LCOV - gcc.info - gcc/tree-vect-loop-manip.cc

LCOV - code coverage report

Current view:	top level - gcc - tree-vect-loop-manip.cc (source / functions)		Coverage	Total	Hit
Test:	gcc.info	Lines:	81.2 %	1856	1508
Test Date:	2025-07-05 13:26:22	Functions:	87.0 %	46	40
Legend:	Lines: hit not hit \| Branches: + taken - not taken # not executed	Branches:	-	0	0

             Branch data     Line data    Source code

       1                 :             : /* Vectorizer Specific Loop Manipulations
       2                 :             :    Copyright (C) 2003-2025 Free Software Foundation, Inc.
       3                 :             :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4                 :             :    and Ira Rosen <irar@il.ibm.com>
       5                 :             : 
       6                 :             : This file is part of GCC.
       7                 :             : 
       8                 :             : GCC is free software; you can redistribute it and/or modify it under
       9                 :             : the terms of the GNU General Public License as published by the Free
      10                 :             : Software Foundation; either version 3, or (at your option) any later
      11                 :             : version.
      12                 :             : 
      13                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      14                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      15                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      16                 :             : for more details.
      17                 :             : 
      18                 :             : You should have received a copy of the GNU General Public License
      19                 :             : along with GCC; see the file COPYING3.  If not see
      20                 :             : <http://www.gnu.org/licenses/>.  */
      21                 :             : 
      22                 :             : #include "config.h"
      23                 :             : #include "system.h"
      24                 :             : #include "coretypes.h"
      25                 :             : #include "backend.h"
      26                 :             : #include "tree.h"
      27                 :             : #include "gimple.h"
      28                 :             : #include "cfghooks.h"
      29                 :             : #include "tree-pass.h"
      30                 :             : #include "ssa.h"
      31                 :             : #include "fold-const.h"
      32                 :             : #include "cfganal.h"
      33                 :             : #include "gimplify.h"
      34                 :             : #include "gimple-iterator.h"
      35                 :             : #include "gimplify-me.h"
      36                 :             : #include "tree-cfg.h"
      37                 :             : #include "tree-ssa-loop-manip.h"
      38                 :             : #include "tree-into-ssa.h"
      39                 :             : #include "tree-ssa.h"
      40                 :             : #include "cfgloop.h"
      41                 :             : #include "tree-scalar-evolution.h"
      42                 :             : #include "tree-vectorizer.h"
      43                 :             : #include "tree-ssa-loop-ivopts.h"
      44                 :             : #include "gimple-fold.h"
      45                 :             : #include "tree-ssa-loop-niter.h"
      46                 :             : #include "internal-fn.h"
      47                 :             : #include "stor-layout.h"
      48                 :             : #include "optabs-query.h"
      49                 :             : #include "vec-perm-indices.h"
      50                 :             : #include "insn-config.h"
      51                 :             : #include "rtl.h"
      52                 :             : #include "recog.h"
      53                 :             : #include "langhooks.h"
      54                 :             : #include "tree-vector-builder.h"
      55                 :             : #include "optabs-tree.h"
      56                 :             : 
      57                 :             : /*************************************************************************
      58                 :             :   Simple Loop Peeling Utilities
      59                 :             : 
      60                 :             :   Utilities to support loop peeling for vectorization purposes.
      61                 :             :  *************************************************************************/
      62                 :             : 
      63                 :             : 
      64                 :             : /* Renames the use *OP_P.  */
      65                 :             : 
      66                 :             : static void
      67                 :      867997 : rename_use_op (use_operand_p op_p)
      68                 :             : {
      69                 :      867997 :   tree new_name;
      70                 :             : 
      71                 :      867997 :   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
      72                 :             :     return;
      73                 :             : 
      74                 :      865279 :   new_name = get_current_def (USE_FROM_PTR (op_p));
      75                 :             : 
      76                 :             :   /* Something defined outside of the loop.  */
      77                 :      865279 :   if (!new_name)
      78                 :             :     return;
      79                 :             : 
      80                 :             :   /* An ordinary ssa name defined in the loop.  */
      81                 :             : 
      82                 :      746489 :   SET_USE (op_p, new_name);
      83                 :             : }
      84                 :             : 
      85                 :             : 
      86                 :             : /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
      87                 :             :    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
      88                 :             :    true.  */
      89                 :             : 
      90                 :             : static void
      91                 :      104713 : rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
      92                 :             : {
      93                 :      104713 :   gimple *stmt;
      94                 :      104713 :   use_operand_p use_p;
      95                 :      104713 :   ssa_op_iter iter;
      96                 :      104713 :   edge e;
      97                 :      104713 :   edge_iterator ei;
      98                 :      104713 :   class loop *loop = bb->loop_father;
      99                 :      104713 :   class loop *outer_loop = NULL;
     100                 :             : 
     101                 :      104713 :   if (rename_from_outer_loop)
     102                 :             :     {
     103                 :         645 :       gcc_assert (loop);
     104                 :         645 :       outer_loop = loop_outer (loop);
     105                 :             :     }
     106                 :             : 
     107                 :      705806 :   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
     108                 :      496380 :        gsi_next (&gsi))
     109                 :             :     {
     110                 :      496380 :       stmt = gsi_stmt (gsi);
     111                 :     1220626 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
     112                 :      724246 :         rename_use_op (use_p);
     113                 :             :     }
     114                 :             : 
     115                 :      213434 :   FOR_EACH_EDGE (e, ei, bb->preds)
     116                 :             :     {
     117                 :      108721 :       if (!flow_bb_inside_loop_p (loop, e->src))
     118                 :             :         {
     119                 :       32948 :           if (!rename_from_outer_loop)
     120                 :       32736 :             continue;
     121                 :         212 :           if (e->src != outer_loop->header)
     122                 :             :             {
     123                 :         116 :               if (outer_loop->inner->next)
     124                 :             :                 {
     125                 :             :                   /* If outer_loop has 2 inner loops, allow there to
     126                 :             :                      be an extra basic block which decides which of the
     127                 :             :                      two loops to use using LOOP_VECTORIZED.  */
     128                 :         113 :                   if (!single_pred_p (e->src)
     129                 :          10 :                       || single_pred (e->src) != outer_loop->header)
     130                 :         103 :                     continue;
     131                 :             :                 }
     132                 :             :             }
     133                 :             :         }
     134                 :      173841 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
     135                 :       97959 :            gsi_next (&gsi))
     136                 :       97959 :         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
     137                 :             :     }
     138                 :      104713 : }
     139                 :             : 
     140                 :             : 
     141                 :             : struct adjust_info
     142                 :             : {
     143                 :             :   tree from, to;
     144                 :             :   basic_block bb;
     145                 :             : };
     146                 :             : 
     147                 :             : /* A stack of values to be adjusted in debug stmts.  We have to
     148                 :             :    process them LIFO, so that the closest substitution applies.  If we
     149                 :             :    processed them FIFO, without the stack, we might substitute uses
     150                 :             :    with a PHI DEF that would soon become non-dominant, and when we got
     151                 :             :    to the suitable one, it wouldn't have anything to substitute any
     152                 :             :    more.  */
     153                 :             : static vec<adjust_info, va_heap> adjust_vec;
     154                 :             : 
     155                 :             : /* Adjust any debug stmts that referenced AI->from values to use the
     156                 :             :    loop-closed AI->to, if the references are dominated by AI->bb and
     157                 :             :    not by the definition of AI->from.  */
     158                 :             : 
     159                 :             : static void
     160                 :       77781 : adjust_debug_stmts_now (adjust_info *ai)
     161                 :             : {
     162                 :       77781 :   basic_block bbphi = ai->bb;
     163                 :       77781 :   tree orig_def = ai->from;
     164                 :       77781 :   tree new_def = ai->to;
     165                 :       77781 :   imm_use_iterator imm_iter;
     166                 :       77781 :   gimple *stmt;
     167                 :       77781 :   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
     168                 :             : 
     169                 :       77781 :   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
     170                 :             : 
     171                 :             :   /* Adjust any debug stmts that held onto non-loop-closed
     172                 :             :      references.  */
     173                 :      311017 :   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
     174                 :             :     {
     175                 :      233236 :       use_operand_p use_p;
     176                 :      233236 :       basic_block bbuse;
     177                 :             : 
     178                 :      233236 :       if (!is_gimple_debug (stmt))
     179                 :      176511 :         continue;
     180                 :             : 
     181                 :       56725 :       gcc_assert (gimple_debug_bind_p (stmt));
     182                 :             : 
     183                 :       56725 :       bbuse = gimple_bb (stmt);
     184                 :             : 
     185                 :       56725 :       if ((bbuse == bbphi
     186                 :       56725 :            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
     187                 :       58039 :           && !(bbuse == bbdef
     188                 :         657 :                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
     189                 :             :         {
     190                 :           0 :           if (new_def)
     191                 :           0 :             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
     192                 :           0 :               SET_USE (use_p, new_def);
     193                 :             :           else
     194                 :             :             {
     195                 :           0 :               gimple_debug_bind_reset_value (stmt);
     196                 :           0 :               update_stmt (stmt);
     197                 :             :             }
     198                 :             :         }
     199                 :       77781 :     }
     200                 :       77781 : }
     201                 :             : 
     202                 :             : /* Adjust debug stmts as scheduled before.  */
     203                 :             : 
     204                 :             : static void
     205                 :       31820 : adjust_vec_debug_stmts (void)
     206                 :             : {
     207                 :       31820 :   if (!MAY_HAVE_DEBUG_BIND_STMTS)
     208                 :             :     return;
     209                 :             : 
     210                 :       12169 :   gcc_assert (adjust_vec.exists ());
     211                 :             : 
     212                 :       89836 :   while (!adjust_vec.is_empty ())
     213                 :             :     {
     214                 :       77667 :       adjust_debug_stmts_now (&adjust_vec.last ());
     215                 :       77667 :       adjust_vec.pop ();
     216                 :             :     }
     217                 :             : }
     218                 :             : 
     219                 :             : /* Adjust any debug stmts that referenced FROM values to use the
     220                 :             :    loop-closed TO, if the references are dominated by BB and not by
     221                 :             :    the definition of FROM.  If adjust_vec is non-NULL, adjustments
     222                 :             :    will be postponed until adjust_vec_debug_stmts is called.  */
     223                 :             : 
     224                 :             : static void
     225                 :      104972 : adjust_debug_stmts (tree from, tree to, basic_block bb)
     226                 :             : {
     227                 :      104972 :   adjust_info ai;
     228                 :             : 
     229                 :      104972 :   if (MAY_HAVE_DEBUG_BIND_STMTS
     230                 :      104972 :       && TREE_CODE (from) == SSA_NAME
     231                 :       95820 :       && ! SSA_NAME_IS_DEFAULT_DEF (from)
     232                 :      199451 :       && ! virtual_operand_p (from))
     233                 :             :     {
     234                 :       77781 :       ai.from = from;
     235                 :       77781 :       ai.to = to;
     236                 :       77781 :       ai.bb = bb;
     237                 :             : 
     238                 :       77781 :       if (adjust_vec.exists ())
     239                 :       77667 :         adjust_vec.safe_push (ai);
     240                 :             :       else
     241                 :         114 :         adjust_debug_stmts_now (&ai);
     242                 :             :     }
     243                 :      104972 : }
     244                 :             : 
     245                 :             : /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
     246                 :             :    to adjust any debug stmts that referenced the old phi arg,
     247                 :             :    presumably non-loop-closed references left over from other
     248                 :             :    transformations.  */
     249                 :             : 
     250                 :             : static void
     251                 :      204954 : adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
     252                 :             : {
     253                 :      204954 :   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
     254                 :             : 
     255                 :      204954 :   gcc_assert (TREE_CODE (orig_def) != SSA_NAME
     256                 :             :               || orig_def != new_def);
     257                 :             : 
     258                 :      204954 :   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
     259                 :             : 
     260                 :      204954 :   if (MAY_HAVE_DEBUG_BIND_STMTS)
     261                 :       82329 :     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
     262                 :             :                         gimple_bb (update_phi));
     263                 :      204954 : }
     264                 :             : 
     265                 :             : /* Define one loop rgroup control CTRL from loop LOOP.  INIT_CTRL is the value
     266                 :             :    that the control should have during the first iteration and NEXT_CTRL is the
     267                 :             :    value that it should have on subsequent iterations.  */
     268                 :             : 
     269                 :             : static void
     270                 :          11 : vect_set_loop_control (class loop *loop, tree ctrl, tree init_ctrl,
     271                 :             :                        tree next_ctrl)
     272                 :             : {
     273                 :          11 :   gphi *phi = create_phi_node (ctrl, loop->header);
     274                 :          11 :   add_phi_arg (phi, init_ctrl, loop_preheader_edge (loop), UNKNOWN_LOCATION);
     275                 :          11 :   add_phi_arg (phi, next_ctrl, loop_latch_edge (loop), UNKNOWN_LOCATION);
     276                 :          11 : }
     277                 :             : 
     278                 :             : /* Add SEQ to the end of LOOP's preheader block.  */
     279                 :             : 
     280                 :             : static void
     281                 :          10 : add_preheader_seq (class loop *loop, gimple_seq seq)
     282                 :             : {
     283                 :          10 :   if (seq)
     284                 :             :     {
     285                 :           7 :       edge pe = loop_preheader_edge (loop);
     286                 :           7 :       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
     287                 :           7 :       gcc_assert (!new_bb);
     288                 :             :     }
     289                 :          10 : }
     290                 :             : 
     291                 :             : /* Add SEQ to the beginning of LOOP's header block.  */
     292                 :             : 
     293                 :             : static void
     294                 :           0 : add_header_seq (class loop *loop, gimple_seq seq)
     295                 :             : {
     296                 :           0 :   if (seq)
     297                 :             :     {
     298                 :           0 :       gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
     299                 :           0 :       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
     300                 :             :     }
     301                 :           0 : }
     302                 :             : 
     303                 :             : /* Return true if the target can interleave elements of two vectors.
     304                 :             :    OFFSET is 0 if the first half of the vectors should be interleaved
     305                 :             :    or 1 if the second half should.  When returning true, store the
     306                 :             :    associated permutation in INDICES.  */
     307                 :             : 
     308                 :             : static bool
     309                 :           0 : interleave_supported_p (vec_perm_indices *indices, tree vectype,
     310                 :             :                         unsigned int offset)
     311                 :             : {
     312                 :           0 :   poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
     313                 :           0 :   poly_uint64 base = exact_div (nelts, 2) * offset;
     314                 :           0 :   vec_perm_builder sel (nelts, 2, 3);
     315                 :           0 :   for (unsigned int i = 0; i < 3; ++i)
     316                 :             :     {
     317                 :           0 :       sel.quick_push (base + i);
     318                 :           0 :       sel.quick_push (base + i + nelts);
     319                 :             :     }
     320                 :           0 :   indices->new_vector (sel, 2, nelts);
     321                 :           0 :   return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
     322                 :           0 :                                *indices);
     323                 :           0 : }
     324                 :             : 
     325                 :             : /* Try to use permutes to define the masks in DEST_RGM using the masks
     326                 :             :    in SRC_RGM, given that the former has twice as many masks as the
     327                 :             :    latter.  Return true on success, adding any new statements to SEQ.  */
     328                 :             : 
     329                 :             : static bool
     330                 :           0 : vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_controls *dest_rgm,
     331                 :             :                                rgroup_controls *src_rgm)
     332                 :             : {
     333                 :           0 :   tree src_masktype = src_rgm->type;
     334                 :           0 :   tree dest_masktype = dest_rgm->type;
     335                 :           0 :   machine_mode src_mode = TYPE_MODE (src_masktype);
     336                 :           0 :   insn_code icode1, icode2;
     337                 :           0 :   if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
     338                 :           0 :       && (icode1 = optab_handler (vec_unpacku_hi_optab,
     339                 :             :                                   src_mode)) != CODE_FOR_nothing
     340                 :           0 :       && (icode2 = optab_handler (vec_unpacku_lo_optab,
     341                 :             :                                   src_mode)) != CODE_FOR_nothing)
     342                 :             :     {
     343                 :             :       /* Unpacking the source masks gives at least as many mask bits as
     344                 :             :          we need.  We can then VIEW_CONVERT any excess bits away.  */
     345                 :           0 :       machine_mode dest_mode = insn_data[icode1].operand[0].mode;
     346                 :           0 :       gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
     347                 :           0 :       tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
     348                 :           0 :       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     349                 :             :         {
     350                 :           0 :           tree src = src_rgm->controls[i / 2];
     351                 :           0 :           tree dest = dest_rgm->controls[i];
     352                 :           0 :           tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
     353                 :           0 :                             ? VEC_UNPACK_HI_EXPR
     354                 :             :                             : VEC_UNPACK_LO_EXPR);
     355                 :           0 :           gassign *stmt;
     356                 :           0 :           if (dest_masktype == unpack_masktype)
     357                 :           0 :             stmt = gimple_build_assign (dest, code, src);
     358                 :             :           else
     359                 :             :             {
     360                 :           0 :               tree temp = make_ssa_name (unpack_masktype);
     361                 :           0 :               stmt = gimple_build_assign (temp, code, src);
     362                 :           0 :               gimple_seq_add_stmt (seq, stmt);
     363                 :           0 :               stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
     364                 :             :                                           build1 (VIEW_CONVERT_EXPR,
     365                 :             :                                                   dest_masktype, temp));
     366                 :             :             }
     367                 :           0 :           gimple_seq_add_stmt (seq, stmt);
     368                 :             :         }
     369                 :             :       return true;
     370                 :             :     }
     371                 :           0 :   vec_perm_indices indices[2];
     372                 :           0 :   if (dest_masktype == src_masktype
     373                 :           0 :       && interleave_supported_p (&indices[0], src_masktype, 0)
     374                 :           0 :       && interleave_supported_p (&indices[1], src_masktype, 1))
     375                 :             :     {
     376                 :             :       /* The destination requires twice as many mask bits as the source, so
     377                 :             :          we can use interleaving permutes to double up the number of bits.  */
     378                 :             :       tree masks[2];
     379                 :           0 :       for (unsigned int i = 0; i < 2; ++i)
     380                 :           0 :         masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
     381                 :           0 :       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     382                 :             :         {
     383                 :           0 :           tree src = src_rgm->controls[i / 2];
     384                 :           0 :           tree dest = dest_rgm->controls[i];
     385                 :           0 :           gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
     386                 :           0 :                                               src, src, masks[i & 1]);
     387                 :           0 :           gimple_seq_add_stmt (seq, stmt);
     388                 :             :         }
     389                 :           0 :       return true;
     390                 :             :     }
     391                 :             :   return false;
     392                 :           0 : }
     393                 :             : 
     394                 :             : /* Populate DEST_RGM->controls, given that they should add up to STEP.
     395                 :             : 
     396                 :             :      STEP = MIN_EXPR <ivtmp_34, VF>;
     397                 :             : 
     398                 :             :      First length (MIN (X, VF/N)):
     399                 :             :        loop_len_15 = MIN_EXPR <STEP, VF/N>;
     400                 :             : 
     401                 :             :      Second length:
     402                 :             :        tmp = STEP - loop_len_15;
     403                 :             :        loop_len_16 = MIN (tmp, VF/N);
     404                 :             : 
     405                 :             :      Third length:
     406                 :             :        tmp2 = tmp - loop_len_16;
     407                 :             :        loop_len_17 = MIN (tmp2, VF/N);
     408                 :             : 
     409                 :             :      Last length:
     410                 :             :        loop_len_18 = tmp2 - loop_len_17;
     411                 :             : */
     412                 :             : 
     413                 :             : static void
     414                 :           0 : vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
     415                 :             :                                rgroup_controls *dest_rgm, tree step)
     416                 :             : {
     417                 :           0 :   tree ctrl_type = dest_rgm->type;
     418                 :           0 :   poly_uint64 nitems_per_ctrl
     419                 :           0 :     = TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
     420                 :           0 :   tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
     421                 :             : 
     422                 :           0 :   for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     423                 :             :     {
     424                 :           0 :       tree ctrl = dest_rgm->controls[i];
     425                 :           0 :       if (i == 0)
     426                 :             :         {
     427                 :             :           /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N].  */
     428                 :           0 :           gassign *assign
     429                 :           0 :             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
     430                 :           0 :           gimple_seq_add_stmt (seq, assign);
     431                 :             :         }
     432                 :           0 :       else if (i == dest_rgm->controls.length () - 1)
     433                 :             :         {
     434                 :             :           /* Last iteration: Remain capped to the range [0, VF/N].  */
     435                 :           0 :           gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
     436                 :           0 :                                                  dest_rgm->controls[i - 1]);
     437                 :           0 :           gimple_seq_add_stmt (seq, assign);
     438                 :             :         }
     439                 :             :       else
     440                 :             :         {
     441                 :             :           /* (MIN (remain, VF*I/N)) capped to the range [0, VF/N].  */
     442                 :           0 :           step = gimple_build (seq, MINUS_EXPR, iv_type, step,
     443                 :           0 :                                dest_rgm->controls[i - 1]);
     444                 :           0 :           gassign *assign
     445                 :           0 :             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
     446                 :           0 :           gimple_seq_add_stmt (seq, assign);
     447                 :             :         }
     448                 :             :     }
     449                 :           0 : }
     450                 :             : 
     451                 :             : /* Stores the standard position for induction variable increment in belonging to
     452                 :             :    LOOP_EXIT (just before the exit condition of the given exit to BSI.
     453                 :             :    INSERT_AFTER is set to true if the increment should be inserted after
     454                 :             :    *BSI.  */
     455                 :             : 
     456                 :             : void
     457                 :       57324 : vect_iv_increment_position (edge loop_exit, gimple_stmt_iterator *bsi,
     458                 :             :                             bool *insert_after)
     459                 :             : {
     460                 :       57324 :   basic_block bb = loop_exit->src;
     461                 :       57324 :   *bsi = gsi_last_bb (bb);
     462                 :       57324 :   *insert_after = false;
     463                 :       57324 : }
     464                 :             : 
     465                 :             : /* Helper for vect_set_loop_condition_partial_vectors.  Generate definitions
     466                 :             :    for all the rgroup controls in RGC and return a control that is nonzero
     467                 :             :    when the loop needs to iterate.  Add any new preheader statements to
     468                 :             :    PREHEADER_SEQ.  Use LOOP_COND_GSI to insert code before the exit gcond.
     469                 :             : 
     470                 :             :    RGC belongs to loop LOOP.  The loop originally iterated NITERS
     471                 :             :    times and has been vectorized according to LOOP_VINFO.
     472                 :             : 
     473                 :             :    If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
     474                 :             :    starts with NITERS_SKIP dummy iterations of the scalar loop before
     475                 :             :    the real work starts.  The mask elements for these dummy iterations
     476                 :             :    must be 0, to ensure that the extra iterations do not have an effect.
     477                 :             : 
     478                 :             :    It is known that:
     479                 :             : 
     480                 :             :      NITERS * RGC->max_nscalars_per_iter * RGC->factor
     481                 :             : 
     482                 :             :    does not overflow.  However, MIGHT_WRAP_P says whether an induction
     483                 :             :    variable that starts at 0 and has step:
     484                 :             : 
     485                 :             :      VF * RGC->max_nscalars_per_iter * RGC->factor
     486                 :             : 
     487                 :             :    might overflow before hitting a value above:
     488                 :             : 
     489                 :             :      (NITERS + NITERS_SKIP) * RGC->max_nscalars_per_iter * RGC->factor
     490                 :             : 
     491                 :             :    This means that we cannot guarantee that such an induction variable
     492                 :             :    would ever hit a value that produces a set of all-false masks or zero
     493                 :             :    lengths for RGC.
     494                 :             : 
     495                 :             :    Note: the cost of the code generated by this function is modeled
     496                 :             :    by vect_estimate_min_profitable_iters, so changes here may need
     497                 :             :    corresponding changes there.  */
     498                 :             : 
     499                 :             : static tree
     500                 :           0 : vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
     501                 :             :                                  gimple_seq *preheader_seq,
     502                 :             :                                  gimple_seq *header_seq,
     503                 :             :                                  gimple_stmt_iterator loop_cond_gsi,
     504                 :             :                                  rgroup_controls *rgc, tree niters,
     505                 :             :                                  tree niters_skip, bool might_wrap_p,
     506                 :             :                                  tree *iv_step, tree *compare_step)
     507                 :             : {
     508                 :           0 :   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
     509                 :           0 :   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     510                 :           0 :   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
     511                 :             : 
     512                 :           0 :   tree ctrl_type = rgc->type;
     513                 :           0 :   unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
     514                 :           0 :   poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type) * rgc->factor;
     515                 :           0 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
     516                 :           0 :   tree length_limit = NULL_TREE;
     517                 :             :   /* For length, we need length_limit to ensure length in range.  */
     518                 :           0 :   if (!use_masks_p)
     519                 :           0 :     length_limit = build_int_cst (compare_type, nitems_per_ctrl);
     520                 :             : 
     521                 :             :   /* Calculate the maximum number of item values that the rgroup
     522                 :             :      handles in total, the number that it handles for each iteration
     523                 :             :      of the vector loop, and the number that it should skip during the
     524                 :             :      first iteration of the vector loop.  */
     525                 :           0 :   tree nitems_total = niters;
     526                 :           0 :   tree nitems_step = build_int_cst (iv_type, vf);
     527                 :           0 :   tree nitems_skip = niters_skip;
     528                 :           0 :   if (nitems_per_iter != 1)
     529                 :             :     {
     530                 :             :       /* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that
     531                 :             :          these multiplications don't overflow.  */
     532                 :           0 :       tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
     533                 :           0 :       tree iv_factor = build_int_cst (iv_type, nitems_per_iter);
     534                 :           0 :       nitems_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
     535                 :             :                                    nitems_total, compare_factor);
     536                 :           0 :       nitems_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
     537                 :             :                                   nitems_step, iv_factor);
     538                 :           0 :       if (nitems_skip)
     539                 :           0 :         nitems_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
     540                 :             :                                     nitems_skip, compare_factor);
     541                 :             :     }
     542                 :             : 
     543                 :             :   /* Create an induction variable that counts the number of items
     544                 :             :      processed.  */
     545                 :           0 :   tree index_before_incr, index_after_incr;
     546                 :           0 :   gimple_stmt_iterator incr_gsi;
     547                 :           0 :   bool insert_after;
     548                 :           0 :   edge exit_e = LOOP_VINFO_IV_EXIT (loop_vinfo);
     549                 :           0 :   vect_iv_increment_position (exit_e, &incr_gsi, &insert_after);
     550                 :           0 :   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
     551                 :             :     {
     552                 :             :       /* Create an IV that counts down from niters_total and whose step
     553                 :             :          is the (variable) amount processed in the current iteration:
     554                 :             :            ...
     555                 :             :            _10 = (unsigned long) count_12(D);
     556                 :             :            ...
     557                 :             :            # ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
     558                 :             :            _36 = (MIN_EXPR | SELECT_VL) <ivtmp_9, POLY_INT_CST [4, 4]>;
     559                 :             :            ...
     560                 :             :            vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
     561                 :             :            ...
     562                 :             :            ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
     563                 :             :            ...
     564                 :             :            if (ivtmp_9 > POLY_INT_CST [4, 4])
     565                 :             :              goto <bb 4>; [83.33%]
     566                 :             :            else
     567                 :             :              goto <bb 5>; [16.67%]
     568                 :             :       */
     569                 :           0 :       nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
     570                 :           0 :       tree step = rgc->controls.length () == 1 ? rgc->controls[0]
     571                 :           0 :                                                : make_ssa_name (iv_type);
     572                 :             :       /* Create decrement IV.  */
     573                 :           0 :       if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
     574                 :             :         {
     575                 :           0 :           create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
     576                 :             :                      insert_after, &index_before_incr, &index_after_incr);
     577                 :           0 :           tree len = gimple_build (header_seq, IFN_SELECT_VL, iv_type,
     578                 :             :                                    index_before_incr, nitems_step);
     579                 :           0 :           gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
     580                 :             :         }
     581                 :             :       else
     582                 :             :         {
     583                 :           0 :           create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
     584                 :             :                      &incr_gsi, insert_after, &index_before_incr,
     585                 :             :                      &index_after_incr);
     586                 :           0 :           gimple_seq_add_stmt (header_seq,
     587                 :           0 :                                gimple_build_assign (step, MIN_EXPR,
     588                 :             :                                                     index_before_incr,
     589                 :             :                                                     nitems_step));
     590                 :             :         }
     591                 :           0 :       *iv_step = step;
     592                 :           0 :       *compare_step = nitems_step;
     593                 :           0 :       return LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? index_after_incr
     594                 :           0 :                                                        : index_before_incr;
     595                 :             :     }
     596                 :             : 
     597                 :             :   /* Create increment IV.  */
     598                 :           0 :   create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
     599                 :             :              loop, &incr_gsi, insert_after, &index_before_incr,
     600                 :             :              &index_after_incr);
     601                 :             : 
     602                 :           0 :   tree zero_index = build_int_cst (compare_type, 0);
     603                 :           0 :   tree test_index, test_limit, first_limit;
     604                 :           0 :   gimple_stmt_iterator *test_gsi;
     605                 :           0 :   if (might_wrap_p)
     606                 :             :     {
     607                 :             :       /* In principle the loop should stop iterating once the incremented
     608                 :             :          IV reaches a value greater than or equal to:
     609                 :             : 
     610                 :             :            NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP
     611                 :             : 
     612                 :             :          However, there's no guarantee that this addition doesn't overflow
     613                 :             :          the comparison type, or that the IV hits a value above it before
     614                 :             :          wrapping around.  We therefore adjust the limit down by one
     615                 :             :          IV step:
     616                 :             : 
     617                 :             :            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
     618                 :             :            -[infinite-prec] NITEMS_STEP
     619                 :             : 
     620                 :             :          and compare the IV against this limit _before_ incrementing it.
     621                 :             :          Since the comparison type is unsigned, we actually want the
     622                 :             :          subtraction to saturate at zero:
     623                 :             : 
     624                 :             :            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
     625                 :             :            -[sat] NITEMS_STEP
     626                 :             : 
     627                 :             :          And since NITEMS_SKIP < NITEMS_STEP, we can reassociate this as:
     628                 :             : 
     629                 :             :            NITEMS_TOTAL -[sat] (NITEMS_STEP - NITEMS_SKIP)
     630                 :             : 
     631                 :             :          where the rightmost subtraction can be done directly in
     632                 :             :          COMPARE_TYPE.  */
     633                 :           0 :       test_index = index_before_incr;
     634                 :           0 :       tree adjust = gimple_convert (preheader_seq, compare_type,
     635                 :             :                                     nitems_step);
     636                 :           0 :       if (nitems_skip)
     637                 :           0 :         adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
     638                 :             :                                adjust, nitems_skip);
     639                 :           0 :       test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
     640                 :             :                                  nitems_total, adjust);
     641                 :           0 :       test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
     642                 :             :                                  test_limit, adjust);
     643                 :           0 :       test_gsi = &incr_gsi;
     644                 :             : 
     645                 :             :       /* Get a safe limit for the first iteration.  */
     646                 :           0 :       if (nitems_skip)
     647                 :             :         {
     648                 :             :           /* The first vector iteration can handle at most NITEMS_STEP
     649                 :             :              items.  NITEMS_STEP <= CONST_LIMIT, and adding
     650                 :             :              NITEMS_SKIP to that cannot overflow.  */
     651                 :           0 :           tree const_limit = build_int_cst (compare_type,
     652                 :           0 :                                             LOOP_VINFO_VECT_FACTOR (loop_vinfo)
     653                 :           0 :                                             * nitems_per_iter);
     654                 :           0 :           first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
     655                 :             :                                       nitems_total, const_limit);
     656                 :           0 :           first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
     657                 :             :                                       first_limit, nitems_skip);
     658                 :             :         }
     659                 :             :       else
     660                 :             :         /* For the first iteration it doesn't matter whether the IV hits
     661                 :             :            a value above NITEMS_TOTAL.  That only matters for the latch
     662                 :             :            condition.  */
     663                 :             :         first_limit = nitems_total;
     664                 :             :     }
     665                 :             :   else
     666                 :             :     {
     667                 :             :       /* Test the incremented IV, which will always hit a value above
     668                 :             :          the bound before wrapping.  */
     669                 :           0 :       test_index = index_after_incr;
     670                 :           0 :       test_limit = nitems_total;
     671                 :           0 :       if (nitems_skip)
     672                 :           0 :         test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
     673                 :             :                                    test_limit, nitems_skip);
     674                 :             :       test_gsi = &loop_cond_gsi;
     675                 :             : 
     676                 :             :       first_limit = test_limit;
     677                 :             :     }
     678                 :             : 
     679                 :             :   /* Convert the IV value to the comparison type (either a no-op or
     680                 :             :      a demotion).  */
     681                 :           0 :   gimple_seq test_seq = NULL;
     682                 :           0 :   test_index = gimple_convert (&test_seq, compare_type, test_index);
     683                 :           0 :   gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
     684                 :             : 
     685                 :             :   /* Provide a definition of each control in the group.  */
     686                 :           0 :   tree next_ctrl = NULL_TREE;
     687                 :           0 :   tree ctrl;
     688                 :           0 :   unsigned int i;
     689                 :           0 :   FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
     690                 :             :     {
     691                 :             :       /* Previous controls will cover BIAS items.  This control covers the
     692                 :             :          next batch.  */
     693                 :           0 :       poly_uint64 bias = nitems_per_ctrl * i;
     694                 :           0 :       tree bias_tree = build_int_cst (compare_type, bias);
     695                 :             : 
     696                 :             :       /* See whether the first iteration of the vector loop is known
     697                 :             :          to have a full control.  */
     698                 :           0 :       poly_uint64 const_limit;
     699                 :           0 :       bool first_iteration_full
     700                 :           0 :         = (poly_int_tree_p (first_limit, &const_limit)
     701                 :           0 :            && known_ge (const_limit, (i + 1) * nitems_per_ctrl));
     702                 :             : 
     703                 :             :       /* Rather than have a new IV that starts at BIAS and goes up to
     704                 :             :          TEST_LIMIT, prefer to use the same 0-based IV for each control
     705                 :             :          and adjust the bound down by BIAS.  */
     706                 :           0 :       tree this_test_limit = test_limit;
     707                 :           0 :       if (i != 0)
     708                 :             :         {
     709                 :           0 :           this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
     710                 :             :                                           compare_type, this_test_limit,
     711                 :             :                                           bias_tree);
     712                 :           0 :           this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
     713                 :             :                                           compare_type, this_test_limit,
     714                 :             :                                           bias_tree);
     715                 :             :         }
     716                 :             : 
     717                 :             :       /* Create the initial control.  First include all items that
     718                 :             :          are within the loop limit.  */
     719                 :           0 :       tree init_ctrl = NULL_TREE;
     720                 :           0 :       if (!first_iteration_full)
     721                 :             :         {
     722                 :           0 :           tree start, end;
     723                 :           0 :           if (first_limit == test_limit)
     724                 :             :             {
     725                 :             :               /* Use a natural test between zero (the initial IV value)
     726                 :             :                  and the loop limit.  The "else" block would be valid too,
     727                 :             :                  but this choice can avoid the need to load BIAS_TREE into
     728                 :             :                  a register.  */
     729                 :             :               start = zero_index;
     730                 :             :               end = this_test_limit;
     731                 :             :             }
     732                 :             :           else
     733                 :             :             {
     734                 :             :               /* FIRST_LIMIT is the maximum number of items handled by the
     735                 :             :                  first iteration of the vector loop.  Test the portion
     736                 :             :                  associated with this control.  */
     737                 :           0 :               start = bias_tree;
     738                 :           0 :               end = first_limit;
     739                 :             :             }
     740                 :             : 
     741                 :           0 :           if (use_masks_p)
     742                 :           0 :             init_ctrl = vect_gen_while (preheader_seq, ctrl_type,
     743                 :             :                                         start, end, "max_mask");
     744                 :             :           else
     745                 :             :             {
     746                 :           0 :               init_ctrl = make_temp_ssa_name (compare_type, NULL, "max_len");
     747                 :           0 :               gimple_seq seq = vect_gen_len (init_ctrl, start,
     748                 :             :                                              end, length_limit);
     749                 :           0 :               gimple_seq_add_seq (preheader_seq, seq);
     750                 :             :             }
     751                 :             :         }
     752                 :             : 
     753                 :             :       /* Now AND out the bits that are within the number of skipped
     754                 :             :          items.  */
     755                 :           0 :       poly_uint64 const_skip;
     756                 :           0 :       if (nitems_skip
     757                 :           0 :           && !(poly_int_tree_p (nitems_skip, &const_skip)
     758                 :           0 :                && known_le (const_skip, bias)))
     759                 :             :         {
     760                 :           0 :           gcc_assert (use_masks_p);
     761                 :           0 :           tree unskipped_mask = vect_gen_while_not (preheader_seq, ctrl_type,
     762                 :             :                                                     bias_tree, nitems_skip);
     763                 :           0 :           if (init_ctrl)
     764                 :           0 :             init_ctrl = gimple_build (preheader_seq, BIT_AND_EXPR, ctrl_type,
     765                 :             :                                       init_ctrl, unskipped_mask);
     766                 :             :           else
     767                 :             :             init_ctrl = unskipped_mask;
     768                 :             :         }
     769                 :             : 
     770                 :           0 :       if (!init_ctrl)
     771                 :             :         {
     772                 :             :           /* First iteration is full.  */
     773                 :           0 :           if (use_masks_p)
     774                 :           0 :             init_ctrl = build_minus_one_cst (ctrl_type);
     775                 :             :           else
     776                 :             :             init_ctrl = length_limit;
     777                 :             :         }
     778                 :             : 
     779                 :             :       /* Get the control value for the next iteration of the loop.  */
     780                 :           0 :       if (use_masks_p)
     781                 :             :         {
     782                 :           0 :           gimple_seq stmts = NULL;
     783                 :           0 :           next_ctrl = vect_gen_while (&stmts, ctrl_type, test_index,
     784                 :             :                                       this_test_limit, "next_mask");
     785                 :           0 :           gsi_insert_seq_before (test_gsi, stmts, GSI_SAME_STMT);
     786                 :             :         }
     787                 :             :       else
     788                 :             :         {
     789                 :           0 :           next_ctrl = make_temp_ssa_name (compare_type, NULL, "next_len");
     790                 :           0 :           gimple_seq seq = vect_gen_len (next_ctrl, test_index, this_test_limit,
     791                 :             :                                          length_limit);
     792                 :           0 :           gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
     793                 :             :         }
     794                 :             : 
     795                 :           0 :       vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
     796                 :             :     }
     797                 :             : 
     798                 :           0 :   int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
     799                 :           0 :   if (partial_load_bias != 0)
     800                 :             :     {
     801                 :           0 :       tree adjusted_len = rgc->bias_adjusted_ctrl;
     802                 :           0 :       gassign *minus = gimple_build_assign (adjusted_len, PLUS_EXPR,
     803                 :           0 :                                             rgc->controls[0],
     804                 :             :                                             build_int_cst
     805                 :           0 :                                             (TREE_TYPE (rgc->controls[0]),
     806                 :           0 :                                              partial_load_bias));
     807                 :           0 :       gimple_seq_add_stmt (header_seq, minus);
     808                 :             :     }
     809                 :             : 
     810                 :             :   return next_ctrl;
     811                 :             : }
     812                 :             : 
     813                 :             : /* Set up the iteration condition and rgroup controls for LOOP, given
     814                 :             :    that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
     815                 :             :    loop.  LOOP_VINFO describes the vectorization of LOOP.  NITERS is
     816                 :             :    the number of iterations of the original scalar loop that should be
     817                 :             :    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are as
     818                 :             :    for vect_set_loop_condition.
     819                 :             : 
     820                 :             :    Insert the branch-back condition before LOOP_COND_GSI and return the
     821                 :             :    final gcond.  */
     822                 :             : 
     823                 :             : static gcond *
     824                 :           0 : vect_set_loop_condition_partial_vectors (class loop *loop, edge exit_edge,
     825                 :             :                                          loop_vec_info loop_vinfo, tree niters,
     826                 :             :                                          tree final_iv, bool niters_maybe_zero,
     827                 :             :                                          gimple_stmt_iterator loop_cond_gsi)
     828                 :             : {
     829                 :           0 :   gimple_seq preheader_seq = NULL;
     830                 :           0 :   gimple_seq header_seq = NULL;
     831                 :             : 
     832                 :           0 :   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
     833                 :           0 :   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
     834                 :           0 :   unsigned int compare_precision = TYPE_PRECISION (compare_type);
     835                 :           0 :   tree orig_niters = niters;
     836                 :             : 
     837                 :             :   /* Type of the initial value of NITERS.  */
     838                 :           0 :   tree ni_actual_type = TREE_TYPE (niters);
     839                 :           0 :   unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
     840                 :           0 :   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
     841                 :           0 :   if (niters_skip)
     842                 :           0 :     niters_skip = gimple_convert (&preheader_seq, compare_type, niters_skip);
     843                 :             : 
     844                 :             :   /* Convert NITERS to the same size as the compare.  */
     845                 :           0 :   if (compare_precision > ni_actual_precision
     846                 :           0 :       && niters_maybe_zero)
     847                 :             :     {
     848                 :             :       /* We know that there is always at least one iteration, so if the
     849                 :             :          count is zero then it must have wrapped.  Cope with this by
     850                 :             :          subtracting 1 before the conversion and adding 1 to the result.  */
     851                 :           0 :       gcc_assert (TYPE_UNSIGNED (ni_actual_type));
     852                 :           0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
     853                 :             :                              niters, build_minus_one_cst (ni_actual_type));
     854                 :           0 :       niters = gimple_convert (&preheader_seq, compare_type, niters);
     855                 :           0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
     856                 :             :                              niters, build_one_cst (compare_type));
     857                 :             :     }
     858                 :             :   else
     859                 :           0 :     niters = gimple_convert (&preheader_seq, compare_type, niters);
     860                 :             : 
     861                 :             :   /* Iterate over all the rgroups and fill in their controls.  We could use
     862                 :             :      the first control from any rgroup for the loop condition; here we
     863                 :             :      arbitrarily pick the last.  */
     864                 :           0 :   tree test_ctrl = NULL_TREE;
     865                 :           0 :   tree iv_step = NULL_TREE;
     866                 :           0 :   tree compare_step = NULL_TREE;
     867                 :           0 :   rgroup_controls *rgc;
     868                 :           0 :   rgroup_controls *iv_rgc = nullptr;
     869                 :           0 :   unsigned int i;
     870                 :           0 :   auto_vec<rgroup_controls> *controls = use_masks_p
     871                 :           0 :                                           ? &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec
     872                 :             :                                           : &LOOP_VINFO_LENS (loop_vinfo);
     873                 :           0 :   FOR_EACH_VEC_ELT (*controls, i, rgc)
     874                 :           0 :     if (!rgc->controls.is_empty ())
     875                 :             :       {
     876                 :             :         /* First try using permutes.  This adds a single vector
     877                 :             :            instruction to the loop for each mask, but needs no extra
     878                 :             :            loop invariants or IVs.  */
     879                 :           0 :         unsigned int nmasks = i + 1;
     880                 :           0 :         if (use_masks_p && (nmasks & 1) == 0)
     881                 :             :           {
     882                 :           0 :             rgroup_controls *half_rgc = &(*controls)[nmasks / 2 - 1];
     883                 :           0 :             if (!half_rgc->controls.is_empty ()
     884                 :           0 :                 && vect_maybe_permute_loop_masks (&header_seq, rgc, half_rgc))
     885                 :           0 :               continue;
     886                 :             :           }
     887                 :             : 
     888                 :           0 :         if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     889                 :           0 :             || !iv_rgc
     890                 :           0 :             || (iv_rgc->max_nscalars_per_iter * iv_rgc->factor
     891                 :           0 :                 != rgc->max_nscalars_per_iter * rgc->factor))
     892                 :             :           {
     893                 :             :             /* See whether zero-based IV would ever generate all-false masks
     894                 :             :                or zero length before wrapping around.  */
     895                 :           0 :             bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
     896                 :             : 
     897                 :             :             /* Set up all controls for this group.  */
     898                 :           0 :             test_ctrl
     899                 :           0 :               = vect_set_loop_controls_directly (loop, loop_vinfo,
     900                 :             :                                                  &preheader_seq, &header_seq,
     901                 :             :                                                  loop_cond_gsi, rgc, niters,
     902                 :             :                                                  niters_skip, might_wrap_p,
     903                 :             :                                                  &iv_step, &compare_step);
     904                 :             : 
     905                 :           0 :             iv_rgc = rgc;
     906                 :             :           }
     907                 :             : 
     908                 :           0 :         if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     909                 :           0 :             && rgc->controls.length () > 1)
     910                 :             :           {
     911                 :             :             /* vect_set_loop_controls_directly creates an IV whose step
     912                 :             :                is equal to the expected sum of RGC->controls.  Use that
     913                 :             :                information to populate RGC->controls.  */
     914                 :           0 :             tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     915                 :           0 :             gcc_assert (iv_step);
     916                 :           0 :             vect_adjust_loop_lens_control (iv_type, &header_seq, rgc, iv_step);
     917                 :             :           }
     918                 :             :       }
     919                 :             : 
     920                 :             :   /* Emit all accumulated statements.  */
     921                 :           0 :   add_preheader_seq (loop, preheader_seq);
     922                 :           0 :   add_header_seq (loop, header_seq);
     923                 :             : 
     924                 :             :   /* Get a boolean result that tells us whether to iterate.  */
     925                 :           0 :   gcond *cond_stmt;
     926                 :           0 :   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     927                 :           0 :       && !LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
     928                 :             :     {
     929                 :           0 :       gcc_assert (compare_step);
     930                 :           0 :       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
     931                 :           0 :       cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
     932                 :             :                                      NULL_TREE);
     933                 :           0 :     }
     934                 :             :   else
     935                 :             :     {
     936                 :           0 :       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
     937                 :           0 :       tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
     938                 :           0 :       cond_stmt
     939                 :           0 :         = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
     940                 :             :     }
     941                 :           0 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
     942                 :             : 
     943                 :             :   /* The loop iterates (NITERS - 1) / VF + 1 times.
     944                 :             :      Subtract one from this to get the latch count.  */
     945                 :           0 :   tree step = build_int_cst (compare_type,
     946                 :           0 :                              LOOP_VINFO_VECT_FACTOR (loop_vinfo));
     947                 :           0 :   tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
     948                 :             :                                        build_minus_one_cst (compare_type));
     949                 :           0 :   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
     950                 :             :                                      niters_minus_one, step);
     951                 :             : 
     952                 :           0 :   if (final_iv)
     953                 :             :     {
     954                 :           0 :       gassign *assign;
     955                 :             :       /* If vectorizing an inverted early break loop we have to restart the
     956                 :             :          scalar loop at niters - vf.  This matches what we do in
     957                 :             :          vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
     958                 :           0 :       if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
     959                 :             :         {
     960                 :           0 :           tree ftype = TREE_TYPE (orig_niters);
     961                 :           0 :           tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
     962                 :           0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
     963                 :             :         }
     964                 :             :        else
     965                 :           0 :         assign = gimple_build_assign (final_iv, orig_niters);
     966                 :           0 :       gsi_insert_on_edge_immediate (exit_edge, assign);
     967                 :             :     }
     968                 :             : 
     969                 :           0 :   return cond_stmt;
     970                 :             : }
     971                 :             : 
     972                 :             : /* Set up the iteration condition and rgroup controls for LOOP in AVX512
     973                 :             :    style, given that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the
     974                 :             :    vectorized loop.  LOOP_VINFO describes the vectorization of LOOP.  NITERS is
     975                 :             :    the number of iterations of the original scalar loop that should be
     976                 :             :    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are as
     977                 :             :    for vect_set_loop_condition.
     978                 :             : 
     979                 :             :    Insert the branch-back condition before LOOP_COND_GSI and return the
     980                 :             :    final gcond.  */
     981                 :             : 
     982                 :             : static gcond *
     983                 :          10 : vect_set_loop_condition_partial_vectors_avx512 (class loop *loop,
     984                 :             :                                          edge exit_edge,
     985                 :             :                                          loop_vec_info loop_vinfo, tree niters,
     986                 :             :                                          tree final_iv,
     987                 :             :                                          bool niters_maybe_zero,
     988                 :             :                                          gimple_stmt_iterator loop_cond_gsi)
     989                 :             : {
     990                 :          10 :   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
     991                 :          10 :   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     992                 :          10 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
     993                 :          10 :   tree orig_niters = niters;
     994                 :          10 :   gimple_seq preheader_seq = NULL;
     995                 :             : 
     996                 :             :   /* Create an IV that counts down from niters and whose step
     997                 :             :      is the number of iterations processed in the current iteration.
     998                 :             :      Produce the controls with compares like the following.
     999                 :             : 
    1000                 :             :        # iv_2 = PHI <niters, iv_3>
    1001                 :             :        rem_4 = MIN <iv_2, VF>;
    1002                 :             :        remv_6 = { rem_4, rem_4, rem_4, ... }
    1003                 :             :        mask_5 = { 0, 0, 1, 1, 2, 2, ... } < remv6;
    1004                 :             :        iv_3 = iv_2 - VF;
    1005                 :             :        if (iv_2 > VF)
    1006                 :             :          continue;
    1007                 :             : 
    1008                 :             :      Where the constant is built with elements at most VF - 1 and
    1009                 :             :      repetitions according to max_nscalars_per_iter which is guarnateed
    1010                 :             :      to be the same within a group.  */
    1011                 :             : 
    1012                 :             :   /* Convert NITERS to the determined IV type.  */
    1013                 :          10 :   if (TYPE_PRECISION (iv_type) > TYPE_PRECISION (TREE_TYPE (niters))
    1014                 :          10 :       && niters_maybe_zero)
    1015                 :             :     {
    1016                 :             :       /* We know that there is always at least one iteration, so if the
    1017                 :             :          count is zero then it must have wrapped.  Cope with this by
    1018                 :             :          subtracting 1 before the conversion and adding 1 to the result.  */
    1019                 :           0 :       gcc_assert (TYPE_UNSIGNED (TREE_TYPE (niters)));
    1020                 :           0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, TREE_TYPE (niters),
    1021                 :           0 :                              niters, build_minus_one_cst (TREE_TYPE (niters)));
    1022                 :           0 :       niters = gimple_convert (&preheader_seq, iv_type, niters);
    1023                 :           0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, iv_type,
    1024                 :             :                              niters, build_one_cst (iv_type));
    1025                 :             :     }
    1026                 :             :   else
    1027                 :          10 :     niters = gimple_convert (&preheader_seq, iv_type, niters);
    1028                 :             : 
    1029                 :             :   /* Bias the initial value of the IV in case we need to skip iterations
    1030                 :             :      at the beginning.  */
    1031                 :          10 :   tree niters_adj = niters;
    1032                 :          10 :   if (niters_skip)
    1033                 :             :     {
    1034                 :           1 :       tree skip = gimple_convert (&preheader_seq, iv_type, niters_skip);
    1035                 :           1 :       niters_adj = gimple_build (&preheader_seq, PLUS_EXPR,
    1036                 :             :                                  iv_type, niters, skip);
    1037                 :             :     }
    1038                 :             : 
    1039                 :             :   /* The iteration step is the vectorization factor.  */
    1040                 :          10 :   tree iv_step = build_int_cst (iv_type, vf);
    1041                 :             : 
    1042                 :             :   /* Create the decrement IV.  */
    1043                 :          10 :   tree index_before_incr, index_after_incr;
    1044                 :          10 :   gimple_stmt_iterator incr_gsi;
    1045                 :          10 :   bool insert_after;
    1046                 :          10 :   vect_iv_increment_position (exit_edge, &incr_gsi, &insert_after);
    1047                 :          10 :   create_iv (niters_adj, MINUS_EXPR, iv_step, NULL_TREE, loop,
    1048                 :             :              &incr_gsi, insert_after, &index_before_incr,
    1049                 :             :              &index_after_incr);
    1050                 :             : 
    1051                 :             :   /* Iterate over all the rgroups and fill in their controls.  */
    1052                 :          42 :   for (auto &rgc : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
    1053                 :             :     {
    1054                 :          12 :       if (rgc.controls.is_empty ())
    1055                 :           2 :         continue;
    1056                 :             : 
    1057                 :          10 :       tree ctrl_type = rgc.type;
    1058                 :          10 :       poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type);
    1059                 :             : 
    1060                 :          10 :       tree vectype = rgc.compare_type;
    1061                 :             : 
    1062                 :             :       /* index_after_incr is the IV specifying the remaining iterations in
    1063                 :             :          the next iteration.  */
    1064                 :          10 :       tree rem = index_after_incr;
    1065                 :             :       /* When the data type for the compare to produce the mask is
    1066                 :             :          smaller than the IV type we need to saturate.  Saturate to
    1067                 :             :          the smallest possible value (IV_TYPE) so we only have to
    1068                 :             :          saturate once (CSE will catch redundant ones we add).  */
    1069                 :          10 :       if (TYPE_PRECISION (TREE_TYPE (vectype)) < TYPE_PRECISION (iv_type))
    1070                 :           4 :         rem = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1071                 :             :                             UNKNOWN_LOCATION,
    1072                 :           4 :                             MIN_EXPR, TREE_TYPE (rem), rem, iv_step);
    1073                 :          10 :       rem = gimple_convert (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1074                 :          10 :                             UNKNOWN_LOCATION, TREE_TYPE (vectype), rem);
    1075                 :             : 
    1076                 :             :       /* Build a data vector composed of the remaining iterations.  */
    1077                 :          10 :       rem = gimple_build_vector_from_val (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1078                 :             :                                           UNKNOWN_LOCATION, vectype, rem);
    1079                 :             : 
    1080                 :             :       /* Provide a definition of each vector in the control group.  */
    1081                 :          10 :       tree next_ctrl = NULL_TREE;
    1082                 :          10 :       tree first_rem = NULL_TREE;
    1083                 :          10 :       tree ctrl;
    1084                 :          10 :       unsigned int i;
    1085                 :          54 :       FOR_EACH_VEC_ELT_REVERSE (rgc.controls, i, ctrl)
    1086                 :             :         {
    1087                 :             :           /* Previous controls will cover BIAS items.  This control covers the
    1088                 :             :              next batch.  */
    1089                 :          11 :           poly_uint64 bias = nitems_per_ctrl * i;
    1090                 :             : 
    1091                 :             :           /* Build the constant to compare the remaining iters against,
    1092                 :             :              this is sth like { 0, 0, 1, 1, 2, 2, 3, 3, ... } appropriately
    1093                 :             :              split into pieces.  */
    1094                 :          11 :           unsigned n = TYPE_VECTOR_SUBPARTS (ctrl_type).to_constant ();
    1095                 :          11 :           tree_vector_builder builder (vectype, n, 1);
    1096                 :         327 :           for (unsigned i = 0; i < n; ++i)
    1097                 :             :             {
    1098                 :         316 :               unsigned HOST_WIDE_INT val
    1099                 :         316 :                 = (i + bias.to_constant ()) / rgc.max_nscalars_per_iter;
    1100                 :         316 :               gcc_assert (val < vf.to_constant ());
    1101                 :         316 :               builder.quick_push (build_int_cst (TREE_TYPE (vectype), val));
    1102                 :             :             }
    1103                 :          11 :           tree cmp_series = builder.build ();
    1104                 :             : 
    1105                 :             :           /* Create the initial control.  First include all items that
    1106                 :             :              are within the loop limit.  */
    1107                 :          11 :           tree init_ctrl = NULL_TREE;
    1108                 :          11 :           poly_uint64 const_limit;
    1109                 :             :           /* See whether the first iteration of the vector loop is known
    1110                 :             :              to have a full control.  */
    1111                 :          11 :           if (poly_int_tree_p (niters, &const_limit)
    1112                 :          11 :               && known_ge (const_limit, (i + 1) * nitems_per_ctrl))
    1113                 :           1 :             init_ctrl = build_minus_one_cst (ctrl_type);
    1114                 :             :           else
    1115                 :             :             {
    1116                 :             :               /* The remaining work items initially are niters.  Saturate,
    1117                 :             :                  splat and compare.  */
    1118                 :          10 :               if (!first_rem)
    1119                 :             :                 {
    1120                 :           9 :                   first_rem = niters;
    1121                 :           9 :                   if (TYPE_PRECISION (TREE_TYPE (vectype))
    1122                 :           9 :                       < TYPE_PRECISION (iv_type))
    1123                 :           4 :                     first_rem = gimple_build (&preheader_seq,
    1124                 :           4 :                                               MIN_EXPR, TREE_TYPE (first_rem),
    1125                 :             :                                               first_rem, iv_step);
    1126                 :           9 :                   first_rem = gimple_convert (&preheader_seq, TREE_TYPE (vectype),
    1127                 :             :                                               first_rem);
    1128                 :           9 :                   first_rem = gimple_build_vector_from_val (&preheader_seq,
    1129                 :             :                                                             vectype, first_rem);
    1130                 :             :                 }
    1131                 :          10 :               init_ctrl = gimple_build (&preheader_seq, LT_EXPR, ctrl_type,
    1132                 :             :                                         cmp_series, first_rem);
    1133                 :             :             }
    1134                 :             : 
    1135                 :             :           /* Now AND out the bits that are within the number of skipped
    1136                 :             :              items.  */
    1137                 :          11 :           poly_uint64 const_skip;
    1138                 :          11 :           if (niters_skip
    1139                 :          11 :               && !(poly_int_tree_p (niters_skip, &const_skip)
    1140                 :           1 :                    && known_le (const_skip, bias)))
    1141                 :             :             {
    1142                 :             :               /* For integer mode masks it's cheaper to shift out the bits
    1143                 :             :                  since that avoids loading a constant.  */
    1144                 :           1 :               gcc_assert (GET_MODE_CLASS (TYPE_MODE (ctrl_type)) == MODE_INT);
    1145                 :           1 :               init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
    1146                 :           1 :                                         lang_hooks.types.type_for_mode
    1147                 :           1 :                                           (TYPE_MODE (ctrl_type), 1),
    1148                 :             :                                         init_ctrl);
    1149                 :             :               /* ???  But when the shift amount isn't constant this requires
    1150                 :             :                  a round-trip to GRPs.  We could apply the bias to either
    1151                 :             :                  side of the compare instead.  */
    1152                 :           2 :               tree shift = gimple_build (&preheader_seq, MINUS_EXPR,
    1153                 :           1 :                                          TREE_TYPE (niters_skip), niters_skip,
    1154                 :           1 :                                          build_int_cst (TREE_TYPE (niters_skip),
    1155                 :           1 :                                                         bias));
    1156                 :           2 :               shift = gimple_build (&preheader_seq, MULT_EXPR,
    1157                 :           1 :                                     TREE_TYPE (niters_skip), shift,
    1158                 :           1 :                                     build_int_cst (TREE_TYPE (niters_skip),
    1159                 :           1 :                                                    rgc.max_nscalars_per_iter));
    1160                 :           1 :               init_ctrl = gimple_build (&preheader_seq, LSHIFT_EXPR,
    1161                 :           1 :                                         TREE_TYPE (init_ctrl),
    1162                 :             :                                         init_ctrl, shift);
    1163                 :           1 :               init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
    1164                 :             :                                         ctrl_type, init_ctrl);
    1165                 :             :             }
    1166                 :             : 
    1167                 :             :           /* Get the control value for the next iteration of the loop.  */
    1168                 :          11 :           next_ctrl = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1169                 :             :                                     UNKNOWN_LOCATION,
    1170                 :             :                                     LT_EXPR, ctrl_type, cmp_series, rem);
    1171                 :             : 
    1172                 :          11 :           vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
    1173                 :          11 :         }
    1174                 :             :     }
    1175                 :             : 
    1176                 :             :   /* Emit all accumulated statements.  */
    1177                 :          10 :   add_preheader_seq (loop, preheader_seq);
    1178                 :             : 
    1179                 :             :   /* Adjust the exit test using the decrementing IV.  */
    1180                 :          10 :   tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
    1181                 :             :   /* When we peel for alignment with niter_skip != 0 this can
    1182                 :             :      cause niter + niter_skip to wrap and since we are comparing the
    1183                 :             :      value before the decrement here we get a false early exit.
    1184                 :             :      We can't compare the value after decrement either because that
    1185                 :             :      decrement could wrap as well as we're not doing a saturating
    1186                 :             :      decrement.  To avoid this situation we force a larger
    1187                 :             :      iv_type.  */
    1188                 :          10 :   gcond *cond_stmt = gimple_build_cond (code, index_before_incr, iv_step,
    1189                 :             :                                         NULL_TREE, NULL_TREE);
    1190                 :          10 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
    1191                 :             : 
    1192                 :             :   /* The loop iterates (NITERS - 1 + NITERS_SKIP) / VF + 1 times.
    1193                 :             :      Subtract one from this to get the latch count.  */
    1194                 :          10 :   tree niters_minus_one
    1195                 :          10 :     = fold_build2 (PLUS_EXPR, TREE_TYPE (orig_niters), orig_niters,
    1196                 :             :                    build_minus_one_cst (TREE_TYPE (orig_niters)));
    1197                 :          10 :   tree niters_adj2 = fold_convert (iv_type, niters_minus_one);
    1198                 :          10 :   if (niters_skip)
    1199                 :           1 :     niters_adj2 = fold_build2 (PLUS_EXPR, iv_type, niters_minus_one,
    1200                 :             :                                fold_convert (iv_type, niters_skip));
    1201                 :          10 :   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, iv_type,
    1202                 :             :                                      niters_adj2, iv_step);
    1203                 :             : 
    1204                 :          10 :   if (final_iv)
    1205                 :             :     {
    1206                 :           0 :       gassign *assign;
    1207                 :             :       /* If vectorizing an inverted early break loop we have to restart the
    1208                 :             :          scalar loop at niters - vf.  This matches what we do in
    1209                 :             :          vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
    1210                 :           0 :       if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    1211                 :             :         {
    1212                 :           0 :           tree ftype = TREE_TYPE (orig_niters);
    1213                 :           0 :           tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
    1214                 :           0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
    1215                 :             :         }
    1216                 :             :        else
    1217                 :           0 :         assign = gimple_build_assign (final_iv, orig_niters);
    1218                 :           0 :       gsi_insert_on_edge_immediate (exit_edge, assign);
    1219                 :             :     }
    1220                 :             : 
    1221                 :          10 :   return cond_stmt;
    1222                 :             : }
    1223                 :             : 
    1224                 :             : 
    1225                 :             : /* Like vect_set_loop_condition, but handle the case in which the vector
    1226                 :             :    loop handles exactly VF scalars per iteration.  */
    1227                 :             : 
    1228                 :             : static gcond *
    1229                 :       57241 : vect_set_loop_condition_normal (loop_vec_info /* loop_vinfo */, edge exit_edge,
    1230                 :             :                                 class loop *loop, tree niters, tree step,
    1231                 :             :                                 tree final_iv, bool niters_maybe_zero,
    1232                 :             :                                 gimple_stmt_iterator loop_cond_gsi)
    1233                 :             : {
    1234                 :       57241 :   tree indx_before_incr, indx_after_incr;
    1235                 :       57241 :   gcond *cond_stmt;
    1236                 :       57241 :   gcond *orig_cond;
    1237                 :       57241 :   edge pe = loop_preheader_edge (loop);
    1238                 :       57241 :   gimple_stmt_iterator incr_gsi;
    1239                 :       57241 :   bool insert_after;
    1240                 :       57241 :   enum tree_code code;
    1241                 :       57241 :   tree niters_type = TREE_TYPE (niters);
    1242                 :             : 
    1243                 :       57241 :   orig_cond = get_loop_exit_condition (exit_edge);
    1244                 :       57241 :   gcc_assert (orig_cond);
    1245                 :       57241 :   loop_cond_gsi = gsi_for_stmt (orig_cond);
    1246                 :             : 
    1247                 :       57241 :   tree init, limit;
    1248                 :       57241 :   if (!niters_maybe_zero && integer_onep (step))
    1249                 :             :     {
    1250                 :             :       /* In this case we can use a simple 0-based IV:
    1251                 :             : 
    1252                 :             :          A:
    1253                 :             :            x = 0;
    1254                 :             :            do
    1255                 :             :              {
    1256                 :             :                ...
    1257                 :             :                x += 1;
    1258                 :             :              }
    1259                 :             :            while (x < NITERS);  */
    1260                 :       57241 :       code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
    1261                 :       57241 :       init = build_zero_cst (niters_type);
    1262                 :       57241 :       limit = niters;
    1263                 :             :     }
    1264                 :             :   else
    1265                 :             :     {
    1266                 :             :       /* The following works for all values of NITERS except 0:
    1267                 :             : 
    1268                 :             :          B:
    1269                 :             :            x = 0;
    1270                 :             :            do
    1271                 :             :              {
    1272                 :             :                ...
    1273                 :             :                x += STEP;
    1274                 :             :              }
    1275                 :             :            while (x <= NITERS - STEP);
    1276                 :             : 
    1277                 :             :          so that the loop continues to iterate if x + STEP - 1 < NITERS
    1278                 :             :          but stops if x + STEP - 1 >= NITERS.
    1279                 :             : 
    1280                 :             :          However, if NITERS is zero, x never hits a value above NITERS - STEP
    1281                 :             :          before wrapping around.  There are two obvious ways of dealing with
    1282                 :             :          this:
    1283                 :             : 
    1284                 :             :          - start at STEP - 1 and compare x before incrementing it
    1285                 :             :          - start at -1 and compare x after incrementing it
    1286                 :             : 
    1287                 :             :          The latter is simpler and is what we use.  The loop in this case
    1288                 :             :          looks like:
    1289                 :             : 
    1290                 :             :          C:
    1291                 :             :            x = -1;
    1292                 :             :            do
    1293                 :             :              {
    1294                 :             :                ...
    1295                 :             :                x += STEP;
    1296                 :             :              }
    1297                 :             :            while (x < NITERS - STEP);
    1298                 :             : 
    1299                 :             :          In both cases the loop limit is NITERS - STEP.  */
    1300                 :           0 :       gimple_seq seq = NULL;
    1301                 :           0 :       limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
    1302                 :           0 :       limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
    1303                 :           0 :       if (seq)
    1304                 :             :         {
    1305                 :           0 :           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
    1306                 :           0 :           gcc_assert (!new_bb);
    1307                 :             :         }
    1308                 :           0 :       if (niters_maybe_zero)
    1309                 :             :         {
    1310                 :             :           /* Case C.  */
    1311                 :           0 :           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
    1312                 :           0 :           init = build_all_ones_cst (niters_type);
    1313                 :             :         }
    1314                 :             :       else
    1315                 :             :         {
    1316                 :             :           /* Case B.  */
    1317                 :           0 :           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
    1318                 :           0 :           init = build_zero_cst (niters_type);
    1319                 :             :         }
    1320                 :             :     }
    1321                 :             : 
    1322                 :       57241 :   vect_iv_increment_position (exit_edge, &incr_gsi, &insert_after);
    1323                 :       57241 :   create_iv (init, PLUS_EXPR, step, NULL_TREE, loop,
    1324                 :             :              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
    1325                 :       57241 :   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
    1326                 :             :                                               true, NULL_TREE, true,
    1327                 :             :                                               GSI_SAME_STMT);
    1328                 :       57241 :   limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
    1329                 :             :                                      true, GSI_SAME_STMT);
    1330                 :             : 
    1331                 :       57241 :   cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
    1332                 :             :                                  NULL_TREE);
    1333                 :             : 
    1334                 :       57241 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
    1335                 :             : 
    1336                 :             :   /* Record the number of latch iterations.  */
    1337                 :       57241 :   if (limit == niters)
    1338                 :             :     /* Case A: the loop iterates NITERS times.  Subtract one to get the
    1339                 :             :        latch count.  */
    1340                 :       57241 :     loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
    1341                 :             :                                        build_int_cst (niters_type, 1));
    1342                 :             :   else
    1343                 :             :     /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
    1344                 :             :        Subtract one from this to get the latch count.  */
    1345                 :           0 :     loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
    1346                 :             :                                        limit, step);
    1347                 :             : 
    1348                 :       57241 :   if (final_iv)
    1349                 :             :     {
    1350                 :           0 :       gassign *assign;
    1351                 :           0 :       gcc_assert (single_pred_p (exit_edge->dest));
    1352                 :           0 :       tree phi_dest
    1353                 :           0 :         = integer_zerop (init) ? final_iv : copy_ssa_name (indx_after_incr);
    1354                 :             :       /* Make sure to maintain LC SSA form here and elide the subtraction
    1355                 :             :          if the value is zero.  */
    1356                 :           0 :       gphi *phi = create_phi_node (phi_dest, exit_edge->dest);
    1357                 :           0 :       add_phi_arg (phi, indx_after_incr, exit_edge, UNKNOWN_LOCATION);
    1358                 :           0 :       if (!integer_zerop (init))
    1359                 :             :         {
    1360                 :           0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR,
    1361                 :             :                                         phi_dest, init);
    1362                 :           0 :           gimple_stmt_iterator gsi = gsi_after_labels (exit_edge->dest);
    1363                 :           0 :           gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
    1364                 :             :         }
    1365                 :             :     }
    1366                 :             : 
    1367                 :       57241 :   return cond_stmt;
    1368                 :             : }
    1369                 :             : 
    1370                 :             : /* If we're using fully-masked loops, make LOOP iterate:
    1371                 :             : 
    1372                 :             :       N == (NITERS - 1) / STEP + 1
    1373                 :             : 
    1374                 :             :    times.  When NITERS is zero, this is equivalent to making the loop
    1375                 :             :    execute (1 << M) / STEP times, where M is the precision of NITERS.
    1376                 :             :    NITERS_MAYBE_ZERO is true if this last case might occur.
    1377                 :             : 
    1378                 :             :    If we're not using fully-masked loops, make LOOP iterate:
    1379                 :             : 
    1380                 :             :       N == (NITERS - STEP) / STEP + 1
    1381                 :             : 
    1382                 :             :    times, where NITERS is known to be outside the range [1, STEP - 1].
    1383                 :             :    This is equivalent to making the loop execute NITERS / STEP times
    1384                 :             :    when NITERS is nonzero and (1 << M) / STEP times otherwise.
    1385                 :             :    NITERS_MAYBE_ZERO again indicates whether this last case might occur.
    1386                 :             : 
    1387                 :             :    If FINAL_IV is nonnull, it is an SSA name that should be set to
    1388                 :             :    N * STEP on exit from the loop.
    1389                 :             : 
    1390                 :             :    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
    1391                 :             : 
    1392                 :             : void
    1393                 :       57251 : vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo,
    1394                 :             :                          tree niters, tree step, tree final_iv,
    1395                 :             :                          bool niters_maybe_zero)
    1396                 :             : {
    1397                 :       57251 :   gcond *cond_stmt;
    1398                 :       57251 :   gcond *orig_cond = get_loop_exit_condition (loop_e);
    1399                 :       57251 :   gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
    1400                 :             : 
    1401                 :       57251 :   if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
    1402                 :             :     {
    1403                 :          10 :       if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_avx512)
    1404                 :          10 :         cond_stmt = vect_set_loop_condition_partial_vectors_avx512 (loop, loop_e,
    1405                 :             :                                                                     loop_vinfo,
    1406                 :             :                                                                     niters, final_iv,
    1407                 :             :                                                                     niters_maybe_zero,
    1408                 :             :                                                                     loop_cond_gsi);
    1409                 :             :       else
    1410                 :           0 :         cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_e,
    1411                 :             :                                                              loop_vinfo,
    1412                 :             :                                                              niters, final_iv,
    1413                 :             :                                                              niters_maybe_zero,
    1414                 :             :                                                              loop_cond_gsi);
    1415                 :             :     }
    1416                 :             :   else
    1417                 :       57241 :     cond_stmt = vect_set_loop_condition_normal (loop_vinfo, loop_e, loop,
    1418                 :             :                                                 niters,
    1419                 :             :                                                 step, final_iv,
    1420                 :             :                                                 niters_maybe_zero,
    1421                 :             :                                                 loop_cond_gsi);
    1422                 :             : 
    1423                 :             :   /* Remove old loop exit test.  */
    1424                 :       57251 :   stmt_vec_info orig_cond_info;
    1425                 :       57251 :   if (loop_vinfo
    1426                 :       57251 :       && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
    1427                 :       56872 :     loop_vinfo->remove_stmt (orig_cond_info);
    1428                 :             :   else
    1429                 :         379 :     gsi_remove (&loop_cond_gsi, true);
    1430                 :             : 
    1431                 :       57251 :   if (dump_enabled_p ())
    1432                 :       10252 :     dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
    1433                 :             :                      (gimple *) cond_stmt);
    1434                 :       57251 : }
    1435                 :             : 
    1436                 :             : /* Get the virtual operand live on E.  The precondition on this is valid
    1437                 :             :    immediate dominators and an actual virtual definition dominating E.  */
    1438                 :             : /* ???  Costly band-aid.  For the use in question we can populate a
    1439                 :             :    live-on-exit/end-of-BB virtual operand when copying stmts.  */
    1440                 :             : 
    1441                 :             : static tree
    1442                 :          10 : get_live_virtual_operand_on_edge (edge e)
    1443                 :             : {
    1444                 :          10 :   basic_block bb = e->src;
    1445                 :          22 :   do
    1446                 :             :     {
    1447                 :          61 :       for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
    1448                 :             :         {
    1449                 :          37 :           gimple *stmt = gsi_stmt (gsi);
    1450                 :          55 :           if (gimple_vdef (stmt))
    1451                 :          10 :             return gimple_vdef (stmt);
    1452                 :          39 :           if (gimple_vuse (stmt))
    1453                 :             :             return gimple_vuse (stmt);
    1454                 :             :         }
    1455                 :           8 :       if (gphi *vphi = get_virtual_phi (bb))
    1456                 :           2 :         return gimple_phi_result (vphi);
    1457                 :           6 :       bb = get_immediate_dominator (CDI_DOMINATORS, bb);
    1458                 :           6 :     }
    1459                 :             :   while (1);
    1460                 :             : }
    1461                 :             : 
    1462                 :             : /* Given LOOP this function generates a new copy of it and puts it
    1463                 :             :    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
    1464                 :             :    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
    1465                 :             :    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
    1466                 :             :    entry or exit of LOOP.  If FLOW_LOOPS then connect LOOP to SCALAR_LOOP as a
    1467                 :             :    continuation.  This is correct for cases where one loop continues from the
    1468                 :             :    other like in the vectorizer, but not true for uses in e.g. loop distribution
    1469                 :             :    where the contents of the loop body are split but the iteration space of both
    1470                 :             :    copies remains the same.
    1471                 :             : 
    1472                 :             :    If UPDATED_DOMS is not NULL it is update with the list of basic blocks whoms
    1473                 :             :    dominators were updated during the peeling.  When doing early break vectorization
    1474                 :             :    then LOOP_VINFO needs to be provided and is used to keep track of any newly created
    1475                 :             :    memory references that need to be updated should we decide to vectorize.  */
    1476                 :             : 
    1477                 :             : class loop *
    1478                 :       32839 : slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
    1479                 :             :                                         class loop *scalar_loop,
    1480                 :             :                                         edge scalar_exit, edge e, edge *new_e,
    1481                 :             :                                         bool flow_loops,
    1482                 :             :                                         vec<basic_block> *updated_doms)
    1483                 :             : {
    1484                 :       32839 :   class loop *new_loop;
    1485                 :       32839 :   basic_block *new_bbs, *bbs, *pbbs;
    1486                 :       32839 :   bool at_exit;
    1487                 :       32839 :   bool was_imm_dom;
    1488                 :       32839 :   basic_block exit_dest;
    1489                 :       32839 :   edge exit, new_exit;
    1490                 :       32839 :   bool duplicate_outer_loop = false;
    1491                 :             : 
    1492                 :       32839 :   exit = loop_exit;
    1493                 :       32839 :   at_exit = (e == exit);
    1494                 :       32839 :   if (!at_exit && e != loop_preheader_edge (loop))
    1495                 :             :     return NULL;
    1496                 :             : 
    1497                 :       32839 :   if (scalar_loop == NULL)
    1498                 :             :     {
    1499                 :       30935 :       scalar_loop = loop;
    1500                 :       30935 :       scalar_exit = loop_exit;
    1501                 :             :     }
    1502                 :        1904 :   else if (scalar_loop == loop)
    1503                 :           0 :     scalar_exit = loop_exit;
    1504                 :             :   else
    1505                 :             :     {
    1506                 :             :       /* Loop has been version, match exits up using the aux index.  */
    1507                 :        5712 :       for (edge exit : get_loop_exit_edges (scalar_loop))
    1508                 :        1904 :         if (exit->aux == loop_exit->aux)
    1509                 :             :           {
    1510                 :        1904 :             scalar_exit = exit;
    1511                 :        1904 :             break;
    1512                 :        1904 :           }
    1513                 :             : 
    1514                 :        1904 :       gcc_assert (scalar_exit);
    1515                 :             :     }
    1516                 :             : 
    1517                 :       32839 :   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
    1518                 :       32839 :   pbbs = bbs + 1;
    1519                 :       32839 :   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
    1520                 :             :   /* Allow duplication of outer loops.  */
    1521                 :       32839 :   if (scalar_loop->inner)
    1522                 :         103 :     duplicate_outer_loop = true;
    1523                 :             : 
    1524                 :             :   /* Generate new loop structure.  */
    1525                 :       32839 :   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
    1526                 :       32839 :   duplicate_subloops (scalar_loop, new_loop);
    1527                 :             : 
    1528                 :       32839 :   exit_dest = exit->dest;
    1529                 :       32839 :   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
    1530                 :       32839 :                                           exit_dest) == exit->src ?
    1531                 :             :                  true : false);
    1532                 :             : 
    1533                 :             :   /* Also copy the pre-header, this avoids jumping through hoops to
    1534                 :             :      duplicate the loop entry PHI arguments.  Create an empty
    1535                 :             :      pre-header unconditionally for this.  */
    1536                 :       32839 :   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
    1537                 :       32839 :   edge entry_e = single_pred_edge (preheader);
    1538                 :       32839 :   bbs[0] = preheader;
    1539                 :       32839 :   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
    1540                 :             : 
    1541                 :       32839 :   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
    1542                 :             :             &scalar_exit, 1, &new_exit, NULL,
    1543                 :             :             at_exit ? loop->latch : e->src, true);
    1544                 :       32839 :   exit = loop_exit;
    1545                 :       32839 :   basic_block new_preheader = new_bbs[0];
    1546                 :             : 
    1547                 :       32839 :   gcc_assert (new_exit);
    1548                 :             : 
    1549                 :             :   /* Record the new loop exit information.  new_loop doesn't have SCEV data and
    1550                 :             :      so we must initialize the exit information.  */
    1551                 :       32839 :   if (new_e)
    1552                 :       31820 :     *new_e = new_exit;
    1553                 :             : 
    1554                 :             :   /* Before installing PHI arguments make sure that the edges
    1555                 :             :      into them match that of the scalar loop we analyzed.  This
    1556                 :             :      makes sure the SLP tree matches up between the main vectorized
    1557                 :             :      loop and the epilogue vectorized copies.  */
    1558                 :       32839 :   if (single_succ_edge (preheader)->dest_idx
    1559                 :       32839 :       != single_succ_edge (new_bbs[0])->dest_idx)
    1560                 :             :     {
    1561                 :       28553 :       basic_block swap_bb = new_bbs[1];
    1562                 :       28553 :       gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
    1563                 :       28553 :       std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
    1564                 :       28553 :       EDGE_PRED (swap_bb, 0)->dest_idx = 0;
    1565                 :       28553 :       EDGE_PRED (swap_bb, 1)->dest_idx = 1;
    1566                 :             :     }
    1567                 :       32839 :   if (duplicate_outer_loop)
    1568                 :             :     {
    1569                 :         103 :       class loop *new_inner_loop = get_loop_copy (scalar_loop->inner);
    1570                 :         103 :       if (loop_preheader_edge (scalar_loop)->dest_idx
    1571                 :         103 :           != loop_preheader_edge (new_inner_loop)->dest_idx)
    1572                 :             :         {
    1573                 :          84 :           basic_block swap_bb = new_inner_loop->header;
    1574                 :          84 :           gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
    1575                 :          84 :           std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
    1576                 :          84 :           EDGE_PRED (swap_bb, 0)->dest_idx = 0;
    1577                 :          84 :           EDGE_PRED (swap_bb, 1)->dest_idx = 1;
    1578                 :             :         }
    1579                 :             :     }
    1580                 :             : 
    1581                 :       32839 :   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
    1582                 :             : 
    1583                 :             :   /* Skip new preheader since it's deleted if copy loop is added at entry.  */
    1584                 :      170391 :   for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
    1585                 :      104713 :     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
    1586                 :             : 
    1587                 :             :   /* Rename the exit uses.  */
    1588                 :      133172 :   for (edge exit : get_loop_exit_edges (new_loop))
    1589                 :       34655 :     for (auto gsi = gsi_start_phis (exit->dest);
    1590                 :       80447 :          !gsi_end_p (gsi); gsi_next (&gsi))
    1591                 :             :       {
    1592                 :       45792 :         tree orig_def = PHI_ARG_DEF_FROM_EDGE (gsi.phi (), exit);
    1593                 :       45792 :         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), exit));
    1594                 :       45792 :         if (MAY_HAVE_DEBUG_BIND_STMTS)
    1595                 :       22643 :           adjust_debug_stmts (orig_def, PHI_RESULT (gsi.phi ()), exit->dest);
    1596                 :       32839 :       }
    1597                 :             : 
    1598                 :       32839 :   auto loop_exits = get_loop_exit_edges (loop);
    1599                 :       32839 :   bool multiple_exits_p = loop_exits.length () > 1;
    1600                 :       32839 :   auto_vec<basic_block> doms;
    1601                 :             : 
    1602                 :       32839 :   if (at_exit) /* Add the loop copy at exit.  */
    1603                 :             :     {
    1604                 :       31441 :       if (scalar_loop != loop && new_exit->dest != exit_dest)
    1605                 :             :         {
    1606                 :        1900 :           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
    1607                 :        1900 :           flush_pending_stmts (new_exit);
    1608                 :             :         }
    1609                 :             : 
    1610                 :       31441 :       bool need_virtual_phi = get_virtual_phi (loop->header);
    1611                 :             : 
    1612                 :             :       /* For the main loop exit preserve the LC PHI nodes.  For vectorization
    1613                 :             :          we need them to continue or finalize reductions.  Since we do not
    1614                 :             :          copy the loop exit blocks we have to materialize PHIs at the
    1615                 :             :          new destination before redirecting edges.  */
    1616                 :       31441 :       for (auto gsi_from = gsi_start_phis (loop_exit->dest);
    1617                 :       74471 :            !gsi_end_p (gsi_from); gsi_next (&gsi_from))
    1618                 :             :         {
    1619                 :       43030 :           tree res = gimple_phi_result (*gsi_from);
    1620                 :       43030 :           create_phi_node (copy_ssa_name (res), new_preheader);
    1621                 :             :         }
    1622                 :       31441 :       edge e = redirect_edge_and_branch (loop_exit, new_preheader);
    1623                 :       31441 :       gcc_assert (e == loop_exit);
    1624                 :       31441 :       flush_pending_stmts (loop_exit);
    1625                 :       31441 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader, loop_exit->src);
    1626                 :             : 
    1627                 :             :       /* If we ended up choosing an exit leading to a path not using memory
    1628                 :             :          we can end up without a virtual LC PHI.  Create it when it is
    1629                 :             :          needed because of the epilog loop continuation.  */
    1630                 :       31441 :       if (need_virtual_phi && !get_virtual_phi (loop_exit->dest))
    1631                 :             :         {
    1632                 :           8 :           tree header_def = gimple_phi_result (get_virtual_phi (loop->header));
    1633                 :           8 :           gphi *vphi = create_phi_node (copy_ssa_name (header_def),
    1634                 :             :                                         new_preheader);
    1635                 :           8 :           add_phi_arg (vphi, get_live_virtual_operand_on_edge (loop_exit),
    1636                 :             :                        loop_exit, UNKNOWN_LOCATION);
    1637                 :             :         }
    1638                 :             : 
    1639                 :       31441 :       bool multiple_exits_p = loop_exits.length () > 1;
    1640                 :       62882 :       basic_block main_loop_exit_block = new_preheader;
    1641                 :       31441 :       basic_block alt_loop_exit_block = NULL;
    1642                 :             :       /* Create the CFG for multiple exits.
    1643                 :             :                    | loop_exit               | alt1   | altN
    1644                 :             :                    v                         v   ...  v
    1645                 :             :             main_loop_exit_block:       alt_loop_exit_block:
    1646                 :             :                    |                      /
    1647                 :             :                    v                     v
    1648                 :             :             new_preheader:
    1649                 :             :          where in the new preheader we need merge PHIs for
    1650                 :             :          the continuation values into the epilogue header.
    1651                 :             :          Do not bother with exit PHIs for the early exits but
    1652                 :             :          their live virtual operand.  We'll fix up things below.  */
    1653                 :       31441 :       if (multiple_exits_p)
    1654                 :             :         {
    1655                 :        1411 :           edge loop_e = single_succ_edge (new_preheader);
    1656                 :        1411 :           new_preheader = split_edge (loop_e);
    1657                 :             : 
    1658                 :        1411 :           gphi *vphi = NULL;
    1659                 :        1411 :           alt_loop_exit_block = new_preheader;
    1660                 :        7257 :           for (auto exit : loop_exits)
    1661                 :        3024 :             if (exit != loop_exit)
    1662                 :             :               {
    1663                 :        1613 :                 tree vphi_def = NULL_TREE;
    1664                 :        1613 :                 if (gphi *evphi = get_virtual_phi (exit->dest))
    1665                 :         610 :                   vphi_def = gimple_phi_arg_def_from_edge (evphi, exit);
    1666                 :        1613 :                 edge res = redirect_edge_and_branch (exit, alt_loop_exit_block);
    1667                 :        1613 :                 gcc_assert (res == exit);
    1668                 :        1613 :                 redirect_edge_var_map_clear (exit);
    1669                 :        1613 :                 if (alt_loop_exit_block == new_preheader)
    1670                 :        1411 :                   alt_loop_exit_block = split_edge (exit);
    1671                 :        1613 :                 if (!need_virtual_phi)
    1672                 :        1178 :                   continue;
    1673                 :             :                 /* When the edge has no virtual LC PHI get at the live
    1674                 :             :                    virtual operand by other means.  */
    1675                 :         435 :                 if (!vphi_def)
    1676                 :           2 :                   vphi_def = get_live_virtual_operand_on_edge (exit);
    1677                 :         435 :                 if (!vphi)
    1678                 :         413 :                   vphi = create_phi_node (copy_ssa_name (vphi_def),
    1679                 :             :                                           alt_loop_exit_block);
    1680                 :             :                 else
    1681                 :             :                   /* Edge redirection might re-allocate the PHI node
    1682                 :             :                      so we have to rediscover it.  */
    1683                 :          22 :                   vphi = get_virtual_phi (alt_loop_exit_block);
    1684                 :         435 :                 add_phi_arg (vphi, vphi_def, exit, UNKNOWN_LOCATION);
    1685                 :             :               }
    1686                 :             : 
    1687                 :        1411 :           set_immediate_dominator (CDI_DOMINATORS, new_preheader,
    1688                 :             :                                    loop->header);
    1689                 :             : 
    1690                 :             :           /* Fix up the profile counts of the new exit blocks.
    1691                 :             :              main_loop_exit_block was created by duplicating the
    1692                 :             :              preheader, so needs its count scaling according to the main
    1693                 :             :              exit edge's probability.  The remaining count from the
    1694                 :             :              preheader goes to the alt_loop_exit_block, since all
    1695                 :             :              alternative exits have been redirected there.  */
    1696                 :        1411 :           main_loop_exit_block->count = loop_exit->count ();
    1697                 :        1411 :           alt_loop_exit_block->count
    1698                 :        1411 :             = preheader->count - main_loop_exit_block->count;
    1699                 :             :         }
    1700                 :             : 
    1701                 :             :       /* Adjust the epilog loop PHI entry values to continue iteration.
    1702                 :             :          This adds remaining necessary LC PHI nodes to the main exit
    1703                 :             :          and creates merge PHIs when we have multiple exits with
    1704                 :             :          their appropriate continuation.  */
    1705                 :       31441 :       if (flow_loops)
    1706                 :             :         {
    1707                 :       31441 :           edge loop_entry = single_succ_edge (new_preheader);
    1708                 :       31441 :           bool peeled_iters = single_pred (loop->latch) != loop_exit->src;
    1709                 :             : 
    1710                 :             :           /* Record the new SSA names in the cache so that we can skip
    1711                 :             :              materializing them again when we fill in the rest of the LC SSA
    1712                 :             :              variables.  */
    1713                 :       31441 :           hash_map <tree, tree> new_phi_args;
    1714                 :       31441 :           for (auto psi = gsi_start_phis (main_loop_exit_block);
    1715                 :       74479 :                !gsi_end_p (psi); gsi_next (&psi))
    1716                 :             :             {
    1717                 :       43038 :               gphi *phi = *psi;
    1718                 :       43038 :               tree new_arg = gimple_phi_arg_def_from_edge (phi, loop_exit);
    1719                 :       43038 :               if (TREE_CODE (new_arg) != SSA_NAME)
    1720                 :         217 :                 continue;
    1721                 :             : 
    1722                 :             :               /* If the loop doesn't have a virtual def then only possibly keep
    1723                 :             :                  the epilog LC PHI for it and avoid creating new defs.  */
    1724                 :       42844 :               if (virtual_operand_p (new_arg) && !need_virtual_phi)
    1725                 :             :                 {
    1726                 :          23 :                   auto gsi = gsi_for_stmt (phi);
    1727                 :          23 :                   remove_phi_node (&gsi, true);
    1728                 :          23 :                   continue;
    1729                 :          23 :                 }
    1730                 :             : 
    1731                 :             :               /* If we decided not to remove the PHI node we should also not
    1732                 :             :                  rematerialize it later on.  */
    1733                 :       42821 :               new_phi_args.put (new_arg, gimple_phi_result (phi));
    1734                 :             :             }
    1735                 :             : 
    1736                 :             :           /* Create the merge PHI nodes in new_preheader and populate the
    1737                 :             :              arguments for the exits.  */
    1738                 :       31441 :           if (multiple_exits_p)
    1739                 :             :             {
    1740                 :        1411 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1741                 :        1411 :                    gsi_to = gsi_start_phis (new_loop->header);
    1742                 :        4497 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1743                 :        3086 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1744                 :             :                 {
    1745                 :        3086 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1746                 :        3086 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1747                 :             : 
    1748                 :             :                   /* When the vector loop is peeled then we need to use the
    1749                 :             :                      value at start of the loop, otherwise the main loop exit
    1750                 :             :                      should use the final iter value.  */
    1751                 :        3086 :                   tree new_arg;
    1752                 :        3086 :                   if (peeled_iters)
    1753                 :          53 :                     new_arg = gimple_phi_result (from_phi);
    1754                 :             :                   else
    1755                 :        3033 :                     new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1756                 :             :                                                      loop_latch_edge (loop));
    1757                 :             : 
    1758                 :             :                   /* Check if we've already created a new phi node during edge
    1759                 :             :                      redirection and re-use it if so.  Otherwise create a
    1760                 :             :                      LC PHI node to feed the merge PHI.  */
    1761                 :        3086 :                   tree *res;
    1762                 :        6172 :                   if (virtual_operand_p (new_arg))
    1763                 :             :                     {
    1764                 :             :                       /* Use the existing virtual LC SSA from exit block.  */
    1765                 :         413 :                       gphi *vphi = get_virtual_phi (main_loop_exit_block);
    1766                 :         413 :                       new_arg = gimple_phi_result (vphi);
    1767                 :             :                     }
    1768                 :        2673 :                   else if ((res = new_phi_args.get (new_arg)))
    1769                 :          99 :                     new_arg = *res;
    1770                 :             :                   else
    1771                 :             :                     {
    1772                 :             :                       /* Create the LC PHI node for the exit.  */
    1773                 :        2574 :                       tree new_def = copy_ssa_name (new_arg);
    1774                 :        2574 :                       gphi *lc_phi
    1775                 :        2574 :                           = create_phi_node (new_def, main_loop_exit_block);
    1776                 :        2574 :                       SET_PHI_ARG_DEF (lc_phi, 0, new_arg);
    1777                 :        2574 :                       new_arg = new_def;
    1778                 :             :                     }
    1779                 :             : 
    1780                 :             :                   /* Create the PHI node in the merge block merging the
    1781                 :             :                      main and early exit values.  */
    1782                 :        3086 :                   tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
    1783                 :        3086 :                   gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
    1784                 :        3086 :                   edge main_e = single_succ_edge (main_loop_exit_block);
    1785                 :        3086 :                   SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, main_e, new_arg);
    1786                 :             : 
    1787                 :             :                   /* And adjust the epilog entry value.  */
    1788                 :        3086 :                   adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
    1789                 :             :                 }
    1790                 :             : 
    1791                 :             :               /* After creating the merge PHIs handle the early exits those
    1792                 :             :                  should use the values at the start of the loop.  */
    1793                 :        1411 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1794                 :        1411 :                    gsi_to = gsi_start_phis (new_preheader);
    1795                 :        4497 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1796                 :        3086 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1797                 :             :                 {
    1798                 :        3086 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1799                 :        3086 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1800                 :             : 
    1801                 :             :                   /* Now update the virtual PHI nodes with the right value.  */
    1802                 :        3086 :                   tree alt_arg = gimple_phi_result (from_phi);
    1803                 :        6172 :                   if (virtual_operand_p (alt_arg))
    1804                 :             :                     {
    1805                 :         413 :                       gphi *vphi = get_virtual_phi (alt_loop_exit_block);
    1806                 :         413 :                       alt_arg = gimple_phi_result (vphi);
    1807                 :             :                     }
    1808                 :             :                   /* For other live args we didn't create LC PHI nodes.
    1809                 :             :                      Do so here.  */
    1810                 :             :                   else
    1811                 :             :                     {
    1812                 :        2673 :                       tree alt_def = copy_ssa_name (alt_arg);
    1813                 :        2673 :                       gphi *lc_phi
    1814                 :        2673 :                         = create_phi_node (alt_def, alt_loop_exit_block);
    1815                 :        5742 :                       for (unsigned i = 0; i < gimple_phi_num_args (lc_phi);
    1816                 :             :                            ++i)
    1817                 :        3069 :                         SET_PHI_ARG_DEF (lc_phi, i, alt_arg);
    1818                 :             :                       alt_arg = alt_def;
    1819                 :             :                     }
    1820                 :        3086 :                   edge alt_e = single_succ_edge (alt_loop_exit_block);
    1821                 :        3086 :                   SET_PHI_ARG_DEF_ON_EDGE (to_phi, alt_e, alt_arg);
    1822                 :             :                 }
    1823                 :             :             }
    1824                 :             :           /* For the single exit case only create the missing LC PHI nodes
    1825                 :             :              for the continuation of the loop IVs that are not also already
    1826                 :             :              reductions and thus had LC PHI nodes on the exit already.  */
    1827                 :             :           else
    1828                 :             :             {
    1829                 :       30030 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1830                 :       30030 :                    gsi_to = gsi_start_phis (new_loop->header);
    1831                 :      115688 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1832                 :       85658 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1833                 :             :                 {
    1834                 :       85658 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1835                 :       85658 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1836                 :       85658 :                   tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1837                 :             :                                                         loop_latch_edge (loop));
    1838                 :             : 
    1839                 :             :                   /* Check if we've already created a new phi node during edge
    1840                 :             :                      redirection.  If we have, only propagate the value
    1841                 :             :                      downwards.  */
    1842                 :       85658 :                   if (tree *res = new_phi_args.get (new_arg))
    1843                 :             :                     {
    1844                 :       40509 :                       adjust_phi_and_debug_stmts (to_phi, loop_entry, *res);
    1845                 :       40509 :                       continue;
    1846                 :             :                     }
    1847                 :             : 
    1848                 :       45149 :                   tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
    1849                 :       45149 :                   gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
    1850                 :       45149 :                   SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, loop_exit, new_arg);
    1851                 :       45149 :                   adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
    1852                 :             :                 }
    1853                 :             :             }
    1854                 :       31441 :         }
    1855                 :             : 
    1856                 :       31441 :       if (was_imm_dom || duplicate_outer_loop)
    1857                 :       31198 :         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
    1858                 :             : 
    1859                 :             :       /* And remove the non-necessary forwarder again.  Keep the other
    1860                 :             :          one so we have a proper pre-header for the loop at the exit edge.  */
    1861                 :       31441 :       redirect_edge_pred (single_succ_edge (preheader),
    1862                 :             :                           single_pred (preheader));
    1863                 :       31441 :       delete_basic_block (preheader);
    1864                 :       31441 :       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
    1865                 :       31441 :                                loop_preheader_edge (scalar_loop)->src);
    1866                 :             : 
    1867                 :             :       /* Finally after wiring the new epilogue we need to update its main exit
    1868                 :             :          to the original function exit we recorded.  Other exits are already
    1869                 :             :          correct.  */
    1870                 :       31441 :       if (multiple_exits_p)
    1871                 :             :         {
    1872                 :        1411 :           class loop *update_loop = new_loop;
    1873                 :        1411 :           doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
    1874                 :       75865 :           for (unsigned i = 0; i < doms.length (); ++i)
    1875                 :       74454 :             if (flow_bb_inside_loop_p (loop, doms[i]))
    1876                 :        4422 :               doms.unordered_remove (i);
    1877                 :             : 
    1878                 :        7257 :           for (edge e : get_loop_exit_edges (update_loop))
    1879                 :             :             {
    1880                 :        3024 :               edge ex;
    1881                 :        3024 :               edge_iterator ei;
    1882                 :        6017 :               FOR_EACH_EDGE (ex, ei, e->dest->succs)
    1883                 :             :                 {
    1884                 :             :                   /* Find the first non-fallthrough block as fall-throughs can't
    1885                 :             :                      dominate other blocks.  */
    1886                 :        2993 :                   if (single_succ_p (ex->dest))
    1887                 :             :                     {
    1888                 :        1510 :                       doms.safe_push (ex->dest);
    1889                 :        1510 :                       ex = single_succ_edge (ex->dest);
    1890                 :             :                     }
    1891                 :        2993 :                   doms.safe_push (ex->dest);
    1892                 :             :                 }
    1893                 :        3024 :               doms.safe_push (e->dest);
    1894                 :        1411 :             }
    1895                 :             : 
    1896                 :        1411 :           iterate_fix_dominators (CDI_DOMINATORS, doms, false);
    1897                 :        1411 :           if (updated_doms)
    1898                 :        1411 :             updated_doms->safe_splice (doms);
    1899                 :             :         }
    1900                 :             :     }
    1901                 :             :   else /* Add the copy at entry.  */
    1902                 :             :     {
    1903                 :             :       /* Copy the current loop LC PHI nodes between the original loop exit
    1904                 :             :          block and the new loop header.  This allows us to later split the
    1905                 :             :          preheader block and still find the right LC nodes.  */
    1906                 :        1398 :       if (flow_loops)
    1907                 :         379 :         for (auto gsi_from = gsi_start_phis (new_loop->header),
    1908                 :         379 :              gsi_to = gsi_start_phis (loop->header);
    1909                 :        1200 :              !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1910                 :         821 :              gsi_next (&gsi_from), gsi_next (&gsi_to))
    1911                 :             :           {
    1912                 :         821 :             gimple *from_phi = gsi_stmt (gsi_from);
    1913                 :         821 :             gimple *to_phi = gsi_stmt (gsi_to);
    1914                 :         821 :             tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1915                 :             :                                                   loop_latch_edge (new_loop));
    1916                 :         821 :             adjust_phi_and_debug_stmts (to_phi, loop_preheader_edge (loop),
    1917                 :             :                                         new_arg);
    1918                 :             :           }
    1919                 :             : 
    1920                 :        1398 :       if (scalar_loop != loop)
    1921                 :             :         {
    1922                 :             :           /* Remove the non-necessary forwarder of scalar_loop again.  */
    1923                 :           4 :           redirect_edge_pred (single_succ_edge (preheader),
    1924                 :             :                               single_pred (preheader));
    1925                 :           4 :           delete_basic_block (preheader);
    1926                 :           4 :           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
    1927                 :           4 :                                    loop_preheader_edge (scalar_loop)->src);
    1928                 :           4 :           preheader = split_edge (loop_preheader_edge (loop));
    1929                 :           4 :           entry_e = single_pred_edge (preheader);
    1930                 :             :         }
    1931                 :             : 
    1932                 :        1398 :       redirect_edge_and_branch_force (entry_e, new_preheader);
    1933                 :        1398 :       flush_pending_stmts (entry_e);
    1934                 :        1398 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
    1935                 :             : 
    1936                 :        1398 :       redirect_edge_and_branch_force (new_exit, preheader);
    1937                 :        1398 :       flush_pending_stmts (new_exit);
    1938                 :        1398 :       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
    1939                 :             : 
    1940                 :             :       /* And remove the non-necessary forwarder again.  Keep the other
    1941                 :             :          one so we have a proper pre-header for the loop at the exit edge.  */
    1942                 :        1398 :       redirect_edge_pred (single_succ_edge (new_preheader),
    1943                 :             :                           single_pred (new_preheader));
    1944                 :        1398 :       delete_basic_block (new_preheader);
    1945                 :        1398 :       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
    1946                 :        1398 :                                loop_preheader_edge (new_loop)->src);
    1947                 :             : 
    1948                 :             :       /* Update dominators for multiple exits.  */
    1949                 :        1398 :       if (multiple_exits_p)
    1950                 :             :         {
    1951                 :         951 :           for (edge alt_e : loop_exits)
    1952                 :             :             {
    1953                 :         390 :               if (alt_e == loop_exit)
    1954                 :         187 :                 continue;
    1955                 :         203 :               basic_block old_dom
    1956                 :         203 :                 = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
    1957                 :         203 :               if (flow_bb_inside_loop_p (loop, old_dom))
    1958                 :             :                 {
    1959                 :          57 :                   auto_vec<basic_block, 8> queue;
    1960                 :          57 :                   for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
    1961                 :         185 :                        son; son = next_dom_son (CDI_DOMINATORS, son))
    1962                 :         128 :                     if (!flow_bb_inside_loop_p (loop, son))
    1963                 :          71 :                       queue.safe_push (son);
    1964                 :         242 :                   for (auto son : queue)
    1965                 :          71 :                     set_immediate_dominator (CDI_DOMINATORS,
    1966                 :             :                                              son, get_bb_copy (old_dom));
    1967                 :          57 :                 }
    1968                 :             :             }
    1969                 :             :         }
    1970                 :             :     }
    1971                 :             : 
    1972                 :       32839 :   free (new_bbs);
    1973                 :       32839 :   free (bbs);
    1974                 :             : 
    1975                 :       32839 :   checking_verify_dominators (CDI_DOMINATORS);
    1976                 :             : 
    1977                 :       32839 :   return new_loop;
    1978                 :       32839 : }
    1979                 :             : 
    1980                 :             : 
    1981                 :             : /* Given the condition expression COND, put it as the last statement of
    1982                 :             :    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
    1983                 :             :    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
    1984                 :             :    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
    1985                 :             :    new edge as irreducible if IRREDUCIBLE_P is true.  */
    1986                 :             : 
    1987                 :             : static edge
    1988                 :       47778 : slpeel_add_loop_guard (basic_block guard_bb, tree cond,
    1989                 :             :                        basic_block guard_to, basic_block dom_bb,
    1990                 :             :                        profile_probability probability, bool irreducible_p)
    1991                 :             : {
    1992                 :       47778 :   gimple_stmt_iterator gsi;
    1993                 :       47778 :   edge new_e, enter_e;
    1994                 :       47778 :   gcond *cond_stmt;
    1995                 :       47778 :   gimple_seq gimplify_stmt_list = NULL;
    1996                 :             : 
    1997                 :       47778 :   enter_e = EDGE_SUCC (guard_bb, 0);
    1998                 :       47778 :   enter_e->flags &= ~EDGE_FALLTHRU;
    1999                 :       47778 :   enter_e->flags |= EDGE_FALSE_VALUE;
    2000                 :       47778 :   gsi = gsi_last_bb (guard_bb);
    2001                 :             : 
    2002                 :       47778 :   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list,
    2003                 :             :                                  is_gimple_condexpr_for_cond, NULL_TREE);
    2004                 :       47778 :   if (gimplify_stmt_list)
    2005                 :       21271 :     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
    2006                 :             : 
    2007                 :       47778 :   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
    2008                 :       47778 :   gsi = gsi_last_bb (guard_bb);
    2009                 :       47778 :   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
    2010                 :             : 
    2011                 :             :   /* Add new edge to connect guard block to the merge/loop-exit block.  */
    2012                 :       47778 :   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
    2013                 :             : 
    2014                 :       47778 :   new_e->probability = probability;
    2015                 :       47778 :   if (irreducible_p)
    2016                 :          16 :     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
    2017                 :             : 
    2018                 :       47778 :   enter_e->probability = probability.invert ();
    2019                 :       47778 :   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
    2020                 :             : 
    2021                 :             :   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
    2022                 :       47778 :   if (enter_e->dest->loop_father->header == enter_e->dest)
    2023                 :         382 :     split_edge (enter_e);
    2024                 :             : 
    2025                 :       47778 :   return new_e;
    2026                 :             : }
    2027                 :             : 
    2028                 :             : 
    2029                 :             : /* This function verifies that the following restrictions apply to LOOP:
    2030                 :             :    (1) it consists of exactly 2 basic blocks - header, and an empty latch
    2031                 :             :        for innermost loop and 5 basic blocks for outer-loop.
    2032                 :             :    (2) it is single entry, single exit
    2033                 :             :    (3) its exit condition is the last stmt in the header
    2034                 :             :    (4) E is the entry/exit edge of LOOP.
    2035                 :             :  */
    2036                 :             : 
    2037                 :             : bool
    2038                 :      403651 : slpeel_can_duplicate_loop_p (const class loop *loop, const_edge exit_e,
    2039                 :             :                              const_edge e)
    2040                 :             : {
    2041                 :      403651 :   edge entry_e = loop_preheader_edge (loop);
    2042                 :      403651 :   gcond *orig_cond = get_loop_exit_condition (exit_e);
    2043                 :      403651 :   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
    2044                 :             : 
    2045                 :             :   /* All loops have an outer scope; the only case loop->outer is NULL is for
    2046                 :             :      the function itself.  */
    2047                 :      403651 :   if (!loop_outer (loop)
    2048                 :      403651 :       || !empty_block_p (loop->latch)
    2049                 :             :       || !exit_e
    2050                 :             :       /* Verify that new loop exit condition can be trivially modified.  */
    2051                 :      403651 :       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
    2052                 :      807302 :       || (e != exit_e && e != entry_e))
    2053                 :           0 :     return false;
    2054                 :             : 
    2055                 :      403651 :   basic_block *bbs = XNEWVEC (basic_block, loop->num_nodes);
    2056                 :      403651 :   get_loop_body_with_size (loop, bbs, loop->num_nodes);
    2057                 :      403651 :   bool ret = can_copy_bbs_p (bbs, loop->num_nodes);
    2058                 :      403651 :   free (bbs);
    2059                 :      403651 :   return ret;
    2060                 :             : }
    2061                 :             : 
    2062                 :             : /* Function find_loop_location.
    2063                 :             : 
    2064                 :             :    Extract the location of the loop in the source code.
    2065                 :             :    If the loop is not well formed for vectorization, an estimated
    2066                 :             :    location is calculated.
    2067                 :             :    Return the loop location if succeed and NULL if not.  */
    2068                 :             : 
    2069                 :             : dump_user_location_t
    2070                 :     3521935 : find_loop_location (class loop *loop)
    2071                 :             : {
    2072                 :     3521935 :   gimple *stmt = NULL;
    2073                 :     3521935 :   basic_block bb;
    2074                 :     3521935 :   gimple_stmt_iterator si;
    2075                 :             : 
    2076                 :     3521935 :   if (!loop)
    2077                 :           0 :     return dump_user_location_t ();
    2078                 :             : 
    2079                 :             :   /* For the root of the loop tree return the function location.  */
    2080                 :     3521935 :   if (!loop_outer (loop))
    2081                 :           0 :     return dump_user_location_t::from_function_decl (cfun->decl);
    2082                 :             : 
    2083                 :     3521935 :   if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
    2084                 :             :     {
    2085                 :             :       /* We only care about the loop location, so use any exit with location
    2086                 :             :          information.  */
    2087                 :    11075079 :       for (edge e : get_loop_exit_edges (loop))
    2088                 :             :         {
    2089                 :     3639934 :           stmt = get_loop_exit_condition (e);
    2090                 :             : 
    2091                 :     3639934 :           if (stmt
    2092                 :     3639934 :               && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
    2093                 :     3072376 :             return stmt;
    2094                 :     3521935 :         }
    2095                 :             :     }
    2096                 :             : 
    2097                 :             :   /* If we got here the loop is probably not "well formed",
    2098                 :             :      try to estimate the loop location */
    2099                 :             : 
    2100                 :      449559 :   if (!loop->header)
    2101                 :           0 :     return dump_user_location_t ();
    2102                 :             : 
    2103                 :      449559 :   bb = loop->header;
    2104                 :             : 
    2105                 :     1608528 :   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    2106                 :             :     {
    2107                 :     1033732 :       stmt = gsi_stmt (si);
    2108                 :     1033732 :       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
    2109                 :      324322 :         return stmt;
    2110                 :             :     }
    2111                 :             : 
    2112                 :      125237 :   return dump_user_location_t ();
    2113                 :             : }
    2114                 :             : 
    2115                 :             : /* Return true if the phi described by STMT_INFO defines an IV of the
    2116                 :             :    loop to be vectorized.  */
    2117                 :             : 
    2118                 :             : static bool
    2119                 :     1102543 : iv_phi_p (stmt_vec_info stmt_info)
    2120                 :             : {
    2121                 :     1102543 :   gphi *phi = as_a <gphi *> (stmt_info->stmt);
    2122                 :     2205086 :   if (virtual_operand_p (PHI_RESULT (phi)))
    2123                 :             :     return false;
    2124                 :             : 
    2125                 :      871042 :   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
    2126                 :      871042 :       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
    2127                 :      112537 :     return false;
    2128                 :             : 
    2129                 :             :   return true;
    2130                 :             : }
    2131                 :             : 
    2132                 :             : /* Return true if vectorizer can peel for nonlinear iv.  */
    2133                 :             : static bool
    2134                 :        9473 : vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
    2135                 :             :                               stmt_vec_info stmt_info)
    2136                 :             : {
    2137                 :        9473 :   enum vect_induction_op_type induction_type
    2138                 :             :     = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
    2139                 :        9473 :   tree niters_skip;
    2140                 :             :   /* Init_expr will be update by vect_update_ivs_after_vectorizer,
    2141                 :             :      if niters or vf is unkown:
    2142                 :             :      For shift, when shift mount >= precision, there would be UD.
    2143                 :             :      For mult, don't known how to generate
    2144                 :             :      init_expr * pow (step, niters) for variable niters.
    2145                 :             :      For neg unknown niters are ok, since niters of vectorized main loop
    2146                 :             :      will always be multiple of 2.
    2147                 :             :      See also PR113163,  PR114196 and PR114485.  */
    2148                 :        9473 :   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
    2149                 :        9473 :       || LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
    2150                 :        9473 :       || (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    2151                 :        6575 :           && induction_type != vect_step_op_neg))
    2152                 :             :     {
    2153                 :        6413 :       if (dump_enabled_p ())
    2154                 :          28 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2155                 :             :                          "Peeling for epilogue is not supported"
    2156                 :             :                          " for this nonlinear induction"
    2157                 :             :                          " when iteration count is unknown or"
    2158                 :             :                          " when using partial vectorization.\n");
    2159                 :        6413 :       return false;
    2160                 :             :     }
    2161                 :             : 
    2162                 :             :   /* Avoid compile time hog on vect_peel_nonlinear_iv_init.  */
    2163                 :        3060 :   if (induction_type == vect_step_op_mul)
    2164                 :             :     {
    2165                 :         417 :       tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
    2166                 :         417 :       tree type = TREE_TYPE (step_expr);
    2167                 :             : 
    2168                 :         828 :       if (wi::exact_log2 (wi::to_wide (step_expr)) == -1
    2169                 :         417 :           && LOOP_VINFO_INT_NITERS(loop_vinfo) >= TYPE_PRECISION (type))
    2170                 :             :         {
    2171                 :           6 :           if (dump_enabled_p ())
    2172                 :           6 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2173                 :             :                              "Avoid compile time hog on"
    2174                 :             :                              " vect_peel_nonlinear_iv_init"
    2175                 :             :                              " for nonlinear induction vec_step_op_mul"
    2176                 :             :                              " when iteration count is too big.\n");
    2177                 :           6 :           return false;
    2178                 :             :         }
    2179                 :             :     }
    2180                 :             : 
    2181                 :             :   /* Also doens't support peel for neg when niter is variable.
    2182                 :             :      ??? generate something like niter_expr & 1 ? init_expr : -init_expr?  */
    2183                 :        3054 :   niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
    2184                 :        3054 :   if ((niters_skip != NULL_TREE
    2185                 :           0 :        && (TREE_CODE (niters_skip) != INTEGER_CST
    2186                 :           0 :            || (HOST_WIDE_INT) TREE_INT_CST_LOW (niters_skip) < 0))
    2187                 :        3054 :       || (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
    2188                 :        3054 :           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0))
    2189                 :             :     {
    2190                 :           4 :       if (dump_enabled_p ())
    2191                 :           0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2192                 :             :                          "Peeling for alignement is not supported"
    2193                 :             :                          " for nonlinear induction when niters_skip"
    2194                 :             :                          " is not constant.\n");
    2195                 :           4 :       return false;
    2196                 :             :     }
    2197                 :             : 
    2198                 :             :   return true;
    2199                 :             : }
    2200                 :             : 
    2201                 :             : /* Function vect_can_advance_ivs_p
    2202                 :             : 
    2203                 :             :    In case the number of iterations that LOOP iterates is unknown at compile
    2204                 :             :    time, an epilog loop will be generated, and the loop induction variables
    2205                 :             :    (IVs) will be "advanced" to the value they are supposed to take just before
    2206                 :             :    the epilog loop.  Here we check that the access function of the loop IVs
    2207                 :             :    and the expression that represents the loop bound are simple enough.
    2208                 :             :    These restrictions will be relaxed in the future.  */
    2209                 :             : 
    2210                 :             : bool
    2211                 :      412122 : vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
    2212                 :             : {
    2213                 :      412122 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2214                 :      412122 :   basic_block bb = loop->header;
    2215                 :      412122 :   gphi_iterator gsi;
    2216                 :             : 
    2217                 :             :   /* Analyze phi functions of the loop header.  */
    2218                 :             : 
    2219                 :      412122 :   if (dump_enabled_p ())
    2220                 :       23334 :     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
    2221                 :     1417124 :   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    2222                 :             :     {
    2223                 :     1013852 :       tree evolution_part;
    2224                 :     1013852 :       enum vect_induction_op_type induction_type;
    2225                 :             : 
    2226                 :     1013852 :       gphi *phi = gsi.phi ();
    2227                 :     1013852 :       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
    2228                 :     1013852 :       if (dump_enabled_p ())
    2229                 :       67363 :         dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
    2230                 :             :                          phi_info->stmt);
    2231                 :             : 
    2232                 :             :       /* Skip virtual phi's. The data dependences that are associated with
    2233                 :             :          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
    2234                 :             : 
    2235                 :             :          Skip reduction phis.  */
    2236                 :     1013852 :       if (!iv_phi_p (phi_info))
    2237                 :             :         {
    2238                 :      303440 :           if (dump_enabled_p ())
    2239                 :       23769 :             dump_printf_loc (MSG_NOTE, vect_location,
    2240                 :             :                              "reduc or virtual phi. skip.\n");
    2241                 :      303440 :           continue;
    2242                 :             :         }
    2243                 :             : 
    2244                 :      710412 :       induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
    2245                 :      710412 :       if (induction_type != vect_step_op_add)
    2246                 :             :         {
    2247                 :        9473 :           if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, phi_info))
    2248                 :             :             return false;
    2249                 :             : 
    2250                 :        3050 :           continue;
    2251                 :             :         }
    2252                 :             : 
    2253                 :             :       /* Analyze the evolution function.  */
    2254                 :             : 
    2255                 :      700939 :       evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
    2256                 :      700939 :       if (evolution_part == NULL_TREE)
    2257                 :             :         {
    2258                 :        2427 :           if (dump_enabled_p ())
    2259                 :          78 :             dump_printf (MSG_MISSED_OPTIMIZATION,
    2260                 :             :                          "No access function or evolution.\n");
    2261                 :        2427 :           return false;
    2262                 :             :         }
    2263                 :             : 
    2264                 :             :       /* FORNOW: We do not transform initial conditions of IVs
    2265                 :             :          which evolution functions are not invariants in the loop.  */
    2266                 :             : 
    2267                 :      698512 :       if (!expr_invariant_in_loop_p (loop, evolution_part))
    2268                 :             :         {
    2269                 :           0 :           if (dump_enabled_p ())
    2270                 :           0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2271                 :             :                              "evolution not invariant in loop.\n");
    2272                 :           0 :           return false;
    2273                 :             :         }
    2274                 :             : 
    2275                 :             :       /* FORNOW: We do not transform initial conditions of IVs
    2276                 :             :          which evolution functions are a polynomial of degree >= 2.  */
    2277                 :             : 
    2278                 :     1703514 :       if (tree_is_chrec (evolution_part))
    2279                 :             :         {
    2280                 :           0 :           if (dump_enabled_p ())
    2281                 :           0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2282                 :             :                              "evolution is chrec.\n");
    2283                 :           0 :           return false;
    2284                 :             :         }
    2285                 :             :     }
    2286                 :             : 
    2287                 :             :   return true;
    2288                 :             : }
    2289                 :             : 
    2290                 :             : 
    2291                 :             : /*   Function vect_update_ivs_after_vectorizer.
    2292                 :             : 
    2293                 :             :      "Advance" the induction variables of LOOP to the value they should take
    2294                 :             :      after the execution of LOOP.  This is currently necessary because the
    2295                 :             :      vectorizer does not handle induction variables that are used after the
    2296                 :             :      loop.  Such a situation occurs when the last iterations of LOOP are
    2297                 :             :      peeled, because:
    2298                 :             :      1. We introduced new uses after LOOP for IVs that were not originally used
    2299                 :             :         after LOOP: the IVs of LOOP are now used by an epilog loop.
    2300                 :             :      2. LOOP is going to be vectorized; this means that it will iterate N/VF
    2301                 :             :         times, whereas the loop IVs should be bumped N times.
    2302                 :             : 
    2303                 :             :      Input:
    2304                 :             :      - LOOP - a loop that is going to be vectorized. The last few iterations
    2305                 :             :               of LOOP were peeled.
    2306                 :             :      - NITERS - the number of iterations that LOOP executes (before it is
    2307                 :             :                 vectorized). i.e, the number of times the ivs should be bumped.
    2308                 :             :      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
    2309                 :             :                   coming out from LOOP on which there are uses of the LOOP ivs
    2310                 :             :                   (this is the path from LOOP->exit to epilog_loop->preheader).
    2311                 :             : 
    2312                 :             :                   The new definitions of the ivs are placed in LOOP->exit.
    2313                 :             :                   The phi args associated with the edge UPDATE_E in the bb
    2314                 :             :                   UPDATE_E->dest are updated accordingly.
    2315                 :             : 
    2316                 :             :      Assumption 1: Like the rest of the vectorizer, this function assumes
    2317                 :             :      a single loop exit that has a single predecessor.
    2318                 :             : 
    2319                 :             :      Assumption 2: The phi nodes in the LOOP header and in update_bb are
    2320                 :             :      organized in the same order.
    2321                 :             : 
    2322                 :             :      Assumption 3: The access function of the ivs is simple enough (see
    2323                 :             :      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
    2324                 :             : 
    2325                 :             :      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
    2326                 :             :      coming out of LOOP on which the ivs of LOOP are used (this is the path
    2327                 :             :      that leads to the epilog loop; other paths skip the epilog loop).  This
    2328                 :             :      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
    2329                 :             :      needs to have its phis updated.
    2330                 :             :  */
    2331                 :             : 
    2332                 :             : static void
    2333                 :       31422 : vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
    2334                 :             :                                   tree niters, edge update_e)
    2335                 :             : {
    2336                 :       31422 :   gphi_iterator gsi, gsi1;
    2337                 :       31422 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2338                 :       31422 :   basic_block update_bb = update_e->dest;
    2339                 :       31422 :   basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
    2340                 :       31422 :   gimple_stmt_iterator last_gsi = gsi_last_bb (exit_bb);
    2341                 :             : 
    2342                 :       31422 :   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
    2343                 :      120113 :        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
    2344                 :       88691 :        gsi_next (&gsi), gsi_next (&gsi1))
    2345                 :             :     {
    2346                 :       88691 :       tree init_expr;
    2347                 :       88691 :       tree step_expr, off;
    2348                 :       88691 :       tree type;
    2349                 :       88691 :       tree var, ni, ni_name;
    2350                 :             : 
    2351                 :       88691 :       gphi *phi = gsi.phi ();
    2352                 :       88691 :       gphi *phi1 = gsi1.phi ();
    2353                 :       88691 :       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
    2354                 :       88691 :       if (dump_enabled_p ())
    2355                 :       10513 :         dump_printf_loc (MSG_NOTE, vect_location,
    2356                 :             :                          "vect_update_ivs_after_vectorizer: phi: %G",
    2357                 :             :                          (gimple *) phi);
    2358                 :             : 
    2359                 :             :       /* Skip reduction and virtual phis.  */
    2360                 :       88691 :       if (!iv_phi_p (phi_info))
    2361                 :             :         {
    2362                 :       40598 :           if (dump_enabled_p ())
    2363                 :        4112 :             dump_printf_loc (MSG_NOTE, vect_location,
    2364                 :             :                              "reduc or virtual phi. skip.\n");
    2365                 :       40598 :           continue;
    2366                 :             :         }
    2367                 :             : 
    2368                 :       48093 :       type = TREE_TYPE (gimple_phi_result (phi));
    2369                 :       48093 :       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
    2370                 :       48093 :       step_expr = unshare_expr (step_expr);
    2371                 :             : 
    2372                 :             :       /* FORNOW: We do not support IVs whose evolution function is a polynomial
    2373                 :             :          of degree >= 2 or exponential.  */
    2374                 :       48093 :       gcc_assert (!tree_is_chrec (step_expr));
    2375                 :             : 
    2376                 :       48093 :       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
    2377                 :       48093 :       gimple_seq stmts = NULL;
    2378                 :       48093 :       enum vect_induction_op_type induction_type
    2379                 :             :         = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
    2380                 :             : 
    2381                 :       48093 :       if (induction_type == vect_step_op_add)
    2382                 :             :         {
    2383                 :       47965 :           tree stype = TREE_TYPE (step_expr);
    2384                 :       47965 :           off = fold_build2 (MULT_EXPR, stype,
    2385                 :             :                                fold_convert (stype, niters), step_expr);
    2386                 :             : 
    2387                 :       47965 :           if (POINTER_TYPE_P (type))
    2388                 :        3335 :             ni = fold_build_pointer_plus (init_expr, off);
    2389                 :             :           else
    2390                 :       44630 :             ni = fold_convert (type,
    2391                 :             :                                fold_build2 (PLUS_EXPR, stype,
    2392                 :             :                                             fold_convert (stype, init_expr),
    2393                 :             :                                             off));
    2394                 :             :         }
    2395                 :             :       /* Don't bother call vect_peel_nonlinear_iv_init.  */
    2396                 :         128 :       else if (induction_type == vect_step_op_neg)
    2397                 :             :         ni = init_expr;
    2398                 :             :       else
    2399                 :          84 :         ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
    2400                 :             :                                           niters, step_expr,
    2401                 :             :                                           induction_type);
    2402                 :             : 
    2403                 :       48093 :       var = create_tmp_var (type, "tmp");
    2404                 :             : 
    2405                 :       48093 :       gimple_seq new_stmts = NULL;
    2406                 :       48093 :       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
    2407                 :             : 
    2408                 :             :       /* Exit_bb shouldn't be empty.  */
    2409                 :       48093 :       if (!gsi_end_p (last_gsi))
    2410                 :             :         {
    2411                 :       30607 :           gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
    2412                 :       30607 :           gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
    2413                 :             :         }
    2414                 :             :       else
    2415                 :             :         {
    2416                 :       17486 :           gsi_insert_seq_before (&last_gsi, stmts, GSI_SAME_STMT);
    2417                 :       17486 :           gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
    2418                 :             :         }
    2419                 :             : 
    2420                 :             :       /* Fix phi expressions in the successor bb.  */
    2421                 :       48093 :       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
    2422                 :             :     }
    2423                 :       31422 : }
    2424                 :             : 
    2425                 :             : /* Return a gimple value containing the misalignment (measured in vector
    2426                 :             :    elements) for the loop described by LOOP_VINFO, i.e. how many elements
    2427                 :             :    it is away from a perfectly aligned address.  Add any new statements
    2428                 :             :    to SEQ.  */
    2429                 :             : 
    2430                 :             : static tree
    2431                 :         167 : get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
    2432                 :             : {
    2433                 :         167 :   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
    2434                 :         167 :   stmt_vec_info stmt_info = dr_info->stmt;
    2435                 :         167 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2436                 :             : 
    2437                 :         167 :   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
    2438                 :         167 :   unsigned HOST_WIDE_INT target_align_c;
    2439                 :         167 :   tree target_align_minus_1;
    2440                 :             : 
    2441                 :         167 :   bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    2442                 :         167 :                                         size_zero_node) < 0;
    2443                 :         167 :   tree offset = (negative
    2444                 :         167 :                  ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
    2445                 :             :                              * TREE_INT_CST_LOW
    2446                 :             :                                  (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
    2447                 :         166 :                  : size_zero_node);
    2448                 :         167 :   tree start_addr = vect_create_addr_base_for_vector_ref (loop_vinfo,
    2449                 :             :                                                           stmt_info, seq,
    2450                 :             :                                                           offset);
    2451                 :         167 :   tree type = unsigned_type_for (TREE_TYPE (start_addr));
    2452                 :         167 :   if (target_align.is_constant (&target_align_c))
    2453                 :         167 :     target_align_minus_1 = build_int_cst (type, target_align_c - 1);
    2454                 :             :   else
    2455                 :             :     {
    2456                 :             :       tree vla = build_int_cst (type, target_align);
    2457                 :             :       tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
    2458                 :             :                                     fold_build2 (MINUS_EXPR, type,
    2459                 :             :                                                  build_int_cst (type, 0), vla));
    2460                 :             :       target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
    2461                 :             :                                           build_int_cst (type, 1));
    2462                 :             :     }
    2463                 :             : 
    2464                 :         167 :   HOST_WIDE_INT elem_size
    2465                 :         167 :     = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    2466                 :         334 :   tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
    2467                 :             : 
    2468                 :             :   /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
    2469                 :         167 :   tree int_start_addr = fold_convert (type, start_addr);
    2470                 :         167 :   tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
    2471                 :             :                                         target_align_minus_1);
    2472                 :             : 
    2473                 :             :   /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
    2474                 :         167 :   tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
    2475                 :             :                                         elem_size_log);
    2476                 :             : 
    2477                 :         167 :   return misalign_in_elems;
    2478                 :             : }
    2479                 :             : 
    2480                 :             : /* Function vect_gen_prolog_loop_niters
    2481                 :             : 
    2482                 :             :    Generate the number of iterations which should be peeled as prolog for the
    2483                 :             :    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
    2484                 :             :    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
    2485                 :             :    As a result, after the execution of this loop, the data reference DR will
    2486                 :             :    refer to an aligned location.  The following computation is generated:
    2487                 :             : 
    2488                 :             :    If the misalignment of DR is known at compile time:
    2489                 :             :      addr_mis = int mis = DR_MISALIGNMENT (dr);
    2490                 :             :    Else, compute address misalignment in bytes:
    2491                 :             :      addr_mis = addr & (target_align - 1)
    2492                 :             : 
    2493                 :             :    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
    2494                 :             : 
    2495                 :             :    (elem_size = element type size; an element is the scalar element whose type
    2496                 :             :    is the inner type of the vectype)
    2497                 :             : 
    2498                 :             :    The computations will be emitted at the end of BB.  We also compute and
    2499                 :             :    store upper bound (included) of the result in BOUND.
    2500                 :             : 
    2501                 :             :    When the step of the data-ref in the loop is not 1 (as in interleaved data
    2502                 :             :    and SLP), the number of iterations of the prolog must be divided by the step
    2503                 :             :    (which is equal to the size of interleaved group).
    2504                 :             : 
    2505                 :             :    The above formulas assume that VF == number of elements in the vector. This
    2506                 :             :    may not hold when there are multiple-types in the loop.
    2507                 :             :    In this case, for some data-references in the loop the VF does not represent
    2508                 :             :    the number of elements that fit in the vector.  Therefore, instead of VF we
    2509                 :             :    use TYPE_VECTOR_SUBPARTS.  */
    2510                 :             : 
    2511                 :             : static tree
    2512                 :         379 : vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
    2513                 :             :                              basic_block bb, int *bound)
    2514                 :             : {
    2515                 :         379 :   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
    2516                 :         379 :   tree var;
    2517                 :         379 :   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
    2518                 :         379 :   gimple_seq stmts = NULL, new_stmts = NULL;
    2519                 :         379 :   tree iters, iters_name;
    2520                 :         379 :   stmt_vec_info stmt_info = dr_info->stmt;
    2521                 :         379 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2522                 :         379 :   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
    2523                 :             : 
    2524                 :         379 :   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    2525                 :             :     {
    2526                 :         212 :       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    2527                 :             : 
    2528                 :         212 :       if (dump_enabled_p ())
    2529                 :         203 :         dump_printf_loc (MSG_NOTE, vect_location,
    2530                 :             :                          "known peeling = %d.\n", npeel);
    2531                 :             : 
    2532                 :         212 :       iters = build_int_cst (niters_type, npeel);
    2533                 :         212 :       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    2534                 :             :     }
    2535                 :             :   else
    2536                 :             :     {
    2537                 :         167 :       tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
    2538                 :         167 :       tree type = TREE_TYPE (misalign_in_elems);
    2539                 :         167 :       HOST_WIDE_INT elem_size
    2540                 :         167 :         = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    2541                 :             :       /* We only do prolog peeling if the target alignment is known at compile
    2542                 :             :          time.  */
    2543                 :         167 :       poly_uint64 align_in_elems =
    2544                 :         167 :         exact_div (target_align, elem_size);
    2545                 :         167 :       tree align_in_elems_minus_1 =
    2546                 :         167 :         build_int_cst (type, align_in_elems - 1);
    2547                 :         167 :       tree align_in_elems_tree = build_int_cst (type, align_in_elems);
    2548                 :             : 
    2549                 :             :       /* Create:  (niters_type) ((align_in_elems - misalign_in_elems)
    2550                 :             :                                  & (align_in_elems - 1)).  */
    2551                 :         167 :       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    2552                 :         167 :                                             size_zero_node) < 0;
    2553                 :         167 :       if (negative)
    2554                 :           1 :         iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
    2555                 :             :                              align_in_elems_tree);
    2556                 :             :       else
    2557                 :         166 :         iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
    2558                 :             :                              misalign_in_elems);
    2559                 :         167 :       iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
    2560                 :         167 :       iters = fold_convert (niters_type, iters);
    2561                 :         167 :       unsigned HOST_WIDE_INT align_in_elems_c;
    2562                 :         167 :       if (align_in_elems.is_constant (&align_in_elems_c))
    2563                 :         167 :         *bound = align_in_elems_c - 1;
    2564                 :             :       else
    2565                 :             :         *bound = -1;
    2566                 :             :     }
    2567                 :             : 
    2568                 :         379 :   if (dump_enabled_p ())
    2569                 :         220 :     dump_printf_loc (MSG_NOTE, vect_location,
    2570                 :             :                      "niters for prolog loop: %T\n", iters);
    2571                 :             : 
    2572                 :         379 :   var = create_tmp_var (niters_type, "prolog_loop_niters");
    2573                 :         379 :   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
    2574                 :             : 
    2575                 :         379 :   if (new_stmts)
    2576                 :         167 :     gimple_seq_add_seq (&stmts, new_stmts);
    2577                 :         379 :   if (stmts)
    2578                 :             :     {
    2579                 :         167 :       gcc_assert (single_succ_p (bb));
    2580                 :         167 :       gimple_stmt_iterator gsi = gsi_last_bb (bb);
    2581                 :         167 :       if (gsi_end_p (gsi))
    2582                 :          11 :         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
    2583                 :             :       else
    2584                 :         156 :         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
    2585                 :             :     }
    2586                 :         379 :   return iters_name;
    2587                 :             : }
    2588                 :             : 
    2589                 :             : 
    2590                 :             : /* Function vect_update_init_of_dr
    2591                 :             : 
    2592                 :             :    If CODE is PLUS, the vector loop starts NITERS iterations after the
    2593                 :             :    scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
    2594                 :             :    iterations before the scalar one (using masking to skip inactive
    2595                 :             :    elements).  This function updates the information recorded in DR to
    2596                 :             :    account for the difference.  Specifically, it updates the OFFSET
    2597                 :             :    field of DR_INFO.  */
    2598                 :             : 
    2599                 :             : static void
    2600                 :       21826 : vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
    2601                 :             : {
    2602                 :       21826 :   struct data_reference *dr = dr_info->dr;
    2603                 :       21826 :   tree offset = dr_info->offset;
    2604                 :       21826 :   if (!offset)
    2605                 :       21826 :     offset = build_zero_cst (sizetype);
    2606                 :             : 
    2607                 :       21826 :   niters = fold_build2 (MULT_EXPR, sizetype,
    2608                 :             :                         fold_convert (sizetype, niters),
    2609                 :             :                         fold_convert (sizetype, DR_STEP (dr)));
    2610                 :       21826 :   offset = fold_build2 (code, sizetype,
    2611                 :             :                         fold_convert (sizetype, offset), niters);
    2612                 :       21826 :   dr_info->offset = offset;
    2613                 :       21826 : }
    2614                 :             : 
    2615                 :             : 
    2616                 :             : /* Function vect_update_inits_of_drs
    2617                 :             : 
    2618                 :             :    Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
    2619                 :             :    CODE and NITERS are as for vect_update_inits_of_dr.  */
    2620                 :             : 
    2621                 :             : void
    2622                 :        6897 : vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
    2623                 :             :                           tree_code code)
    2624                 :             : {
    2625                 :        6897 :   unsigned int i;
    2626                 :        6897 :   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
    2627                 :        6897 :   struct data_reference *dr;
    2628                 :             : 
    2629                 :        6897 :   DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
    2630                 :             : 
    2631                 :             :   /* Adjust niters to sizetype.  We used to insert the stmts on loop preheader
    2632                 :             :      here, but since we might use these niters to update the epilogues niters
    2633                 :             :      and data references we can't insert them here as this definition might not
    2634                 :             :      always dominate its uses.  */
    2635                 :        6897 :   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
    2636                 :        4119 :     niters = fold_convert (sizetype, niters);
    2637                 :             : 
    2638                 :       35956 :   FOR_EACH_VEC_ELT (datarefs, i, dr)
    2639                 :             :     {
    2640                 :       22245 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2641                 :       22245 :       if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)
    2642                 :       21826 :           && !STMT_VINFO_SIMD_LANE_ACCESS_P (dr_info->stmt))
    2643                 :       21826 :         vect_update_init_of_dr (dr_info, niters, code);
    2644                 :             :     }
    2645                 :        6897 : }
    2646                 :             : 
    2647                 :             : /* For the information recorded in LOOP_VINFO prepare the loop for peeling
    2648                 :             :    by masking.  This involves calculating the number of iterations to
    2649                 :             :    be peeled and then aligning all memory references appropriately.  */
    2650                 :             : 
    2651                 :             : void
    2652                 :           1 : vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
    2653                 :             : {
    2654                 :           1 :   tree misalign_in_elems;
    2655                 :           1 :   tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
    2656                 :             : 
    2657                 :           1 :   gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
    2658                 :             : 
    2659                 :             :   /* From the information recorded in LOOP_VINFO get the number of iterations
    2660                 :             :      that need to be skipped via masking.  */
    2661                 :           1 :   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    2662                 :             :     {
    2663                 :           1 :       poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
    2664                 :           1 :                              - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
    2665                 :           1 :       misalign_in_elems = build_int_cst (type, misalign);
    2666                 :             :     }
    2667                 :             :   else
    2668                 :             :     {
    2669                 :           0 :       gimple_seq seq1 = NULL, seq2 = NULL;
    2670                 :           0 :       misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
    2671                 :           0 :       misalign_in_elems = fold_convert (type, misalign_in_elems);
    2672                 :           0 :       misalign_in_elems = force_gimple_operand (misalign_in_elems,
    2673                 :             :                                                 &seq2, true, NULL_TREE);
    2674                 :           0 :       gimple_seq_add_seq (&seq1, seq2);
    2675                 :           0 :       if (seq1)
    2676                 :             :         {
    2677                 :           0 :           edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2678                 :           0 :           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
    2679                 :           0 :           gcc_assert (!new_bb);
    2680                 :             :         }
    2681                 :             :     }
    2682                 :             : 
    2683                 :           1 :   if (dump_enabled_p ())
    2684                 :           1 :     dump_printf_loc (MSG_NOTE, vect_location,
    2685                 :             :                      "misalignment for fully-masked loop: %T\n",
    2686                 :             :                      misalign_in_elems);
    2687                 :             : 
    2688                 :           1 :   LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
    2689                 :             : 
    2690                 :           1 :   vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
    2691                 :           1 : }
    2692                 :             : 
    2693                 :             : /* This function builds ni_name = number of iterations.  Statements
    2694                 :             :    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
    2695                 :             :    it to TRUE if new ssa_var is generated.  */
    2696                 :             : 
    2697                 :             : tree
    2698                 :       57251 : vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
    2699                 :             : {
    2700                 :       57251 :   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
    2701                 :       57251 :   if (TREE_CODE (ni) == INTEGER_CST)
    2702                 :             :     return ni;
    2703                 :             :   else
    2704                 :             :     {
    2705                 :       24146 :       tree ni_name, var;
    2706                 :       24146 :       gimple_seq stmts = NULL;
    2707                 :       24146 :       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2708                 :             : 
    2709                 :       24146 :       var = create_tmp_var (TREE_TYPE (ni), "niters");
    2710                 :       24146 :       ni_name = force_gimple_operand (ni, &stmts, false, var);
    2711                 :       24146 :       if (stmts)
    2712                 :             :         {
    2713                 :       23157 :           gsi_insert_seq_on_edge_immediate (pe, stmts);
    2714                 :       23157 :           if (new_var_p != NULL)
    2715                 :         209 :             *new_var_p = true;
    2716                 :             :         }
    2717                 :             : 
    2718                 :       24146 :       return ni_name;
    2719                 :             :     }
    2720                 :             : }
    2721                 :             : 
    2722                 :             : /* Calculate the number of iterations above which vectorized loop will be
    2723                 :             :    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
    2724                 :             :    of prolog loop.  If it's integer const, the integer number is also passed
    2725                 :             :    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (inclusive) of the
    2726                 :             :    number of iterations of the prolog loop.  BOUND_EPILOG is the corresponding
    2727                 :             :    value for the epilog loop.  If CHECK_PROFITABILITY is true, TH is the
    2728                 :             :    threshold below which the scalar (rather than vectorized) loop will be
    2729                 :             :    executed.  This function stores the upper bound (inclusive) of the result
    2730                 :             :    in BOUND_SCALAR.  */
    2731                 :             : 
    2732                 :             : static tree
    2733                 :       23207 : vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
    2734                 :             :                              int bound_prolog, poly_int64 bound_epilog, int th,
    2735                 :             :                              poly_uint64 *bound_scalar,
    2736                 :             :                              bool check_profitability)
    2737                 :             : {
    2738                 :       23207 :   tree type = TREE_TYPE (niters_prolog);
    2739                 :       23207 :   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
    2740                 :             :                              build_int_cst (type, bound_epilog));
    2741                 :             : 
    2742                 :       23207 :   *bound_scalar = bound_prolog + bound_epilog;
    2743                 :       23207 :   if (check_profitability)
    2744                 :             :     {
    2745                 :             :       /* TH indicates the minimum niters of vectorized loop, while we
    2746                 :             :          compute the maximum niters of scalar loop.  */
    2747                 :       14644 :       th--;
    2748                 :             :       /* Peeling for constant times.  */
    2749                 :       14644 :       if (int_niters_prolog >= 0)
    2750                 :             :         {
    2751                 :       14618 :           *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
    2752                 :       14618 :           return build_int_cst (type, *bound_scalar);
    2753                 :             :         }
    2754                 :             :       /* Peeling an unknown number of times.  Note that both BOUND_PROLOG
    2755                 :             :          and BOUND_EPILOG are inclusive upper bounds.  */
    2756                 :          26 :       if (known_ge (th, bound_prolog + bound_epilog))
    2757                 :             :         {
    2758                 :          24 :           *bound_scalar = th;
    2759                 :          24 :           return build_int_cst (type, th);
    2760                 :             :         }
    2761                 :             :       /* Need to do runtime comparison.  */
    2762                 :           2 :       else if (maybe_gt (th, bound_epilog))
    2763                 :             :         {
    2764                 :           2 :           *bound_scalar = upper_bound (*bound_scalar, th);
    2765                 :           2 :           return fold_build2 (MAX_EXPR, type,
    2766                 :             :                               build_int_cst (type, th), niters);
    2767                 :             :         }
    2768                 :             :     }
    2769                 :             :   return niters;
    2770                 :             : }
    2771                 :             : 
    2772                 :             : /* NITERS is the number of times that the original scalar loop executes
    2773                 :             :    after peeling.  Work out the maximum number of iterations N that can
    2774                 :             :    be handled by the vectorized form of the loop and then either:
    2775                 :             : 
    2776                 :             :    a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
    2777                 :             : 
    2778                 :             :         niters_vector = N
    2779                 :             : 
    2780                 :             :    b) set *STEP_VECTOR_PTR to one and generate:
    2781                 :             : 
    2782                 :             :         niters_vector = N / vf
    2783                 :             : 
    2784                 :             :    In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
    2785                 :             :    any new statements on the loop preheader edge.  NITERS_NO_OVERFLOW
    2786                 :             :    is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero).  */
    2787                 :             : 
    2788                 :             : void
    2789                 :       32150 : vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
    2790                 :             :                              tree *niters_vector_ptr, tree *step_vector_ptr,
    2791                 :             :                              bool niters_no_overflow)
    2792                 :             : {
    2793                 :       32150 :   tree ni_minus_gap, var;
    2794                 :       32150 :   tree niters_vector, step_vector, type = TREE_TYPE (niters);
    2795                 :       32150 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    2796                 :       32150 :   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2797                 :             : 
    2798                 :             :   /* If epilogue loop is required because of data accesses with gaps, we
    2799                 :             :      subtract one iteration from the total number of iterations here for
    2800                 :             :      correct calculation of RATIO.  */
    2801                 :       32150 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
    2802                 :             :     {
    2803                 :         335 :       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
    2804                 :             :                                   build_one_cst (type));
    2805                 :         335 :       if (!is_gimple_val (ni_minus_gap))
    2806                 :             :         {
    2807                 :         156 :           var = create_tmp_var (type, "ni_gap");
    2808                 :         156 :           gimple *stmts = NULL;
    2809                 :         156 :           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
    2810                 :             :                                                true, var);
    2811                 :         156 :           gsi_insert_seq_on_edge_immediate (pe, stmts);
    2812                 :             :         }
    2813                 :             :     }
    2814                 :             :   else
    2815                 :             :     ni_minus_gap = niters;
    2816                 :             : 
    2817                 :             :   /* To silence some unexpected warnings, simply initialize to 0. */
    2818                 :       32150 :   unsigned HOST_WIDE_INT const_vf = 0;
    2819                 :       32150 :   if (vf.is_constant (&const_vf)
    2820                 :       32150 :       && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
    2821                 :             :     {
    2822                 :             :       /* Create: niters / vf, which is equivalent to niters >> log2(vf) when
    2823                 :             :                  vf is a power of two, and when not we approximate using a
    2824                 :             :                  truncating division.  */
    2825                 :             :       /* If it's known that niters == number of latch executions + 1 doesn't
    2826                 :             :          overflow, we can generate niters / vf; otherwise we generate
    2827                 :             :          (niters - vf) / vf + 1 by using the fact that we know ratio
    2828                 :             :          will be at least one.  */
    2829                 :       32140 :       tree var_vf = build_int_cst (type, const_vf);
    2830                 :       32140 :       if (niters_no_overflow)
    2831                 :       31932 :         niters_vector = fold_build2 (TRUNC_DIV_EXPR, type, ni_minus_gap,
    2832                 :             :                                      var_vf);
    2833                 :             :       else
    2834                 :         208 :         niters_vector
    2835                 :         208 :           = fold_build2 (PLUS_EXPR, type,
    2836                 :             :                          fold_build2 (TRUNC_DIV_EXPR, type,
    2837                 :             :                                       fold_build2 (MINUS_EXPR, type,
    2838                 :             :                                                    ni_minus_gap,
    2839                 :             :                                                    var_vf),
    2840                 :             :                                       var_vf),
    2841                 :             :                          build_int_cst (type, 1));
    2842                 :       32140 :       step_vector = build_one_cst (type);
    2843                 :             :     }
    2844                 :             :   else
    2845                 :             :     {
    2846                 :          10 :       niters_vector = ni_minus_gap;
    2847                 :          10 :       step_vector = build_int_cst (type, vf);
    2848                 :             :     }
    2849                 :             : 
    2850                 :       32150 :   if (!is_gimple_val (niters_vector))
    2851                 :             :     {
    2852                 :       23925 :       var = create_tmp_var (type, "bnd");
    2853                 :       23925 :       gimple_seq stmts = NULL;
    2854                 :       23925 :       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
    2855                 :       23925 :       gsi_insert_seq_on_edge_immediate (pe, stmts);
    2856                 :             :       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
    2857                 :             :          we set range information to make niters analyzer's life easier.
    2858                 :             :          Note the number of latch iteration value can be TYPE_MAX_VALUE so
    2859                 :             :          we have to represent the vector niter TYPE_MAX_VALUE + 1 / vf.  */
    2860                 :       23925 :       if (stmts != NULL && const_vf > 0)
    2861                 :             :         {
    2862                 :       23925 :           if (niters_no_overflow)
    2863                 :             :             {
    2864                 :       23735 :               int_range<1> vr (type,
    2865                 :       47470 :                                wi::one (TYPE_PRECISION (type)),
    2866                 :       47470 :                                wi::div_trunc (wi::max_value
    2867                 :       23735 :                                                         (TYPE_PRECISION (type),
    2868                 :       23735 :                                                          TYPE_SIGN (type)),
    2869                 :             :                                            const_vf,
    2870                 :       47470 :                                            TYPE_SIGN (type)));
    2871                 :       23735 :               set_range_info (niters_vector, vr);
    2872                 :       23735 :             }
    2873                 :             :           /* For VF == 1 the vector IV might also overflow so we cannot
    2874                 :             :              assert a minimum value of 1.  */
    2875                 :         190 :           else if (const_vf > 1)
    2876                 :             :             {
    2877                 :         154 :               int_range<1> vr (type,
    2878                 :         308 :                                wi::one (TYPE_PRECISION (type)),
    2879                 :         462 :                                wi::rshift (wi::max_value (TYPE_PRECISION (type),
    2880                 :         154 :                                                           TYPE_SIGN (type))
    2881                 :         308 :                                            - (const_vf - 1),
    2882                 :         308 :                                            exact_log2 (const_vf), TYPE_SIGN (type))
    2883                 :         616 :                                + 1);
    2884                 :         154 :               set_range_info (niters_vector, vr);
    2885                 :         154 :             }
    2886                 :             :         }
    2887                 :             :     }
    2888                 :       32150 :   *niters_vector_ptr = niters_vector;
    2889                 :       32150 :   *step_vector_ptr = step_vector;
    2890                 :             : 
    2891                 :       32150 :   return;
    2892                 :             : }
    2893                 :             : 
    2894                 :             : /* Given NITERS_VECTOR which is the number of iterations for vectorized
    2895                 :             :    loop specified by LOOP_VINFO after vectorization, compute the number
    2896                 :             :    of iterations before vectorization (niters_vector * vf) and store it
    2897                 :             :    to NITERS_VECTOR_MULT_VF_PTR.  */
    2898                 :             : 
    2899                 :             : static void
    2900                 :       31441 : vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
    2901                 :             :                                      tree niters_vector,
    2902                 :             :                                      tree *niters_vector_mult_vf_ptr)
    2903                 :             : {
    2904                 :             :   /* We should be using a step_vector of VF if VF is variable.  */
    2905                 :       31441 :   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
    2906                 :       31441 :   tree type = TREE_TYPE (niters_vector);
    2907                 :       31441 :   tree tree_vf = build_int_cst (type, vf);
    2908                 :       31441 :   basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
    2909                 :             : 
    2910                 :       31441 :   gcc_assert (niters_vector_mult_vf_ptr != NULL);
    2911                 :       31441 :   tree niters_vector_mult_vf = fold_build2 (MULT_EXPR, type,
    2912                 :             :                                             niters_vector, tree_vf);
    2913                 :             : 
    2914                 :             :   /* If we've peeled a vector iteration then subtract one full vector
    2915                 :             :      iteration.  */
    2916                 :       31441 :   if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    2917                 :          19 :     niters_vector_mult_vf = fold_build2 (MINUS_EXPR, type,
    2918                 :             :                                          niters_vector_mult_vf, tree_vf);
    2919                 :             : 
    2920                 :       31441 :   if (!is_gimple_val (niters_vector_mult_vf))
    2921                 :             :     {
    2922                 :       23238 :       tree var = create_tmp_var (type, "niters_vector_mult_vf");
    2923                 :       23238 :       gimple_seq stmts = NULL;
    2924                 :       23238 :       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
    2925                 :             :                                                     &stmts, true, var);
    2926                 :       23238 :       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
    2927                 :       23238 :       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
    2928                 :             :     }
    2929                 :       31441 :   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
    2930                 :       31441 : }
    2931                 :             : 
    2932                 :             : /* Function slpeel_add_loop_guard adds guard skipping from the beginning
    2933                 :             :    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
    2934                 :             :    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
    2935                 :             :    appear like below:
    2936                 :             : 
    2937                 :             :        guard_bb:
    2938                 :             :          if (cond)
    2939                 :             :            goto merge_bb;
    2940                 :             :          else
    2941                 :             :            goto skip_loop;
    2942                 :             : 
    2943                 :             :      skip_loop:
    2944                 :             :        header_a:
    2945                 :             :          i_1 = PHI<i_0, i_2>;
    2946                 :             :          ...
    2947                 :             :          i_2 = i_1 + 1;
    2948                 :             :          if (cond_a)
    2949                 :             :            goto latch_a;
    2950                 :             :          else
    2951                 :             :            goto exit_a;
    2952                 :             :        latch_a:
    2953                 :             :          goto header_a;
    2954                 :             : 
    2955                 :             :        exit_a:
    2956                 :             :          i_5 = PHI<i_2>;
    2957                 :             : 
    2958                 :             :        merge_bb:
    2959                 :             :          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
    2960                 :             : 
    2961                 :             :      update_loop:
    2962                 :             :        header_b:
    2963                 :             :          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
    2964                 :             :          ...
    2965                 :             :          i_4 = i_3 + 1;
    2966                 :             :          if (cond_b)
    2967                 :             :            goto latch_b;
    2968                 :             :          else
    2969                 :             :            goto exit_bb;
    2970                 :             :        latch_b:
    2971                 :             :          goto header_b;
    2972                 :             : 
    2973                 :             :        exit_bb:
    2974                 :             : 
    2975                 :             :    This function creates PHI nodes at merge_bb and replaces the use of i_5
    2976                 :             :    in the update_loop's PHI node with the result of new PHI result.  */
    2977                 :             : 
    2978                 :             : static void
    2979                 :       23586 : slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
    2980                 :             :                                     class loop *update_loop,
    2981                 :             :                                     edge guard_edge, edge merge_edge)
    2982                 :             : {
    2983                 :       23586 :   location_t merge_loc, guard_loc;
    2984                 :       23586 :   edge orig_e = loop_preheader_edge (skip_loop);
    2985                 :       23586 :   edge update_e = loop_preheader_edge (update_loop);
    2986                 :       23586 :   gphi_iterator gsi_orig, gsi_update;
    2987                 :             : 
    2988                 :       23586 :   for ((gsi_orig = gsi_start_phis (skip_loop->header),
    2989                 :       23586 :         gsi_update = gsi_start_phis (update_loop->header));
    2990                 :       87131 :        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
    2991                 :       63545 :        gsi_next (&gsi_orig), gsi_next (&gsi_update))
    2992                 :             :     {
    2993                 :       63545 :       gphi *orig_phi = gsi_orig.phi ();
    2994                 :       63545 :       gphi *update_phi = gsi_update.phi ();
    2995                 :             : 
    2996                 :             :       /* Generate new phi node at merge bb of the guard.  */
    2997                 :       63545 :       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
    2998                 :       63545 :       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
    2999                 :             : 
    3000                 :             :       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
    3001                 :             :          args in NEW_PHI for these edges.  */
    3002                 :       63545 :       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
    3003                 :       63545 :       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
    3004                 :       63545 :       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
    3005                 :       63545 :       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
    3006                 :       63545 :       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
    3007                 :       63545 :       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
    3008                 :             : 
    3009                 :             :       /* Update phi in UPDATE_PHI.  */
    3010                 :       63545 :       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
    3011                 :             :     }
    3012                 :       23586 : }
    3013                 :             : 
    3014                 :             : /* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.
    3015                 :             :    Return a value that equals:
    3016                 :             : 
    3017                 :             :    - MAIN_LOOP_VALUE when LOOP_VINFO is entered from the main loop and
    3018                 :             :    - SKIP_VALUE when the main loop is skipped.  */
    3019                 :             : 
    3020                 :             : tree
    3021                 :        3885 : vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value,
    3022                 :             :                            tree skip_value)
    3023                 :             : {
    3024                 :        3885 :   gcc_assert (loop_vinfo->main_loop_edge);
    3025                 :             : 
    3026                 :        3885 :   tree phi_result = make_ssa_name (TREE_TYPE (main_loop_value));
    3027                 :        3885 :   basic_block bb = loop_vinfo->main_loop_edge->dest;
    3028                 :        3885 :   gphi *new_phi = create_phi_node (phi_result, bb);
    3029                 :        3885 :   add_phi_arg (new_phi, main_loop_value, loop_vinfo->main_loop_edge,
    3030                 :             :                UNKNOWN_LOCATION);
    3031                 :        3885 :   add_phi_arg (new_phi, skip_value,
    3032                 :             :                loop_vinfo->skip_main_loop_edge, UNKNOWN_LOCATION);
    3033                 :        3885 :   return phi_result;
    3034                 :             : }
    3035                 :             : 
    3036                 :             : /* Function vect_do_peeling.
    3037                 :             : 
    3038                 :             :    Input:
    3039                 :             :    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
    3040                 :             : 
    3041                 :             :        preheader:
    3042                 :             :      LOOP:
    3043                 :             :        header_bb:
    3044                 :             :          loop_body
    3045                 :             :          if (exit_loop_cond) goto exit_bb
    3046                 :             :          else                goto header_bb
    3047                 :             :        exit_bb:
    3048                 :             : 
    3049                 :             :    - NITERS: The number of iterations of the loop.
    3050                 :             :    - NITERSM1: The number of iterations of the loop's latch.
    3051                 :             :    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
    3052                 :             :    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
    3053                 :             :                               CHECK_PROFITABILITY is true.
    3054                 :             :    Output:
    3055                 :             :    - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
    3056                 :             :      iterate after vectorization; see vect_set_loop_condition for details.
    3057                 :             :    - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
    3058                 :             :      should be set to the number of scalar iterations handled by the
    3059                 :             :      vector loop.  The SSA name is only used on exit from the loop.
    3060                 :             : 
    3061                 :             :    This function peels prolog and epilog from the loop, adds guards skipping
    3062                 :             :    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
    3063                 :             :    would look like:
    3064                 :             : 
    3065                 :             :        guard_bb_1:
    3066                 :             :          if (prefer_scalar_loop) goto merge_bb_1
    3067                 :             :          else                    goto guard_bb_2
    3068                 :             : 
    3069                 :             :        guard_bb_2:
    3070                 :             :          if (skip_prolog) goto merge_bb_2
    3071                 :             :          else             goto prolog_preheader
    3072                 :             : 
    3073                 :             :        prolog_preheader:
    3074                 :             :      PROLOG:
    3075                 :             :        prolog_header_bb:
    3076                 :             :          prolog_body
    3077                 :             :          if (exit_prolog_cond) goto prolog_exit_bb
    3078                 :             :          else                  goto prolog_header_bb
    3079                 :             :        prolog_exit_bb:
    3080                 :             : 
    3081                 :             :        merge_bb_2:
    3082                 :             : 
    3083                 :             :        vector_preheader:
    3084                 :             :      VECTOR LOOP:
    3085                 :             :        vector_header_bb:
    3086                 :             :          vector_body
    3087                 :             :          if (exit_vector_cond) goto vector_exit_bb
    3088                 :             :          else                  goto vector_header_bb
    3089                 :             :        vector_exit_bb:
    3090                 :             : 
    3091                 :             :        guard_bb_3:
    3092                 :             :          if (skip_epilog) goto merge_bb_3
    3093                 :             :          else             goto epilog_preheader
    3094                 :             : 
    3095                 :             :        merge_bb_1:
    3096                 :             : 
    3097                 :             :        epilog_preheader:
    3098                 :             :      EPILOG:
    3099                 :             :        epilog_header_bb:
    3100                 :             :          epilog_body
    3101                 :             :          if (exit_epilog_cond) goto merge_bb_3
    3102                 :             :          else                  goto epilog_header_bb
    3103                 :             : 
    3104                 :             :        merge_bb_3:
    3105                 :             : 
    3106                 :             :    Note this function peels prolog and epilog only if it's necessary,
    3107                 :             :    as well as guards.
    3108                 :             :    This function returns the epilogue loop if a decision was made to vectorize
    3109                 :             :    it, otherwise NULL.
    3110                 :             : 
    3111                 :             :    The analysis resulting in this epilogue loop's loop_vec_info was performed
    3112                 :             :    in the same vect_analyze_loop call as the main loop's.  At that time
    3113                 :             :    vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
    3114                 :             :    vectorization factors than the main loop.  This list is chained in the
    3115                 :             :    loop's loop_vec_info in the 'epilogue_vinfo' member.  When we decide to
    3116                 :             :    vectorize the epilogue loop for a lower vectorization factor, the
    3117                 :             :    loop_vec_info in epilogue_vinfo is updated and linked to the epilogue loop.
    3118                 :             :    This is later used to vectorize the epilogue.
    3119                 :             :    The reason the loop_vec_info needs updating is that it was
    3120                 :             :    constructed based on the original main loop, and the epilogue loop is a
    3121                 :             :    copy of this loop, so all links pointing to statements in the original loop
    3122                 :             :    need updating.  Furthermore, these loop_vec_infos share the
    3123                 :             :    data_reference's records, which will also need to be updated.
    3124                 :             : 
    3125                 :             :    TODO: Guard for prefer_scalar_loop should be emitted along with
    3126                 :             :    versioning conditions if loop versioning is needed.  */
    3127                 :             : 
    3128                 :             : 
    3129                 :             : class loop *
    3130                 :       56872 : vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
    3131                 :             :                  tree *niters_vector, tree *step_vector,
    3132                 :             :                  tree *niters_vector_mult_vf_var, int th,
    3133                 :             :                  bool check_profitability, bool niters_no_overflow,
    3134                 :             :                  tree *advance)
    3135                 :             : {
    3136                 :       56872 :   edge e, guard_e;
    3137                 :       56872 :   tree type = TREE_TYPE (niters), guard_cond;
    3138                 :       56872 :   basic_block guard_bb, guard_to;
    3139                 :       56872 :   profile_probability prob_prolog, prob_vector, prob_epilog;
    3140                 :       56872 :   int estimated_vf;
    3141                 :       56872 :   int prolog_peeling = 0;
    3142                 :       56872 :   bool vect_epilogues = loop_vinfo->epilogue_vinfo != NULL;
    3143                 :             : 
    3144                 :       56872 :   if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
    3145                 :       56871 :     prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    3146                 :             : 
    3147                 :       56872 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    3148                 :       56872 :   poly_uint64 bound_epilog = 0;
    3149                 :       56872 :   if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
    3150                 :       56862 :       && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
    3151                 :       30416 :     bound_epilog += vf - 1;
    3152                 :       56872 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
    3153                 :         335 :     bound_epilog += 1;
    3154                 :             : 
    3155                 :             :   /* For early breaks the scalar loop needs to execute at most VF times
    3156                 :             :      to find the element that caused the break.  */
    3157                 :       56872 :   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3158                 :        1411 :     bound_epilog = vf;
    3159                 :             : 
    3160                 :       56872 :   bool epilog_peeling = maybe_ne (bound_epilog, 0U);
    3161                 :       56872 :   poly_uint64 bound_scalar = bound_epilog;
    3162                 :             : 
    3163                 :       56872 :   if (!prolog_peeling && !epilog_peeling)
    3164                 :             :     return NULL;
    3165                 :             : 
    3166                 :             :   /* Before doing any peeling make sure to reset debug binds outside of
    3167                 :             :      the loop refering to defs not in LC SSA.  */
    3168                 :       31472 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    3169                 :       96267 :   for (unsigned i = 0; i < loop->num_nodes; ++i)
    3170                 :             :     {
    3171                 :       64795 :       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
    3172                 :       64795 :       imm_use_iterator ui;
    3173                 :       64795 :       gimple *use_stmt;
    3174                 :      154001 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
    3175                 :       89206 :            gsi_next (&gsi))
    3176                 :             :         {
    3177                 :      282412 :           FOR_EACH_IMM_USE_STMT (use_stmt, ui, gimple_phi_result (gsi.phi ()))
    3178                 :      212784 :             if (gimple_debug_bind_p (use_stmt)
    3179                 :       19578 :                 && loop != gimple_bb (use_stmt)->loop_father
    3180                 :       19594 :                 && !flow_loop_nested_p (loop,
    3181                 :          16 :                                         gimple_bb (use_stmt)->loop_father))
    3182                 :             :               {
    3183                 :           2 :                 gimple_debug_bind_reset_value (use_stmt);
    3184                 :           2 :                 update_stmt (use_stmt);
    3185                 :       89206 :               }
    3186                 :             :         }
    3187                 :      608131 :       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
    3188                 :      478541 :            gsi_next (&gsi))
    3189                 :             :         {
    3190                 :      478541 :           ssa_op_iter op_iter;
    3191                 :      478541 :           def_operand_p def_p;
    3192                 :      774413 :           FOR_EACH_SSA_DEF_OPERAND (def_p, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
    3193                 :      741872 :             FOR_EACH_IMM_USE_STMT (use_stmt, ui, DEF_FROM_PTR (def_p))
    3194                 :      482445 :               if (gimple_debug_bind_p (use_stmt)
    3195                 :       36445 :                   && loop != gimple_bb (use_stmt)->loop_father
    3196                 :       36472 :                   && !flow_loop_nested_p (loop,
    3197                 :          27 :                                           gimple_bb (use_stmt)->loop_father))
    3198                 :             :                 {
    3199                 :           0 :                   gimple_debug_bind_reset_value (use_stmt);
    3200                 :           0 :                   update_stmt (use_stmt);
    3201                 :      295872 :                 }
    3202                 :             :         }
    3203                 :             :     }
    3204                 :             : 
    3205                 :       31472 :   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
    3206                 :       31472 :   estimated_vf = vect_vf_for_cost (loop_vinfo);
    3207                 :       31472 :   if (estimated_vf == 2)
    3208                 :        6297 :     estimated_vf = 3;
    3209                 :       31472 :   prob_prolog = prob_epilog = profile_probability::guessed_always ()
    3210                 :       31472 :                         .apply_scale (estimated_vf - 1, estimated_vf);
    3211                 :             : 
    3212                 :       31472 :   class loop *prolog = NULL, *epilog = NULL;
    3213                 :       31472 :   class loop *first_loop = loop;
    3214                 :       31472 :   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
    3215                 :             : 
    3216                 :             :   /* SSA form needs to be up-to-date since we are going to manually
    3217                 :             :      update SSA form in slpeel_tree_duplicate_loop_to_edge_cfg and delete all
    3218                 :             :      update SSA state after that, so we have to make sure to not lose any
    3219                 :             :      pending update needs.  */
    3220                 :       31472 :   gcc_assert (!need_ssa_update_p (cfun));
    3221                 :             : 
    3222                 :             :   /* If we're vectorizing an epilogue loop, we have ensured that the
    3223                 :             :      virtual operand is in SSA form throughout the vectorized main loop.
    3224                 :             :      Normally it is possible to trace the updated
    3225                 :             :      vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
    3226                 :             :      back to scalar-stmt vuses, meaning that the effect of the SSA update
    3227                 :             :      remains local to the main loop.  However, there are rare cases in
    3228                 :             :      which the vectorized loop should have vdefs even when the original scalar
    3229                 :             :      loop didn't.  For example, vectorizing a load with IFN_LOAD_LANES
    3230                 :             :      introduces clobbers of the temporary vector array, which in turn
    3231                 :             :      needs new vdefs.  If the scalar loop doesn't write to memory, these
    3232                 :             :      new vdefs will be the only ones in the vector loop.
    3233                 :             :      We are currently defering updating virtual SSA form and creating
    3234                 :             :      of a virtual PHI for this case so we do not have to make sure the
    3235                 :             :      newly introduced virtual def is in LCSSA form.  */
    3236                 :             : 
    3237                 :       31472 :   if (MAY_HAVE_DEBUG_BIND_STMTS)
    3238                 :             :     {
    3239                 :       12096 :       gcc_assert (!adjust_vec.exists ());
    3240                 :       12096 :       adjust_vec.create (32);
    3241                 :             :     }
    3242                 :       31472 :   initialize_original_copy_tables ();
    3243                 :             : 
    3244                 :             :   /* Record the anchor bb at which the guard should be placed if the scalar
    3245                 :             :      loop might be preferred.  */
    3246                 :       31472 :   basic_block anchor = loop_preheader_edge (loop)->src;
    3247                 :             : 
    3248                 :             :   /* Generate the number of iterations for the prolog loop.  We do this here
    3249                 :             :      so that we can also get the upper bound on the number of iterations.  */
    3250                 :       31472 :   tree niters_prolog;
    3251                 :       31472 :   int bound_prolog = 0;
    3252                 :       31472 :   if (prolog_peeling)
    3253                 :             :     {
    3254                 :         379 :       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
    3255                 :             :                                                     &bound_prolog);
    3256                 :             :       /* If algonment peeling is known, we will always execute prolog.  */
    3257                 :         379 :       if (TREE_CODE (niters_prolog) == INTEGER_CST)
    3258                 :         212 :         prob_prolog = profile_probability::always ();
    3259                 :             :     }
    3260                 :             :   else
    3261                 :       31093 :     niters_prolog = build_int_cst (type, 0);
    3262                 :             : 
    3263                 :       31472 :   loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo;
    3264                 :       31472 :   tree niters_vector_mult_vf = NULL_TREE;
    3265                 :             :   /* Saving NITERs before the loop, as this may be changed by prologue.  */
    3266                 :       31472 :   tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
    3267                 :       31472 :   edge update_e = NULL, skip_e = NULL;
    3268                 :       31472 :   unsigned int lowest_vf = constant_lower_bound (vf);
    3269                 :             :   /* Prolog loop may be skipped.  */
    3270                 :       31472 :   bool skip_prolog = (prolog_peeling != 0);
    3271                 :             :   /* Skip this loop to epilog when there are not enough iterations to enter this
    3272                 :             :      vectorized loop.  If true we should perform runtime checks on the NITERS
    3273                 :             :      to check whether we should skip the current vectorized loop.  If we know
    3274                 :             :      the number of scalar iterations we may choose to add a runtime check if
    3275                 :             :      this number "maybe" smaller than the number of iterations required
    3276                 :             :      when we know the number of scalar iterations may potentially
    3277                 :             :      be smaller than the number of iterations required to enter this loop, for
    3278                 :             :      this we use the upper bounds on the prolog and epilog peeling.  When we
    3279                 :             :      don't know the number of iterations and don't require versioning it is
    3280                 :             :      because we have asserted that there are enough scalar iterations to enter
    3281                 :             :      the main loop, so this skip is not necessary.  When we are versioning then
    3282                 :             :      we only add such a skip if we have chosen to vectorize the epilogue.  */
    3283                 :        8239 :   bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    3284                 :       31472 :                       ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
    3285                 :        8239 :                                   bound_prolog + bound_epilog)
    3286                 :       23233 :                       : (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
    3287                 :        8265 :                          || vect_epilogues));
    3288                 :             : 
    3289                 :             :   /* Epilog loop must be executed if the number of iterations for epilog
    3290                 :             :      loop is known at compile time, otherwise we need to add a check at
    3291                 :             :      the end of vector loop and skip to the end of epilog loop.  */
    3292                 :       31472 :   bool skip_epilog = (prolog_peeling < 0
    3293                 :       31305 :                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    3294                 :       31472 :                       || !vf.is_constant ());
    3295                 :             :   /* PEELING_FOR_GAPS and peeling for early breaks are special because epilog
    3296                 :             :      loop must be executed.  */
    3297                 :       31472 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
    3298                 :       31137 :       || LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3299                 :        1746 :     skip_epilog = false;
    3300                 :             : 
    3301                 :       31472 :   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    3302                 :       31472 :   auto_vec<profile_count> original_counts;
    3303                 :       31472 :   basic_block *original_bbs = NULL;
    3304                 :             : 
    3305                 :       31472 :   if (skip_vector)
    3306                 :             :     {
    3307                 :       23207 :       split_edge (loop_preheader_edge (loop));
    3308                 :             : 
    3309                 :       23207 :       if (epilog_peeling && (vect_epilogues || scalar_loop == NULL))
    3310                 :             :         {
    3311                 :       21516 :           original_bbs = get_loop_body (loop);
    3312                 :       64895 :           for (unsigned int i = 0; i < loop->num_nodes; i++)
    3313                 :       43379 :             original_counts.safe_push(original_bbs[i]->count);
    3314                 :             :         }
    3315                 :             : 
    3316                 :             :       /* Due to the order in which we peel prolog and epilog, we first
    3317                 :             :          propagate probability to the whole loop.  The purpose is to
    3318                 :             :          avoid adjusting probabilities of both prolog and vector loops
    3319                 :             :          separately.  Note in this case, the probability of epilog loop
    3320                 :             :          needs to be scaled back later.  */
    3321                 :       23207 :       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
    3322                 :       23207 :       if (prob_vector.initialized_p ())
    3323                 :             :         {
    3324                 :       23207 :           scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
    3325                 :       23207 :           scale_loop_profile (loop, prob_vector, -1);
    3326                 :             :         }
    3327                 :             :     }
    3328                 :             : 
    3329                 :       31472 :   if (vect_epilogues)
    3330                 :             :     {
    3331                 :             :       /* Make sure to set the epilogue's epilogue scalar loop, such that we can
    3332                 :             :          use the original scalar loop as remaining epilogue if necessary.  */
    3333                 :        6517 :       LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
    3334                 :        6517 :         = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    3335                 :        6517 :       LOOP_VINFO_SCALAR_IV_EXIT (epilogue_vinfo)
    3336                 :        6517 :         = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
    3337                 :             :     }
    3338                 :             : 
    3339                 :       31472 :   if (prolog_peeling)
    3340                 :             :     {
    3341                 :         379 :       e = loop_preheader_edge (loop);
    3342                 :         379 :       edge exit_e = LOOP_VINFO_IV_EXIT (loop_vinfo);
    3343                 :         379 :       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, exit_e, e)
    3344                 :             :                            && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo));
    3345                 :             : 
    3346                 :             :       /* Peel prolog and put it on preheader edge of loop.  */
    3347                 :         379 :       edge scalar_e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
    3348                 :         379 :       edge prolog_e = NULL;
    3349                 :         379 :       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e,
    3350                 :             :                                                        scalar_loop, scalar_e,
    3351                 :             :                                                        e, &prolog_e);
    3352                 :         379 :       gcc_assert (prolog);
    3353                 :         379 :       prolog->force_vectorize = false;
    3354                 :             : 
    3355                 :         379 :       first_loop = prolog;
    3356                 :         379 :       reset_original_copy_tables ();
    3357                 :             : 
    3358                 :             :       /* Update the number of iterations for prolog loop.  */
    3359                 :         379 :       tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
    3360                 :         379 :       vect_set_loop_condition (prolog, prolog_e, NULL, niters_prolog,
    3361                 :             :                                step_prolog, NULL_TREE, false);
    3362                 :             : 
    3363                 :             :       /* Skip the prolog loop.  */
    3364                 :         379 :       if (skip_prolog)
    3365                 :             :         {
    3366                 :         379 :           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
    3367                 :             :                                     niters_prolog, build_int_cst (type, 0));
    3368                 :         379 :           guard_bb = loop_preheader_edge (prolog)->src;
    3369                 :         379 :           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
    3370                 :         379 :           guard_to = split_edge (loop_preheader_edge (loop));
    3371                 :         379 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
    3372                 :             :                                            guard_to, guard_bb,
    3373                 :             :                                            prob_prolog.invert (),
    3374                 :             :                                            irred_flag);
    3375                 :        1719 :           for (edge alt_e : get_loop_exit_edges (prolog))
    3376                 :             :             {
    3377                 :         582 :               if (alt_e == prolog_e)
    3378                 :         379 :                 continue;
    3379                 :         203 :               basic_block old_dom
    3380                 :         203 :                 = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
    3381                 :         203 :               if (flow_bb_inside_loop_p (prolog, old_dom))
    3382                 :             :                 {
    3383                 :          57 :                   auto_vec<basic_block, 8> queue;
    3384                 :          57 :                   for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
    3385                 :         185 :                        son; son = next_dom_son (CDI_DOMINATORS, son))
    3386                 :         128 :                     if (!flow_bb_inside_loop_p (prolog, son))
    3387                 :          71 :                       queue.safe_push (son);
    3388                 :         242 :                   for (auto son : queue)
    3389                 :          71 :                     set_immediate_dominator (CDI_DOMINATORS, son, guard_bb);
    3390                 :          57 :                 }
    3391                 :         379 :             }
    3392                 :             : 
    3393                 :         379 :           e = EDGE_PRED (guard_to, 0);
    3394                 :         379 :           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
    3395                 :         379 :           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
    3396                 :             : 
    3397                 :         379 :           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
    3398                 :         379 :           scale_loop_profile (prolog, prob_prolog, bound_prolog - 1);
    3399                 :             :         }
    3400                 :             : 
    3401                 :             :       /* Update init address of DRs.  */
    3402                 :         379 :       vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
    3403                 :             :       /* Update niters for vector loop.  */
    3404                 :         379 :       LOOP_VINFO_NITERS (loop_vinfo)
    3405                 :         379 :         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
    3406                 :         379 :       LOOP_VINFO_NITERSM1 (loop_vinfo)
    3407                 :         379 :         = fold_build2 (MINUS_EXPR, type,
    3408                 :             :                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
    3409                 :         379 :       bool new_var_p = false;
    3410                 :         379 :       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
    3411                 :             :       /* It's guaranteed that vector loop bound before vectorization is at
    3412                 :             :          least VF, so set range information for newly generated var.  */
    3413                 :         379 :       if (new_var_p)
    3414                 :             :         {
    3415                 :         209 :           int_range<1> vr (type,
    3416                 :         418 :                            wi::to_wide (build_int_cst (type, lowest_vf)),
    3417                 :         418 :                            wi::to_wide (TYPE_MAX_VALUE (type)));
    3418                 :         209 :           set_range_info (niters, vr);
    3419                 :         209 :         }
    3420                 :             : 
    3421                 :             :       /* Prolog iterates at most bound_prolog times, latch iterates at
    3422                 :             :          most bound_prolog - 1 times.  */
    3423                 :         379 :       record_niter_bound (prolog, bound_prolog - 1, false, true);
    3424                 :         379 :       delete_update_ssa ();
    3425                 :         379 :       adjust_vec_debug_stmts ();
    3426                 :         379 :       scev_reset ();
    3427                 :             :     }
    3428                 :       31472 :   basic_block bb_before_epilog = NULL;
    3429                 :             : 
    3430                 :       31472 :   if (epilog_peeling)
    3431                 :             :     {
    3432                 :       31441 :       e = LOOP_VINFO_IV_EXIT (loop_vinfo);
    3433                 :       31441 :       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e, e));
    3434                 :             : 
    3435                 :             :       /* Peel epilog and put it on exit edge of loop.  If we are vectorizing
    3436                 :             :          said epilog then we should use a copy of the main loop as a starting
    3437                 :             :          point.  This loop may have already had some preliminary transformations
    3438                 :             :          to allow for more optimal vectorization, for example if-conversion.
    3439                 :             :          If we are not vectorizing the epilog then we should use the scalar loop
    3440                 :             :          as the transformations mentioned above make less or no sense when not
    3441                 :             :          vectorizing.  */
    3442                 :       31441 :       edge scalar_e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
    3443                 :       31441 :       epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
    3444                 :        6517 :       edge epilog_e = vect_epilogues ? e : scalar_e;
    3445                 :       31441 :       edge new_epilog_e = NULL;
    3446                 :       31441 :       auto_vec<basic_block> doms;
    3447                 :       31441 :       epilog
    3448                 :       31441 :         = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
    3449                 :             :                                                   &new_epilog_e, true, &doms);
    3450                 :             : 
    3451                 :       31441 :       LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo) = new_epilog_e;
    3452                 :       31441 :       gcc_assert (epilog);
    3453                 :       31441 :       gcc_assert (new_epilog_e);
    3454                 :       31441 :       epilog->force_vectorize = false;
    3455                 :       31441 :       bb_before_epilog = loop_preheader_edge (epilog)->src;
    3456                 :             : 
    3457                 :             :       /* Scalar version loop may be preferred.  In this case, add guard
    3458                 :             :          and skip to epilog.  Note this only happens when the number of
    3459                 :             :          iterations of loop is unknown at compile time, otherwise this
    3460                 :             :          won't be vectorized.  */
    3461                 :       31441 :       if (skip_vector)
    3462                 :             :         {
    3463                 :             :           /* Additional epilogue iteration is peeled if gap exists.  */
    3464                 :       23207 :           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
    3465                 :       23207 :                                                 bound_prolog, bound_epilog,
    3466                 :             :                                                 th, &bound_scalar,
    3467                 :             :                                                 check_profitability);
    3468                 :             :           /* Build guard against NITERSM1 since NITERS may overflow.  */
    3469                 :       23207 :           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
    3470                 :       23207 :           guard_bb = anchor;
    3471                 :       23207 :           guard_to = split_edge (loop_preheader_edge (epilog));
    3472                 :       23207 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
    3473                 :             :                                            guard_to, guard_bb,
    3474                 :             :                                            prob_vector.invert (),
    3475                 :             :                                            irred_flag);
    3476                 :       23207 :           skip_e = guard_e;
    3477                 :       23207 :           e = EDGE_PRED (guard_to, 0);
    3478                 :       23207 :           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
    3479                 :             : 
    3480                 :             :           /* Handle any remaining dominator updates needed after
    3481                 :             :              inserting the loop skip edge above.  */
    3482                 :       23207 :           if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    3483                 :         254 :               && prolog_peeling)
    3484                 :             :             {
    3485                 :             :               /* Adding a skip edge to skip a loop with multiple exits
    3486                 :             :                  means the dominator of the join blocks for all exits shifts
    3487                 :             :                  from the prolog skip guard to the loop skip guard.  */
    3488                 :         138 :               auto prolog_skip_bb
    3489                 :         138 :                 = single_pred (loop_preheader_edge (prolog)->src);
    3490                 :         138 :               auto needs_update
    3491                 :         138 :                 = get_dominated_by (CDI_DOMINATORS, prolog_skip_bb);
    3492                 :             : 
    3493                 :             :               /* Update everything except for the immediate children of
    3494                 :             :                  the prolog skip block (the prolog and vector preheaders).
    3495                 :             :                  Those should remain dominated by the prolog skip block itself,
    3496                 :             :                  since the loop guard edge goes to the epilogue.  */
    3497                 :         727 :               for (auto bb : needs_update)
    3498                 :         313 :                 if (bb != EDGE_SUCC (prolog_skip_bb, 0)->dest
    3499                 :         313 :                     && bb != EDGE_SUCC (prolog_skip_bb, 1)->dest)
    3500                 :          37 :                   set_immediate_dominator (CDI_DOMINATORS, bb, guard_bb);
    3501                 :         138 :             }
    3502                 :             : 
    3503                 :       23207 :           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
    3504                 :             : 
    3505                 :             :           /* Simply propagate profile info from guard_bb to guard_to which is
    3506                 :             :              a merge point of control flow.  */
    3507                 :       23207 :           profile_count old_count = guard_to->count;
    3508                 :       23207 :           guard_to->count = guard_bb->count;
    3509                 :             : 
    3510                 :             :           /* Restore the counts of the epilog loop if we didn't use the scalar loop. */
    3511                 :       23207 :           if (vect_epilogues || scalar_loop == NULL)
    3512                 :             :             {
    3513                 :       21516 :               gcc_assert(epilog->num_nodes == loop->num_nodes);
    3514                 :       21516 :               basic_block *bbs = get_loop_body (epilog);
    3515                 :       64895 :               for (unsigned int i = 0; i < epilog->num_nodes; i++)
    3516                 :             :                 {
    3517                 :       43379 :                   gcc_assert(get_bb_original (bbs[i]) == original_bbs[i]);
    3518                 :       43379 :                   bbs[i]->count = original_counts[i];
    3519                 :             :                 }
    3520                 :       21516 :               free (bbs);
    3521                 :       21516 :               free (original_bbs);
    3522                 :             :             }
    3523                 :        1691 :           else if (old_count.nonzero_p ())
    3524                 :        1691 :             scale_loop_profile (epilog, guard_to->count.probability_in (old_count), -1);
    3525                 :             : 
    3526                 :             :           /* Only need to handle basic block before epilog loop if it's not
    3527                 :             :              the guard_bb, which is the case when skip_vector is true.  */
    3528                 :       23207 :           if (guard_bb != bb_before_epilog && single_pred_p (bb_before_epilog))
    3529                 :       22953 :             bb_before_epilog->count = single_pred_edge (bb_before_epilog)->count ();
    3530                 :       23207 :           bb_before_epilog = loop_preheader_edge (epilog)->src;
    3531                 :             :         }
    3532                 :             : 
    3533                 :             :       /* If loop is peeled for non-zero constant times, now niters refers to
    3534                 :             :          orig_niters - prolog_peeling, it won't overflow even the orig_niters
    3535                 :             :          overflows.  */
    3536                 :       31441 :       niters_no_overflow |= (prolog_peeling > 0);
    3537                 :       31441 :       vect_gen_vector_loop_niters (loop_vinfo, niters,
    3538                 :             :                                    niters_vector, step_vector,
    3539                 :             :                                    niters_no_overflow);
    3540                 :       31441 :       if (!integer_onep (*step_vector))
    3541                 :             :         {
    3542                 :             :           /* On exit from the loop we will have an easy way of calcalating
    3543                 :             :              NITERS_VECTOR / STEP * STEP.  Install a dummy definition
    3544                 :             :              until then.  */
    3545                 :           0 :           niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
    3546                 :           0 :           SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
    3547                 :           0 :           *niters_vector_mult_vf_var = niters_vector_mult_vf;
    3548                 :             :         }
    3549                 :             :       else
    3550                 :       31441 :         vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
    3551                 :             :                                              &niters_vector_mult_vf);
    3552                 :             :       /* Update IVs of original loop as if they were advanced by
    3553                 :             :          niters_vector_mult_vf steps.  */
    3554                 :       31441 :       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
    3555                 :       31441 :       update_e = skip_vector ? e : loop_preheader_edge (epilog);
    3556                 :       31441 :       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3557                 :        1411 :         update_e = single_succ_edge (LOOP_VINFO_IV_EXIT (loop_vinfo)->dest);
    3558                 :             : 
    3559                 :             :       /* If we have a peeled vector iteration, all exits are the same, leave it
    3560                 :             :          and so the main exit needs to be treated the same as the alternative
    3561                 :             :          exits in that we leave their updates to vectorizable_live_operations.
    3562                 :             :          */
    3563                 :       31441 :       if (!LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    3564                 :       31422 :         vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
    3565                 :             :                                           update_e);
    3566                 :             : 
    3567                 :             :       /* If we have a peeled vector iteration we will never skip the epilog loop
    3568                 :             :          and we can simplify the cfg a lot by not doing the edge split.  */
    3569                 :       31441 :       if (skip_epilog
    3570                 :       31441 :           || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    3571                 :        1411 :               && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)))
    3572                 :             :         {
    3573                 :       24192 :           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
    3574                 :             :                                     niters, niters_vector_mult_vf);
    3575                 :             : 
    3576                 :       24192 :           guard_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
    3577                 :       24192 :           edge epilog_e = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo);
    3578                 :       24192 :           guard_to = epilog_e->dest;
    3579                 :       25343 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
    3580                 :             :                                            skip_vector ? anchor : guard_bb,
    3581                 :             :                                            prob_epilog.invert (),
    3582                 :             :                                            irred_flag);
    3583                 :       24192 :           doms.safe_push (guard_to);
    3584                 :       24192 :           if (vect_epilogues)
    3585                 :        5267 :             epilogue_vinfo->skip_this_loop_edge = guard_e;
    3586                 :       24192 :           edge main_iv = LOOP_VINFO_IV_EXIT (loop_vinfo);
    3587                 :       24192 :           gphi_iterator gsi2 = gsi_start_phis (main_iv->dest);
    3588                 :       24192 :           for (gphi_iterator gsi = gsi_start_phis (guard_to);
    3589                 :       58602 :                !gsi_end_p (gsi); gsi_next (&gsi))
    3590                 :             :             {
    3591                 :             :               /* We are expecting all of the PHIs we have on epilog_e
    3592                 :             :                  to be also on the main loop exit.  But sometimes
    3593                 :             :                  a stray virtual definition can appear at epilog_e
    3594                 :             :                  which we can then take as the same on all exits,
    3595                 :             :                  we've removed the LC SSA PHI on the main exit before
    3596                 :             :                  so we wouldn't need to create a loop PHI for it.  */
    3597                 :       34410 :               if (virtual_operand_p (gimple_phi_result (*gsi))
    3598                 :       34410 :                   && (gsi_end_p (gsi2)
    3599                 :       33700 :                       || !virtual_operand_p (gimple_phi_result (*gsi2))))
    3600                 :          46 :                 add_phi_arg (*gsi,
    3601                 :          23 :                              gimple_phi_arg_def_from_edge (*gsi, epilog_e),
    3602                 :             :                              guard_e, UNKNOWN_LOCATION);
    3603                 :             :               else
    3604                 :             :                 {
    3605                 :       34387 :                   add_phi_arg (*gsi, gimple_phi_result (*gsi2), guard_e,
    3606                 :             :                                UNKNOWN_LOCATION);
    3607                 :       34387 :                   gsi_next (&gsi2);
    3608                 :             :                 }
    3609                 :             :             }
    3610                 :             : 
    3611                 :             :           /* Only need to handle basic block before epilog loop if it's not
    3612                 :             :              the guard_bb, which is the case when skip_vector is true.  */
    3613                 :       24192 :           if (guard_bb != bb_before_epilog)
    3614                 :             :             {
    3615                 :       24189 :               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
    3616                 :             : 
    3617                 :       24189 :               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
    3618                 :             :             }
    3619                 :       24192 :           scale_loop_profile (epilog, prob_epilog, -1);
    3620                 :             :         }
    3621                 :             : 
    3622                 :             :       /* Recalculate the dominators after adding the guard edge.  */
    3623                 :       31441 :       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3624                 :        1411 :         iterate_fix_dominators (CDI_DOMINATORS, doms, false);
    3625                 :             : 
    3626                 :             :       /* When we do not have a loop-around edge to the epilog we know
    3627                 :             :          the vector loop covered at least VF scalar iterations unless
    3628                 :             :          we have early breaks.
    3629                 :             :          Update any known upper bound with this knowledge.  */
    3630                 :       31441 :       if (! skip_vector
    3631                 :        8234 :           && ! LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3632                 :             :         {
    3633                 :        7077 :           if (epilog->any_upper_bound)
    3634                 :        7077 :             epilog->nb_iterations_upper_bound -= lowest_vf;
    3635                 :        7077 :           if (epilog->any_likely_upper_bound)
    3636                 :        7077 :             epilog->nb_iterations_likely_upper_bound -= lowest_vf;
    3637                 :        7077 :           if (epilog->any_estimate)
    3638                 :        7074 :             epilog->nb_iterations_estimate -= lowest_vf;
    3639                 :             :         }
    3640                 :             : 
    3641                 :       31441 :       unsigned HOST_WIDE_INT bound;
    3642                 :       31441 :       if (bound_scalar.is_constant (&bound))
    3643                 :             :         {
    3644                 :       31441 :           gcc_assert (bound != 0);
    3645                 :             :           /* Adjust the upper bound by the extra peeled vector iteration if we
    3646                 :             :              are an epilogue of an peeled vect loop and not VLA.  For VLA the
    3647                 :             :              loop bounds are unknown.  */
    3648                 :       31441 :           if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
    3649                 :       31441 :               && vf.is_constant ())
    3650                 :          19 :             bound += vf.to_constant ();
    3651                 :             :           /* -1 to convert loop iterations to latch iterations.  */
    3652                 :       31441 :           record_niter_bound (epilog, bound - 1, false, true);
    3653                 :       31441 :           scale_loop_profile (epilog, profile_probability::always (),
    3654                 :             :                               bound - 1);
    3655                 :             :         }
    3656                 :             : 
    3657                 :       31441 :       delete_update_ssa ();
    3658                 :       31441 :       adjust_vec_debug_stmts ();
    3659                 :       31441 :       scev_reset ();
    3660                 :       31441 :     }
    3661                 :             : 
    3662                 :       31472 :   if (vect_epilogues)
    3663                 :             :     {
    3664                 :        6517 :       epilog->aux = epilogue_vinfo;
    3665                 :        6517 :       LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
    3666                 :        6517 :       LOOP_VINFO_IV_EXIT (epilogue_vinfo)
    3667                 :        6517 :         = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo);
    3668                 :             : 
    3669                 :        6517 :       loop_constraint_clear (epilog, LOOP_C_INFINITE);
    3670                 :             : 
    3671                 :             :       /* We now must calculate the number of NITERS performed by the previous
    3672                 :             :          loop and EPILOGUE_NITERS to be performed by the epilogue.  */
    3673                 :        6517 :       tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
    3674                 :             :                                  niters_prolog, niters_vector_mult_vf);
    3675                 :             : 
    3676                 :             :       /* If skip_vector we may skip the previous loop, we insert a phi-node to
    3677                 :             :          determine whether we are coming from the previous vectorized loop
    3678                 :             :          using the update_e edge or the skip_vector basic block using the
    3679                 :             :          skip_e edge.  */
    3680                 :        6517 :       if (skip_vector)
    3681                 :             :         {
    3682                 :        5329 :           gcc_assert (update_e != NULL && skip_e != NULL);
    3683                 :        5329 :           gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
    3684                 :             :                                            update_e->dest);
    3685                 :        5329 :           tree new_ssa = make_ssa_name (TREE_TYPE (niters));
    3686                 :        5329 :           gimple *stmt = gimple_build_assign (new_ssa, niters);
    3687                 :        5329 :           gimple_stmt_iterator gsi;
    3688                 :        5329 :           if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
    3689                 :        5329 :               && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
    3690                 :             :             {
    3691                 :        5329 :               gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
    3692                 :        5329 :               gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
    3693                 :             :             }
    3694                 :             :           else
    3695                 :             :             {
    3696                 :           0 :               gsi = gsi_last_bb (update_e->src);
    3697                 :           0 :               gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
    3698                 :             :             }
    3699                 :             : 
    3700                 :        5329 :           niters = new_ssa;
    3701                 :        5329 :           add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
    3702                 :        5329 :           add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
    3703                 :             :                        UNKNOWN_LOCATION);
    3704                 :        5329 :           niters = PHI_RESULT (new_phi);
    3705                 :        5329 :           epilogue_vinfo->main_loop_edge = update_e;
    3706                 :        5329 :           epilogue_vinfo->skip_main_loop_edge = skip_e;
    3707                 :             :         }
    3708                 :             : 
    3709                 :             :       /* Set ADVANCE to the number of iterations performed by the previous
    3710                 :             :          loop and its prologue.  */
    3711                 :        6517 :       *advance = niters;
    3712                 :             : 
    3713                 :             :       /* Subtract the number of iterations performed by the vectorized loop
    3714                 :             :          from the number of total iterations.  */
    3715                 :        6517 :       tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
    3716                 :             :                                           before_loop_niters,
    3717                 :             :                                           niters);
    3718                 :             : 
    3719                 :        6517 :       LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
    3720                 :        6517 :       LOOP_VINFO_NITERSM1 (epilogue_vinfo)
    3721                 :        6517 :         = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
    3722                 :             :                        epilogue_niters,
    3723                 :             :                        build_one_cst (TREE_TYPE (epilogue_niters)));
    3724                 :             : 
    3725                 :             :       /* Decide what to do if the number of epilogue iterations is not
    3726                 :             :          a multiple of the epilogue loop's vectorization factor.
    3727                 :             :          We should have rejected the loop during the analysis phase
    3728                 :             :          if this fails.  */
    3729                 :        6517 :       bool res = vect_determine_partial_vectors_and_peeling (epilogue_vinfo);
    3730                 :        6517 :       gcc_assert (res);
    3731                 :             :     }
    3732                 :             : 
    3733                 :       31472 :   adjust_vec.release ();
    3734                 :       31472 :   free_original_copy_tables ();
    3735                 :             : 
    3736                 :       31472 :   return vect_epilogues ? epilog : NULL;
    3737                 :       31472 : }
    3738                 :             : 
    3739                 :             : /* Function vect_create_cond_for_niters_checks.
    3740                 :             : 
    3741                 :             :    Create a conditional expression that represents the run-time checks for
    3742                 :             :    loop's niter.  The loop is guaranteed to terminate if the run-time
    3743                 :             :    checks hold.
    3744                 :             : 
    3745                 :             :    Input:
    3746                 :             :    COND_EXPR  - input conditional expression.  New conditions will be chained
    3747                 :             :                 with logical AND operation.  If it is NULL, then the function
    3748                 :             :                 is used to return the number of alias checks.
    3749                 :             :    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
    3750                 :             :                 to be checked.
    3751                 :             : 
    3752                 :             :    Output:
    3753                 :             :    COND_EXPR - conditional expression.
    3754                 :             : 
    3755                 :             :    The returned COND_EXPR is the conditional expression to be used in the
    3756                 :             :    if statement that controls which version of the loop gets executed at
    3757                 :             :    runtime.  */
    3758                 :             : 
    3759                 :             : static void
    3760                 :         388 : vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
    3761                 :             : {
    3762                 :         388 :   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
    3763                 :             : 
    3764                 :         388 :   if (*cond_expr)
    3765                 :         388 :     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    3766                 :             :                               *cond_expr, part_cond_expr);
    3767                 :             :   else
    3768                 :           0 :     *cond_expr = part_cond_expr;
    3769                 :         388 : }
    3770                 :             : 
    3771                 :             : /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
    3772                 :             :    and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
    3773                 :             : 
    3774                 :             : static void
    3775                 :         310 : chain_cond_expr (tree *cond_expr, tree part_cond_expr)
    3776                 :             : {
    3777                 :         310 :   if (*cond_expr)
    3778                 :         310 :     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    3779                 :             :                               *cond_expr, part_cond_expr);
    3780                 :             :   else
    3781                 :           0 :     *cond_expr = part_cond_expr;
    3782                 :         310 : }
    3783                 :             : 
    3784                 :             : /* Function vect_create_cond_for_align_checks.
    3785                 :             : 
    3786                 :             :    Create a conditional expression that represents the alignment checks for
    3787                 :             :    all of data references (array element references) whose alignment must be
    3788                 :             :    checked at runtime.
    3789                 :             : 
    3790                 :             :    Input:
    3791                 :             :    COND_EXPR  - input conditional expression.  New conditions will be chained
    3792                 :             :                 with logical AND operation.
    3793                 :             :    LOOP_VINFO - three fields of the loop information are used.
    3794                 :             :                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
    3795                 :             :                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
    3796                 :             :                 LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT indicates which check applies.
    3797                 :             : 
    3798                 :             :    Output:
    3799                 :             :    COND_EXPR_STMT_LIST - statements needed to construct the conditional
    3800                 :             :                          expression.
    3801                 :             :    The returned value is the conditional expression to be used in the if
    3802                 :             :    statement that controls which version of the loop gets executed at runtime.
    3803                 :             : 
    3804                 :             :    Based on the boolean value of LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT, we decide
    3805                 :             :    which type of check should be applied and create two different expressions
    3806                 :             :    accordingly.
    3807                 :             :      1) When LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is false, we see if all data refs
    3808                 :             :         to be checked are already aligned to an alignment boundary.  We create
    3809                 :             :         an expression of "(a_1 | a_2 | a_3 | ... | a_n) & mask", where "a_i" is
    3810                 :             :         the address of i'th data reference.
    3811                 :             :      2) When LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true, we see if all data refs
    3812                 :             :         can be aligned to a boundary after a certain amount of peeling, in other
    3813                 :             :         words, their addresses have the same bottom bits according to the mask.
    3814                 :             :         We create "((a_1 ^ a_2) | (a_2 ^ a_3) | ... | (a_n-1 ^ a_n)) & mask",
    3815                 :             :         where "a_i" is the address of i'th data reference.
    3816                 :             : 
    3817                 :             :    Both algorithms make two assumptions:
    3818                 :             :      1) The number of bytes "n" in a vector is a power of 2.
    3819                 :             :      2) An address "a" is aligned if a%n is zero and that this
    3820                 :             :         test can be done as a&(n-1) == 0.  For example, for 16
    3821                 :             :         byte vectors the test is a&0xf == 0.  */
    3822                 :             : 
    3823                 :             : static void
    3824                 :          27 : vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
    3825                 :             :                                    tree *cond_expr,
    3826                 :             :                                    gimple_seq *cond_expr_stmt_list)
    3827                 :             : {
    3828                 :          27 :   const vec<stmt_vec_info> &may_misalign_stmts
    3829                 :             :     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
    3830                 :          27 :   stmt_vec_info stmt_info;
    3831                 :          27 :   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
    3832                 :          27 :   tree mask_cst;
    3833                 :          27 :   unsigned int i;
    3834                 :          27 :   tree int_ptrsize_type;
    3835                 :          27 :   char tmp_name[30];
    3836                 :          27 :   tree or_tmp_name = NULL_TREE;
    3837                 :          27 :   tree prev_addr_tmp_name = NULL_TREE;
    3838                 :          27 :   tree and_tmp_name;
    3839                 :          27 :   gimple *and_stmt;
    3840                 :          27 :   tree ptrsize_zero;
    3841                 :          27 :   tree part_cond_expr;
    3842                 :             : 
    3843                 :             :   /* Check that mask is one less than a power of 2, i.e., mask is
    3844                 :             :      all zeros followed by all ones.  */
    3845                 :          27 :   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
    3846                 :             : 
    3847                 :          27 :   int_ptrsize_type = signed_type_for (ptr_type_node);
    3848                 :             : 
    3849                 :             :   /* If LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true, we should have at least two
    3850                 :             :      datarefs to check the mutual alignment.  */
    3851                 :          27 :   gcc_assert (may_misalign_stmts.length () > 1
    3852                 :             :               || !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo));
    3853                 :             : 
    3854                 :          72 :   FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
    3855                 :             :     {
    3856                 :          45 :       gimple_seq new_stmt_list = NULL;
    3857                 :          45 :       tree addr_base;
    3858                 :          45 :       tree addr_tmp_name;
    3859                 :          45 :       tree xor_tmp_name;
    3860                 :          45 :       tree new_or_tmp_name;
    3861                 :          45 :       gimple *addr_stmt, *or_stmt, *xor_stmt;
    3862                 :          45 :       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    3863                 :          45 :       bool negative = tree_int_cst_compare
    3864                 :          45 :         (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
    3865                 :          45 :       tree offset = negative
    3866                 :          45 :         ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
    3867                 :             :                     * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
    3868                 :          45 :         : size_zero_node;
    3869                 :             : 
    3870                 :             :       /* create: addr_tmp = (int)(address_of_first_vector) */
    3871                 :          45 :       addr_base =
    3872                 :          45 :         vect_create_addr_base_for_vector_ref (loop_vinfo,
    3873                 :             :                                               stmt_info, &new_stmt_list,
    3874                 :             :                                               offset);
    3875                 :          45 :       if (new_stmt_list != NULL)
    3876                 :          16 :         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
    3877                 :             : 
    3878                 :          45 :       sprintf (tmp_name, "addr2int%d", i);
    3879                 :          45 :       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
    3880                 :          45 :       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
    3881                 :          45 :       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
    3882                 :             : 
    3883                 :          45 :       if (LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo))
    3884                 :             :         {
    3885                 :             :           /* Create "((a_1 ^ a_2) | (a_2 ^ a_3) | ... | (a_n-1 ^ a_n)) & mask"
    3886                 :             :              to check mutual alignment.  */
    3887                 :          34 :           if (prev_addr_tmp_name != NULL_TREE)
    3888                 :             :             {
    3889                 :          17 :               sprintf (tmp_name, "xorptrs%d_%d", i - 1, i);
    3890                 :          17 :               xor_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    3891                 :             :                                                  tmp_name);
    3892                 :          17 :               xor_stmt = gimple_build_assign (xor_tmp_name, BIT_XOR_EXPR,
    3893                 :             :                                               prev_addr_tmp_name,
    3894                 :             :                                               addr_tmp_name);
    3895                 :          17 :               gimple_seq_add_stmt (cond_expr_stmt_list, xor_stmt);
    3896                 :          17 :               if (or_tmp_name == NULL_TREE)
    3897                 :             :                 {
    3898                 :             :                   /* Create the 1st XOR when the 2nd data ref is seen.  */
    3899                 :             :                   or_tmp_name = xor_tmp_name;
    3900                 :             :                 }
    3901                 :             :               else
    3902                 :             :                 {
    3903                 :             :                   /* Create: or_tmp = or_tmp | new_xor_tmp.  */
    3904                 :           0 :                   sprintf (tmp_name, "orxors%d", i - 1);
    3905                 :           0 :                   new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    3906                 :             :                                                         tmp_name);
    3907                 :           0 :                   or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
    3908                 :             :                                                  or_tmp_name, xor_tmp_name);
    3909                 :           0 :                   gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
    3910                 :           0 :                   or_tmp_name = new_or_tmp_name;
    3911                 :             :                 }
    3912                 :             :             }
    3913                 :             :           prev_addr_tmp_name = addr_tmp_name;
    3914                 :             :         }
    3915                 :             :       else
    3916                 :             :         {
    3917                 :             :           /* Create: "(a_1 | a_2 | a_3 | ... | a_n) & mask" to check if all
    3918                 :             :              addresses are already aligned.  */
    3919                 :          11 :           if (or_tmp_name != NULL_TREE)
    3920                 :             :             {
    3921                 :             :               /* Create: or_tmp = or_tmp | addr_tmp.  */
    3922                 :           1 :               sprintf (tmp_name, "orptrs%d", i);
    3923                 :           1 :               new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    3924                 :             :                                                     tmp_name);
    3925                 :           1 :               or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
    3926                 :             :                                              or_tmp_name, addr_tmp_name);
    3927                 :           1 :               gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
    3928                 :           1 :               or_tmp_name = new_or_tmp_name;
    3929                 :             :             }
    3930                 :             :           else
    3931                 :             :             or_tmp_name = addr_tmp_name;
    3932                 :             :         }
    3933                 :             : 
    3934                 :             :     } /* end for i */
    3935                 :             : 
    3936                 :          27 :   mask_cst = build_int_cst (int_ptrsize_type, mask);
    3937                 :             : 
    3938                 :             :   /* create: and_tmp = or_tmp & mask  */
    3939                 :          27 :   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
    3940                 :             : 
    3941                 :          27 :   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
    3942                 :             :                                   or_tmp_name, mask_cst);
    3943                 :          27 :   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
    3944                 :             : 
    3945                 :             :   /* Make and_tmp the left operand of the conditional test against zero.
    3946                 :             :      if and_tmp has a nonzero bit then some address is unaligned.  */
    3947                 :          27 :   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
    3948                 :          27 :   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
    3949                 :             :                                 and_tmp_name, ptrsize_zero);
    3950                 :          27 :   chain_cond_expr (cond_expr, part_cond_expr);
    3951                 :          27 : }
    3952                 :             : 
    3953                 :             : /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
    3954                 :             :    create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
    3955                 :             :    Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
    3956                 :             :    and this new condition are true.  Treat a null *COND_EXPR as "true".  */
    3957                 :             : 
    3958                 :             : static void
    3959                 :        3160 : vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
    3960                 :             : {
    3961                 :        3160 :   const vec<vec_object_pair> &pairs
    3962                 :             :     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
    3963                 :        3160 :   unsigned int i;
    3964                 :        3160 :   vec_object_pair *pair;
    3965                 :        3168 :   FOR_EACH_VEC_ELT (pairs, i, pair)
    3966                 :             :     {
    3967                 :           8 :       tree addr1 = build_fold_addr_expr (pair->first);
    3968                 :           8 :       tree addr2 = build_fold_addr_expr (pair->second);
    3969                 :           8 :       tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
    3970                 :             :                                          addr1, addr2);
    3971                 :           8 :       chain_cond_expr (cond_expr, part_cond_expr);
    3972                 :             :     }
    3973                 :        3160 : }
    3974                 :             : 
    3975                 :             : /* Create an expression that is true when all lower-bound conditions for
    3976                 :             :    the vectorized loop are met.  Chain this condition with *COND_EXPR.  */
    3977                 :             : 
    3978                 :             : static void
    3979                 :        3160 : vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
    3980                 :             : {
    3981                 :        3160 :   const vec<vec_lower_bound> &lower_bounds
    3982                 :             :     = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
    3983                 :        3435 :   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
    3984                 :             :     {
    3985                 :         275 :       tree expr = lower_bounds[i].expr;
    3986                 :         275 :       tree type = unsigned_type_for (TREE_TYPE (expr));
    3987                 :         275 :       expr = fold_convert (type, expr);
    3988                 :         275 :       poly_uint64 bound = lower_bounds[i].min_value;
    3989                 :         275 :       if (!lower_bounds[i].unsigned_p)
    3990                 :             :         {
    3991                 :         106 :           expr = fold_build2 (PLUS_EXPR, type, expr,
    3992                 :             :                               build_int_cstu (type, bound - 1));
    3993                 :         106 :           bound += bound - 1;
    3994                 :             :         }
    3995                 :         275 :       tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
    3996                 :             :                                          build_int_cstu (type, bound));
    3997                 :         275 :       chain_cond_expr (cond_expr, part_cond_expr);
    3998                 :             :     }
    3999                 :        3160 : }
    4000                 :             : 
    4001                 :             : /* Function vect_create_cond_for_alias_checks.
    4002                 :             : 
    4003                 :             :    Create a conditional expression that represents the run-time checks for
    4004                 :             :    overlapping of address ranges represented by a list of data references
    4005                 :             :    relations passed as input.
    4006                 :             : 
    4007                 :             :    Input:
    4008                 :             :    COND_EXPR  - input conditional expression.  New conditions will be chained
    4009                 :             :                 with logical AND operation.  If it is NULL, then the function
    4010                 :             :                 is used to return the number of alias checks.
    4011                 :             :    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
    4012                 :             :                 to be checked.
    4013                 :             : 
    4014                 :             :    Output:
    4015                 :             :    COND_EXPR - conditional expression.
    4016                 :             : 
    4017                 :             :    The returned COND_EXPR is the conditional expression to be used in the if
    4018                 :             :    statement that controls which version of the loop gets executed at runtime.
    4019                 :             : */
    4020                 :             : 
    4021                 :             : void
    4022                 :        3160 : vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
    4023                 :             : {
    4024                 :        3160 :   const vec<dr_with_seg_len_pair_t> &comp_alias_ddrs =
    4025                 :             :     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
    4026                 :             : 
    4027                 :        3160 :   if (comp_alias_ddrs.is_empty ())
    4028                 :             :     return;
    4029                 :             : 
    4030                 :        3019 :   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
    4031                 :             :                                &comp_alias_ddrs, cond_expr);
    4032                 :        3019 :   if (dump_enabled_p ())
    4033                 :        1240 :     dump_printf_loc (MSG_NOTE, vect_location,
    4034                 :             :                      "created %u versioning for alias checks.\n",
    4035                 :             :                      comp_alias_ddrs.length ());
    4036                 :             : }
    4037                 :             : 
    4038                 :             : 
    4039                 :             : /* Function vect_loop_versioning.
    4040                 :             : 
    4041                 :             :    If the loop has data references that may or may not be aligned or/and
    4042                 :             :    has data reference relations whose independence was not proven then
    4043                 :             :    two versions of the loop need to be generated, one which is vectorized
    4044                 :             :    and one which isn't.  A test is then generated to control which of the
    4045                 :             :    loops is executed.  The test checks for the alignment of all of the
    4046                 :             :    data references that may or may not be aligned.  An additional
    4047                 :             :    sequence of runtime tests is generated for each pairs of DDRs whose
    4048                 :             :    independence was not proven.  The vectorized version of loop is
    4049                 :             :    executed only if both alias and alignment tests are passed.
    4050                 :             : 
    4051                 :             :    The test generated to check which version of loop is executed
    4052                 :             :    is modified to also check for profitability as indicated by the
    4053                 :             :    cost model threshold TH.
    4054                 :             : 
    4055                 :             :    The versioning precondition(s) are placed in *COND_EXPR and
    4056                 :             :    *COND_EXPR_STMT_LIST.  */
    4057                 :             : 
    4058                 :             : class loop *
    4059                 :        3608 : vect_loop_versioning (loop_vec_info loop_vinfo,
    4060                 :             :                       gimple *loop_vectorized_call)
    4061                 :             : {
    4062                 :        3608 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
    4063                 :        3608 :   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    4064                 :        3608 :   basic_block condition_bb;
    4065                 :        3608 :   gphi_iterator gsi;
    4066                 :        3608 :   gimple_stmt_iterator cond_exp_gsi;
    4067                 :        3608 :   basic_block merge_bb;
    4068                 :        3608 :   basic_block new_exit_bb;
    4069                 :        3608 :   edge new_exit_e, e;
    4070                 :        3608 :   gphi *orig_phi, *new_phi;
    4071                 :        3608 :   tree cond_expr = NULL_TREE;
    4072                 :        3608 :   gimple_seq cond_expr_stmt_list = NULL;
    4073                 :        3608 :   tree arg;
    4074                 :        3608 :   profile_probability prob = profile_probability::likely ();
    4075                 :        3608 :   gimple_seq gimplify_stmt_list = NULL;
    4076                 :        3608 :   tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
    4077                 :        3608 :   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
    4078                 :        3608 :   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
    4079                 :        3608 :   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
    4080                 :        3608 :   poly_uint64 versioning_threshold
    4081                 :             :     = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
    4082                 :        3608 :   tree version_simd_if_cond
    4083                 :             :     = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
    4084                 :        3608 :   unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
    4085                 :             : 
    4086                 :        3608 :   if (vect_apply_runtime_profitability_check_p (loop_vinfo)
    4087                 :             :       && !ordered_p (th, versioning_threshold))
    4088                 :             :     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
    4089                 :             :                              build_int_cst (TREE_TYPE (scalar_loop_iters),
    4090                 :             :                                             th - 1));
    4091                 :        3608 :   if (maybe_ne (versioning_threshold, 0U))
    4092                 :             :     {
    4093                 :        3605 :       tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
    4094                 :             :                                build_int_cst (TREE_TYPE (scalar_loop_iters),
    4095                 :             :                                               versioning_threshold - 1));
    4096                 :        3605 :       if (cond_expr)
    4097                 :           0 :         cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
    4098                 :             :                                  expr, cond_expr);
    4099                 :             :       else
    4100                 :        3605 :         cond_expr = expr;
    4101                 :             :     }
    4102                 :             : 
    4103                 :        3608 :   tree cost_name = NULL_TREE;
    4104                 :        3608 :   profile_probability prob2 = profile_probability::always ();
    4105                 :        3608 :   if (cond_expr
    4106                 :        3605 :       && EXPR_P (cond_expr)
    4107                 :        2473 :       && (version_niter
    4108                 :        2473 :           || version_align
    4109                 :             :           || version_alias
    4110                 :        2130 :           || version_simd_if_cond))
    4111                 :             :     {
    4112                 :        2473 :       cost_name = cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4113                 :             :                                                       &cond_expr_stmt_list,
    4114                 :             :                                                       is_gimple_val, NULL_TREE);
    4115                 :             :       /* Split prob () into two so that the overall probability of passing
    4116                 :             :          both the cost-model and versioning checks is the orig prob.  */
    4117                 :        2473 :       prob2 = prob = prob.sqrt ();
    4118                 :             :     }
    4119                 :             : 
    4120                 :        3608 :   if (version_niter)
    4121                 :         388 :     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
    4122                 :             : 
    4123                 :        3608 :   if (cond_expr)
    4124                 :             :     {
    4125                 :        3605 :       gimple_seq tem = NULL;
    4126                 :        3605 :       cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4127                 :             :                                           &tem, is_gimple_condexpr_for_cond,
    4128                 :             :                                           NULL_TREE);
    4129                 :        3605 :       gimple_seq_add_seq (&cond_expr_stmt_list, tem);
    4130                 :             :     }
    4131                 :             : 
    4132                 :        3608 :   if (version_align)
    4133                 :          27 :     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
    4134                 :             :                                        &cond_expr_stmt_list);
    4135                 :             : 
    4136                 :        3608 :   if (version_alias)
    4137                 :             :     {
    4138                 :        3160 :       vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
    4139                 :        3160 :       vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
    4140                 :        3160 :       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
    4141                 :             :     }
    4142                 :             : 
    4143                 :        3608 :   if (version_simd_if_cond)
    4144                 :             :     {
    4145                 :          58 :       gcc_assert (dom_info_available_p (CDI_DOMINATORS));
    4146                 :          58 :       if (flag_checking)
    4147                 :          58 :         if (basic_block bb
    4148                 :          58 :             = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
    4149                 :          58 :           gcc_assert (bb != loop->header
    4150                 :             :                       && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
    4151                 :             :                       && (scalar_loop == NULL
    4152                 :             :                           || (bb != scalar_loop->header
    4153                 :             :                               && dominated_by_p (CDI_DOMINATORS,
    4154                 :             :                                                  scalar_loop->header, bb))));
    4155                 :          58 :       tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
    4156                 :          58 :       tree c = fold_build2 (NE_EXPR, boolean_type_node,
    4157                 :             :                             version_simd_if_cond, zero);
    4158                 :          58 :       if (cond_expr)
    4159                 :          58 :         cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    4160                 :             :                                  c, cond_expr);
    4161                 :             :       else
    4162                 :           0 :         cond_expr = c;
    4163                 :          58 :       if (dump_enabled_p ())
    4164                 :           5 :         dump_printf_loc (MSG_NOTE, vect_location,
    4165                 :             :                          "created versioning for simd if condition check.\n");
    4166                 :             :     }
    4167                 :             : 
    4168                 :        3608 :   cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4169                 :             :                                       &gimplify_stmt_list,
    4170                 :             :                                       is_gimple_condexpr_for_cond, NULL_TREE);
    4171                 :        3608 :   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
    4172                 :             : 
    4173                 :             :   /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
    4174                 :             :      invariant in.  */
    4175                 :        3608 :   class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
    4176                 :        3608 :   for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
    4177                 :       65837 :        !gsi_end_p (gsi); gsi_next (&gsi))
    4178                 :             :     {
    4179                 :       62229 :       gimple *stmt = gsi_stmt (gsi);
    4180                 :       62229 :       update_stmt (stmt);
    4181                 :       62229 :       ssa_op_iter iter;
    4182                 :       62229 :       use_operand_p use_p;
    4183                 :       62229 :       basic_block def_bb;
    4184                 :      143205 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
    4185                 :       80976 :         if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
    4186                 :       80976 :             && flow_bb_inside_loop_p (outermost, def_bb))
    4187                 :        2417 :           outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
    4188                 :             :     }
    4189                 :             : 
    4190                 :             :   /* Search for the outermost loop we can version.  Avoid versioning of
    4191                 :             :      non-perfect nests but allow if-conversion versioned loops inside.  */
    4192                 :        3608 :   class loop *loop_to_version = loop;
    4193                 :        3608 :   if (flow_loop_nested_p (outermost, loop))
    4194                 :             :     {
    4195                 :        1413 :       if (dump_enabled_p ())
    4196                 :         630 :         dump_printf_loc (MSG_NOTE, vect_location,
    4197                 :             :                          "trying to apply versioning to outer loop %d\n",
    4198                 :             :                          outermost->num);
    4199                 :        1413 :       if (outermost->num == 0)
    4200                 :        1319 :         outermost = superloop_at_depth (loop, 1);
    4201                 :             :       /* And avoid applying versioning on non-perfect nests.  */
    4202                 :             :       while (loop_to_version != outermost
    4203                 :         119 :              && (e = single_exit (loop_outer (loop_to_version)))
    4204                 :         100 :              && !(e->flags & EDGE_COMPLEX)
    4205                 :          99 :              && (!loop_outer (loop_to_version)->inner->next
    4206                 :          55 :                  || vect_loop_vectorized_call (loop_to_version))
    4207                 :        1507 :              && (!loop_outer (loop_to_version)->inner->next
    4208                 :           3 :                  || !loop_outer (loop_to_version)->inner->next->next))
    4209                 :             :         loop_to_version = loop_outer (loop_to_version);
    4210                 :             :     }
    4211                 :             : 
    4212                 :             :   /* Apply versioning.  If there is already a scalar version created by
    4213                 :             :      if-conversion re-use that.  Note we cannot re-use the copy of
    4214                 :             :      an if-converted outer-loop when vectorizing the inner loop only.  */
    4215                 :        3608 :   gcond *cond;
    4216                 :        3608 :   if ((!loop_to_version->inner || loop == loop_to_version)
    4217                 :        3573 :       && loop_vectorized_call)
    4218                 :             :     {
    4219                 :          85 :       gcc_assert (scalar_loop);
    4220                 :          85 :       condition_bb = gimple_bb (loop_vectorized_call);
    4221                 :         170 :       cond = as_a <gcond *> (*gsi_last_bb (condition_bb));
    4222                 :          85 :       gimple_cond_set_condition_from_tree (cond, cond_expr);
    4223                 :          85 :       update_stmt (cond);
    4224                 :             : 
    4225                 :          85 :       if (cond_expr_stmt_list)
    4226                 :             :         {
    4227                 :          85 :           cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
    4228                 :          85 :           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
    4229                 :             :                                  GSI_SAME_STMT);
    4230                 :             :         }
    4231                 :             : 
    4232                 :             :       /* if-conversion uses profile_probability::always () for both paths,
    4233                 :             :          reset the paths probabilities appropriately.  */
    4234                 :          85 :       edge te, fe;
    4235                 :          85 :       extract_true_false_edges_from_block (condition_bb, &te, &fe);
    4236                 :          85 :       te->probability = prob;
    4237                 :          85 :       fe->probability = prob.invert ();
    4238                 :             :       /* We can scale loops counts immediately but have to postpone
    4239                 :             :          scaling the scalar loop because we re-use it during peeling.
    4240                 :             : 
    4241                 :             :          Ifcvt duplicates loop preheader, loop body and produces an basic
    4242                 :             :          block after loop exit.  We need to scale all that.  */
    4243                 :          85 :       basic_block preheader = loop_preheader_edge (loop_to_version)->src;
    4244                 :          85 :       preheader->count = preheader->count.apply_probability (prob * prob2);
    4245                 :          85 :       scale_loop_frequencies (loop_to_version, prob * prob2);
    4246                 :             :       /* When the loop has multiple exits then we can only version itself.
    4247                 :             :         This is denoted by loop_to_version == loop.  In this case we can
    4248                 :             :         do the versioning by selecting the exit edge the vectorizer is
    4249                 :             :         currently using.  */
    4250                 :          85 :       edge exit_edge;
    4251                 :          85 :       if (loop_to_version == loop)
    4252                 :          85 :        exit_edge = LOOP_VINFO_IV_EXIT (loop_vinfo);
    4253                 :             :       else
    4254                 :           0 :        exit_edge = single_exit (loop_to_version);
    4255                 :          85 :       exit_edge->dest->count = preheader->count;
    4256                 :          85 :       LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = (prob * prob2).invert ();
    4257                 :             : 
    4258                 :          85 :       nloop = scalar_loop;
    4259                 :          85 :       if (dump_enabled_p ())
    4260                 :          82 :         dump_printf_loc (MSG_NOTE, vect_location,
    4261                 :             :                          "reusing %sloop version created by if conversion\n",
    4262                 :             :                          loop_to_version != loop ? "outer " : "");
    4263                 :          85 :     }
    4264                 :             :   else
    4265                 :             :     {
    4266                 :        3523 :       if (loop_to_version != loop
    4267                 :        3523 :           && dump_enabled_p ())
    4268                 :          11 :         dump_printf_loc (MSG_NOTE, vect_location,
    4269                 :             :                          "applying loop versioning to outer loop %d\n",
    4270                 :             :                          loop_to_version->num);
    4271                 :             : 
    4272                 :        3523 :       unsigned orig_pe_idx = loop_preheader_edge (loop)->dest_idx;
    4273                 :             : 
    4274                 :        3523 :       initialize_original_copy_tables ();
    4275                 :        7046 :       nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
    4276                 :        3523 :                             prob * prob2, (prob * prob2).invert (),
    4277                 :        3523 :                             prob * prob2, (prob * prob2).invert (),
    4278                 :             :                             true);
    4279                 :             : 
    4280                 :             :       /* If the PHI nodes in the loop header were reallocated, we need to fix up
    4281                 :             :          our internally stashed copies of those.  */
    4282                 :        3523 :       if (loop_to_version == loop)
    4283                 :        3488 :         for (auto gsi = gsi_start_phis (loop->header);
    4284                 :       13849 :              !gsi_end_p (gsi); gsi_next (&gsi))
    4285                 :       10361 :           loop_vinfo->resync_stmt_addr (gsi.phi ());
    4286                 :             : 
    4287                 :             :       /* We will later insert second conditional so overall outcome of
    4288                 :             :          both is prob * prob2.  */
    4289                 :        3523 :       edge true_e, false_e;
    4290                 :        3523 :       extract_true_false_edges_from_block (condition_bb, &true_e, &false_e);
    4291                 :        3523 :       true_e->probability = prob;
    4292                 :        3523 :       false_e->probability = prob.invert ();
    4293                 :        3523 :       gcc_assert (nloop);
    4294                 :        3523 :       nloop = get_loop_copy (loop);
    4295                 :             : 
    4296                 :             :       /* For cycle vectorization with SLP we rely on the PHI arguments
    4297                 :             :          appearing in the same order as the SLP node operands which for the
    4298                 :             :          loop PHI nodes means the preheader edge dest index needs to remain
    4299                 :             :          the same for the analyzed loop which also becomes the vectorized one.
    4300                 :             :          Make it so in case the state after versioning differs by redirecting
    4301                 :             :          the first edge into the header to the same destination which moves
    4302                 :             :          it last.  */
    4303                 :        3523 :       if (loop_preheader_edge (loop)->dest_idx != orig_pe_idx)
    4304                 :             :         {
    4305                 :         283 :           edge e = EDGE_PRED (loop->header, 0);
    4306                 :         283 :           ssa_redirect_edge (e, e->dest);
    4307                 :         283 :           flush_pending_stmts (e);
    4308                 :             :         }
    4309                 :        3523 :       gcc_assert (loop_preheader_edge (loop)->dest_idx == orig_pe_idx);
    4310                 :             : 
    4311                 :             :       /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
    4312                 :             :          reap those otherwise;  they also refer to the original
    4313                 :             :          loops.  */
    4314                 :             :       class loop *l = loop;
    4315                 :        3526 :       while (gimple *call = vect_loop_vectorized_call (l))
    4316                 :             :         {
    4317                 :           3 :           call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
    4318                 :           3 :           fold_loop_internal_call (call, boolean_false_node);
    4319                 :           3 :           l = loop_outer (l);
    4320                 :           3 :         }
    4321                 :        3523 :       free_original_copy_tables ();
    4322                 :             : 
    4323                 :        3523 :       if (cond_expr_stmt_list)
    4324                 :             :         {
    4325                 :        3443 :           cond_exp_gsi = gsi_last_bb (condition_bb);
    4326                 :        3443 :           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
    4327                 :             :                                  GSI_SAME_STMT);
    4328                 :             :         }
    4329                 :             : 
    4330                 :             :       /* Loop versioning violates an assumption we try to maintain during
    4331                 :             :          vectorization - that the loop exit block has a single predecessor.
    4332                 :             :          After versioning, the exit block of both loop versions is the same
    4333                 :             :          basic block (i.e. it has two predecessors). Just in order to simplify
    4334                 :             :          following transformations in the vectorizer, we fix this situation
    4335                 :             :          here by adding a new (empty) block on the exit-edge of the loop,
    4336                 :             :          with the proper loop-exit phis to maintain loop-closed-form.
    4337                 :             :          If loop versioning wasn't done from loop, but scalar_loop instead,
    4338                 :             :          merge_bb will have already just a single successor.  */
    4339                 :             : 
    4340                 :             :       /* When the loop has multiple exits then we can only version itself.
    4341                 :             :          This is denoted by loop_to_version == loop.  In this case we can
    4342                 :             :          do the versioning by selecting the exit edge the vectorizer is
    4343                 :             :          currently using.  */
    4344                 :        3523 :       edge exit_edge;
    4345                 :        3523 :       if (loop_to_version == loop)
    4346                 :        3488 :         exit_edge = LOOP_VINFO_IV_EXIT (loop_vinfo);
    4347                 :             :       else
    4348                 :          35 :         exit_edge = single_exit (loop_to_version);
    4349                 :             : 
    4350                 :        3523 :       gcc_assert (exit_edge);
    4351                 :        3523 :       merge_bb = exit_edge->dest;
    4352                 :        3523 :       if (EDGE_COUNT (merge_bb->preds) >= 2)
    4353                 :             :         {
    4354                 :        3523 :           gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
    4355                 :        3523 :           new_exit_bb = split_edge (exit_edge);
    4356                 :        3523 :           new_exit_e = exit_edge;
    4357                 :        3523 :           e = EDGE_SUCC (new_exit_bb, 0);
    4358                 :             : 
    4359                 :        7274 :           for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
    4360                 :        3751 :                gsi_next (&gsi))
    4361                 :             :             {
    4362                 :        3751 :               tree new_res;
    4363                 :        3751 :               orig_phi = gsi.phi ();
    4364                 :        3751 :               new_res = copy_ssa_name (PHI_RESULT (orig_phi));
    4365                 :        3751 :               new_phi = create_phi_node (new_res, new_exit_bb);
    4366                 :        3751 :               arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
    4367                 :        3751 :               add_phi_arg (new_phi, arg, new_exit_e,
    4368                 :             :                            gimple_phi_arg_location_from_edge (orig_phi, e));
    4369                 :        3751 :               adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
    4370                 :             :             }
    4371                 :             :         }
    4372                 :             : 
    4373                 :        3523 :       update_ssa (TODO_update_ssa_no_phi);
    4374                 :             :     }
    4375                 :             : 
    4376                 :             :   /* Split the cost model check off to a separate BB.  Costing assumes
    4377                 :             :      this is the only thing we perform when we enter the scalar loop
    4378                 :             :      from a failed cost decision.  */
    4379                 :        3608 :   if (cost_name && TREE_CODE (cost_name) == SSA_NAME)
    4380                 :             :     {
    4381                 :        2473 :       gimple *def = SSA_NAME_DEF_STMT (cost_name);
    4382                 :        2473 :       gcc_assert (gimple_bb (def) == condition_bb);
    4383                 :             :       /* All uses of the cost check are 'true' after the check we
    4384                 :             :          are going to insert.  */
    4385                 :        2473 :       replace_uses_by (cost_name, boolean_true_node);
    4386                 :             :       /* And we're going to build the new single use of it.  */
    4387                 :        2473 :       gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node,
    4388                 :             :                                        NULL_TREE, NULL_TREE);
    4389                 :        2473 :       edge e = split_block (gimple_bb (def), def);
    4390                 :        2473 :       gimple_stmt_iterator gsi = gsi_for_stmt (def);
    4391                 :        2473 :       gsi_insert_after (&gsi, cond, GSI_NEW_STMT);
    4392                 :        2473 :       edge true_e, false_e;
    4393                 :        2473 :       extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
    4394                 :        2473 :       e->flags &= ~EDGE_FALLTHRU;
    4395                 :        2473 :       e->flags |= EDGE_TRUE_VALUE;
    4396                 :        2473 :       edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE);
    4397                 :        2473 :       e->probability = prob2;
    4398                 :        2473 :       e2->probability = prob2.invert ();
    4399                 :        2473 :       e->dest->count = e->count ();
    4400                 :        2473 :       set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src);
    4401                 :        2473 :       auto_vec<basic_block, 3> adj;
    4402                 :        2473 :       for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest);
    4403                 :        7388 :            son;
    4404                 :        4915 :            son = next_dom_son (CDI_DOMINATORS, son))
    4405                 :        7357 :         if (EDGE_COUNT (son->preds) > 1)
    4406                 :        2442 :           adj.safe_push (son);
    4407                 :        9861 :       for (auto son : adj)
    4408                 :        2442 :         set_immediate_dominator (CDI_DOMINATORS, son, e->src);
    4409                 :             :       //debug_bb (condition_bb);
    4410                 :             :       //debug_bb (e->src);
    4411                 :        2473 :     }
    4412                 :             : 
    4413                 :        3608 :   if (version_niter)
    4414                 :             :     {
    4415                 :             :       /* The versioned loop could be infinite, we need to clear existing
    4416                 :             :          niter information which is copied from the original loop.  */
    4417                 :         388 :       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
    4418                 :         388 :       vect_free_loop_info_assumptions (nloop);
    4419                 :             :     }
    4420                 :             : 
    4421                 :        3608 :   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
    4422                 :        3608 :       && dump_enabled_p ())
    4423                 :             :     {
    4424                 :         767 :       if (version_alias)
    4425                 :         720 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
    4426                 :             :                          vect_location,
    4427                 :             :                          "loop versioned for vectorization because of "
    4428                 :             :                          "possible aliasing\n");
    4429                 :         767 :       if (version_align)
    4430                 :          12 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
    4431                 :             :                          vect_location,
    4432                 :             :                          "loop versioned for vectorization to enhance "
    4433                 :             :                          "alignment\n");
    4434                 :             : 
    4435                 :             :     }
    4436                 :             : 
    4437                 :        3608 :   return nloop;
    4438                 :             : }

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.