LCOV - code coverage report
Current view: top level - gcc - tree-ssa-loop-ivcanon.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 97.6 % 745 727
Test Date: 2026-02-28 14:20:25 Functions: 100.0 % 24 24
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Induction variable canonicalization and loop peeling.
       2              :    Copyright (C) 2004-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it
       7              : under the terms of the GNU General Public License as published by the
       8              : Free Software Foundation; either version 3, or (at your option) any
       9              : later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT
      12              : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : /* This pass detects the loops that iterate a constant number of times,
      21              :    adds a canonical induction variable (step -1, tested against 0)
      22              :    and replaces the exit test.  This enables the less powerful rtl
      23              :    level analysis to use this information.
      24              : 
      25              :    This might spoil the code in some cases (by increasing register pressure).
      26              :    Note that in the case the new variable is not needed, ivopts will get rid
      27              :    of it, so it might only be a problem when there are no other linear induction
      28              :    variables.  In that case the created optimization possibilities are likely
      29              :    to pay up.
      30              : 
      31              :    We also perform
      32              :      - complete unrolling (or peeling) when the loops is rolling few enough
      33              :        times
      34              :      - simple peeling (i.e. copying few initial iterations prior the loop)
      35              :        when number of iteration estimate is known (typically by the profile
      36              :        info).  */
      37              : 
      38              : #include "config.h"
      39              : #include "system.h"
      40              : #include "coretypes.h"
      41              : #include "backend.h"
      42              : #include "tree.h"
      43              : #include "gimple.h"
      44              : #include "cfghooks.h"
      45              : #include "tree-pass.h"
      46              : #include "ssa.h"
      47              : #include "cgraph.h"
      48              : #include "gimple-pretty-print.h"
      49              : #include "fold-const.h"
      50              : #include "profile.h"
      51              : #include "gimple-iterator.h"
      52              : #include "gimple-fold.h"
      53              : #include "tree-eh.h"
      54              : #include "tree-cfg.h"
      55              : #include "tree-ssa-loop-manip.h"
      56              : #include "tree-ssa-loop-niter.h"
      57              : #include "tree-ssa-loop.h"
      58              : #include "tree-into-ssa.h"
      59              : #include "cfgloop.h"
      60              : #include "tree-chrec.h"
      61              : #include "tree-scalar-evolution.h"
      62              : #include "tree-inline.h"
      63              : #include "tree-cfgcleanup.h"
      64              : #include "builtins.h"
      65              : #include "tree-ssa-sccvn.h"
      66              : #include "tree-vectorizer.h" /* For find_loop_location */
      67              : #include "dbgcnt.h"
      68              : #include "hierarchical_discriminator.h"
      69              : 
      70              : /* Specifies types of loops that may be unrolled.  */
      71              : 
      72              : enum unroll_level
      73              : {
      74              :   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
      75              :                            iteration.  */
      76              :   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
      77              :                            of code size.  */
      78              :   UL_ALL                /* All suitable loops.  */
      79              : };
      80              : 
      81              : /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
      82              :    is the exit edge whose condition is replaced.  The ssa versions of the new
      83              :    IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
      84              :    if they are not NULL.  */
      85              : 
      86              : void
      87       314391 : create_canonical_iv (class loop *loop, edge exit, tree niter,
      88              :                      tree *var_before = NULL, tree *var_after = NULL)
      89              : {
      90       314391 :   edge in;
      91       314391 :   tree type, var;
      92       314391 :   gcond *cond;
      93       314391 :   gimple_stmt_iterator incr_at;
      94       314391 :   enum tree_code cmp;
      95              : 
      96       314391 :   if (dump_file && (dump_flags & TDF_DETAILS))
      97              :     {
      98           41 :       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
      99           41 :       print_generic_expr (dump_file, niter, TDF_SLIM);
     100           41 :       fprintf (dump_file, " iterations.\n");
     101              :     }
     102              : 
     103       628782 :   cond = as_a <gcond *> (*gsi_last_bb (exit->src));
     104       314391 :   in = EDGE_SUCC (exit->src, 0);
     105       314391 :   if (in == exit)
     106        89280 :     in = EDGE_SUCC (exit->src, 1);
     107              : 
     108              :   /* Note that we do not need to worry about overflows, since
     109              :      type of niter is always unsigned and all comparisons are
     110              :      just for equality/nonequality -- i.e. everything works
     111              :      with a modulo arithmetics.  */
     112              : 
     113       314391 :   type = TREE_TYPE (niter);
     114       314391 :   niter = fold_build2 (PLUS_EXPR, type,
     115              :                        niter,
     116              :                        build_int_cst (type, 1));
     117       314391 :   incr_at = gsi_last_bb (in->src);
     118       314391 :   create_iv (niter, PLUS_EXPR,
     119              :              build_int_cst (type, -1),
     120              :              NULL_TREE, loop,
     121              :              &incr_at, false, var_before, &var);
     122       314391 :   if (var_after)
     123           96 :     *var_after = var;
     124              : 
     125       314391 :   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
     126       314391 :   gimple_cond_set_code (cond, cmp);
     127       314391 :   gimple_cond_set_lhs (cond, var);
     128       314391 :   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
     129       314391 :   update_stmt (cond);
     130       314391 : }
     131              : 
     132              : /* Describe size of loop as detected by tree_estimate_loop_size.  */
     133              : struct loop_size
     134              : {
     135              :   /* Number of instructions in the loop.  */
     136              :   int overall;
     137              : 
     138              :   /* Number of instructions that will be likely optimized out in
     139              :      peeled iterations of loop  (i.e. computation based on induction
     140              :      variable where induction variable starts at known constant.)  */
     141              :   int eliminated_by_peeling;
     142              : 
     143              :   /* Number of instructions that cannot be further optimized in the
     144              :      peeled loop, for example volatile accesses.  */
     145              :   int not_eliminatable_after_peeling;
     146              : 
     147              :   /* Same statistics for last iteration of loop: it is smaller because
     148              :      instructions after exit are not executed.  */
     149              :   int last_iteration;
     150              :   int last_iteration_eliminated_by_peeling;
     151              :   int last_iteration_not_eliminatable_after_peeling;
     152              : 
     153              :   /* If some IV computation will become constant.  */
     154              :   bool constant_iv;
     155              : 
     156              :   /* Number of call stmts that are not a builtin and are pure or const
     157              :      present on the hot path.  */
     158              :   int num_pure_calls_on_hot_path;
     159              :   /* Number of call stmts that are not a builtin and are not pure nor const
     160              :      present on the hot path.  */
     161              :   int num_non_pure_calls_on_hot_path;
     162              :   /* Number of statements other than calls in the loop.  */
     163              :   int non_call_stmts_on_hot_path;
     164              :   /* Number of branches seen on the hot path.  */
     165              :   int num_branches_on_hot_path;
     166              : };
     167              : 
     168              : /* Return true if OP in STMT will be constant after peeling LOOP.  */
     169              : 
     170              : static bool
     171     13128482 : constant_after_peeling (tree op, gimple *stmt, class loop *loop)
     172              : {
     173     13128482 :   if (CONSTANT_CLASS_P (op))
     174              :     return true;
     175              : 
     176              :   /* Get at the actual SSA operand.  */
     177     12335340 :   if (handled_component_p (op)
     178      1347561 :       && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME)
     179        30237 :     op = TREE_OPERAND (op, 0);
     180              : 
     181              :   /* We can still fold accesses to constant arrays when index is known.  */
     182     12335340 :   if (TREE_CODE (op) != SSA_NAME)
     183              :     {
     184              :       tree base = op;
     185              : 
     186              :       /* First make fast look if we see constant array inside.  */
     187      4076918 :       while (handled_component_p (base))
     188      2105291 :         base = TREE_OPERAND (base, 0);
     189      1971627 :       if ((DECL_P (base)
     190       902031 :            && ctor_for_folding (base) != error_mark_node)
     191      2814398 :           || CONSTANT_CLASS_P (base))
     192              :         {
     193              :           /* If so, see if we understand all the indices.  */
     194              :           base = op;
     195      3037443 :           while (handled_component_p (base))
     196              :             {
     197        66591 :               if (TREE_CODE (base) == ARRAY_REF
     198        66591 :                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
     199              :                 return false;
     200        53242 :               base = TREE_OPERAND (base, 0);
     201              :             }
     202              :           return true;
     203              :         }
     204              :       return false;
     205              :     }
     206              : 
     207              :   /* Induction variables are constants when defined in loop.  */
     208     20727426 :   if (loop_containing_stmt (stmt) != loop)
     209              :     return false;
     210      7655753 :   tree ev = analyze_scalar_evolution (loop, op);
     211      7655753 :   if (chrec_contains_undetermined (ev)
     212      7655753 :       || chrec_contains_symbols (ev))
     213              :     {
     214      5536566 :       if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op)))
     215              :         {
     216      4078263 :           gassign *ass = nullptr;
     217      4078263 :           gphi *phi = nullptr;
     218      4078263 :           if (is_a <gassign *> (SSA_NAME_DEF_STMT (op)))
     219              :             {
     220      3668301 :               ass = as_a <gassign *> (SSA_NAME_DEF_STMT (op));
     221      3668301 :               if (TREE_CODE (gimple_assign_rhs1 (ass)) == SSA_NAME)
     222      2313307 :                 phi = dyn_cast <gphi *>
     223      2313307 :                         (SSA_NAME_DEF_STMT (gimple_assign_rhs1  (ass)));
     224              :             }
     225       409962 :           else if (is_a <gphi *> (SSA_NAME_DEF_STMT (op)))
     226              :             {
     227       321457 :               phi = as_a <gphi *> (SSA_NAME_DEF_STMT (op));
     228       321457 :               if (gimple_bb (phi) == loop->header)
     229              :                 {
     230       208361 :                   tree def = gimple_phi_arg_def_from_edge
     231       208361 :                     (phi, loop_latch_edge (loop));
     232       208361 :                   if (TREE_CODE (def) == SSA_NAME
     233       208361 :                       && is_a <gassign *> (SSA_NAME_DEF_STMT (def)))
     234       147808 :                     ass = as_a <gassign *> (SSA_NAME_DEF_STMT (def));
     235              :                 }
     236              :             }
     237      4078263 :           if (ass && phi)
     238              :             {
     239       560679 :               tree rhs1 = gimple_assign_rhs1 (ass);
     240       560679 :               if (gimple_assign_rhs_class (ass) == GIMPLE_BINARY_RHS
     241       452397 :                   && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass))
     242       335684 :                   && rhs1 == gimple_phi_result (phi)
     243       334776 :                   && gimple_bb (phi) == loop->header
     244       258792 :                   && (gimple_phi_arg_def_from_edge (phi, loop_latch_edge (loop))
     245       258792 :                       == gimple_assign_lhs (ass))
     246       760818 :                   && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
     247              :                                          (phi, loop_preheader_edge (loop)))))
     248              :                 return true;
     249              :             }
     250              :         }
     251      5523969 :       return false;
     252              :     }
     253              :   return true;
     254              : }
     255              : 
     256              : /* Computes an estimated number of insns in LOOP.
     257              :    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
     258              :    iteration of the loop.
     259              :    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
     260              :    of loop.
     261              :    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
     262              :    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
     263              : 
     264              : static bool
     265       499040 : tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
     266              :                          struct loop_size *size, int upper_bound)
     267              : {
     268       499040 :   basic_block *body = get_loop_body (loop);
     269       499040 :   gimple_stmt_iterator gsi;
     270       499040 :   unsigned int i;
     271       499040 :   bool after_exit;
     272       499040 :   auto_vec<basic_block> path = get_loop_hot_path (loop);
     273              : 
     274       499040 :   size->overall = 0;
     275       499040 :   size->eliminated_by_peeling = 0;
     276       499040 :   size->not_eliminatable_after_peeling = 0;
     277       499040 :   size->last_iteration = 0;
     278       499040 :   size->last_iteration_eliminated_by_peeling = 0;
     279       499040 :   size->last_iteration_not_eliminatable_after_peeling = 0;
     280       499040 :   size->num_pure_calls_on_hot_path = 0;
     281       499040 :   size->num_non_pure_calls_on_hot_path = 0;
     282       499040 :   size->non_call_stmts_on_hot_path = 0;
     283       499040 :   size->num_branches_on_hot_path = 0;
     284       499040 :   size->constant_iv = 0;
     285              : 
     286       499040 :   if (dump_file && (dump_flags & TDF_DETAILS))
     287           65 :     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
     288      2836023 :   for (i = 0; i < loop->num_nodes; i++)
     289              :     {
     290      2233322 :       if (edge_to_cancel && body[i] != edge_to_cancel->src
     291      4098205 :           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
     292              :         after_exit = true;
     293              :       else
     294              :         after_exit = false;
     295      2343508 :       if (dump_file && (dump_flags & TDF_DETAILS))
     296          192 :         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
     297              :                  after_exit);
     298              : 
     299     14567234 :       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
     300              :         {
     301      9886743 :           gimple *stmt = gsi_stmt (gsi);
     302      9886743 :           int num = estimate_num_insns (stmt, &eni_size_weights);
     303      9886743 :           bool not_eliminatable_after_peeling = false;
     304      9886743 :           bool likely_eliminated = false;
     305      9886743 :           bool likely_eliminated_last = false;
     306      9886743 :           bool likely_eliminated_peeled = false;
     307              : 
     308      9886743 :           if (dump_file && (dump_flags & TDF_DETAILS))
     309              :             {
     310          977 :               fprintf (dump_file, "  size: %3i ", num);
     311          977 :               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
     312              :             }
     313              : 
     314              :           /* Look for reasons why we might optimize this stmt away. */
     315              : 
     316      9886743 :           if (gimple_has_side_effects (stmt))
     317              :             not_eliminatable_after_peeling = true;
     318              :           else
     319              :             {
     320              :               /* Exit conditional.  */
     321      7783475 :               if (exit && body[i] == exit->src
     322     13246006 :                   && stmt == *gsi_last_bb (exit->src))
     323              :                 {
     324       411107 :                   if (dump_file && (dump_flags & TDF_DETAILS))
     325           32 :                     fprintf (dump_file, "   Exit condition will be eliminated "
     326              :                              "in peeled copies.\n");
     327              :                   likely_eliminated_peeled = true;
     328              :                 }
     329      9199919 :               if (edge_to_cancel && body[i] == edge_to_cancel->src
     330     14780186 :                   && stmt == *gsi_last_bb (edge_to_cancel->src))
     331              :                 {
     332       478440 :                   if (dump_file && (dump_flags & TDF_DETAILS))
     333           59 :                     fprintf (dump_file, "   Exit condition will be eliminated "
     334              :                              "in last copy.\n");
     335              :                   likely_eliminated_last = true;
     336              :                 }
     337              :               /* Sets of IV variables  */
     338      9623418 :               if (gimple_code (stmt) == GIMPLE_ASSIGN
     339      9623418 :                   && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
     340              :                 {
     341      1152729 :                   if (dump_file && (dump_flags & TDF_DETAILS))
     342          105 :                     fprintf (dump_file, "   Induction variable computation will"
     343              :                              " be folded away.\n");
     344              :                   likely_eliminated = true;
     345              :                 }
     346              :               /* Assignments of IV variables.  */
     347      8470689 :               else if (gimple_code (stmt) == GIMPLE_ASSIGN
     348      4737256 :                        && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
     349      3999407 :                        && constant_after_peeling (gimple_assign_rhs1 (stmt),
     350              :                                                   stmt, loop)
     351       182451 :                        && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
     352        94273 :                            || constant_after_peeling (gimple_assign_rhs2 (stmt),
     353              :                                                       stmt, loop))
     354      8571504 :                        && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
     355              :                 {
     356       100260 :                   size->constant_iv = true;
     357       100260 :                   if (dump_file && (dump_flags & TDF_DETAILS))
     358           13 :                     fprintf (dump_file,
     359              :                              "   Constant expression will be folded away.\n");
     360              :                   likely_eliminated = true;
     361              :                 }
     362              :               /* Conditionals.  */
     363      8370429 :               else if ((gimple_code (stmt) == GIMPLE_COND
     364      1261824 :                         && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
     365              :                                                    loop)
     366       431498 :                         && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
     367              :                                                    loop)
     368              :                         /* We don't simplify all constant compares so make sure
     369              :                            they are not both constant already.  See PR70288.  */
     370       407664 :                         && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
     371          183 :                             || ! is_gimple_min_invariant
     372          183 :                                  (gimple_cond_rhs (stmt))))
     373      9224772 :                        || (gimple_code (stmt) == GIMPLE_SWITCH
     374         1354 :                            && constant_after_peeling (gimple_switch_index (
     375              :                                                         as_a <gswitch *>
     376         1354 :                                                           (stmt)),
     377              :                                                       stmt, loop)
     378          290 :                            && ! is_gimple_min_invariant
     379          290 :                                    (gimple_switch_index
     380          290 :                                       (as_a <gswitch *> (stmt)))))
     381              :                 {
     382       407771 :                   if (dump_file && (dump_flags & TDF_DETAILS))
     383           29 :                     fprintf (dump_file, "   Constant conditional.\n");
     384              :                   likely_eliminated = true;
     385              :                 }
     386              :             }
     387              : 
     388      9886743 :           size->overall += num;
     389      9886743 :           if (likely_eliminated || likely_eliminated_peeled)
     390      1678937 :             size->eliminated_by_peeling += num;
     391      9886743 :           if (not_eliminatable_after_peeling)
     392       263325 :             size->not_eliminatable_after_peeling += num;
     393      9886743 :           if (!after_exit)
     394              :             {
     395      6268988 :               size->last_iteration += num;
     396      6268988 :               if (likely_eliminated || likely_eliminated_last)
     397      1235094 :                 size->last_iteration_eliminated_by_peeling += num;
     398      6268988 :               if (not_eliminatable_after_peeling)
     399       136187 :                 size->last_iteration_not_eliminatable_after_peeling += num;
     400              :             }
     401      9886743 :           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
     402      9886743 :               - size->last_iteration_eliminated_by_peeling) > upper_bound)
     403              :             {
     404         6525 :               free (body);
     405         6525 :               return true;
     406              :             }
     407              :         }
     408              :     }
     409      2207903 :   while (path.length ())
     410              :     {
     411      1715388 :       basic_block bb = path.pop ();
     412     11083063 :       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     413              :         {
     414      7652287 :           gimple *stmt = gsi_stmt (gsi);
     415      7652287 :           if (gimple_code (stmt) == GIMPLE_CALL
     416      7652287 :               && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
     417              :             {
     418       143427 :               int flags = gimple_call_flags (stmt);
     419       143427 :               if (flags & (ECF_PURE | ECF_CONST))
     420        28076 :                 size->num_pure_calls_on_hot_path++;
     421              :               else
     422       115351 :                 size->num_non_pure_calls_on_hot_path++;
     423       143427 :               size->num_branches_on_hot_path ++;
     424              :             }
     425              :           /* Count inexpensive calls as non-calls, because they will likely
     426              :              expand inline.  */
     427      7508860 :           else if (gimple_code (stmt) != GIMPLE_DEBUG)
     428      5810871 :             size->non_call_stmts_on_hot_path++;
     429      7652287 :           if (((gimple_code (stmt) == GIMPLE_COND
     430      1009053 :                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
     431       379936 :                     || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
     432              :                                                 loop)))
     433      6999781 :                || (gimple_code (stmt) == GIMPLE_SWITCH
     434          899 :                    && !constant_after_peeling (gimple_switch_index (
     435          899 :                                                  as_a <gswitch *> (stmt)),
     436              :                                                stmt, loop)))
     437      8305496 :               && (!exit || bb != exit->src))
     438       635827 :             size->num_branches_on_hot_path++;
     439              :         }
     440              :     }
     441              : 
     442       492515 :   if (dump_file && (dump_flags & TDF_DETAILS))
     443           65 :     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
     444              :              size->eliminated_by_peeling, size->last_iteration,
     445              :              size->last_iteration_eliminated_by_peeling);
     446              : 
     447       492515 :   free (body);
     448       492515 :   return false;
     449       499040 : }
     450              : 
     451              : /* Estimate number of insns of completely unrolled loop.
     452              :    It is (NUNROLL + 1) * size of loop body with taking into account
     453              :    the fact that in last copy everything after exit conditional
     454              :    is dead and that some instructions will be eliminated after
     455              :    peeling.  Set *EST_ELIMINATED to the number of stmts that could be
     456              :    optimistically eliminated by followup transforms.  */
     457              : static unsigned HOST_WIDE_INT
     458       489637 : estimated_unrolled_size (struct loop_size *size,
     459              :                          unsigned HOST_WIDE_INT *est_eliminated,
     460              :                          unsigned HOST_WIDE_INT nunroll)
     461              : {
     462       489637 :   HOST_WIDE_INT unr_insns = ((nunroll)
     463       489637 :                              * (HOST_WIDE_INT) (size->overall
     464       489637 :                                                 - size->eliminated_by_peeling));
     465       489637 :   HOST_WIDE_INT not_elim
     466       489637 :     = ((nunroll) * (HOST_WIDE_INT) size->not_eliminatable_after_peeling);
     467       489637 :   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
     468       489637 :   not_elim += size->last_iteration_not_eliminatable_after_peeling;
     469              : 
     470              :   /* Testcases rely on rounding up, so do not write as
     471              :      (unr_insns - not_elim) / 3.  */
     472       489637 :   *est_eliminated = unr_insns - not_elim - (unr_insns - not_elim) * 2 / 3;
     473       489637 :   return unr_insns;
     474              : }
     475              : 
     476              : /* Loop LOOP is known to not loop.  See if there is an edge in the loop
     477              :    body that can be remove to make the loop to always exit and at
     478              :    the same time it does not make any code potentially executed
     479              :    during the last iteration dead.
     480              : 
     481              :    After complete unrolling we still may get rid of the conditional
     482              :    on the exit in the last copy even if we have no idea what it does.
     483              :    This is quite common case for loops of form
     484              : 
     485              :      int a[5];
     486              :      for (i=0;i<b;i++)
     487              :        a[i]=0;
     488              : 
     489              :    Here we prove the loop to iterate 5 times but we do not know
     490              :    it from induction variable.
     491              : 
     492              :    For now we handle only simple case where there is exit condition
     493              :    just before the latch block and the latch block contains no statements
     494              :    with side effect that may otherwise terminate the execution of loop
     495              :    (such as by EH or by terminating the program or longjmp).
     496              : 
     497              :    In the general case we may want to cancel the paths leading to statements
     498              :    loop-niter identified as having undefined effect in the last iteration.
     499              :    The other cases are hopefully rare and will be cleaned up later.  */
     500              : 
     501              : static edge
     502        97190 : loop_edge_to_cancel (class loop *loop)
     503              : {
     504        97190 :   unsigned i;
     505        97190 :   edge edge_to_cancel;
     506        97190 :   gimple_stmt_iterator gsi;
     507              : 
     508              :   /* We want only one predecestor of the loop.  */
     509        97190 :   if (EDGE_COUNT (loop->latch->preds) > 1)
     510              :     return NULL;
     511              : 
     512        82087 :   auto_vec<edge> exits = get_loop_exit_edges (loop);
     513              : 
     514       256849 :   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
     515              :     {
     516              :        /* Find the other edge than the loop exit
     517              :           leaving the conditoinal.  */
     518        90724 :        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
     519           80 :          continue;
     520        90644 :        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
     521        23346 :          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
     522              :        else
     523              :          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
     524              : 
     525              :       /* We only can handle conditionals.  */
     526        90644 :       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
     527          378 :         continue;
     528              : 
     529              :       /* We should never have conditionals in the loop latch. */
     530        90266 :       gcc_assert (edge_to_cancel->dest != loop->header);
     531              : 
     532              :       /* Check that it leads to loop latch.  */
     533        90266 :       if (edge_to_cancel->dest != loop->latch)
     534        10130 :         continue;
     535              : 
     536              :       /* Verify that the code in loop latch does nothing that may end program
     537              :          execution without really reaching the exit.  This may include
     538              :          non-pure/const function calls, EH statements, volatile ASMs etc.  */
     539       286492 :       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
     540       126831 :         if (gimple_has_side_effects (gsi_stmt (gsi)))
     541              :            return NULL;
     542              :       return edge_to_cancel;
     543              :     }
     544              :   return NULL;
     545        82087 : }
     546              : 
     547              : /* Remove all tests for exits that are known to be taken after LOOP was
     548              :    peeled NPEELED times. Put gcc_unreachable before every statement
     549              :    known to not be executed.  */
     550              : 
     551              : static bool
     552       136914 : remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
     553              : {
     554       136914 :   class nb_iter_bound *elt;
     555       136914 :   bool changed = false;
     556              : 
     557       413246 :   for (elt = loop->bounds; elt; elt = elt->next)
     558              :     {
     559              :       /* If statement is known to be undefined after peeling, turn it
     560              :          into unreachable (or trap when debugging experience is supposed
     561              :          to be good).  */
     562       276332 :       if (!elt->is_exit
     563       276332 :           && wi::ltu_p (elt->bound, npeeled))
     564              :         {
     565        37204 :           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
     566        37204 :           location_t loc = gimple_location (elt->stmt);
     567        37204 :           gcall *stmt = gimple_build_builtin_unreachable (loc);
     568        37204 :           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
     569        37204 :           split_block (gimple_bb (stmt), stmt);
     570        37204 :           changed = true;
     571        37204 :           if (dump_file && (dump_flags & TDF_DETAILS))
     572              :             {
     573            4 :               fprintf (dump_file, "Forced statement unreachable: ");
     574            4 :               print_gimple_stmt (dump_file, elt->stmt, 0);
     575              :             }
     576              :         }
     577              :       /* If we know the exit will be taken after peeling, update.  */
     578       239128 :       else if (elt->is_exit
     579       239128 :                && wi::leu_p (elt->bound, npeeled))
     580              :         {
     581       104809 :           basic_block bb = gimple_bb (elt->stmt);
     582       104809 :           edge exit_edge = EDGE_SUCC (bb, 0);
     583              : 
     584       104809 :           if (dump_file && (dump_flags & TDF_DETAILS))
     585              :             {
     586           77 :               fprintf (dump_file, "Forced exit to be taken: ");
     587           77 :               print_gimple_stmt (dump_file, elt->stmt, 0);
     588              :             }
     589       104809 :           if (!loop_exit_edge_p (loop, exit_edge))
     590        38967 :             exit_edge = EDGE_SUCC (bb, 1);
     591       104809 :           exit_edge->probability = profile_probability::always ();
     592       104809 :           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
     593       104809 :           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
     594       104809 :           if (exit_edge->flags & EDGE_TRUE_VALUE)
     595        64114 :             gimple_cond_make_true (cond_stmt);
     596              :           else
     597        40695 :             gimple_cond_make_false (cond_stmt);
     598       104809 :           update_stmt (cond_stmt);
     599       104809 :           changed = true;
     600              :         }
     601              :     }
     602       136914 :   return changed;
     603              : }
     604              : 
     605              : /* Remove all exits that are known to be never taken because of the loop bound
     606              :    discovered.  */
     607              : 
     608              : static bool
     609      2255234 : remove_redundant_iv_tests (class loop *loop)
     610              : {
     611      2255234 :   class nb_iter_bound *elt;
     612      2255234 :   bool changed = false;
     613              : 
     614      2255234 :   if (!loop->any_upper_bound)
     615              :     return false;
     616      5403390 :   for (elt = loop->bounds; elt; elt = elt->next)
     617              :     {
     618              :       /* Exit is pointless if it won't be taken before loop reaches
     619              :          upper bound.  */
     620      1629010 :       if (elt->is_exit && loop->any_upper_bound
     621      5335232 :           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
     622              :         {
     623       310852 :           basic_block bb = gimple_bb (elt->stmt);
     624       310852 :           edge exit_edge = EDGE_SUCC (bb, 0);
     625       310852 :           class tree_niter_desc niter;
     626              : 
     627       310852 :           if (!loop_exit_edge_p (loop, exit_edge))
     628       224454 :             exit_edge = EDGE_SUCC (bb, 1);
     629              : 
     630              :           /* Only when we know the actual number of iterations, not
     631              :              just a bound, we can remove the exit.  */
     632       310852 :           if (!number_of_iterations_exit (loop, exit_edge,
     633              :                                           &niter, false, false)
     634       310707 :               || !integer_onep (niter.assumptions)
     635       310707 :               || !integer_zerop (niter.may_be_zero)
     636       203285 :               || !niter.niter
     637       203285 :               || TREE_CODE (niter.niter) != INTEGER_CST
     638       316770 :               || !wi::ltu_p (widest_int::from (loop->nb_iterations_upper_bound,
     639              :                                                SIGNED),
     640       313811 :                              wi::to_widest (niter.niter)))
     641       307893 :             continue;
     642              : 
     643         2959 :           if (dump_file && (dump_flags & TDF_DETAILS))
     644              :             {
     645            2 :               fprintf (dump_file, "Removed pointless exit: ");
     646            2 :               print_gimple_stmt (dump_file, elt->stmt, 0);
     647              :             }
     648         2959 :           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
     649         2959 :           if (exit_edge->flags & EDGE_TRUE_VALUE)
     650          855 :             gimple_cond_make_false (cond_stmt);
     651              :           else
     652         2104 :             gimple_cond_make_true (cond_stmt);
     653         2959 :           update_stmt (cond_stmt);
     654         2959 :           changed = true;
     655       310852 :         }
     656              :     }
     657              :   return changed;
     658              : }
     659              : 
     660              : /* Stores loops that will be unlooped and edges that will be removed
     661              :    after we process whole loop tree. */
     662              : static vec<loop_p> loops_to_unloop;
     663              : static vec<int> loops_to_unloop_nunroll;
     664              : static vec<edge> edges_to_remove;
     665              : /* Stores loops that has been peeled.  */
     666              : static bitmap peeled_loops;
     667              : 
     668              : /* Cancel all fully unrolled loops by putting __builtin_unreachable
     669              :    on the latch edge.
     670              :    We do it after all unrolling since unlooping moves basic blocks
     671              :    across loop boundaries trashing loop closed SSA form as well
     672              :    as SCEV info needed to be intact during unrolling.
     673              : 
     674              :    IRRED_INVALIDATED is used to bookkeep if information about
     675              :    irreducible regions may become invalid as a result
     676              :    of the transformation.
     677              :    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
     678              :    when we need to go into loop closed SSA form.  */
     679              : 
     680              : void
     681       293497 : unloop_loops (vec<class loop *> &loops_to_unloop,
     682              :               vec<int> &loops_to_unloop_nunroll,
     683              :               vec<edge> &edges_to_remove,
     684              :               bitmap loop_closed_ssa_invalidated,
     685              :               bool *irred_invalidated)
     686              : {
     687       430411 :   while (loops_to_unloop.length ())
     688              :     {
     689       136914 :       class loop *loop = loops_to_unloop.pop ();
     690       136914 :       int n_unroll = loops_to_unloop_nunroll.pop ();
     691       136914 :       basic_block latch = loop->latch;
     692       136914 :       edge latch_edge = loop_latch_edge (loop);
     693       136914 :       int flags = latch_edge->flags;
     694       136914 :       location_t locus = latch_edge->goto_locus;
     695       136914 :       gcall *stmt;
     696       136914 :       gimple_stmt_iterator gsi;
     697              : 
     698       136914 :       remove_exits_and_undefined_stmts (loop, n_unroll);
     699              : 
     700              :       /* Unloop destroys the latch edge.  */
     701       136914 :       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
     702              : 
     703              :       /* Create new basic block for the latch edge destination and wire
     704              :          it in.  */
     705       136914 :       stmt = gimple_build_builtin_unreachable (locus);
     706       136914 :       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
     707       136914 :       latch_edge->probability = profile_probability::never ();
     708       136914 :       latch_edge->flags |= flags;
     709       136914 :       latch_edge->goto_locus = locus;
     710              : 
     711       136914 :       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
     712       136914 :       latch_edge->dest->count = profile_count::zero ();
     713       136914 :       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
     714              : 
     715       136914 :       gsi = gsi_start_bb (latch_edge->dest);
     716       136914 :       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
     717              :     }
     718              : 
     719              :   /* Remove edges in peeled copies.  Given remove_path removes dominated
     720              :      regions we need to cope with removal of already removed paths.  */
     721       293497 :   unsigned i;
     722       293497 :   edge e;
     723       293497 :   auto_vec<int, 20> src_bbs;
     724       327194 :   src_bbs.reserve_exact (edges_to_remove.length ());
     725       876730 :   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
     726       289736 :     src_bbs.quick_push (e->src->index);
     727       583233 :   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
     728       289736 :     if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
     729              :       {
     730       289736 :         bool ok = remove_path (e, irred_invalidated,
     731              :                                loop_closed_ssa_invalidated);
     732       289736 :         gcc_assert (ok);
     733              :       }
     734       293497 :   edges_to_remove.release ();
     735       293497 : }
     736              : 
     737              : /* Tries to unroll LOOP completely, i.e. NITER times.
     738              :    UL determines which loops we are allowed to unroll.
     739              :    EXIT is the exit of the loop that should be eliminated.
     740              :    MAXITER specfy bound on number of iterations, -1 if it is
     741              :    not known or too large for HOST_WIDE_INT.  The location
     742              :    LOCUS corresponding to the loop is used when emitting
     743              :    a summary of the unroll to the dump file.  */
     744              : 
     745              : static bool
     746      2255234 : try_unroll_loop_completely (class loop *loop,
     747              :                             edge exit, tree niter, bool may_be_zero,
     748              :                             enum unroll_level ul,
     749              :                             HOST_WIDE_INT maxiter,
     750              :                             dump_user_location_t locus, bool allow_peel,
     751              :                             bool cunrolli)
     752              : {
     753      2255234 :   unsigned HOST_WIDE_INT n_unroll = 0;
     754      2255234 :   bool n_unroll_found = false;
     755      2255234 :   edge edge_to_cancel = NULL;
     756              : 
     757              :   /* See if we proved number of iterations to be low constant.
     758              : 
     759              :      EXIT is an edge that will be removed in all but last iteration of
     760              :      the loop.
     761              : 
     762              :      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
     763              :      of the unrolled sequence and is expected to make the final loop not
     764              :      rolling.
     765              : 
     766              :      If the number of execution of loop is determined by standard induction
     767              :      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
     768              :      from the iv test.  */
     769      2255234 :   if (tree_fits_uhwi_p (niter))
     770              :     {
     771      1046894 :       n_unroll = tree_to_uhwi (niter);
     772      1046894 :       n_unroll_found = true;
     773      1046894 :       edge_to_cancel = EDGE_SUCC (exit->src, 0);
     774      1046894 :       if (edge_to_cancel == exit)
     775       343781 :         edge_to_cancel = EDGE_SUCC (exit->src, 1);
     776              :     }
     777              :   /* We do not know the number of iterations and thus we cannot eliminate
     778              :      the EXIT edge.  */
     779              :   else
     780              :     exit = NULL;
     781              : 
     782              :   /* See if we can improve our estimate by using recorded loop bounds.  */
     783      2255234 :   if ((maxiter == 0 || ul != UL_SINGLE_ITER)
     784      1575082 :       && maxiter >= 0
     785      1139819 :       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
     786              :     {
     787       405048 :       n_unroll = maxiter;
     788       405048 :       n_unroll_found = true;
     789              :       /* Loop terminates before the IV variable test, so we cannot
     790              :          remove it in the last iteration.  */
     791       405048 :       edge_to_cancel = NULL;
     792              :       /* If we do not allow peeling and we iterate just allow cases
     793              :          that do not grow code.  */
     794       405048 :       if (!allow_peel && maxiter != 0)
     795              :         ul = UL_NO_GROWTH;
     796              :     }
     797              : 
     798      1850186 :   if (!n_unroll_found)
     799              :     return false;
     800              : 
     801      1451728 :   if (!loop->unroll
     802      1449451 :       && n_unroll > (unsigned) param_max_completely_peel_times)
     803              :     {
     804       769451 :       if (dump_file && (dump_flags & TDF_DETAILS))
     805           33 :         fprintf (dump_file, "Not unrolling loop %d "
     806              :                  "(--param max-completely-peel-times limit reached).\n",
     807              :                  loop->num);
     808       769451 :       return false;
     809              :     }
     810              : 
     811       682277 :   if (!edge_to_cancel)
     812        97190 :     edge_to_cancel = loop_edge_to_cancel (loop);
     813              : 
     814       682277 :   if (n_unroll)
     815              :     {
     816       653699 :       if (ul == UL_SINGLE_ITER)
     817      2255234 :         return false;
     818              : 
     819       497960 :       if (loop->unroll)
     820              :         {
     821              :           /* If the unrolling factor is too large, bail out.  */
     822         1800 :           if (n_unroll > (unsigned)loop->unroll)
     823              :             {
     824         1265 :               if (dump_file && (dump_flags & TDF_DETAILS))
     825           44 :                 fprintf (dump_file,
     826              :                          "Not unrolling loop %d: "
     827              :                          "user didn't want it unrolled completely.\n",
     828              :                          loop->num);
     829         1265 :               return false;
     830              :             }
     831              :         }
     832              :       else
     833              :         {
     834       496160 :           struct loop_size size;
     835              :           /* EXIT can be removed only if we are sure it passes first N_UNROLL
     836              :              iterations.  */
     837       496160 :           bool remove_exit = (exit && niter
     838       411241 :                               && TREE_CODE (niter) == INTEGER_CST
     839       907401 :                               && wi::leu_p (n_unroll, wi::to_widest (niter)));
     840       496160 :           bool large
     841              :             = tree_estimate_loop_size
     842       496160 :                 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
     843              :                  param_max_completely_peeled_insns);
     844       496160 :           if (large)
     845              :             {
     846         6523 :               if (dump_file && (dump_flags & TDF_DETAILS))
     847            0 :                 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
     848              :                          loop->num);
     849       391206 :               return false;
     850              :             }
     851              : 
     852       489637 :           unsigned HOST_WIDE_INT ninsns = size.overall;
     853       489637 :           unsigned HOST_WIDE_INT est_eliminated;
     854       489637 :           unsigned HOST_WIDE_INT unr_insns
     855       489637 :             = estimated_unrolled_size (&size, &est_eliminated, n_unroll);
     856       489637 :           if (dump_file && (dump_flags & TDF_DETAILS))
     857              :             {
     858           62 :               fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
     859           62 :               fprintf (dump_file, "  Estimated size after unrolling: %d-%d\n",
     860              :                        (int) unr_insns, (int) est_eliminated);
     861              :             }
     862              : 
     863              :           /* If the code is going to shrink, we don't need to be extra
     864              :              cautious on guessing if the unrolling is going to be
     865              :              profitable.
     866              :              Move from estimated_unrolled_size to unroll small loops.  */
     867       489637 :           if (unr_insns - est_eliminated
     868              :               /* If there is IV variable that will become constant, we
     869              :                  save one instruction in the loop prologue we do not
     870              :                  account otherwise.  */
     871       489637 :               <= ninsns + (size.constant_iv != false))
     872              :             ;
     873              :           /* We unroll only inner loops, because we do not consider it
     874              :              profitable otheriwse.  We still can cancel loopback edge
     875              :              of not rolling loop; this is always a good idea.  */
     876       413985 :           else if (ul == UL_NO_GROWTH)
     877              :             {
     878       365302 :               if (dump_file && (dump_flags & TDF_DETAILS))
     879           24 :                 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
     880              :                          loop->num);
     881       365302 :               return false;
     882              :             }
     883              :           /* Outer loops tend to be less interesting candidates for
     884              :              complete unrolling unless we can do a lot of propagation
     885              :              into the inner loop body.  For now we disable outer loop
     886              :              unrolling when the code would grow.  */
     887        48683 :           else if (loop->inner)
     888              :             {
     889         4960 :               if (dump_file && (dump_flags & TDF_DETAILS))
     890            0 :                 fprintf (dump_file, "Not unrolling loop %d: "
     891              :                          "it is not innermost and code would grow.\n",
     892              :                          loop->num);
     893         4960 :               return false;
     894              :             }
     895              :           /* If there is call on a hot path through the loop, then
     896              :              there is most probably not much to optimize.  */
     897        43723 :           else if (size.num_non_pure_calls_on_hot_path)
     898              :             {
     899        10314 :               if (dump_file && (dump_flags & TDF_DETAILS))
     900            1 :                 fprintf (dump_file, "Not unrolling loop %d: "
     901              :                          "contains call and code would grow.\n",
     902              :                          loop->num);
     903        10314 :               return false;
     904              :             }
     905              :           /* If there is pure/const call in the function, then we can
     906              :              still optimize the unrolled loop body if it contains some
     907              :              other interesting code than the calls and code storing or
     908              :              cumulating the return value.  */
     909        33409 :           else if (size.num_pure_calls_on_hot_path
     910              :                    /* One IV increment, one test, one ivtmp store and
     911              :                       one useful stmt.  That is about minimal loop
     912              :                       doing pure call.  */
     913         2387 :                    && (size.non_call_stmts_on_hot_path
     914         2387 :                        <= 3 + size.num_pure_calls_on_hot_path))
     915              :             {
     916           30 :               if (dump_file && (dump_flags & TDF_DETAILS))
     917            0 :                 fprintf (dump_file, "Not unrolling loop %d: "
     918              :                          "contains just pure calls and code would grow.\n",
     919              :                          loop->num);
     920           30 :               return false;
     921              :             }
     922              :           /* Complete unrolling is major win when control flow is
     923              :              removed and one big basic block is created.  If the loop
     924              :              contains control flow the optimization may still be a win
     925              :              because of eliminating the loop overhead but it also may
     926              :              blow the branch predictor tables.  Limit number of
     927              :              branches on the hot path through the peeled sequence.  */
     928        33379 :           else if (size.num_branches_on_hot_path * (int)n_unroll
     929        33379 :                    > param_max_peel_branches)
     930              :             {
     931         2441 :               if (dump_file && (dump_flags & TDF_DETAILS))
     932            0 :                 fprintf (dump_file, "Not unrolling loop %d: "
     933              :                          "number of branches on hot path in the unrolled "
     934              :                          "sequence reaches --param max-peel-branches limit.\n",
     935              :                          loop->num);
     936         2441 :               return false;
     937              :             }
     938              :           /* Move 2 / 3 reduction from estimated_unrolled_size, but don't reduce
     939              :              unrolled size for innermost loop.
     940              :              1) It could increase register pressure.
     941              :              2) Big loop after completely unroll may not be vectorized
     942              :              by BB vectorizer.  */
     943        30938 :           else if ((cunrolli ? unr_insns : unr_insns - est_eliminated)
     944        30938 :                    > (unsigned) param_max_completely_peeled_insns)
     945              :             {
     946         1636 :               if (dump_file && (dump_flags & TDF_DETAILS))
     947            1 :                 fprintf (dump_file, "Not unrolling loop %d: "
     948              :                          "number of insns in the unrolled sequence reaches "
     949              :                          "--param max-completely-peeled-insns limit.\n",
     950              :                          loop->num);
     951         1636 :               return false;
     952              :             }
     953              :         }
     954              : 
     955       105489 :       if (!dbg_cnt (gimple_unroll))
     956              :         return false;
     957              : 
     958       105489 :       initialize_original_copy_tables ();
     959       105489 :       auto_sbitmap wont_exit (n_unroll + 1);
     960       105489 :       if (exit && niter
     961        96901 :           && TREE_CODE (niter) == INTEGER_CST
     962       202390 :           && wi::leu_p (n_unroll, wi::to_widest (niter)))
     963              :         {
     964        96901 :           bitmap_ones (wont_exit);
     965       193802 :           if (wi::eq_p (wi::to_widest (niter), n_unroll)
     966        96901 :               || edge_to_cancel)
     967        96896 :             bitmap_clear_bit (wont_exit, 0);
     968              :         }
     969              :       else
     970              :         {
     971         8588 :           exit = NULL;
     972         8588 :           bitmap_clear (wont_exit);
     973              :         }
     974       105489 :       if (may_be_zero)
     975            2 :         bitmap_clear_bit (wont_exit, 1);
     976              : 
     977              :       /* If loop was originally estimated to iterate too many times,
     978              :          reduce the profile to avoid new profile inconsistencies.  */
     979       105489 :       scale_loop_profile (loop, profile_probability::always (), n_unroll);
     980              : 
     981       105489 :       if (!gimple_duplicate_loop_body_to_header_edge (
     982              :             loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit,
     983              :             &edges_to_remove,
     984              :             DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL
     985              :             | DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR))
     986              :         {
     987            9 :           free_original_copy_tables ();
     988            9 :           if (dump_file && (dump_flags & TDF_DETAILS))
     989            0 :             fprintf (dump_file, "Failed to duplicate the loop\n");
     990            9 :           return false;
     991              :         }
     992              : 
     993       105480 :       free_original_copy_tables ();
     994       105489 :     }
     995              :   else
     996        28578 :     scale_loop_profile (loop, profile_probability::always (), 0);
     997              : 
     998              :   /* Remove the conditional from the last copy of the loop.  */
     999       134058 :   if (edge_to_cancel)
    1000              :     {
    1001       267952 :       gcond *cond = as_a <gcond *> (*gsi_last_bb (edge_to_cancel->src));
    1002       133976 :       force_edge_cold (edge_to_cancel, true);
    1003       133976 :       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
    1004        60091 :         gimple_cond_make_false (cond);
    1005              :       else
    1006        73885 :         gimple_cond_make_true (cond);
    1007       133976 :       update_stmt (cond);
    1008              :       /* Do not remove the path, as doing so may remove outer loop and
    1009              :          confuse bookkeeping code in tree_unroll_loops_completely.  */
    1010              :     }
    1011              : 
    1012              :   /* Store the loop for later unlooping and exit removal.  */
    1013       134058 :   loops_to_unloop.safe_push (loop);
    1014       134058 :   loops_to_unloop_nunroll.safe_push (n_unroll);
    1015              : 
    1016       134058 :   if (dump_enabled_p ())
    1017              :     {
    1018          169 :       if (!n_unroll)
    1019           62 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
    1020              :                          "loop turned into non-loop; it never loops\n");
    1021              :       else
    1022              :         {
    1023          107 :           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
    1024              :                            "loop with %d iterations completely unrolled",
    1025              :                            (int) n_unroll);
    1026          107 :           if (loop->header->count.initialized_p ())
    1027          106 :             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
    1028              :                          " (header execution count %d)",
    1029          106 :                          (int)loop->header->count.to_gcov_type ());
    1030          107 :           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
    1031              :         }
    1032              :     }
    1033              : 
    1034       134058 :   if (dump_file && (dump_flags & TDF_DETAILS))
    1035              :     {
    1036          100 :       if (exit)
    1037           75 :         fprintf (dump_file, "Exit condition of peeled iterations was "
    1038              :                  "eliminated.\n");
    1039          100 :       if (edge_to_cancel)
    1040          100 :         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
    1041              :       else
    1042            0 :         fprintf (dump_file, "Latch of last iteration was marked by "
    1043              :                  "__builtin_unreachable ().\n");
    1044              :     }
    1045              : 
    1046              :   return true;
    1047              : }
    1048              : 
    1049              : /* Return number of instructions after peeling.  */
    1050              : static unsigned HOST_WIDE_INT
    1051         2880 : estimated_peeled_sequence_size (struct loop_size *size,
    1052              :                                 unsigned HOST_WIDE_INT npeel)
    1053              : {
    1054         2880 :   return MAX (npeel * (HOST_WIDE_INT) (size->overall
    1055              :                                        - size->eliminated_by_peeling), 1);
    1056              : }
    1057              : 
    1058              : /* Update loop estimates after peeling LOOP by NPEEL.
    1059              :    If PRECISE is false only likely exists were duplicated and thus
    1060              :    do not update any estimates that are supposed to be always reliable.  */
    1061              : void
    1062       453914 : adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise)
    1063              : {
    1064       453914 :   if (loop->any_estimate)
    1065              :     {
    1066              :       /* Since peeling is mostly about loops where first few
    1067              :          iterations are special, it is not quite correct to
    1068              :          assume that the remaining iterations will behave
    1069              :          the same way.  However we do not have better info
    1070              :          so update the esitmate, since it is likely better
    1071              :          than keeping it as it is.
    1072              : 
    1073              :          Remove it if it looks wrong.
    1074              : 
    1075              :          TODO: We likely want to special case the situation where
    1076              :          peeling is optimizing out exit edges and only update
    1077              :          estimates here.  */
    1078       228048 :       if (wi::leu_p (npeel, loop->nb_iterations_estimate))
    1079       228031 :         loop->nb_iterations_estimate -= npeel;
    1080              :       else
    1081           17 :         loop->any_estimate = false;
    1082              :     }
    1083       453914 :   if (loop->any_upper_bound && precise)
    1084              :     {
    1085       268085 :       if (wi::leu_p (npeel, loop->nb_iterations_upper_bound))
    1086       268085 :         loop->nb_iterations_upper_bound -= npeel;
    1087              :       else
    1088              :         {
    1089              :           /* Peeling maximal number of iterations or more
    1090              :              makes no sense and is a bug.
    1091              :              We should peel completely.  */
    1092            0 :           gcc_unreachable ();
    1093              :         }
    1094              :     }
    1095       453914 :   if (loop->any_likely_upper_bound)
    1096              :     {
    1097       374379 :       if (wi::leu_p (npeel, loop->nb_iterations_likely_upper_bound))
    1098       373520 :         loop->nb_iterations_likely_upper_bound -= npeel;
    1099              :       else
    1100              :         {
    1101          859 :           loop->any_estimate = true;
    1102          859 :           loop->nb_iterations_estimate = 0;
    1103          859 :           loop->nb_iterations_likely_upper_bound = 0;
    1104              :         }
    1105              :     }
    1106       453914 : }
    1107              : 
    1108              : /* If the loop is expected to iterate N times and is
    1109              :    small enough, duplicate the loop body N+1 times before
    1110              :    the loop itself.  This way the hot path will never
    1111              :    enter the loop.
    1112              :    Parameters are the same as for try_unroll_loops_completely */
    1113              : 
    1114              : static bool
    1115       130504 : try_peel_loop (class loop *loop,
    1116              :                edge exit, tree niter, bool may_be_zero,
    1117              :                HOST_WIDE_INT maxiter)
    1118              : {
    1119       130504 :   HOST_WIDE_INT npeel;
    1120       130504 :   struct loop_size size;
    1121       130504 :   int peeled_size;
    1122              : 
    1123       130504 :   if (!flag_peel_loops
    1124       111968 :       || param_max_peel_times <= 0
    1125       111968 :       || !peeled_loops)
    1126              :     return false;
    1127              : 
    1128        84033 :   if (bitmap_bit_p (peeled_loops, loop->num))
    1129              :     {
    1130          843 :       if (dump_file)
    1131            2 :         fprintf (dump_file, "Not peeling: loop is already peeled\n");
    1132          843 :       return false;
    1133              :     }
    1134              : 
    1135              :   /* We don't peel loops that will be unrolled as this can duplicate a
    1136              :      loop more times than the user requested.  */
    1137        83190 :   if (loop->unroll)
    1138              :     {
    1139           95 :       if (dump_file)
    1140            0 :         fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
    1141           95 :       return false;
    1142              :     }
    1143              : 
    1144              :   /* Peel only innermost loops.
    1145              :      While the code is perfectly capable of peeling non-innermost loops,
    1146              :      the heuristics would probably need some improvements. */
    1147        83095 :   if (loop->inner)
    1148              :     {
    1149        15468 :       if (dump_file)
    1150            2 :         fprintf (dump_file, "Not peeling: outer loop\n");
    1151        15468 :       return false;
    1152              :     }
    1153              : 
    1154        67627 :   if (!optimize_loop_for_speed_p (loop))
    1155              :     {
    1156            0 :       if (dump_file)
    1157            0 :         fprintf (dump_file, "Not peeling: cold loop\n");
    1158            0 :       return false;
    1159              :     }
    1160              : 
    1161              :   /* Check if there is an estimate on the number of iterations.  */
    1162        67627 :   npeel = estimated_loop_iterations_int (loop);
    1163        67627 :   if (npeel < 0)
    1164        49650 :     npeel = likely_max_loop_iterations_int (loop);
    1165        67627 :   if (npeel < 0)
    1166              :     {
    1167        13798 :       if (dump_file)
    1168            0 :         fprintf (dump_file, "Not peeling: number of iterations is not "
    1169              :                  "estimated\n");
    1170        13798 :       return false;
    1171              :     }
    1172        53829 :   if (maxiter >= 0 && maxiter <= npeel)
    1173              :     {
    1174        49990 :       if (dump_file)
    1175           23 :         fprintf (dump_file, "Not peeling: upper bound is known so can "
    1176              :                  "unroll completely\n");
    1177        49990 :       return false;
    1178              :     }
    1179              : 
    1180              :   /* We want to peel estimated number of iterations + 1 (so we never
    1181              :      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
    1182              :      and be sure to avoid overflows.  */
    1183         3839 :   if (npeel > param_max_peel_times - 1)
    1184              :     {
    1185          959 :       if (dump_file)
    1186            0 :         fprintf (dump_file, "Not peeling: rolls too much "
    1187              :                  "(%i + 1 > --param max-peel-times)\n", (int) npeel);
    1188          959 :       return false;
    1189              :     }
    1190         2880 :   npeel++;
    1191              : 
    1192              :   /* Check peeled loops size.  */
    1193         2880 :   tree_estimate_loop_size (loop, exit, NULL, &size,
    1194              :                            param_max_peeled_insns);
    1195         2880 :   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
    1196         2880 :       > param_max_peeled_insns)
    1197              :     {
    1198         2010 :       if (dump_file)
    1199            0 :         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
    1200              :                  "(%i insns > --param max-peel-insns)", peeled_size);
    1201         2010 :       return false;
    1202              :     }
    1203              : 
    1204          870 :   if (!dbg_cnt (gimple_unroll))
    1205              :     return false;
    1206              : 
    1207              :   /* Duplicate possibly eliminating the exits.  */
    1208          870 :   initialize_original_copy_tables ();
    1209          870 :   auto_sbitmap wont_exit (npeel + 1);
    1210          870 :   if (exit && niter
    1211            9 :       && TREE_CODE (niter) == INTEGER_CST
    1212          879 :       && wi::leu_p (npeel, wi::to_widest (niter)))
    1213              :     {
    1214            9 :       bitmap_ones (wont_exit);
    1215            9 :       bitmap_clear_bit (wont_exit, 0);
    1216              :     }
    1217              :   else
    1218              :     {
    1219          861 :       exit = NULL;
    1220          861 :       bitmap_clear (wont_exit);
    1221              :     }
    1222          870 :   if (may_be_zero)
    1223            0 :     bitmap_clear_bit (wont_exit, 1);
    1224              : 
    1225          870 :   if (!gimple_duplicate_loop_body_to_header_edge (
    1226              :         loop, loop_preheader_edge (loop), npeel, wont_exit, exit,
    1227              :         &edges_to_remove,
    1228              :         DLTHE_FLAG_UPDATE_FREQ | DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR))
    1229              :     {
    1230            0 :       free_original_copy_tables ();
    1231            0 :       return false;
    1232              :     }
    1233          870 :   free_original_copy_tables ();
    1234          870 :   if (dump_file && (dump_flags & TDF_DETAILS))
    1235              :     {
    1236            3 :       fprintf (dump_file, "Peeled loop %d, %i times.\n",
    1237              :                loop->num, (int) npeel);
    1238              :     }
    1239          870 :   adjust_loop_info_after_peeling (loop, npeel, true);
    1240              : 
    1241          870 :   bitmap_set_bit (peeled_loops, loop->num);
    1242          870 :   return true;
    1243          870 : }
    1244              : /* Adds a canonical induction variable to LOOP if suitable.
    1245              :    CREATE_IV is true if we may create a new iv.  UL determines
    1246              :    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
    1247              :    to determine the number of iterations of a loop by direct evaluation.
    1248              :    Returns true if cfg is changed.   */
    1249              : 
    1250              : static bool
    1251      2255234 : canonicalize_loop_induction_variables (class loop *loop,
    1252              :                                        bool create_iv, enum unroll_level ul,
    1253              :                                        bool try_eval, bool allow_peel,
    1254              :                                        const_sbitmap innermost,
    1255              :                                        bool cunrolli)
    1256              : {
    1257      2255234 :   edge exit = NULL;
    1258      2255234 :   tree niter;
    1259      2255234 :   HOST_WIDE_INT maxiter;
    1260      2255234 :   bool modified = false;
    1261      2255234 :   class tree_niter_desc niter_desc;
    1262      2255234 :   bool may_be_zero = false;
    1263      2255234 :   bool by_eval = false;
    1264              : 
    1265              :   /* For unrolling allow conditional constant or zero iterations, thus
    1266              :      perform loop-header copying on-the-fly.  */
    1267      2255234 :   exit = single_exit (loop);
    1268      2255234 :   niter = chrec_dont_know;
    1269      2255234 :   if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
    1270              :     {
    1271      1120706 :       niter = niter_desc.niter;
    1272      1120706 :       may_be_zero
    1273      1120706 :         = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
    1274              :     }
    1275      2255234 :   if (TREE_CODE (niter) != INTEGER_CST)
    1276              :     {
    1277              :       /* For non-constant niter fold may_be_zero into niter again.  */
    1278      1558926 :       if (may_be_zero)
    1279              :         {
    1280       122659 :           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
    1281       122647 :             niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
    1282              :                                  niter_desc.may_be_zero,
    1283              :                                  build_int_cst (TREE_TYPE (niter), 0), niter);
    1284              :           else
    1285           12 :             niter = chrec_dont_know;
    1286              :           may_be_zero = false;
    1287              :         }
    1288              : 
    1289              :       /* If the loop has more than one exit, try checking all of them
    1290              :          for # of iterations determinable through scev.  */
    1291      1558926 :       if (!exit)
    1292       757381 :         niter = find_loop_niter (loop, &exit);
    1293              : 
    1294              :       /* Finally if everything else fails, try brute force evaluation.  */
    1295      1558926 :       if (try_eval
    1296      1558926 :           && (chrec_contains_undetermined (niter)
    1297       510501 :               || TREE_CODE (niter) != INTEGER_CST))
    1298              :         {
    1299       786229 :           niter = find_loop_niter_by_eval (loop, &exit);
    1300       786229 :           if (TREE_CODE (niter) == INTEGER_CST)
    1301        15038 :             by_eval = true;
    1302              :         }
    1303              : 
    1304      1558926 :       if (TREE_CODE (niter) != INTEGER_CST)
    1305      1208340 :         exit = NULL;
    1306              :     }
    1307              : 
    1308              :   /* We work exceptionally hard here to estimate the bound
    1309              :      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
    1310      3463574 :   if (niter && TREE_CODE (niter) == INTEGER_CST)
    1311              :     {
    1312      1046894 :       auto_vec<edge> exits = get_loop_exit_edges  (loop);
    1313      1046894 :       record_niter_bound (loop, wi::to_widest (niter),
    1314      1046894 :                           exit == single_likely_exit (loop, exits), true);
    1315      1046894 :     }
    1316              : 
    1317              :   /* Force re-computation of loop bounds so we can remove redundant exits.  */
    1318      2255234 :   maxiter = max_loop_iterations_int (loop);
    1319              : 
    1320          303 :   if (dump_file && (dump_flags & TDF_DETAILS)
    1321      2255479 :       && TREE_CODE (niter) == INTEGER_CST)
    1322              :     {
    1323          145 :       fprintf (dump_file, "Loop %d iterates ", loop->num);
    1324          145 :       print_generic_expr (dump_file, niter, TDF_SLIM);
    1325          145 :       fprintf (dump_file, " times.\n");
    1326              :     }
    1327          303 :   if (dump_file && (dump_flags & TDF_DETAILS)
    1328      2255479 :       && maxiter >= 0)
    1329              :     {
    1330          209 :       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
    1331              :                (int)maxiter);
    1332              :     }
    1333          303 :   if (dump_file && (dump_flags & TDF_DETAILS)
    1334      2255479 :       && likely_max_loop_iterations_int (loop) >= 0)
    1335              :     {
    1336          209 :       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
    1337          209 :                loop->num, (int)likely_max_loop_iterations_int (loop));
    1338              :     }
    1339              : 
    1340              :   /* Remove exits that are known to be never taken based on loop bound.
    1341              :      Needs to be called after compilation of max_loop_iterations_int that
    1342              :      populates the loop bounds.  */
    1343      2255234 :   modified |= remove_redundant_iv_tests (loop);
    1344              : 
    1345      2255234 :   dump_user_location_t locus = find_loop_location (loop);
    1346              : 
    1347      2255234 :   bool innermost_cunrolli_p
    1348              :     = cunrolli
    1349       764294 :       && (unsigned) loop->num < SBITMAP_SIZE (innermost)
    1350      3016582 :       && bitmap_bit_p (innermost, loop->num);
    1351              : 
    1352      2255234 :   if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
    1353              :                                   maxiter, locus, allow_peel,
    1354              :                                   innermost_cunrolli_p))
    1355              :     return true;
    1356              : 
    1357      2121176 :   if ((create_iv || by_eval)
    1358       682566 :       && niter && !chrec_contains_undetermined (niter)
    1359      2435473 :       && exit && just_once_each_iteration_p (loop, exit->src))
    1360              :     {
    1361       314295 :       tree iv_niter = niter;
    1362       314295 :       if (may_be_zero)
    1363              :         {
    1364           21 :           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
    1365           21 :             iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
    1366              :                                     niter_desc.may_be_zero,
    1367              :                                     build_int_cst (TREE_TYPE (iv_niter), 0),
    1368              :                                     iv_niter);
    1369              :           else
    1370              :             iv_niter = NULL_TREE;
    1371              :         }
    1372       314295 :       if (iv_niter)
    1373       314295 :         create_canonical_iv (loop, exit, iv_niter);
    1374              :     }
    1375              : 
    1376      2121176 :   if (ul == UL_ALL)
    1377       130504 :     modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
    1378              : 
    1379              :   return modified;
    1380      2255234 : }
    1381              : 
    1382              : /* The main entry point of the pass.  Adds canonical induction variables
    1383              :    to the suitable loops.  */
    1384              : 
    1385              : unsigned int
    1386       241441 : canonicalize_induction_variables (void)
    1387              : {
    1388       241441 :   bool changed = false;
    1389       241441 :   bool irred_invalidated = false;
    1390       241441 :   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
    1391       482882 :   auto_sbitmap innermost (number_of_loops (cfun));
    1392       241441 :   bitmap_clear (innermost);
    1393              : 
    1394       241441 :   estimate_numbers_of_iterations (cfun);
    1395              : 
    1396      1404736 :   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
    1397              :     {
    1398       680413 :       changed
    1399       680413 :         |= canonicalize_loop_induction_variables (loop,
    1400              :                                                   true, UL_SINGLE_ITER,
    1401              :                                                   true, false,
    1402       680413 :                                                   (const_sbitmap) innermost,
    1403              :                                                   false);
    1404       241441 :     }
    1405       241441 :   gcc_assert (!need_ssa_update_p (cfun));
    1406              : 
    1407       241441 :   unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, edges_to_remove,
    1408              :                 loop_closed_ssa_invalidated, &irred_invalidated);
    1409       241441 :   loops_to_unloop.release ();
    1410       241441 :   loops_to_unloop_nunroll.release ();
    1411       241441 :   if (irred_invalidated
    1412       241441 :       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
    1413            2 :     mark_irreducible_loops ();
    1414              : 
    1415              :   /* Clean up the information about numbers of iterations, since brute force
    1416              :      evaluation could reveal new information.  */
    1417       241441 :   free_numbers_of_iterations_estimates (cfun);
    1418       241441 :   scev_reset ();
    1419              : 
    1420       241441 :   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
    1421              :     {
    1422           25 :       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
    1423           25 :       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
    1424              :     }
    1425       241441 :   BITMAP_FREE (loop_closed_ssa_invalidated);
    1426              : 
    1427       241441 :   if (changed)
    1428         1115 :     return TODO_cleanup_cfg;
    1429              :   return 0;
    1430       241441 : }
    1431              : 
    1432              : /* Process loops from innermost to outer, stopping at the innermost
    1433              :    loop we unrolled.  */
    1434              : 
    1435              : static bool
    1436      2141103 : tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
    1437              :                                 bitmap father_bbs, class loop *loop,
    1438              :                                 const_sbitmap innermost, bool cunrolli)
    1439              : {
    1440      2141103 :   class loop *loop_father;
    1441      2141103 :   bool changed = false;
    1442      2141103 :   class loop *inner;
    1443      2141103 :   enum unroll_level ul;
    1444      2141103 :   unsigned num = number_of_loops (cfun);
    1445              : 
    1446              :   /* Process inner loops first.  Don't walk loops added by the recursive
    1447              :      calls because SSA form is not up-to-date.  They can be handled in the
    1448              :      next iteration.  */
    1449      2141103 :   bitmap child_father_bbs = NULL;
    1450      3767938 :   for (inner = loop->inner; inner != NULL; inner = inner->next)
    1451      1626835 :     if ((unsigned) inner->num < num)
    1452              :       {
    1453      1624753 :         if (!child_father_bbs)
    1454       765229 :           child_father_bbs = BITMAP_ALLOC (NULL);
    1455      1624753 :         if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
    1456              :                                             child_father_bbs, inner,
    1457              :                                             innermost, cunrolli))
    1458              :           {
    1459       174581 :             bitmap_ior_into (father_bbs, child_father_bbs);
    1460       174581 :             bitmap_clear (child_father_bbs);
    1461       174581 :             changed = true;
    1462              :           }
    1463              :       }
    1464      2141103 :   if (child_father_bbs)
    1465       765229 :     BITMAP_FREE (child_father_bbs);
    1466              : 
    1467              :   /* If we changed an inner loop we cannot process outer loops in this
    1468              :      iteration because SSA form is not up-to-date.  Continue with
    1469              :      siblings of outer loops instead.  */
    1470      2141103 :   if (changed)
    1471              :     {
    1472              :       /* If we are recorded as father clear all other fathers that
    1473              :          are necessarily covered already to avoid redundant work.  */
    1474        90952 :       if (bitmap_bit_p (father_bbs, loop->header->index))
    1475              :         {
    1476        28165 :           bitmap_clear (father_bbs);
    1477        28165 :           bitmap_set_bit (father_bbs, loop->header->index);
    1478              :         }
    1479        90952 :       return true;
    1480              :     }
    1481              : 
    1482              :   /* Don't unroll #pragma omp simd loops until the vectorizer
    1483              :      attempts to vectorize those.  */
    1484      2050151 :   if (loop->force_vectorize)
    1485              :     return false;
    1486              : 
    1487              :   /* Try to unroll this loop.  */
    1488      2039881 :   loop_father = loop_outer (loop);
    1489      2039881 :   if (!loop_father)
    1490              :     return false;
    1491              : 
    1492      1574821 :   if (loop->unroll > 1)
    1493              :     ul = UL_ALL;
    1494       273522 :   else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
    1495              :       /* Unroll outermost loops only if asked to do so or they do
    1496              :          not cause code growth.  */
    1497      1838763 :       && (unroll_outer || loop_outer (loop_father)))
    1498              :     ul = UL_ALL;
    1499              :   else
    1500              :     ul = UL_NO_GROWTH;
    1501              : 
    1502      1574821 :   if (canonicalize_loop_induction_variables
    1503      1574866 :       (loop, false, ul, !flag_tree_loop_ivcanon || cunrolli, unroll_outer,
    1504              :        innermost, cunrolli))
    1505              :     {
    1506              :       /* If we'll continue unrolling, we need to propagate constants
    1507              :          within the new basic blocks to fold away induction variable
    1508              :          computations; otherwise, the size might blow up before the
    1509              :          iteration is complete and the IR eventually cleaned up.  */
    1510       134919 :       if (loop_outer (loop_father))
    1511              :         {
    1512              :           /* Once we process our father we will have processed
    1513              :              the fathers of our children as well, so avoid doing
    1514              :              redundant work and clear fathers we've gathered sofar.  */
    1515        36040 :           bitmap_clear (father_bbs);
    1516        36040 :           bitmap_set_bit (father_bbs, loop_father->header->index);
    1517              :         }
    1518        98879 :       else if (unroll_outer)
    1519              :         /* Trigger scalar cleanup once any outermost loop gets unrolled.  */
    1520        58556 :         cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup;
    1521              : 
    1522       134919 :       return true;
    1523              :     }
    1524              : 
    1525              :   return false;
    1526              : }
    1527              : 
    1528              : /* Unroll LOOPS completely if they iterate just few times.  Unless
    1529              :    MAY_INCREASE_SIZE is true, perform the unrolling only if the
    1530              :    size of the code does not increase.
    1531              :    cunrolli is true when passs is cunrolli.  */
    1532              : 
    1533              : static unsigned int
    1534       465069 : tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer, bool cunrolli)
    1535              : {
    1536       465069 :   bitmap father_bbs = BITMAP_ALLOC (NULL);
    1537       465069 :   bool changed;
    1538       465069 :   int iteration = 0;
    1539       465069 :   bool irred_invalidated = false;
    1540       930138 :   auto_sbitmap innermost (number_of_loops (cfun));
    1541       465069 :   bitmap_clear (innermost);
    1542              : 
    1543       465069 :   estimate_numbers_of_iterations (cfun);
    1544              : 
    1545              :   /* Mark all innermost loop at the beginning.  */
    1546      2793248 :   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
    1547              :     {
    1548      1398041 :       if (!loop->inner)
    1549      1157345 :         bitmap_set_bit (innermost, loop->num);
    1550       465069 :     }
    1551              : 
    1552       516350 :   do
    1553              :     {
    1554       516350 :       changed = false;
    1555       516350 :       bitmap loop_closed_ssa_invalidated = NULL;
    1556              : 
    1557       516350 :       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
    1558       274361 :         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
    1559              : 
    1560       516350 :       free_numbers_of_iterations_estimates (cfun);
    1561       516350 :       estimate_numbers_of_iterations (cfun);
    1562              : 
    1563      1032700 :       changed = tree_unroll_loops_completely_1 (may_increase_size,
    1564              :                                                 unroll_outer, father_bbs,
    1565       516350 :                                                 current_loops->tree_root,
    1566       516350 :                                                 (const_sbitmap) innermost,
    1567              :                                                 cunrolli);
    1568       516350 :       if (changed)
    1569              :         {
    1570        51290 :           unsigned i;
    1571              : 
    1572        51290 :           unloop_loops (loops_to_unloop, loops_to_unloop_nunroll,
    1573              :                         edges_to_remove, loop_closed_ssa_invalidated,
    1574              :                         &irred_invalidated);
    1575        51290 :           loops_to_unloop.release ();
    1576        51290 :           loops_to_unloop_nunroll.release ();
    1577              : 
    1578              :           /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused.  */
    1579        51290 :           if (loop_closed_ssa_invalidated
    1580        51290 :               && !bitmap_empty_p (loop_closed_ssa_invalidated))
    1581         7478 :             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
    1582              :                                           TODO_update_ssa);
    1583              :           else
    1584        43812 :             update_ssa (TODO_update_ssa);
    1585              : 
    1586              :           /* father_bbs is a bitmap of loop father header BB indices.
    1587              :              Translate that to what non-root loops these BBs belong to now.  */
    1588        51290 :           bitmap_iterator bi;
    1589        51290 :           bitmap fathers = BITMAP_ALLOC (NULL);
    1590        77978 :           EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
    1591              :             {
    1592        26688 :               basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
    1593        26688 :               if (! unrolled_loop_bb)
    1594            0 :                 continue;
    1595        26688 :               if (loop_outer (unrolled_loop_bb->loop_father))
    1596        26688 :                 bitmap_set_bit (fathers,
    1597              :                                 unrolled_loop_bb->loop_father->num);
    1598              :             }
    1599        51290 :           bitmap_clear (father_bbs);
    1600              :           /* Propagate the constants within the new basic blocks.  */
    1601        77978 :           EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
    1602              :             {
    1603        26688 :               loop_p father = get_loop (cfun, i);
    1604        26688 :               bitmap exit_bbs = BITMAP_ALLOC (NULL);
    1605        26688 :               loop_exit *exit = father->exits->next;
    1606       125981 :               while (exit->e)
    1607              :                 {
    1608        99293 :                   bitmap_set_bit (exit_bbs, exit->e->dest->index);
    1609        99293 :                   exit = exit->next;
    1610              :                 }
    1611        26688 :               do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
    1612              :             }
    1613        51290 :           BITMAP_FREE (fathers);
    1614              : 
    1615              :           /* Clean up the information about numbers of iterations, since
    1616              :              complete unrolling might have invalidated it.  */
    1617        51290 :           scev_reset ();
    1618              : 
    1619              :           /* This will take care of removing completely unrolled loops
    1620              :              from the loop structures so we can continue unrolling now
    1621              :              innermost loops.  */
    1622        51290 :           if (cleanup_tree_cfg ())
    1623        51290 :             update_ssa (TODO_update_ssa_only_virtuals);
    1624              : 
    1625        51290 :           if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
    1626        32926 :             verify_loop_closed_ssa (true);
    1627              :         }
    1628       516350 :       if (loop_closed_ssa_invalidated)
    1629       274361 :         BITMAP_FREE (loop_closed_ssa_invalidated);
    1630              :     }
    1631              :   while (changed
    1632       516359 :          && ++iteration <= param_max_unroll_iterations);
    1633              : 
    1634       465069 :   BITMAP_FREE (father_bbs);
    1635              : 
    1636       465069 :   if (irred_invalidated
    1637       465069 :       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
    1638           20 :     mark_irreducible_loops ();
    1639              : 
    1640       465069 :   return 0;
    1641       465069 : }
    1642              : 
    1643              : /* Canonical induction variable creation pass.  */
    1644              : 
    1645              : namespace {
    1646              : 
    1647              : const pass_data pass_data_iv_canon =
    1648              : {
    1649              :   GIMPLE_PASS, /* type */
    1650              :   "ivcanon", /* name */
    1651              :   OPTGROUP_LOOP, /* optinfo_flags */
    1652              :   TV_TREE_LOOP_IVCANON, /* tv_id */
    1653              :   ( PROP_cfg | PROP_ssa ), /* properties_required */
    1654              :   0, /* properties_provided */
    1655              :   0, /* properties_destroyed */
    1656              :   0, /* todo_flags_start */
    1657              :   0, /* todo_flags_finish */
    1658              : };
    1659              : 
    1660              : class pass_iv_canon : public gimple_opt_pass
    1661              : {
    1662              : public:
    1663       285722 :   pass_iv_canon (gcc::context *ctxt)
    1664       571444 :     : gimple_opt_pass (pass_data_iv_canon, ctxt)
    1665              :   {}
    1666              : 
    1667              :   /* opt_pass methods: */
    1668       241458 :   bool gate (function *) final override { return flag_tree_loop_ivcanon != 0; }
    1669              :   unsigned int execute (function *fun) final override;
    1670              : 
    1671              : }; // class pass_iv_canon
    1672              : 
    1673              : unsigned int
    1674       241441 : pass_iv_canon::execute (function *fun)
    1675              : {
    1676       482882 :   if (number_of_loops (fun) <= 1)
    1677              :     return 0;
    1678              : 
    1679       241441 :   return canonicalize_induction_variables ();
    1680              : }
    1681              : 
    1682              : } // anon namespace
    1683              : 
    1684              : gimple_opt_pass *
    1685       285722 : make_pass_iv_canon (gcc::context *ctxt)
    1686              : {
    1687       285722 :   return new pass_iv_canon (ctxt);
    1688              : }
    1689              : 
    1690              : /* Complete unrolling of loops.  */
    1691              : 
    1692              : namespace {
    1693              : 
    1694              : const pass_data pass_data_complete_unroll =
    1695              : {
    1696              :   GIMPLE_PASS, /* type */
    1697              :   "cunroll", /* name */
    1698              :   OPTGROUP_LOOP, /* optinfo_flags */
    1699              :   TV_COMPLETE_UNROLL, /* tv_id */
    1700              :   ( PROP_cfg | PROP_ssa ), /* properties_required */
    1701              :   0, /* properties_provided */
    1702              :   0, /* properties_destroyed */
    1703              :   0, /* todo_flags_start */
    1704              :   0, /* todo_flags_finish */
    1705              : };
    1706              : 
    1707              : class pass_complete_unroll : public gimple_opt_pass
    1708              : {
    1709              : public:
    1710       285722 :   pass_complete_unroll (gcc::context *ctxt)
    1711       571444 :     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
    1712              :   {}
    1713              : 
    1714              :   /* opt_pass methods: */
    1715              :   unsigned int execute (function *) final override;
    1716              : 
    1717              : }; // class pass_complete_unroll
    1718              : 
    1719              : unsigned int
    1720       241437 : pass_complete_unroll::execute (function *fun)
    1721              : {
    1722       482874 :   if (number_of_loops (fun) <= 1)
    1723              :     return 0;
    1724              : 
    1725              :   /* If we ever decide to run loop peeling more than once, we will need to
    1726              :      track loops already peeled in loop structures themselves to avoid
    1727              :      re-peeling the same loop multiple times.  */
    1728       241437 :   if (flag_peel_loops)
    1729        28045 :     peeled_loops = BITMAP_ALLOC (NULL);
    1730       241437 :   unsigned int val = tree_unroll_loops_completely (flag_cunroll_grow_size, true, false);
    1731       241437 :   if (peeled_loops)
    1732              :     {
    1733        28045 :       BITMAP_FREE (peeled_loops);
    1734        28045 :       peeled_loops = NULL;
    1735              :     }
    1736              :   return val;
    1737              : }
    1738              : 
    1739              : } // anon namespace
    1740              : 
    1741              : gimple_opt_pass *
    1742       285722 : make_pass_complete_unroll (gcc::context *ctxt)
    1743              : {
    1744       285722 :   return new pass_complete_unroll (ctxt);
    1745              : }
    1746              : 
    1747              : /* Complete unrolling of inner loops.  */
    1748              : 
    1749              : namespace {
    1750              : 
    1751              : const pass_data pass_data_complete_unrolli =
    1752              : {
    1753              :   GIMPLE_PASS, /* type */
    1754              :   "cunrolli", /* name */
    1755              :   OPTGROUP_LOOP, /* optinfo_flags */
    1756              :   TV_COMPLETE_UNROLL, /* tv_id */
    1757              :   ( PROP_cfg | PROP_ssa ), /* properties_required */
    1758              :   0, /* properties_provided */
    1759              :   0, /* properties_destroyed */
    1760              :   0, /* todo_flags_start */
    1761              :   0, /* todo_flags_finish */
    1762              : };
    1763              : 
    1764              : class pass_complete_unrolli : public gimple_opt_pass
    1765              : {
    1766              : public:
    1767       285722 :   pass_complete_unrolli (gcc::context *ctxt)
    1768       571444 :     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
    1769              :   {}
    1770              : 
    1771              :   /* opt_pass methods: */
    1772      1041484 :   bool gate (function *) final override { return optimize >= 2; }
    1773              :   unsigned int execute (function *) final override;
    1774              : 
    1775              : }; // class pass_complete_unrolli
    1776              : 
    1777              : unsigned int
    1778       964174 : pass_complete_unrolli::execute (function *fun)
    1779              : {
    1780       964174 :   unsigned ret = 0;
    1781              : 
    1782       964174 :   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
    1783      1928348 :   if (number_of_loops (fun) > 1)
    1784              :     {
    1785       223632 :       scev_initialize ();
    1786       223632 :       ret = tree_unroll_loops_completely (optimize >= 3, false, true);
    1787       223632 :       scev_finalize ();
    1788              :     }
    1789       964174 :   loop_optimizer_finalize ();
    1790              : 
    1791       964174 :   return ret;
    1792              : }
    1793              : 
    1794              : } // anon namespace
    1795              : 
    1796              : gimple_opt_pass *
    1797       285722 : make_pass_complete_unrolli (gcc::context *ctxt)
    1798              : {
    1799       285722 :   return new pass_complete_unrolli (ctxt);
    1800              : }
    1801              : 
    1802              : 
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.