LCOV - gcc.info - gcc/gimple-loop-jam.cc

LCOV - code coverage report

Current view:	top level - gcc - gimple-loop-jam.cc (source / functions)		Coverage	Total	Hit
Test:	gcc.info	Lines:	97.8 %	230	225
Test Date:	2026-02-28 14:20:25	Functions:	100.0 %	10	10
Legend:	Lines: hit not hit

            Line data    Source code

       1              : /* Loop unroll-and-jam.
       2              :    Copyright (C) 2017-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it
       7              : under the terms of the GNU General Public License as published by the
       8              : Free Software Foundation; either version 3, or (at your option) any
       9              : later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT
      12              : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "tree-pass.h"
      24              : #include "backend.h"
      25              : #include "tree.h"
      26              : #include "gimple.h"
      27              : #include "ssa.h"
      28              : #include "fold-const.h"
      29              : #include "tree-cfg.h"
      30              : #include "tree-ssa.h"
      31              : #include "tree-ssa-loop-niter.h"
      32              : #include "tree-ssa-loop.h"
      33              : #include "tree-ssa-loop-manip.h"
      34              : #include "cfgloop.h"
      35              : #include "tree-scalar-evolution.h"
      36              : #include "gimple-iterator.h"
      37              : #include "cfghooks.h"
      38              : #include "tree-data-ref.h"
      39              : #include "tree-ssa-loop-ivopts.h"
      40              : #include "tree-vectorizer.h"
      41              : #include "tree-ssa-sccvn.h"
      42              : #include "tree-cfgcleanup.h"
      43              : 
      44              : /* Unroll and Jam transformation
      45              : 
      46              :    This is a combination of two transformations, where the second
      47              :    is not always valid.  It's applicable if a loop nest has redundancies
      48              :    over the iterations of an outer loop while not having that with
      49              :    an inner loop.
      50              : 
      51              :    Given this nest:
      52              :        for (i) {
      53              :          for (j) {
      54              :            B(i,j)
      55              :          }
      56              :        }
      57              : 
      58              :    first unroll:
      59              :        for (i by 2) {
      60              :          for (j) {
      61              :            B(i,j)
      62              :          }
      63              :          for (j) {
      64              :            B(i+1,j)
      65              :          }
      66              :        }
      67              : 
      68              :    then fuse the two adjacent inner loops resulting from that:
      69              :        for (i by 2) {
      70              :          for (j) {
      71              :            B(i,j)
      72              :            B(i+1,j)
      73              :          }
      74              :        }
      75              : 
      76              :    As the order of evaluations of the body B changes this is valid
      77              :    only in certain situations: all distance vectors need to be forward.
      78              :    Additionally if there are multiple induction variables than just
      79              :    a counting control IV (j above) we can also deal with some situations.
      80              : 
      81              :    The validity is checked by unroll_jam_possible_p, and the data-dep
      82              :    testing below.
      83              : 
      84              :    A trivial example where the fusion is wrong would be when
      85              :    B(i,j) == x[j-1] = x[j];
      86              :        for (i by 2) {
      87              :          for (j) {
      88              :            x[j-1] = x[j];
      89              :          }
      90              :          for (j) {
      91              :            x[j-1] = x[j];
      92              :          }
      93              :        }  effect: move content to front by two elements
      94              :        -->
      95              :        for (i by 2) {
      96              :          for (j) {
      97              :            x[j-1] = x[j];
      98              :            x[j-1] = x[j];
      99              :          }
     100              :        }  effect: move content to front by one element
     101              : */
     102              : 
     103              : /* Modify the loop tree for the fact that all code once belonging
     104              :    to the OLD loop or the outer loop of OLD now is inside LOOP.  */
     105              : 
     106              : static void
     107           58 : merge_loop_tree (class loop *loop, class loop *old)
     108              : {
     109           58 :   basic_block *bbs;
     110           58 :   int i, n;
     111           58 :   class loop *subloop;
     112           58 :   edge e;
     113           58 :   edge_iterator ei;
     114              : 
     115              :   /* Find its nodes.  */
     116           58 :   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
     117           58 :   n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
     118              : 
     119          467 :   for (i = 0; i < n; i++)
     120              :     {
     121              :       /* If the block was direct child of OLD loop it's now part
     122              :          of LOOP.  If it was outside OLD, then it moved into LOOP
     123              :          as well.  This avoids changing the loop father for BBs
     124              :          in inner loops of OLD.  */
     125          752 :       if (bbs[i]->loop_father == old
     126          694 :           || loop_depth (bbs[i]->loop_father) < loop_depth (old))
     127              :         {
     128          343 :           remove_bb_from_loops (bbs[i]);
     129          343 :           add_bb_to_loop (bbs[i], loop);
     130          343 :           continue;
     131              :         }
     132              : 
     133              :       /* If we find a direct subloop of OLD, move it to LOOP.  */
     134           66 :       subloop = bbs[i]->loop_father;
     135           66 :       if (loop_outer (subloop) == old && subloop->header == bbs[i])
     136              :         {
     137            0 :           flow_loop_tree_node_remove (subloop);
     138            0 :           flow_loop_tree_node_add (loop, subloop);
     139              :         }
     140              :     }
     141              : 
     142              :   /* Update the information about loop exit edges.  */
     143          467 :   for (i = 0; i < n; i++)
     144              :     {
     145          884 :       FOR_EACH_EDGE (e, ei, bbs[i]->succs)
     146              :         {
     147          475 :           rescan_loop_exit (e, false, false);
     148              :         }
     149              :     }
     150              : 
     151           58 :   loop->num_nodes = n;
     152              : 
     153           58 :   free (bbs);
     154           58 : }
     155              : 
     156              : /* BB is part of the outer loop of an unroll-and-jam situation.
     157              :    Check if any statements therein would prevent the transformation.  */
     158              : 
     159              : static bool
     160         8283 : bb_prevents_fusion_p (basic_block bb)
     161              : {
     162         8283 :   gimple_stmt_iterator gsi;
     163              :   /* BB is duplicated by outer unrolling and then all N-1 first copies
     164              :      move into the body of the fused inner loop.  If BB exits the outer loop
     165              :      the last copy still does so, and the first N-1 copies are cancelled
     166              :      by loop unrolling, so also after fusion it's the exit block.
     167              :      But there might be other reasons that prevent fusion:
     168              :        * stores or unknown side-effects prevent fusion
     169              :        * loads don't
     170              :        * computations into SSA names: these aren't problematic.  Their
     171              :          result will be unused on the exit edges of the first N-1 copies
     172              :          (those aren't taken after unrolling).  If they are used on the
     173              :          other edge (the one leading to the outer latch block) they are
     174              :          loop-carried (on the outer loop) and the Nth copy of BB will
     175              :          compute them again (i.e. the first N-1 copies will be dead).  */
     176        31376 :   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     177              :     {
     178        15090 :       gimple *g = gsi_stmt (gsi);
     179        25463 :       if (gimple_vdef (g) || gimple_has_side_effects (g))
     180          280 :         return true;
     181              :     }
     182              :   return false;
     183              : }
     184              : 
     185              : /* Given an inner loop LOOP (of some OUTER loop) determine if
     186              :    we can safely fuse copies of it (generated by outer unrolling).
     187              :    If so return true, otherwise return false.  */
     188              : 
     189              : static bool
     190        16254 : unroll_jam_possible_p (class loop *outer, class loop *loop)
     191              : {
     192        16254 :   basic_block *bbs;
     193        16254 :   int i, n;
     194        16254 :   class tree_niter_desc niter;
     195              : 
     196              :   /* When fusing the loops we skip the latch block
     197              :      of the first one, so it mustn't have any effects to
     198              :      preserve.  */
     199        16254 :   if (!empty_block_p (loop->latch))
     200              :     return false;
     201              : 
     202        15429 :   edge exit;
     203        15429 :   if (!(exit = single_exit (loop)))
     204              :     return false;
     205              : 
     206              :   /* We need a perfect nest.  Quick check for adjacent inner loops.  */
     207        11118 :   if (outer->inner != loop || loop->next)
     208              :     return false;
     209              : 
     210              :   /* Prevent head-controlled inner loops, that we usually have.
     211              :      The guard block would need to be accepted
     212              :      (invariant condition either entering or skipping the loop),
     213              :      without also accepting arbitrary control flow.  When unswitching
     214              :      ran before us (as with -O3) this won't be a problem because its
     215              :      outer loop unswitching will have moved out the invariant condition.
     216              : 
     217              :      If we do that we need to extend fuse_loops() to cope with this
     218              :      by threading through the (still invariant) copied condition
     219              :      between the two loop copies.  */
     220         6743 :   if (!dominated_by_p (CDI_DOMINATORS, outer->latch, loop->header))
     221              :     return false;
     222              : 
     223              :   /* The number of iterations of the inner loop must be loop invariant
     224              :      with respect to the outer loop.  */
     225         4359 :   if (!number_of_iterations_exit (loop, single_exit (loop), &niter,
     226              :                                  false, true)
     227         3993 :       || niter.cmp == ERROR_MARK
     228         3993 :       || !integer_zerop (niter.may_be_zero)
     229         8142 :       || !expr_invariant_in_loop_p (outer, niter.niter))
     230         1076 :     return false;
     231              : 
     232              :   /* If the inner loop produces any values that are used inside the
     233              :      outer loop (except the virtual op) then it can flow
     234              :      back (perhaps indirectly) into the inner loop.  This prevents
     235              :      fusion: without fusion the value at the last iteration is used,
     236              :      with fusion the value after the initial iteration is used.
     237              : 
     238              :      If all uses are outside the outer loop this doesn't prevent fusion;
     239              :      the value of the last iteration is still used (and the values from
     240              :      all intermediate iterations are dead).  */
     241         3283 :   gphi_iterator psi;
     242         3283 :   for (psi = gsi_start_phis (single_exit (loop)->dest);
     243         5836 :        !gsi_end_p (psi); gsi_next (&psi))
     244              :     {
     245         3037 :       imm_use_iterator imm_iter;
     246         3037 :       use_operand_p use_p;
     247         3037 :       tree op = gimple_phi_result (psi.phi ());
     248         6074 :       if (virtual_operand_p (op))
     249         2462 :         continue;
     250         1259 :       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
     251              :         {
     252          593 :           gimple *use_stmt = USE_STMT (use_p);
     253          593 :           if (!is_gimple_debug (use_stmt)
     254          593 :               && flow_bb_inside_loop_p (outer, gimple_bb (use_stmt)))
     255          484 :             return false;
     256          575 :         }
     257              :     }
     258              : 
     259              :   /* And check blocks belonging to just outer loop.  */
     260         2799 :   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
     261         2799 :   n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun));
     262              : 
     263        16445 :   for (i = 0; i < n; i++)
     264        13961 :     if (bbs[i]->loop_father == outer
     265        13961 :         && (bb_prevents_fusion_p (bbs[i])
     266              :             /* Outer loop exits must come after the inner loop, otherwise
     267              :                we'll put the outer loop exit into the fused inner loop.  */
     268         8003 :             || (loop_exits_from_bb_p (outer, bbs[i])
     269         2529 :                 && !dominated_by_p (CDI_DOMINATORS, bbs[i], exit->src))))
     270              :       break;
     271         2799 :   free (bbs);
     272         2799 :   if (i != n)
     273              :     return false;
     274              : 
     275              :   /* For now we can safely fuse copies of LOOP only if all
     276              :      loop carried variables are inductions (or the virtual op).
     277              : 
     278              :      We could handle reductions as well (the initial value in the second
     279              :      body would be the after-iter value of the first body) if it's over
     280              :      an associative and commutative operation.  We wouldn't
     281              :      be able to handle unknown cycles.  */
     282         2484 :   bool inner_vdef = false;
     283         6388 :   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
     284              :     {
     285         4300 :       affine_iv iv;
     286         4300 :       tree op = gimple_phi_result (psi.phi ());
     287              : 
     288         8600 :       if (virtual_operand_p (op))
     289              :         {
     290         1737 :           inner_vdef = true;
     291         1737 :           continue;
     292              :         }
     293         2563 :       if (!simple_iv (loop, loop, op, &iv, true))
     294          396 :         return false;
     295              :       /* The inductions must be regular, loop invariant step and initial
     296              :          value.  */
     297         2537 :       if (!expr_invariant_in_loop_p (outer, iv.step)
     298         2537 :           || !expr_invariant_in_loop_p (outer, iv.base))
     299          370 :         return false;
     300              :       /* XXX With more effort we could also be able to deal with inductions
     301              :          where the initial value is loop variant but a simple IV in the
     302              :          outer loop.  The initial value for the second body would be
     303              :          the original initial value plus iv.base.step.  The next value
     304              :          for the fused loop would be the original next value of the first
     305              :          copy, _not_ the next value of the second body.  */
     306              :     }
     307              : 
     308              :   /* When there's no inner loop virtual PHI IV we cannot handle the update
     309              :      required to the inner loop if that doesn't already have one.  See
     310              :      PR117113.  */
     311         2088 :   if (!inner_vdef && get_virtual_phi (outer->header))
     312              :     return false;
     313              : 
     314              :   return true;
     315        16254 : }
     316              : 
     317              : /* Fuse LOOP with all further neighbors.  The loops are expected to
     318              :    be in appropriate form.  */
     319              : 
     320              : static void
     321           58 : fuse_loops (class loop *loop)
     322              : {
     323           58 :   class loop *next = loop->next;
     324              : 
     325          116 :   while (next)
     326              :     {
     327           58 :       edge e;
     328              : 
     329           58 :       remove_branch (single_pred_edge (loop->latch));
     330              :       /* Make delete_basic_block not fiddle with the loop structure.  */
     331           58 :       basic_block oldlatch = loop->latch;
     332           58 :       loop->latch = NULL;
     333           58 :       delete_basic_block (oldlatch);
     334           58 :       e = redirect_edge_and_branch (loop_latch_edge (next),
     335              :                                     loop->header);
     336           58 :       loop->latch = e->src;
     337           58 :       flush_pending_stmts (e);
     338              : 
     339           58 :       gcc_assert (EDGE_COUNT (next->header->preds) == 1);
     340              : 
     341              :       /* The PHI nodes of the second body (single-argument now)
     342              :          need adjustments to use the right values: either directly
     343              :          the value of the corresponding PHI in the first copy or
     344              :          the one leaving the first body which unrolling did for us.
     345              : 
     346              :          See also unroll_jam_possible_p() for further possibilities.  */
     347           58 :       gphi_iterator psi_first, psi_second;
     348           58 :       e = single_pred_edge (next->header);
     349           58 :       for (psi_first = gsi_start_phis (loop->header),
     350           58 :            psi_second = gsi_start_phis (next->header);
     351          175 :            !gsi_end_p (psi_first);
     352          117 :            gsi_next (&psi_first), gsi_next (&psi_second))
     353              :         {
     354          117 :           gphi *phi_first = psi_first.phi ();
     355          117 :           gphi *phi_second = psi_second.phi ();
     356          117 :           tree firstop = gimple_phi_result (phi_first);
     357              :           /* The virtual operand is correct already as it's
     358              :              always live at exit, hence has a LCSSA node and outer
     359              :              loop unrolling updated SSA form.  */
     360          234 :           if (virtual_operand_p (firstop))
     361           58 :             continue;
     362              : 
     363              :           /* Due to unroll_jam_possible_p() we know that this is
     364              :              an induction.  The second body goes over the same
     365              :              iteration space.  */
     366           59 :           add_phi_arg (phi_second, firstop, e,
     367              :                        gimple_location (phi_first));
     368              :         }
     369           58 :       gcc_assert (gsi_end_p (psi_second));
     370              : 
     371           58 :       merge_loop_tree (loop, next);
     372           58 :       gcc_assert (!next->num_nodes);
     373           58 :       class loop *ln = next->next;
     374           58 :       delete_loop (next);
     375           58 :       next = ln;
     376              :     }
     377           58 : }
     378              : 
     379              : /* Return true if any of the access functions for dataref A
     380              :    isn't invariant with respect to loop LOOP_NEST.  */
     381              : static bool
     382         3738 : any_access_function_variant_p (const struct data_reference *a,
     383              :                                const class loop *loop_nest)
     384              : {
     385         3738 :   vec<tree> fns = DR_ACCESS_FNS (a);
     386              : 
     387        11781 :   for (tree t : fns)
     388         4291 :     if (!evolution_function_is_invariant_p (t, loop_nest->num))
     389              :       return true;
     390              : 
     391              :   return false;
     392              : }
     393              : 
     394              : /* Returns true if the distance in DDR can be determined and adjusts
     395              :    the unroll factor in *UNROLL to make unrolling valid for that distance.
     396              :    Otherwise return false.  DDR is with respect to the outer loop of INNER.
     397              : 
     398              :    If this data dep can lead to a removed memory reference, increment
     399              :    *REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor
     400              :    for this to happen.  */
     401              : 
     402              : static bool
     403         3719 : adjust_unroll_factor (class loop *inner, struct data_dependence_relation *ddr,
     404              :                       unsigned *unroll, unsigned *profit_unroll,
     405              :                       unsigned *removed)
     406              : {
     407         3719 :   bool ret = false;
     408         3719 :   if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
     409              :     {
     410         3719 :       if (DDR_NUM_DIST_VECTS (ddr) == 0)
     411         3719 :         return false;
     412              :       unsigned i;
     413              :       lambda_vector dist_v;
     414         3769 :       FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
     415              :         {
     416              :           /* A distance (a,b) is at worst transformed into (a/N,b) by the
     417              :              unrolling (factor N), so the transformation is valid if
     418              :              a >= N, or b > 0, or b is zero and a > 0.  Otherwise the unroll
     419              :              factor needs to be limited so that the first condition holds.
     420              :              That may limit the factor down to zero in the worst case.  */
     421         2450 :           lambda_int dist = dist_v[0];
     422         2450 :           if (dist < 0)
     423            0 :             gcc_unreachable ();
     424         2450 :           else if (dist >= (lambda_int)*unroll)
     425              :             ;
     426         7152 :           else if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
     427              :             {
     428              :               /* We have (a,0) with a < N, so this will be transformed into
     429              :                  (0,0) after unrolling by N.  This might potentially be a
     430              :                  problem, if it's not a read-read dependency.  */
     431         2320 :               if (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))
     432              :                 ;
     433              :               else
     434              :                 {
     435              :                   /* So, at least one is a write, and we might reduce the
     436              :                      distance vector to (0,0).  This is still no problem
     437              :                      if both data-refs are affine with respect to the inner
     438              :                      loops.  But if one of them is invariant with respect
     439              :                      to an inner loop our reordering implicit in loop fusion
     440              :                      corrupts the program, as our data dependences don't
     441              :                      capture this.  E.g. for:
     442              :                        for (0 <= i < n)
     443              :                          for (0 <= j < m)
     444              :                            a[i][0] = a[i+1][0] + 2;    // (1)
     445              :                            b[i][j] = b[i+1][j] + 2;    // (2)
     446              :                      the distance vector for both statements is (-1,0),
     447              :                      but exchanging the order for (2) is okay, while
     448              :                      for (1) it is not.  To see this, write out the original
     449              :                      accesses (assume m is 2):
     450              :                        a i j original
     451              :                        0 0 0 r a[1][0] b[1][0]
     452              :                        1 0 0 w a[0][0] b[0][0]
     453              :                        2 0 1 r a[1][0] b[1][1]
     454              :                        3 0 1 w a[0][0] b[0][1]
     455              :                        4 1 0 r a[2][0] b[2][0]
     456              :                        5 1 0 w a[1][0] b[1][0]
     457              :                      after unroll-by-2 and fusion the accesses are done in
     458              :                      this order (from column a): 0,1, 4,5, 2,3, i.e. this:
     459              :                        a i j transformed
     460              :                        0 0 0 r a[1][0] b[1][0]
     461              :                        1 0 0 w a[0][0] b[0][0]
     462              :                        4 1 0 r a[2][0] b[2][0]
     463              :                        5 1 0 w a[1][0] b[1][0]
     464              :                        2 0 1 r a[1][0] b[1][1]
     465              :                        3 0 1 w a[0][0] b[0][1]
     466              :                      Note how access 2 accesses the same element as access 5
     467              :                      for array 'a' but not for array 'b'.  */
     468         1876 :                   if (any_access_function_variant_p (DDR_A (ddr), inner)
     469         1876 :                       && any_access_function_variant_p (DDR_B (ddr), inner))
     470              :                     ;
     471              :                   else
     472              :                     /* And if any dataref of this pair is invariant with
     473              :                        respect to the inner loop, we have no chance than
     474              :                        to reduce the unroll factor.  */
     475           14 :                     *unroll = dist;
     476              :                 }
     477              :             }
     478         2496 :           else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
     479              :             ;
     480              :           else
     481           46 :             *unroll = dist;
     482              : 
     483              :           /* With a distance (a,0) it's always profitable to unroll-and-jam
     484              :              (by a+1), because one memory reference will go away.  With
     485              :              (a,b) and b != 0 that's less clear.  We will increase the
     486              :              number of streams without lowering the number of mem refs.
     487              :              So for now only handle the first situation.  */
     488         7350 :           if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
     489              :             {
     490         2336 :               *profit_unroll = MAX (*profit_unroll, (unsigned)dist + 1);
     491         2336 :               (*removed)++;
     492              :             }
     493              : 
     494         2450 :           ret = true;
     495              :         }
     496              :     }
     497              :   return ret;
     498              : }
     499              : 
     500              : /* Main entry point for the unroll-and-jam transformation
     501              :    described above.  */
     502              : 
     503              : static unsigned int
     504        28047 : tree_loop_unroll_and_jam (void)
     505              : {
     506        28047 :   unsigned int todo = 0;
     507              : 
     508        28047 :   gcc_assert (scev_initialized_p ());
     509              : 
     510              :   /* Go through all innermost loops.  */
     511       153520 :   for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
     512              :     {
     513        69379 :       class loop *outer = loop_outer (loop);
     514              : 
     515        69379 :       if (loop_depth (loop) < 2
     516        69379 :           || optimize_loop_nest_for_size_p (outer))
     517        68086 :         continue;
     518              : 
     519        16254 :       if (!unroll_jam_possible_p (outer, loop))
     520        14196 :         continue;
     521              : 
     522         2058 :       vec<data_reference_p> datarefs = vNULL;
     523         2058 :       vec<ddr_p> dependences = vNULL;
     524         2058 :       unsigned unroll_factor, profit_unroll, removed;
     525         2058 :       class tree_niter_desc desc;
     526         2058 :       bool unroll = false;
     527              : 
     528         2058 :       auto_vec<loop_p, 3> loop_nest;
     529         2058 :       if (!compute_data_dependences_for_loop (outer, true, &loop_nest,
     530              :                                               &datarefs, &dependences))
     531              :         {
     532          345 :           if (dump_file && (dump_flags & TDF_DETAILS))
     533            0 :             fprintf (dump_file, "Cannot analyze data dependencies\n");
     534          345 :           free_data_refs (datarefs);
     535          345 :           free_dependence_relations (dependences);
     536          345 :           continue;
     537              :         }
     538         1713 :       if (!datarefs.length ())
     539          420 :         continue;
     540              : 
     541         1293 :       if (dump_file && (dump_flags & TDF_DETAILS))
     542           14 :         dump_data_dependence_relations (dump_file, dependences);
     543              : 
     544         1293 :       unroll_factor = (unsigned)-1;
     545         1293 :       profit_unroll = 1;
     546         1293 :       removed = 0;
     547              : 
     548              :       /* Check all dependencies.  */
     549         1293 :       unsigned i;
     550         1293 :       struct data_dependence_relation *ddr;
     551        40693 :       FOR_EACH_VEC_ELT (dependences, i, ddr)
     552              :         {
     553        39792 :           struct data_reference *dra, *drb;
     554              : 
     555              :           /* If the refs are independend there's nothing to do.  */
     556        39792 :           if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
     557        32086 :             continue;
     558              : 
     559         7706 :           dra = DDR_A (ddr);
     560         7706 :           drb = DDR_B (ddr);
     561              : 
     562              :           /* Nothing interesting for the self dependencies, except for WAW if
     563              :              the access function is not affine or constant because we may end
     564              :              up reordering writes to the same location.  */
     565         7706 :           if (dra == drb)
     566              :             {
     567         7802 :               if (DR_IS_WRITE (dra)
     568         2122 :                   && !DR_ACCESS_FNS (dra).is_empty ()
     569         6094 :                   && DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
     570              :                 {
     571          172 :                   unroll_factor = 0;
     572          172 :                   break;
     573              :                 }
     574              :               else
     575         3815 :                 continue;
     576              :             }
     577              : 
     578              :           /* Now check the distance vector, for determining a sensible
     579              :              outer unroll factor, and for validity of merging the inner
     580              :              loop copies.  */
     581         3719 :           if (!adjust_unroll_factor (loop, ddr, &unroll_factor, &profit_unroll,
     582              :                                      &removed))
     583              :             {
     584              :               /* Couldn't get the distance vector.  For two reads that's
     585              :                  harmless (we assume we should unroll).  For at least
     586              :                  one write this means we can't check the dependence direction
     587              :                  and hence can't determine safety.  */
     588              : 
     589         2400 :               if (DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
     590              :                 {
     591          220 :                   unroll_factor = 0;
     592          220 :                   break;
     593              :                 }
     594              :             }
     595              :         }
     596              : 
     597              :       /* We regard a user-specified minimum percentage of zero as a request
     598              :          to ignore all profitability concerns and apply the transformation
     599              :          always.  */
     600         1293 :       if (!param_unroll_jam_min_percent)
     601           27 :         profit_unroll = MAX(2, profit_unroll);
     602         1266 :       else if (removed * 100 / datarefs.length ()
     603         1266 :           < (unsigned)param_unroll_jam_min_percent)
     604         1006 :         profit_unroll = 1;
     605         1293 :       if (unroll_factor > profit_unroll)
     606          853 :         unroll_factor = profit_unroll;
     607         1293 :       if (unroll_factor > (unsigned)param_unroll_jam_max_unroll)
     608            0 :         unroll_factor = param_unroll_jam_max_unroll;
     609         2644 :       unroll = (unroll_factor > 1
     610         1293 :                 && can_unroll_loop_p (outer, unroll_factor, &desc));
     611              : 
     612           58 :       if (unroll)
     613              :         {
     614           58 :           if (dump_enabled_p ())
     615           10 :             dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
     616           10 :                              find_loop_location (outer),
     617              :                              "applying unroll and jam with factor %d\n",
     618              :                              unroll_factor);
     619           58 :           initialize_original_copy_tables ();
     620           58 :           tree_unroll_loop (outer, unroll_factor, &desc);
     621           58 :           free_original_copy_tables ();
     622           58 :           fuse_loops (outer->inner);
     623           58 :           todo |= TODO_cleanup_cfg;
     624              : 
     625           58 :           auto_bitmap exit_bbs;
     626           58 :           bitmap_set_bit (exit_bbs, single_exit (outer)->dest->index);
     627           58 :           todo |= do_rpo_vn (cfun, loop_preheader_edge (outer), exit_bbs);
     628           58 :         }
     629              : 
     630         1293 :       loop_nest.release ();
     631         1293 :       free_dependence_relations (dependences);
     632         1293 :       free_data_refs (datarefs);
     633         2058 :     }
     634              : 
     635        28047 :   if (todo)
     636              :     {
     637           52 :       free_dominance_info (CDI_DOMINATORS);
     638              :       /* We need to cleanup the CFG first since otherwise SSA form can
     639              :          be not up-to-date from the VN run.  */
     640           52 :       if (todo & TODO_cleanup_cfg)
     641              :         {
     642           52 :           cleanup_tree_cfg ();
     643           52 :           todo &= ~TODO_cleanup_cfg;
     644           52 :           todo |= loop_invariant_motion_in_fun (cfun, false);
     645              :         }
     646           52 :       rewrite_into_loop_closed_ssa (NULL, 0);
     647           52 :       scev_reset ();
     648              :     }
     649        28047 :   return todo;
     650              : }
     651              : 
     652              : /* Pass boilerplate */
     653              : 
     654              : namespace {
     655              : 
     656              : const pass_data pass_data_loop_jam =
     657              : {
     658              :   GIMPLE_PASS, /* type */
     659              :   "unrolljam", /* name */
     660              :   OPTGROUP_LOOP, /* optinfo_flags */
     661              :   TV_LOOP_JAM, /* tv_id */
     662              :   PROP_cfg, /* properties_required */
     663              :   0, /* properties_provided */
     664              :   0, /* properties_destroyed */
     665              :   0, /* todo_flags_start */
     666              :   0, /* todo_flags_finish */
     667              : };
     668              : 
     669              : class pass_loop_jam : public gimple_opt_pass
     670              : {
     671              : public:
     672       285722 :   pass_loop_jam (gcc::context *ctxt)
     673       571444 :     : gimple_opt_pass (pass_data_loop_jam, ctxt)
     674              :   {}
     675              : 
     676              :   /* opt_pass methods: */
     677       241458 :   bool gate (function *) final override { return flag_unroll_jam != 0; }
     678              :   unsigned int execute (function *) final override;
     679              : 
     680              : };
     681              : 
     682              : unsigned int
     683        28047 : pass_loop_jam::execute (function *fun)
     684              : {
     685        56094 :   if (number_of_loops (fun) <= 1)
     686              :     return 0;
     687              : 
     688        28047 :   return tree_loop_unroll_and_jam ();
     689              : }
     690              : 
     691              : }
     692              : 
     693              : gimple_opt_pass *
     694       285722 : make_pass_loop_jam (gcc::context *ctxt)
     695              : {
     696       285722 :   return new pass_loop_jam (ctxt);
     697              : }

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.