LCOV - code coverage report
Current view: top level - gcc - gimple-loop-interchange.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 96.8 % 864 836
Test Date: 2026-02-28 14:20:25 Functions: 97.3 % 37 36
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Loop interchange.
       2              :    Copyright (C) 2017-2026 Free Software Foundation, Inc.
       3              :    Contributed by ARM Ltd.
       4              : 
       5              : This file is part of GCC.
       6              : 
       7              : GCC is free software; you can redistribute it and/or modify it
       8              : under the terms of the GNU General Public License as published by the
       9              : Free Software Foundation; either version 3, or (at your option) any
      10              : later version.
      11              : 
      12              : GCC is distributed in the hope that it will be useful, but WITHOUT
      13              : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15              : for more details.
      16              : 
      17              : You should have received a copy of the GNU General Public License
      18              : along with GCC; see the file COPYING3.  If not see
      19              : <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "is-a.h"
      26              : #include "tree.h"
      27              : #include "gimple.h"
      28              : #include "tree-pass.h"
      29              : #include "ssa.h"
      30              : #include "gimple-pretty-print.h"
      31              : #include "fold-const.h"
      32              : #include "gimplify.h"
      33              : #include "gimple-iterator.h"
      34              : #include "gimplify-me.h"
      35              : #include "cfgloop.h"
      36              : #include "tree-ssa.h"
      37              : #include "tree-scalar-evolution.h"
      38              : #include "tree-ssa-loop-manip.h"
      39              : #include "tree-ssa-loop-niter.h"
      40              : #include "tree-ssa-loop-ivopts.h"
      41              : #include "tree-ssa-dce.h"
      42              : #include "tree-data-ref.h"
      43              : #include "tree-vectorizer.h"
      44              : 
      45              : /* This pass performs loop interchange: for example, the loop nest
      46              : 
      47              :    for (int j = 0; j < N; j++)
      48              :      for (int k = 0; k < N; k++)
      49              :        for (int i = 0; i < N; i++)
      50              :          c[i][j] = c[i][j] + a[i][k]*b[k][j];
      51              : 
      52              :    is transformed to
      53              : 
      54              :    for (int i = 0; i < N; i++)
      55              :      for (int j = 0; j < N; j++)
      56              :        for (int k = 0; k < N; k++)
      57              :          c[i][j] = c[i][j] + a[i][k]*b[k][j];
      58              : 
      59              :    This pass implements loop interchange in the following steps:
      60              : 
      61              :      1) Find perfect loop nest for each innermost loop and compute data
      62              :         dependence relations for it.  For above example, loop nest is
      63              :         <loop_j, loop_k, loop_i>.
      64              :      2) From innermost to outermost loop, this pass tries to interchange
      65              :         each loop pair.  For above case, it firstly tries to interchange
      66              :         <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
      67              :         Then it tries to interchange <loop_j, loop_i> and loop nest becomes
      68              :         <loop_i, loop_j, loop_k>.  The overall effect is to move innermost
      69              :         loop to the outermost position.  For loop pair <loop_i, loop_j>
      70              :         to be interchanged, we:
      71              :      3) Check if data dependence relations are valid for loop interchange.
      72              :      4) Check if both loops can be interchanged in terms of transformation.
      73              :      5) Check if interchanging the two loops is profitable.
      74              :      6) Interchange the two loops by mapping induction variables.
      75              : 
      76              :    This pass also handles reductions in loop nest.  So far we only support
      77              :    simple reduction of inner loop and double reduction of the loop nest.  */
      78              : 
      79              : /* Maximum number of stmts in each loop that should be interchanged.  */
      80              : #define MAX_NUM_STMT    (param_loop_interchange_max_num_stmts)
      81              : /* Maximum number of data references in loop nest.  */
      82              : #define MAX_DATAREFS    (param_loop_max_datarefs_for_datadeps)
      83              : 
      84              : /* Comparison ratio of access stride between inner/outer loops to be
      85              :    interchanged.  This is the minimum stride ratio for loop interchange
      86              :    to be profitable.  */
      87              : #define OUTER_STRIDE_RATIO  (param_loop_interchange_stride_ratio)
      88              : /* The same as above, but we require higher ratio for interchanging the
      89              :    innermost two loops.  */
      90              : #define INNER_STRIDE_RATIO  ((OUTER_STRIDE_RATIO) + 1)
      91              : 
      92              : /* Comparison ratio of stmt cost between inner/outer loops.  Loops won't
      93              :    be interchanged if outer loop has too many stmts.  */
      94              : #define STMT_COST_RATIO     (3)
      95              : 
      96              : /* Vector of strides that DR accesses in each level loop of a loop nest.  */
      97              : #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
      98              : 
      99              : /* Structure recording loop induction variable.  */
     100              : typedef struct induction
     101              : {
     102              :   /* IV itself.  */
     103              :   tree var;
     104              :   /* IV's initializing value, which is the init arg of the IV PHI node.  */
     105              :   tree init_val;
     106              :   /* IV's initializing expr, which is (the expanded result of) init_val.  */
     107              :   tree init_expr;
     108              :   /* IV's step.  */
     109              :   tree step;
     110              : } *induction_p;
     111              : 
     112              : /* Enum type for loop reduction variable.  */
     113              : enum reduction_type
     114              : {
     115              :   UNKNOWN_RTYPE = 0,
     116              :   SIMPLE_RTYPE,
     117              :   DOUBLE_RTYPE
     118              : };
     119              : 
     120              : /* Structure recording loop reduction variable.  */
     121              : typedef struct reduction
     122              : {
     123              :   /* Reduction itself.  */
     124              :   tree var;
     125              :   /* PHI node defining reduction variable.  */
     126              :   gphi *phi;
     127              :   /* Init and next variables of the reduction.  */
     128              :   tree init;
     129              :   tree next;
     130              :   /* Lcssa PHI node if reduction is used outside of its definition loop.  */
     131              :   gphi *lcssa_phi;
     132              :   /* Stmts defining init and next.  */
     133              :   gimple *producer;
     134              :   gimple *consumer;
     135              :   /* If init is loaded from memory, this is the loading memory reference.  */
     136              :   tree init_ref;
     137              :   /* If reduction is finally stored to memory, this is the stored memory
     138              :      reference.  */
     139              :   tree fini_ref;
     140              :   enum reduction_type type;
     141              : } *reduction_p;
     142              : 
     143              : 
     144              : /* Dump reduction RE.  */
     145              : 
     146              : static void
     147           10 : dump_reduction (reduction_p re)
     148              : {
     149           10 :   if (re->type == SIMPLE_RTYPE)
     150            6 :     fprintf (dump_file, "  Simple reduction:  ");
     151            4 :   else if (re->type == DOUBLE_RTYPE)
     152            2 :     fprintf (dump_file, "  Double reduction:  ");
     153              :   else
     154            2 :     fprintf (dump_file, "  Unknown reduction:  ");
     155              : 
     156           10 :   print_gimple_stmt (dump_file, re->phi, 0);
     157           10 : }
     158              : 
     159              : /* Dump LOOP's induction IV.  */
     160              : static void
     161           51 : dump_induction (class loop *loop, induction_p iv)
     162              : {
     163           51 :   fprintf (dump_file, "  Induction:  ");
     164           51 :   print_generic_expr (dump_file, iv->var, TDF_SLIM);
     165           51 :   fprintf (dump_file, " = {");
     166           51 :   print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
     167           51 :   fprintf (dump_file, ", ");
     168           51 :   print_generic_expr (dump_file, iv->step, TDF_SLIM);
     169           51 :   fprintf (dump_file, "}_%d\n", loop->num);
     170           51 : }
     171              : 
     172              : /* Loop candidate for interchange.  */
     173              : 
     174              : class loop_cand
     175              : {
     176              : public:
     177              :   loop_cand (class loop *, class loop *);
     178              :   ~loop_cand ();
     179              : 
     180              :   reduction_p find_reduction_by_stmt (gimple *);
     181              :   void classify_simple_reduction (reduction_p);
     182              :   bool analyze_iloop_reduction_var (tree);
     183              :   bool analyze_oloop_reduction_var (loop_cand *, tree);
     184              :   bool analyze_induction_var (tree, tree);
     185              :   bool analyze_carried_vars (loop_cand *);
     186              :   bool analyze_lcssa_phis (void);
     187              :   bool can_interchange_p (loop_cand *);
     188              :   void undo_simple_reduction (reduction_p, bitmap);
     189              : 
     190              :   /* The loop itself.  */
     191              :   class loop *m_loop;
     192              :   /* The outer loop for interchange.  It equals to loop if this loop cand
     193              :      itself represents the outer loop.  */
     194              :   class loop *m_outer;
     195              :   /* Vector of induction variables in loop.  */
     196              :   vec<induction_p> m_inductions;
     197              :   /* Vector of reduction variables in loop.  */
     198              :   vec<reduction_p> m_reductions;
     199              :   /* Lcssa PHI nodes of this loop.  */
     200              :   vec<gphi *> m_lcssa_nodes;
     201              :   /* Single exit edge of this loop.  */
     202              :   edge m_exit;
     203              :   /* Basic blocks of this loop.  */
     204              :   basic_block *m_bbs;
     205              :   /* Number of stmts of this loop.  Inner loops' stmts are not included.  */
     206              :   int m_num_stmts;
     207              :   /* Number of constant initialized simple reduction.  */
     208              :   int m_const_init_reduc;
     209              : };
     210              : 
     211              : /* Constructor.  */
     212              : 
     213          176 : loop_cand::loop_cand (class loop *loop, class loop *outer)
     214          176 :   : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)),
     215          176 :     m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0)
     216              : {
     217          176 :     m_inductions.create (3);
     218          176 :     m_reductions.create (3);
     219          176 :     m_lcssa_nodes.create (3);
     220          176 : }
     221              : 
     222              : /* Destructor.  */
     223              : 
     224          176 : loop_cand::~loop_cand ()
     225              : {
     226          176 :   induction_p iv;
     227          482 :   for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
     228          306 :     free (iv);
     229              : 
     230              :   reduction_p re;
     231          211 :   for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
     232           35 :     free (re);
     233              : 
     234          176 :   m_inductions.release ();
     235          176 :   m_reductions.release ();
     236          176 :   m_lcssa_nodes.release ();
     237          176 :   free (m_bbs);
     238          176 : }
     239              : 
     240              : /* Return single use stmt of VAR in LOOP, otherwise return NULL.  */
     241              : 
     242              : static gimple *
     243           13 : single_use_in_loop (tree var, class loop *loop)
     244              : {
     245           13 :   gimple *stmt, *res = NULL;
     246           13 :   use_operand_p use_p;
     247           13 :   imm_use_iterator iterator;
     248              : 
     249           39 :   FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
     250              :     {
     251           13 :       stmt = USE_STMT (use_p);
     252           13 :       if (is_gimple_debug (stmt))
     253            0 :         continue;
     254              : 
     255           13 :       if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
     256            0 :         continue;
     257              : 
     258           13 :       if (res)
     259            0 :         return NULL;
     260              : 
     261              :       res = stmt;
     262            0 :     }
     263           13 :   return res;
     264              : }
     265              : 
     266              : /* Return true if E is unsupported in loop interchange, i.e, E is a complex
     267              :    edge or part of irreducible loop.  */
     268              : 
     269              : static inline bool
     270       126013 : unsupported_edge (edge e)
     271              : {
     272       126013 :   return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
     273              : }
     274              : 
     275              : /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
     276              :    stmt.  */
     277              : 
     278              : reduction_p
     279           53 : loop_cand::find_reduction_by_stmt (gimple *stmt)
     280              : {
     281           53 :   gphi *phi = dyn_cast <gphi *> (stmt);
     282           53 :   reduction_p re;
     283              : 
     284           53 :   for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
     285           43 :     if ((phi != NULL && phi == re->lcssa_phi)
     286           10 :         || (stmt == re->producer || stmt == re->consumer))
     287              :       return re;
     288              : 
     289              :   return NULL;
     290              : }
     291              : 
     292              : /* Return true if current loop_cand be interchanged.  ILOOP is not NULL if
     293              :    current loop_cand is outer loop in loop nest.  */
     294              : 
     295              : bool
     296          141 : loop_cand::can_interchange_p (loop_cand *iloop)
     297              : {
     298              :   /* For now we only support at most one reduction.  */
     299          141 :   unsigned allowed_reduction_num = 1;
     300              : 
     301              :   /* Only support reduction if the loop nest to be interchanged is the
     302              :      innermostin two loops.  */
     303          141 :   if ((iloop == NULL && m_loop->inner != NULL)
     304           70 :        || (iloop != NULL && iloop->m_loop->inner != NULL))
     305          141 :     allowed_reduction_num = 0;
     306              : 
     307          141 :   if (m_reductions.length () > allowed_reduction_num
     308          141 :       || (m_reductions.length () == 1
     309           30 :           && m_reductions[0]->type == UNKNOWN_RTYPE))
     310              :     return false;
     311              : 
     312              :   /* Only support lcssa PHI node which is for reduction.  */
     313          282 :   if (m_lcssa_nodes.length () > allowed_reduction_num)
     314              :     return false;
     315              : 
     316              :   /* Check if basic block has any unsupported operation.  Note basic blocks
     317              :      of inner loops are not checked here.  */
     318          754 :   for (unsigned i = 0; i < m_loop->num_nodes; i++)
     319              :     {
     320          615 :       basic_block bb = m_bbs[i];
     321          615 :       gphi_iterator psi;
     322          615 :       gimple_stmt_iterator gsi;
     323              : 
     324              :       /* Skip basic blocks of inner loops.  */
     325          615 :       if (bb->loop_father != m_loop)
     326          543 :        continue;
     327              : 
     328         2323 :       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     329              :         {
     330         1586 :           gimple *stmt = gsi_stmt (gsi);
     331         1586 :           if (is_gimple_debug (stmt))
     332          358 :             continue;
     333              : 
     334         1228 :           if (gimple_has_side_effects (stmt))
     335            2 :             return false;
     336              : 
     337         1228 :           m_num_stmts++;
     338         1228 :           if (gcall *call = dyn_cast <gcall *> (stmt))
     339              :             {
     340              :               /* In basic block of outer loop, the call should be cheap since
     341              :                  it will be moved to inner loop.  */
     342            1 :               if (iloop != NULL
     343            1 :                   && !gimple_inexpensive_call_p (call))
     344              :                 return false;
     345            1 :               continue;
     346              :             }
     347              : 
     348         1496 :           if (!iloop || !gimple_vuse (stmt))
     349         1210 :             continue;
     350              : 
     351              :           /* Support stmt accessing memory in outer loop only if it is for
     352              :              inner loop's reduction.  */
     353           17 :           if (iloop->find_reduction_by_stmt (stmt))
     354           10 :             continue;
     355              : 
     356            7 :           tree lhs;
     357              :           /* Support loop invariant memory reference if it's only used once by
     358              :              inner loop.  */
     359              :           /* ???  How's this checking for invariantness?  */
     360            7 :           if (gimple_assign_single_p (stmt)
     361            7 :               && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
     362            7 :               && TREE_CODE (lhs) == SSA_NAME
     363           13 :               && single_use_in_loop (lhs, iloop->m_loop))
     364            6 :             continue;
     365              : 
     366              :           return false;
     367              :         }
     368              :       /* Check if loop has too many stmts.  */
     369          368 :       if (m_num_stmts > MAX_NUM_STMT)
     370              :         return false;
     371              : 
     372              :       /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
     373              :          loop's header, or PHI nodes in dest bb of inner loop's exit edge.  */
     374          367 :       if (!iloop || bb == m_loop->header
     375          139 :           || bb == iloop->m_exit->dest)
     376          297 :         continue;
     377              : 
     378              :       /* Don't allow any other PHI nodes.  */
     379           70 :       for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
     380            2 :         if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
     381              :           return false;
     382              :     }
     383              : 
     384              :   return true;
     385              : }
     386              : 
     387              : /* Programmers and optimizers (like loop store motion) may optimize code:
     388              : 
     389              :      for (int i = 0; i < N; i++)
     390              :        for (int j = 0; j < N; j++)
     391              :          a[i] += b[j][i] * c[j][i];
     392              : 
     393              :    into reduction:
     394              : 
     395              :      for (int i = 0; i < N; i++)
     396              :        {
     397              :          // producer.  Note sum can be intitialized to a constant.
     398              :          int sum = a[i];
     399              :          for (int j = 0; j < N; j++)
     400              :            {
     401              :              sum += b[j][i] * c[j][i];
     402              :            }
     403              :          // consumer.
     404              :          a[i] = sum;
     405              :        }
     406              : 
     407              :    The result code can't be interchanged without undoing the optimization.
     408              :    This function classifies this kind reduction and records information so
     409              :    that we can undo the store motion during interchange.  */
     410              : 
     411              : void
     412           21 : loop_cand::classify_simple_reduction (reduction_p re)
     413              : {
     414           21 :   gimple *producer, *consumer;
     415              : 
     416              :   /* Check init variable of reduction and how it is initialized.  */
     417           21 :   if (TREE_CODE (re->init) == SSA_NAME)
     418              :     {
     419           17 :       producer = SSA_NAME_DEF_STMT (re->init);
     420           17 :       re->producer = producer;
     421           17 :       basic_block bb = gimple_bb (producer);
     422           17 :       if (!bb || bb->loop_father != m_outer)
     423              :         return;
     424              : 
     425           17 :       if (!gimple_assign_load_p (producer))
     426              :         return;
     427              : 
     428            3 :       re->init_ref = gimple_assign_rhs1 (producer);
     429              :     }
     430            4 :   else if (CONSTANT_CLASS_P (re->init))
     431            4 :     m_const_init_reduc++;
     432              :   else
     433              :     return;
     434              : 
     435              :   /* Check how reduction variable is used.  */
     436            7 :   consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
     437            7 :   if (!consumer
     438            7 :       || !gimple_store_p (consumer))
     439            0 :     return;
     440              : 
     441            7 :   re->fini_ref = gimple_get_lhs (consumer);
     442            7 :   re->consumer = consumer;
     443              : 
     444              :   /* Simple reduction with constant initializer.  */
     445            7 :   if (!re->init_ref)
     446              :     {
     447            4 :       gcc_assert (CONSTANT_CLASS_P (re->init));
     448            4 :       re->init_ref = unshare_expr (re->fini_ref);
     449              :     }
     450              : 
     451              :   /* Require memory references in producer and consumer are the same so
     452              :      that we can undo reduction during interchange.  */
     453            7 :   if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
     454              :     return;
     455              : 
     456            7 :   re->type = SIMPLE_RTYPE;
     457              : }
     458              : 
     459              : /* Analyze reduction variable VAR for inner loop of the loop nest to be
     460              :    interchanged.  Return true if analysis succeeds.  */
     461              : 
     462              : bool
     463           31 : loop_cand::analyze_iloop_reduction_var (tree var)
     464              : {
     465           31 :   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
     466           31 :   gphi *lcssa_phi = NULL, *use_phi;
     467           31 :   tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
     468           31 :   tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
     469           31 :   reduction_p re;
     470           31 :   gimple *stmt, *next_def, *single_use = NULL;
     471           31 :   use_operand_p use_p;
     472           31 :   imm_use_iterator iterator;
     473              : 
     474           31 :   if (TREE_CODE (next) != SSA_NAME)
     475              :     return false;
     476              : 
     477           31 :   next_def = SSA_NAME_DEF_STMT (next);
     478           31 :   basic_block bb = gimple_bb (next_def);
     479           31 :   if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
     480            0 :     return false;
     481              : 
     482              :   /* In restricted reduction, the var is (and must be) used in defining
     483              :      the updated var.  The process can be depicted as below:
     484              : 
     485              :                 var ;; = PHI<init, next>
     486              :                  |
     487              :                  |
     488              :                  v
     489              :       +---------------------+
     490              :       | reduction operators | <-- other operands
     491              :       +---------------------+
     492              :                  |
     493              :                  |
     494              :                  v
     495              :                 next
     496              : 
     497              :      In terms loop interchange, we don't change how NEXT is computed based
     498              :      on VAR and OTHER OPERANDS.  In case of double reduction in loop nest
     499              :      to be interchanged, we don't changed it at all.  In the case of simple
     500              :      reduction in inner loop, we only make change how VAR/NEXT is loaded or
     501              :      stored.  With these conditions, we can relax restrictions on reduction
     502              :      in a way that reduction operation is seen as black box.  In general,
     503              :      we can ignore reassociation of reduction operator; we can handle fake
     504              :      reductions in which VAR is not even used to compute NEXT.  */
     505           31 :   if (! single_imm_use (var, &use_p, &single_use)
     506           31 :       || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
     507            2 :     return false;
     508              : 
     509              :   /* Check the reduction operation.  We require a left-associative operation.
     510              :      For FP math we also need to be allowed to associate operations.  */
     511           39 :   if (gassign *ass = dyn_cast <gassign *> (single_use))
     512              :     {
     513           29 :       enum tree_code code = gimple_assign_rhs_code (ass);
     514           32 :       if (! (associative_tree_code (code)
     515              :              || (code == MINUS_EXPR
     516            1 :                  && use_p->use == gimple_assign_rhs1_ptr (ass)))
     517           29 :           || (FLOAT_TYPE_P (TREE_TYPE (var))
     518           12 :               && ! flag_associative_math))
     519              :         return false;
     520              :     }
     521              :   else
     522              :     return false;
     523              : 
     524              :   /* Handle and verify a series of stmts feeding the reduction op.  */
     525           21 :   if (single_use != next_def
     526           21 :       && !check_reduction_path (dump_user_location_t (), m_loop, phi, next,
     527              :                                 gimple_assign_rhs_code (single_use)))
     528            0 :     return false;
     529              : 
     530              :   /* Only support cases in which INIT is used in inner loop.  */
     531           21 :   if (TREE_CODE (init) == SSA_NAME)
     532           36 :     FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
     533              :       {
     534           19 :         stmt = USE_STMT (use_p);
     535           19 :         if (is_gimple_debug (stmt))
     536            2 :           continue;
     537              : 
     538           17 :         if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
     539            0 :           return false;
     540           17 :       }
     541              : 
     542           67 :   FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
     543              :     {
     544           46 :       stmt = USE_STMT (use_p);
     545           46 :       if (is_gimple_debug (stmt))
     546            4 :         continue;
     547              : 
     548              :       /* Or else it's used in PHI itself.  */
     549           42 :       use_phi = dyn_cast <gphi *> (stmt);
     550           42 :       if (use_phi == phi)
     551           21 :         continue;
     552              : 
     553           21 :       if (use_phi != NULL
     554           21 :           && lcssa_phi == NULL
     555           21 :           && gimple_bb (stmt) == m_exit->dest
     556           42 :           && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
     557              :         lcssa_phi = use_phi;
     558              :       else
     559            0 :         return false;
     560            0 :     }
     561           21 :   if (!lcssa_phi)
     562              :     return false;
     563              : 
     564           21 :   re = XCNEW (struct reduction);
     565           21 :   re->var = var;
     566           21 :   re->init = init;
     567           21 :   re->next = next;
     568           21 :   re->phi = phi;
     569           21 :   re->lcssa_phi = lcssa_phi;
     570              : 
     571           21 :   classify_simple_reduction (re);
     572              : 
     573           21 :   if (dump_file && (dump_flags & TDF_DETAILS))
     574            8 :     dump_reduction (re);
     575              : 
     576           21 :   m_reductions.safe_push (re);
     577           21 :   return true;
     578              : }
     579              : 
     580              : /* Analyze reduction variable VAR for outer loop of the loop nest to be
     581              :    interchanged.  ILOOP is not NULL and points to inner loop.  For the
     582              :    moment, we only support double reduction for outer loop, like:
     583              : 
     584              :      for (int i = 0; i < n; i++)
     585              :        {
     586              :          int sum = 0;
     587              : 
     588              :          for (int j = 0; j < n; j++)    // outer loop
     589              :            for (int k = 0; k < n; k++)  // inner loop
     590              :              sum += a[i][k]*b[k][j];
     591              : 
     592              :          s[i] = sum;
     593              :        }
     594              : 
     595              :    Note the innermost two loops are the loop nest to be interchanged.
     596              :    Return true if analysis succeeds.  */
     597              : 
     598              : bool
     599           18 : loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
     600              : {
     601           18 :   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
     602           18 :   gphi *lcssa_phi = NULL, *use_phi;
     603           18 :   tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
     604           18 :   tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
     605           18 :   reduction_p re;
     606           18 :   gimple *stmt, *next_def;
     607           18 :   use_operand_p use_p;
     608           18 :   imm_use_iterator iterator;
     609              : 
     610           18 :   if (TREE_CODE (next) != SSA_NAME)
     611              :     return false;
     612              : 
     613           18 :   next_def = SSA_NAME_DEF_STMT (next);
     614           18 :   basic_block bb = gimple_bb (next_def);
     615           18 :   if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
     616            0 :     return false;
     617              : 
     618              :   /* Find inner loop's simple reduction that uses var as initializer.  */
     619           20 :   reduction_p inner_re = NULL;
     620           20 :   for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
     621           16 :     if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
     622              :       break;
     623              : 
     624           18 :   if (inner_re == NULL
     625           14 :       || inner_re->type != UNKNOWN_RTYPE
     626           14 :       || inner_re->producer != phi)
     627              :     return false;
     628              : 
     629              :   /* In case of double reduction, outer loop's reduction should be updated
     630              :      by inner loop's simple reduction.  */
     631           14 :   if (next_def != inner_re->lcssa_phi)
     632              :     return false;
     633              : 
     634              :   /* Outer loop's reduction should only be used to initialize inner loop's
     635              :      simple reduction.  */
     636           14 :   if (! single_imm_use (var, &use_p, &stmt)
     637           14 :       || stmt != inner_re->phi)
     638              :     return false;
     639              : 
     640              :   /* Check this reduction is correctly used outside of loop via lcssa phi.  */
     641           44 :   FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
     642              :     {
     643           30 :       stmt = USE_STMT (use_p);
     644           30 :       if (is_gimple_debug (stmt))
     645            2 :         continue;
     646              : 
     647              :       /* Or else it's used in PHI itself.  */
     648           28 :       use_phi = dyn_cast <gphi *> (stmt);
     649           28 :       if (use_phi == phi)
     650           14 :         continue;
     651              : 
     652           14 :       if (lcssa_phi == NULL
     653           14 :           && use_phi != NULL
     654           14 :           && gimple_bb (stmt) == m_exit->dest
     655           28 :           && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
     656              :         lcssa_phi = use_phi;
     657              :       else
     658            0 :         return false;
     659            0 :     }
     660           14 :   if (!lcssa_phi)
     661              :     return false;
     662              : 
     663           14 :   re = XCNEW (struct reduction);
     664           14 :   re->var = var;
     665           14 :   re->init = init;
     666           14 :   re->next = next;
     667           14 :   re->phi = phi;
     668           14 :   re->lcssa_phi = lcssa_phi;
     669           14 :   re->type = DOUBLE_RTYPE;
     670           14 :   inner_re->type = DOUBLE_RTYPE;
     671              : 
     672           14 :   if (dump_file && (dump_flags & TDF_DETAILS))
     673            2 :     dump_reduction (re);
     674              : 
     675           14 :   m_reductions.safe_push (re);
     676           14 :   return true;
     677              : }
     678              : 
     679              : /* Return true if VAR is induction variable of current loop whose scev is
     680              :    specified by CHREC.  */
     681              : 
     682              : bool
     683          306 : loop_cand::analyze_induction_var (tree var, tree chrec)
     684              : {
     685          306 :   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
     686          306 :   tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
     687              : 
     688              :   /* Var is loop invariant, though it's unlikely to happen.  */
     689          306 :   if (tree_does_not_contain_chrecs (chrec))
     690              :     {
     691              :       /* Punt on floating point invariants if honoring signed zeros,
     692              :          representing that as + 0.0 would change the result if init
     693              :          is -0.0.  Similarly for SNaNs it can raise exception.  */
     694            0 :       if (HONOR_SIGNED_ZEROS (chrec) || HONOR_SNANS (chrec))
     695            0 :         return false;
     696            0 :       struct induction *iv = XCNEW (struct induction);
     697            0 :       iv->var = var;
     698            0 :       iv->init_val = init;
     699            0 :       iv->init_expr = chrec;
     700            0 :       iv->step = build_zero_cst (TREE_TYPE (chrec));
     701            0 :       m_inductions.safe_push (iv);
     702            0 :       return true;
     703              :     }
     704              : 
     705          306 :   if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
     706          306 :       || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
     707          306 :       || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
     708          612 :       || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
     709            0 :     return false;
     710              : 
     711          306 :   struct induction *iv = XCNEW (struct induction);
     712          306 :   iv->var = var;
     713          306 :   iv->init_val = init;
     714          306 :   iv->init_expr = CHREC_LEFT (chrec);
     715          306 :   iv->step = CHREC_RIGHT (chrec);
     716              : 
     717          306 :   if (dump_file && (dump_flags & TDF_DETAILS))
     718           51 :     dump_induction (m_loop, iv);
     719              : 
     720          306 :   m_inductions.safe_push (iv);
     721          306 :   return true;
     722              : }
     723              : 
     724              : /* Return true if all loop carried variables defined in loop header can
     725              :    be successfully analyzed.  */
     726              : 
     727              : bool
     728          166 : loop_cand::analyze_carried_vars (loop_cand *iloop)
     729              : {
     730          166 :   edge e = loop_preheader_edge (m_outer);
     731          166 :   gphi_iterator gsi;
     732              : 
     733          166 :   if (dump_file && (dump_flags & TDF_DETAILS))
     734           38 :     fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
     735              : 
     736          659 :   for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
     737              :     {
     738          507 :       gphi *phi = gsi.phi ();
     739              : 
     740          507 :       tree var = PHI_RESULT (phi);
     741         1014 :       if (virtual_operand_p (var))
     742          152 :         continue;
     743              : 
     744          355 :       tree chrec = analyze_scalar_evolution (m_loop, var);
     745          355 :       chrec = instantiate_scev (e, m_loop, chrec);
     746              : 
     747              :       /* Analyze var as reduction variable.  */
     748          355 :       if (chrec_contains_undetermined (chrec)
     749          355 :           || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
     750              :         {
     751           49 :           if (iloop && !analyze_oloop_reduction_var (iloop, var))
     752              :             return false;
     753           45 :           if (!iloop && !analyze_iloop_reduction_var (var))
     754              :             return false;
     755              :         }
     756              :       /* Analyze var as induction variable.  */
     757          306 :       else if (!analyze_induction_var (var, chrec))
     758              :         return false;
     759              :     }
     760              : 
     761              :   return true;
     762              : }
     763              : 
     764              : /* Return TRUE if loop closed PHI nodes can be analyzed successfully.  */
     765              : 
     766              : bool
     767          152 : loop_cand::analyze_lcssa_phis (void)
     768              : {
     769          152 :   gphi_iterator gsi;
     770          328 :   for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
     771              :     {
     772          179 :       gphi *phi = gsi.phi ();
     773              : 
     774          358 :       if (virtual_operand_p (PHI_RESULT (phi)))
     775          143 :         continue;
     776              : 
     777              :       /* TODO: We only support lcssa phi for reduction for now.  */
     778           36 :       if (!find_reduction_by_stmt (phi))
     779              :         return false;
     780              :     }
     781              : 
     782              :   return true;
     783              : }
     784              : 
     785              : /* CONSUMER is a stmt in BB storing reduction result into memory object.
     786              :    When the reduction is intialized from constant value, we need to add
     787              :    a stmt loading from the memory object to target basic block in inner
     788              :    loop during undoing the reduction.  Problem is that memory reference
     789              :    may use ssa variables not dominating the target basic block.  This
     790              :    function finds all stmts on which CONSUMER depends in basic block BB,
     791              :    records and returns them via STMTS.  */
     792              : 
     793              : static void
     794            4 : find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
     795              : {
     796            4 :   auto_vec<gimple *, 4> worklist;
     797            4 :   use_operand_p use_p;
     798            4 :   ssa_op_iter iter;
     799            4 :   gimple *stmt, *def_stmt;
     800            4 :   gimple_stmt_iterator gsi;
     801              : 
     802              :   /* First clear flag for stmts in bb.  */
     803           21 :   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     804           13 :     gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
     805              : 
     806              :   /* DFS search all depended stmts in bb and mark flag for these stmts.  */
     807            4 :   worklist.safe_push (consumer);
     808           13 :   while (!worklist.is_empty ())
     809              :     {
     810            5 :       stmt = worklist.pop ();
     811           19 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
     812              :         {
     813           14 :           def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
     814              : 
     815           14 :           if (is_a <gphi *> (def_stmt)
     816           10 :               || gimple_bb (def_stmt) != bb
     817           15 :               || gimple_plf (def_stmt, GF_PLF_1))
     818           13 :             continue;
     819              : 
     820            1 :           worklist.safe_push (def_stmt);
     821              :         }
     822            5 :       gimple_set_plf (stmt, GF_PLF_1, true);
     823              :     }
     824            4 :   for (gsi = gsi_start_nondebug_bb (bb);
     825            5 :        !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
     826              :     {
     827              :       /* Move dep stmts to sequence STMTS.  */
     828            1 :       if (gimple_plf (stmt, GF_PLF_1))
     829              :         {
     830            1 :           gsi_remove (&gsi, false);
     831            1 :           gimple_seq_add_stmt_without_update (stmts, stmt);
     832              :         }
     833              :       else
     834            0 :         gsi_next_nondebug (&gsi);
     835              :     }
     836            4 : }
     837              : 
     838              : /* User can write, optimizers can generate simple reduction RE for inner
     839              :    loop.  In order to make interchange valid, we have to undo reduction by
     840              :    moving producer and consumer stmts into the inner loop.  For example,
     841              :    below code:
     842              : 
     843              :      init = MEM_REF[idx];               //producer
     844              :      loop:
     845              :        var = phi<init, next>
     846              :        next = var op ...
     847              :      reduc_sum = phi<next>
     848              :      MEM_REF[idx] = reduc_sum           //consumer
     849              : 
     850              :    is transformed into:
     851              : 
     852              :      loop:
     853              :        new_var = MEM_REF[idx];          //producer after moving
     854              :        next = new_var op ...
     855              :        MEM_REF[idx] = next;             //consumer after moving
     856              : 
     857              :    Note if the reduction variable is initialized to constant, like:
     858              : 
     859              :      var = phi<0.0, next>
     860              : 
     861              :    we compute new_var as below:
     862              : 
     863              :      loop:
     864              :        tmp = MEM_REF[idx];
     865              :        new_var = !first_iteration ? tmp : 0.0;
     866              : 
     867              :    so that the initial const is used in the first iteration of loop.  Also
     868              :    record ssa variables for dead code elimination in DCE_SEEDS.  */
     869              : 
     870              : void
     871            7 : loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
     872              : {
     873            7 :   gimple *stmt;
     874            7 :   gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
     875            7 :   gimple_seq stmts = NULL;
     876            7 :   tree new_var;
     877              : 
     878              :   /* Prepare the initialization stmts and insert it to inner loop.  */
     879            7 :   if (re->producer != NULL)
     880              :     {
     881            3 :       gimple_set_vuse (re->producer, NULL_TREE);
     882            3 :       update_stmt (re->producer);
     883            3 :       from = gsi_for_stmt (re->producer);
     884            3 :       gsi_remove (&from, false);
     885            3 :       gimple_seq_add_stmt_without_update (&stmts, re->producer);
     886            3 :       new_var = re->init;
     887              :     }
     888              :   else
     889              :     {
     890              :       /* Find all stmts on which expression "MEM_REF[idx]" depends.  */
     891            4 :       find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
     892              :       /* Because we generate new stmt loading from the MEM_REF to TMP.  */
     893            4 :       tree cond, tmp = copy_ssa_name (re->var);
     894            4 :       stmt = gimple_build_assign (tmp, re->init_ref);
     895            4 :       gimple_seq_add_stmt_without_update (&stmts, stmt);
     896              : 
     897              :       /* Init new_var to MEM_REF or CONST depending on if it is the first
     898              :          iteration.  */
     899            4 :       induction_p iv = m_inductions[0];
     900            4 :       cond = make_ssa_name (boolean_type_node);
     901            4 :       stmt = gimple_build_assign (cond, NE_EXPR, iv->var, iv->init_val);
     902            4 :       gimple_seq_add_stmt_without_update (&stmts, stmt);
     903            4 :       new_var = copy_ssa_name (re->var);
     904            4 :       stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
     905            4 :       gimple_seq_add_stmt_without_update (&stmts, stmt);
     906              :     }
     907            7 :   gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
     908              : 
     909              :   /* Replace all uses of reduction var with new variable.  */
     910            7 :   use_operand_p use_p;
     911            7 :   imm_use_iterator iterator;
     912           21 :   FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
     913              :     {
     914           21 :       FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
     915            7 :         SET_USE (use_p, new_var);
     916              : 
     917            7 :       update_stmt (stmt);
     918            7 :     }
     919              : 
     920              :   /* Move consumer stmt into inner loop, just after reduction next's def.  */
     921            7 :   unlink_stmt_vdef (re->consumer);
     922           14 :   release_ssa_name (gimple_vdef (re->consumer));
     923            7 :   gimple_set_vdef (re->consumer, NULL_TREE);
     924            7 :   gimple_set_vuse (re->consumer, NULL_TREE);
     925            7 :   gimple_assign_set_rhs1 (re->consumer, re->next);
     926            7 :   update_stmt (re->consumer);
     927            7 :   from = gsi_for_stmt (re->consumer);
     928            7 :   to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
     929            7 :   gsi_move_after (&from, &to);
     930              : 
     931              :   /* Mark the reduction variables for DCE.  */
     932            7 :   bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
     933            7 :   bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
     934            7 : }
     935              : 
     936              : /* Free DATAREFS and its auxiliary memory.  */
     937              : 
     938              : static void
     939        55436 : free_data_refs_with_aux (vec<data_reference_p> datarefs)
     940              : {
     941        55436 :   data_reference_p dr;
     942        60440 :   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
     943         5004 :     if (dr->aux != NULL)
     944              :       {
     945         4956 :         DR_ACCESS_STRIDE (dr)->release ();
     946         4956 :         delete (vec<tree> *) dr->aux;
     947              :       }
     948              : 
     949        55436 :   free_data_refs (datarefs);
     950        55436 : }
     951              : 
     952              : /* Class for loop interchange transformation.  */
     953              : 
     954              : class tree_loop_interchange
     955              : {
     956              : public:
     957           80 :   tree_loop_interchange (vec<class loop *> loop_nest)
     958           80 :     : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
     959          160 :       m_dce_seeds (BITMAP_ALLOC (NULL)) { }
     960          160 :   ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
     961              :   bool interchange (vec<data_reference_p>, vec<ddr_p>);
     962              : 
     963              : private:
     964              :   void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
     965              :   bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
     966              :   void interchange_loops (loop_cand &, loop_cand &);
     967              :   void map_inductions_to_loop (loop_cand &, loop_cand &);
     968              :   void move_code_to_inner_loop (class loop *, class loop *, basic_block *);
     969              : 
     970              :   /* The whole loop nest in which interchange is ongoing.  */
     971              :   vec<class loop *> m_loop_nest;
     972              :   /* We create new IV which is only used in loop's exit condition check.
     973              :      In case of 3-level loop nest interchange, when we interchange the
     974              :      innermost two loops, new IV created in the middle level loop does
     975              :      not need to be preserved in interchanging the outermost two loops
     976              :      later.  We record the IV so that it can be skipped.  */
     977              :   tree m_niters_iv_var;
     978              :   /* Bitmap of seed variables for dead code elimination after interchange.  */
     979              :   bitmap m_dce_seeds;
     980              : };
     981              : 
     982              : /* Update data refs' access stride and dependence information after loop
     983              :    interchange.  I_IDX/O_IDX gives indices of interchanged loops in loop
     984              :    nest.  DATAREFS are data references.  DDRS are data dependences.  */
     985              : 
     986              : void
     987           13 : tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
     988              :                                          vec<data_reference_p> datarefs,
     989              :                                          vec<ddr_p> ddrs)
     990              : {
     991           13 :   struct data_reference *dr;
     992           13 :   struct data_dependence_relation *ddr;
     993              : 
     994              :   /* Update strides of data references.  */
     995          145 :   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
     996              :     {
     997          132 :       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
     998          132 :       gcc_assert (stride->length () > i_idx);
     999          132 :       std::swap ((*stride)[i_idx], (*stride)[o_idx]);
    1000              :     }
    1001              :   /* Update data dependences.  */
    1002         2306 :   for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
    1003         2293 :     if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
    1004              :       {
    1005         2299 :         for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
    1006              :           {
    1007            6 :             lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
    1008            6 :             std::swap (dist_vect[i_idx], dist_vect[o_idx]);
    1009              :           }
    1010              :       }
    1011           13 : }
    1012              : 
    1013              : /* Check data dependence relations, return TRUE if it's valid to interchange
    1014              :    two loops specified by I_IDX/O_IDX.  Theoretically, interchanging the two
    1015              :    loops is valid only if dist vector, after interchanging, doesn't have '>'
    1016              :    as the leftmost non-'=' direction.  Practically, this function have been
    1017              :    conservative here by not checking some valid cases.  */
    1018              : 
    1019              : bool
    1020           98 : tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
    1021              :                                                vec<ddr_p> ddrs)
    1022              : {
    1023           98 :   struct data_dependence_relation *ddr;
    1024              : 
    1025         5627 :   for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
    1026              :     {
    1027              :       /* Skip no-dependence case.  */
    1028         5539 :       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
    1029         5448 :         continue;
    1030              : 
    1031         5688 :       for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
    1032              :         {
    1033          159 :           lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
    1034          318 :           unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
    1035              : 
    1036              :           /* If there is no carried dependence.  */
    1037          159 :           if (level == 0)
    1038           78 :             continue;
    1039              : 
    1040           81 :           level --;
    1041              : 
    1042              :           /* If dependence is not carried by any loop in between the two
    1043              :              loops [oloop, iloop] to interchange.  */
    1044           81 :           if (level < o_idx || level > i_idx)
    1045           16 :             continue;
    1046              : 
    1047              :           /* Be conservative, skip case if either direction at i_idx/o_idx
    1048              :              levels is not '=' or '<'.  */
    1049           65 :           if ((!DDR_REVERSED_P (ddr) && dist_vect[i_idx] < 0)
    1050           64 :               || (DDR_REVERSED_P (ddr) && dist_vect[i_idx] > 0)
    1051           55 :               || (!DDR_REVERSED_P (ddr) && dist_vect[o_idx] < 0)
    1052           55 :               || (DDR_REVERSED_P (ddr) && dist_vect[o_idx] > 0))
    1053              :             return false;
    1054              :         }
    1055              :     }
    1056              : 
    1057              :   return true;
    1058              : }
    1059              : 
    1060              : /* Interchange two loops specified by ILOOP and OLOOP.  */
    1061              : 
    1062              : void
    1063           48 : tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
    1064              : {
    1065           48 :   reduction_p re;
    1066           48 :   gimple_stmt_iterator gsi;
    1067           48 :   tree i_niters, o_niters, var_after;
    1068              : 
    1069              :   /* Undo inner loop's simple reduction.  */
    1070           66 :   for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
    1071           18 :     if (re->type != DOUBLE_RTYPE)
    1072              :       {
    1073            7 :         if (re->producer)
    1074            3 :           reset_debug_uses (re->producer);
    1075              : 
    1076            7 :         iloop.undo_simple_reduction (re, m_dce_seeds);
    1077              :       }
    1078              : 
    1079              :   /* Only need to reset debug uses for double reduction.  */
    1080           59 :   for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
    1081              :     {
    1082           11 :       gcc_assert (re->type == DOUBLE_RTYPE);
    1083           11 :       reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
    1084           11 :       reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
    1085              :     }
    1086              : 
    1087              :   /* Prepare niters for both loops.  */
    1088           48 :   class loop *loop_nest = m_loop_nest[0];
    1089           48 :   edge instantiate_below = loop_preheader_edge (loop_nest);
    1090           48 :   gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
    1091           48 :   i_niters = number_of_latch_executions (iloop.m_loop);
    1092           48 :   i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
    1093           48 :   i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
    1094              :                                i_niters);
    1095           48 :   i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
    1096              :                                        NULL_TREE, false, GSI_CONTINUE_LINKING);
    1097           48 :   o_niters = number_of_latch_executions (oloop.m_loop);
    1098           48 :   if (oloop.m_loop != loop_nest)
    1099              :     {
    1100           13 :       o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
    1101           13 :       o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
    1102              :                                    o_niters);
    1103              :     }
    1104           48 :   o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
    1105              :                                        NULL_TREE, false, GSI_CONTINUE_LINKING);
    1106              : 
    1107              :   /* Move src's code to tgt loop.  This is necessary when src is the outer
    1108              :      loop and tgt is the inner loop.  */
    1109           48 :   move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
    1110              : 
    1111              :   /* Map outer loop's IV to inner loop, and vice versa.  */
    1112           48 :   map_inductions_to_loop (oloop, iloop);
    1113           48 :   map_inductions_to_loop (iloop, oloop);
    1114              : 
    1115              :   /* Create canonical IV for both loops.  Note canonical IV for outer/inner
    1116              :      loop is actually from inner/outer loop.  Also we record the new IV
    1117              :      created for the outer loop so that it can be skipped in later loop
    1118              :      interchange.  */
    1119           48 :   create_canonical_iv (oloop.m_loop, oloop.m_exit,
    1120              :                        i_niters, &m_niters_iv_var, &var_after);
    1121           48 :   bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
    1122           48 :   create_canonical_iv (iloop.m_loop, iloop.m_exit,
    1123              :                        o_niters, NULL, &var_after);
    1124           48 :   bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
    1125              : 
    1126              :   /* Scrap niters estimation of interchanged loops.  */
    1127           48 :   iloop.m_loop->any_upper_bound = false;
    1128           48 :   iloop.m_loop->any_likely_upper_bound = false;
    1129           48 :   free_numbers_of_iterations_estimates (iloop.m_loop);
    1130           48 :   oloop.m_loop->any_upper_bound = false;
    1131           48 :   oloop.m_loop->any_likely_upper_bound = false;
    1132           48 :   free_numbers_of_iterations_estimates (oloop.m_loop);
    1133              : 
    1134              :   /* Clear all cached scev information.  This is expensive but shouldn't be
    1135              :      a problem given we interchange in very limited times.  */
    1136           48 :   scev_reset_htab ();
    1137              : 
    1138              :   /* ???  The association between the loop data structure and the
    1139              :      CFG changed, so what was loop N at the source level is now
    1140              :      loop M.  We should think of retaining the association or breaking
    1141              :      it fully by creating a new loop instead of re-using the "wrong" one.  */
    1142           48 : }
    1143              : 
    1144              : /* Map induction variables of SRC loop to TGT loop.  The function firstly
    1145              :    creates the same IV of SRC loop in TGT loop, then deletes the original
    1146              :    IV and re-initialize it using the newly created IV.  For example, loop
    1147              :    nest:
    1148              : 
    1149              :      for (i = 0; i < N; i++)
    1150              :        for (j = 0; j < M; j++)
    1151              :          {
    1152              :            //use of i;
    1153              :            //use of j;
    1154              :          }
    1155              : 
    1156              :    will be transformed into:
    1157              : 
    1158              :      for (jj = 0; jj < M; jj++)
    1159              :        for (ii = 0; ii < N; ii++)
    1160              :          {
    1161              :            //use of ii;
    1162              :            //use of jj;
    1163              :          }
    1164              : 
    1165              :    after loop interchange.  */
    1166              : 
    1167              : void
    1168           96 : tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
    1169              : {
    1170           96 :   induction_p iv;
    1171           96 :   edge e = tgt.m_exit;
    1172           96 :   gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
    1173              : 
    1174              :   /* Map source loop's IV to target loop.  */
    1175          266 :   for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
    1176              :     {
    1177          170 :       gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
    1178          170 :       gcc_assert (is_a <gphi *> (stmt));
    1179              : 
    1180          170 :       use_operand_p use_p;
    1181              :       /* Only map original IV to target loop.  */
    1182          170 :       if (m_niters_iv_var != iv->var)
    1183              :         {
    1184              :           /* Map the IV by creating the same one in target loop.  */
    1185          166 :           tree var_before, var_after;
    1186          166 :           tree base = unshare_expr (iv->init_expr);
    1187          166 :           tree step = unshare_expr (iv->step);
    1188          262 :           create_iv (base, PLUS_EXPR, step, SSA_NAME_VAR (iv->var),
    1189              :                      tgt.m_loop, &incr_pos, false, &var_before, &var_after);
    1190          166 :           bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
    1191          166 :           bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
    1192              : 
    1193              :           /* Replace uses of the original IV var with newly created IV var.  */
    1194          166 :           imm_use_iterator imm_iter;
    1195         1038 :           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
    1196              :             {
    1197         2310 :               FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
    1198          802 :                 SET_USE (use_p, var_before);
    1199              : 
    1200          706 :               update_stmt (use_stmt);
    1201          166 :             }
    1202              :         }
    1203              : 
    1204              :       /* Mark all uses for DCE.  */
    1205          170 :       ssa_op_iter op_iter;
    1206          680 :       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
    1207              :         {
    1208          340 :           tree use = USE_FROM_PTR (use_p);
    1209          340 :           if (TREE_CODE (use) == SSA_NAME
    1210          340 :               && ! SSA_NAME_IS_DEFAULT_DEF (use))
    1211          176 :             bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
    1212              :         }
    1213              : 
    1214              :       /* Delete definition of the original IV in the source loop.  */
    1215          170 :       gsi = gsi_for_stmt (stmt);
    1216          170 :       remove_phi_node (&gsi, true);
    1217              :     }
    1218           96 : }
    1219              : 
    1220              : /* Move stmts of outer loop to inner loop.  */
    1221              : 
    1222              : void
    1223           48 : tree_loop_interchange::move_code_to_inner_loop (class loop *outer,
    1224              :                                                 class loop *inner,
    1225              :                                                 basic_block *outer_bbs)
    1226              : {
    1227           48 :   basic_block oloop_exit_bb = single_exit (outer)->src;
    1228           48 :   gimple_stmt_iterator gsi, to;
    1229              : 
    1230          306 :   for (unsigned i = 0; i < outer->num_nodes; i++)
    1231              :     {
    1232          258 :       basic_block bb = outer_bbs[i];
    1233              : 
    1234              :       /* Skip basic blocks of inner loop.  */
    1235          258 :       if (flow_bb_inside_loop_p (inner, bb))
    1236          114 :         continue;
    1237              : 
    1238              :       /* Move code from header/latch to header/latch.  */
    1239          144 :       if (bb == outer->header)
    1240           48 :         to = gsi_after_labels (inner->header);
    1241           96 :       else if (bb == outer->latch)
    1242           48 :         to = gsi_after_labels (inner->latch);
    1243              :       else
    1244              :         /* Otherwise, simply move to exit->src.  */
    1245           96 :         to = gsi_last_bb (single_exit (inner)->src);
    1246              : 
    1247          401 :       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
    1248              :         {
    1249          257 :           gimple *stmt = gsi_stmt (gsi);
    1250              : 
    1251          305 :           if (oloop_exit_bb == bb
    1252          423 :               && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
    1253              :             {
    1254           48 :               gsi_next (&gsi);
    1255           48 :               continue;
    1256              :             }
    1257              : 
    1258          343 :           if (gimple_vdef (stmt))
    1259              :             {
    1260            0 :               unlink_stmt_vdef (stmt);
    1261            0 :               release_ssa_name (gimple_vdef (stmt));
    1262            0 :               gimple_set_vdef (stmt, NULL_TREE);
    1263              :             }
    1264          343 :           if (gimple_vuse (stmt))
    1265              :             {
    1266            2 :               gimple_set_vuse (stmt, NULL_TREE);
    1267            2 :               update_stmt (stmt);
    1268              :             }
    1269              : 
    1270          209 :           reset_debug_uses (stmt);
    1271          209 :           gsi_move_before (&gsi, &to);
    1272              :         }
    1273              :     }
    1274           48 : }
    1275              : 
    1276              : /* Given data reference DR in LOOP_NEST, the function computes DR's access
    1277              :    stride at each level of loop from innermost LOOP to outer.  On success,
    1278              :    it saves access stride at each level loop in a vector which is pointed
    1279              :    by DR->aux.  For example:
    1280              : 
    1281              :      int arr[100][100][100];
    1282              :      for (i = 0; i < 100; i++)       ;(DR->aux)strides[0] = 40000
    1283              :        for (j = 100; j > 0; j--)     ;(DR->aux)strides[1] = 400
    1284              :          for (k = 0; k < 100; k++)   ;(DR->aux)strides[2] = 4
    1285              :            arr[i][j - 1][k] = 0;  */
    1286              : 
    1287              : static void
    1288         4958 : compute_access_stride (class loop *&loop_nest, class loop *loop,
    1289              :                        data_reference_p dr)
    1290              : {
    1291         4958 :   vec<tree> *strides = new vec<tree> ();
    1292         4958 :   dr->aux = strides;
    1293              : 
    1294         4958 :   basic_block bb = gimple_bb (DR_STMT (dr));
    1295         4958 :   if (!flow_bb_inside_loop_p (loop_nest, bb))
    1296              :     return;
    1297         5734 :   while (!flow_bb_inside_loop_p (loop, bb))
    1298              :     {
    1299          776 :       strides->safe_push (build_int_cst (sizetype, 0));
    1300          776 :       loop = loop_outer (loop);
    1301              :     }
    1302         4958 :   gcc_assert (loop == bb->loop_father);
    1303              : 
    1304         4958 :   tree ref = DR_REF (dr);
    1305         4958 :   if (TREE_CODE (ref) == COMPONENT_REF
    1306         4958 :       && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
    1307              :     {
    1308              :       /* We can't take address of bitfields.  If the bitfield is at constant
    1309              :          offset from the start of the struct, just use address of the
    1310              :          struct, for analysis of the strides that shouldn't matter.  */
    1311            4 :       if (!TREE_OPERAND (ref, 2)
    1312            4 :           || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
    1313            2 :         ref = TREE_OPERAND (ref, 0);
    1314              :       /* Otherwise, if we have a bit field representative, use that.  */
    1315            2 :       else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
    1316              :                != NULL_TREE)
    1317              :         {
    1318            2 :           tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
    1319            2 :           ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
    1320            2 :                         repr, TREE_OPERAND (ref, 2));
    1321              :         }
    1322              :       /* Otherwise punt.  */
    1323              :       else
    1324              :         return;
    1325              :     }
    1326         4958 :   tree scev_base = build_fold_addr_expr (ref);
    1327         4958 :   tree scev = analyze_scalar_evolution (loop, scev_base);
    1328         4958 :   if (chrec_contains_undetermined (scev))
    1329              :     return;
    1330              : 
    1331              :   tree orig_scev = scev;
    1332         5054 :   do
    1333              :     {
    1334         4995 :       scev = instantiate_scev (loop_preheader_edge (loop_nest),
    1335              :                                loop, orig_scev);
    1336         4995 :       if (! chrec_contains_undetermined (scev))
    1337              :         break;
    1338              : 
    1339              :       /* If we couldn't instantiate for the desired nest, shrink it.  */
    1340           67 :       if (loop_nest == loop)
    1341              :         return;
    1342           59 :       loop_nest = loop_nest->inner;
    1343              :     } while (1);
    1344              : 
    1345              :   tree sl = scev;
    1346              :   class loop *expected = loop;
    1347        15061 :   while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
    1348              :     {
    1349        10133 :       class loop *sl_loop = get_chrec_loop (sl);
    1350        20356 :       while (sl_loop != expected)
    1351              :         {
    1352           90 :           strides->safe_push (size_int (0));
    1353           90 :           expected = loop_outer (expected);
    1354              :         }
    1355        10133 :       strides->safe_push (CHREC_RIGHT (sl));
    1356        10133 :       sl = CHREC_LEFT (sl);
    1357        10133 :       expected = loop_outer (expected);
    1358              :     }
    1359         4928 :   if (! tree_contains_chrecs (sl, NULL))
    1360         5574 :     while (expected != loop_outer (loop_nest))
    1361              :       {
    1362          646 :         strides->safe_push (size_int (0));
    1363          646 :         expected = loop_outer (expected);
    1364              :       }
    1365              : }
    1366              : 
    1367              : /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
    1368              :    access strides with respect to each level loop for all data refs in
    1369              :    DATAREFS from inner loop to outer loop.  On success, it returns the
    1370              :    outermost loop that access strides can be computed successfully for
    1371              :    all data references.  If access strides cannot be computed at least
    1372              :    for two levels of loop for any data reference, it returns NULL.  */
    1373              : 
    1374              : static class loop *
    1375          856 : compute_access_strides (class loop *loop_nest, class loop *loop,
    1376              :                         vec<data_reference_p> datarefs)
    1377              : {
    1378          856 :   unsigned i, j, num_loops = (unsigned) -1;
    1379          856 :   data_reference_p dr;
    1380          856 :   vec<tree> *stride;
    1381              : 
    1382          856 :   class loop *interesting_loop_nest = loop_nest;
    1383         5755 :   for (i = 0; datarefs.iterate (i, &dr); ++i)
    1384              :     {
    1385         4958 :       compute_access_stride (interesting_loop_nest, loop, dr);
    1386         4958 :       stride = DR_ACCESS_STRIDE (dr);
    1387         9886 :       if (stride->length () < num_loops)
    1388              :         {
    1389          915 :           num_loops = stride->length ();
    1390          885 :           if (num_loops < 2)
    1391              :             return NULL;
    1392              :         }
    1393              :     }
    1394              : 
    1395         5616 :   for (i = 0; datarefs.iterate (i, &dr); ++i)
    1396              :     {
    1397         4819 :       stride = DR_ACCESS_STRIDE (dr);
    1398         4819 :       if (stride->length () > num_loops)
    1399           15 :         stride->truncate (num_loops);
    1400              : 
    1401         9844 :       for (j = 0; j < (num_loops >> 1); ++j)
    1402         5025 :         std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
    1403              :     }
    1404              : 
    1405         1594 :   loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
    1406          797 :   gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
    1407              :   return loop;
    1408              : }
    1409              : 
    1410              : /* Prune access strides for data references in DATAREFS by removing strides
    1411              :    of loops that isn't in current LOOP_NEST.  */
    1412              : 
    1413              : static void
    1414            3 : prune_access_strides_not_in_loop (class loop *loop_nest,
    1415              :                                   class loop *innermost,
    1416              :                                   vec<data_reference_p> datarefs)
    1417              : {
    1418            3 :   data_reference_p dr;
    1419            6 :   unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
    1420            3 :   gcc_assert (num_loops > 1);
    1421              : 
    1422              :   /* Block remove strides of loops that is not in current loop nest.  */
    1423           18 :   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
    1424              :     {
    1425           15 :       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
    1426           30 :       if (stride->length () > num_loops)
    1427           15 :         stride->block_remove (0, stride->length () - num_loops);
    1428              :     }
    1429            3 : }
    1430              : 
    1431              : /* Dump access strides for all DATAREFS.  */
    1432              : 
    1433              : static void
    1434           18 : dump_access_strides (vec<data_reference_p> datarefs)
    1435              : {
    1436           18 :   data_reference_p dr;
    1437           18 :   fprintf (dump_file, "Access Strides for DRs:\n");
    1438          110 :   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
    1439              :     {
    1440           92 :       fprintf (dump_file, "  ");
    1441           92 :       print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
    1442           92 :       fprintf (dump_file, ":\t\t<");
    1443              : 
    1444           92 :       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
    1445           92 :       unsigned num_loops = stride->length ();
    1446          288 :       for (unsigned j = 0; j < num_loops; ++j)
    1447              :         {
    1448          196 :           print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
    1449          288 :           fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
    1450              :         }
    1451              :     }
    1452           18 : }
    1453              : 
    1454              : /* Return true if it's profitable to interchange two loops whose index
    1455              :    in whole loop nest vector are I_IDX/O_IDX respectively.  The function
    1456              :    computes and compares three types information from all DATAREFS:
    1457              :      1) Access stride for loop I_IDX and O_IDX.
    1458              :      2) Number of invariant memory references with respect to I_IDX before
    1459              :         and after loop interchange.
    1460              :      3) Flags indicating if all memory references access sequential memory
    1461              :         in ILOOP, before and after loop interchange.
    1462              :    If INNMOST_LOOP_P is true, the two loops for interchanging are the two
    1463              :    innermost loops in loop nest.  This function also dumps information if
    1464              :    DUMP_INFO_P is true.  */
    1465              : 
    1466              : static bool
    1467         1100 : should_interchange_loops (unsigned i_idx, unsigned o_idx,
    1468              :                           vec<data_reference_p> datarefs,
    1469              :                           unsigned i_stmt_cost, unsigned o_stmt_cost,
    1470              :                           bool innermost_loops_p, bool dump_info_p = true)
    1471              : {
    1472         1100 :   unsigned HOST_WIDE_INT ratio;
    1473         1100 :   unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
    1474         1100 :   struct data_reference *dr;
    1475         1100 :   bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
    1476         1100 :   widest_int iloop_strides = 0, oloop_strides = 0;
    1477         1100 :   unsigned num_unresolved_drs = 0;
    1478         1100 :   unsigned num_resolved_ok_drs = 0;
    1479         1100 :   unsigned num_resolved_not_ok_drs = 0;
    1480              : 
    1481         1100 :   if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
    1482           17 :     fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
    1483              : 
    1484         7893 :   for (i = 0; datarefs.iterate (i, &dr); ++i)
    1485              :     {
    1486         6793 :       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
    1487         6793 :       tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
    1488              : 
    1489         6793 :       bool subloop_stride_p = false;
    1490              :       /* Data ref can't be invariant or sequential access at current loop if
    1491              :          its address changes with respect to any subloops.  */
    1492         7128 :       for (j = i_idx + 1; j < stride->length (); ++j)
    1493         1731 :         if (!integer_zerop ((*stride)[j]))
    1494              :           {
    1495              :             subloop_stride_p = true;
    1496              :             break;
    1497              :           }
    1498              : 
    1499         6793 :       if (integer_zerop (iloop_stride))
    1500              :         {
    1501          865 :           if (!subloop_stride_p)
    1502          823 :             num_old_inv_drs++;
    1503              :         }
    1504         6793 :       if (integer_zerop (oloop_stride))
    1505              :         {
    1506          599 :           if (!subloop_stride_p)
    1507          538 :             num_new_inv_drs++;
    1508              :         }
    1509              : 
    1510         6793 :       if (TREE_CODE (iloop_stride) == INTEGER_CST
    1511         5851 :           && TREE_CODE (oloop_stride) == INTEGER_CST)
    1512              :         {
    1513         5341 :           iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
    1514         5341 :           oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
    1515              :         }
    1516         1452 :       else if (multiple_of_p (TREE_TYPE (iloop_stride),
    1517              :                               iloop_stride, oloop_stride))
    1518           67 :         num_resolved_ok_drs++;
    1519         1385 :       else if (multiple_of_p (TREE_TYPE (iloop_stride),
    1520              :                               oloop_stride, iloop_stride))
    1521          491 :         num_resolved_not_ok_drs++;
    1522              :       else
    1523          894 :         num_unresolved_drs++;
    1524              : 
    1525              :       /* Data ref can't be sequential access if its address changes in sub
    1526              :          loop.  */
    1527         6793 :       if (subloop_stride_p)
    1528              :         {
    1529         1396 :           all_seq_dr_before_p = false;
    1530         1396 :           all_seq_dr_after_p = false;
    1531         1396 :           continue;
    1532              :         }
    1533              :       /* Track if all data references are sequential accesses before/after loop
    1534              :          interchange.  Note invariant is considered sequential here.  */
    1535         5397 :       tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
    1536         5397 :       if (all_seq_dr_before_p
    1537         6642 :           && ! (integer_zerop (iloop_stride)
    1538         1245 :                 || operand_equal_p (access_size, iloop_stride, 0)))
    1539              :         all_seq_dr_before_p = false;
    1540         5397 :       if (all_seq_dr_after_p
    1541         6440 :           && ! (integer_zerop (oloop_stride)
    1542         1043 :                 || operand_equal_p (access_size, oloop_stride, 0)))
    1543              :         all_seq_dr_after_p = false;
    1544              :     }
    1545              : 
    1546         1100 :   if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
    1547              :     {
    1548           17 :       fprintf (dump_file, "\toverall:\t\t");
    1549           17 :       print_decu (iloop_strides, dump_file);
    1550           17 :       fprintf (dump_file, "\t");
    1551           17 :       print_decu (oloop_strides, dump_file);
    1552           17 :       fprintf (dump_file, "\n");
    1553              : 
    1554           17 :       fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
    1555              :                num_old_inv_drs, num_new_inv_drs);
    1556           38 :       fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
    1557              :                all_seq_dr_before_p ? "true" : "false",
    1558              :                all_seq_dr_after_p ? "true" : "false");
    1559           17 :       fprintf (dump_file, "OK to interchage with variable strides: %d\n",
    1560              :                num_resolved_ok_drs);
    1561           17 :       fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
    1562              :                num_resolved_not_ok_drs);
    1563           17 :       fprintf (dump_file, "Variable strides we cannot decide: %d\n",
    1564              :                num_unresolved_drs);
    1565           17 :       fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost);
    1566           17 :       fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost);
    1567              :     }
    1568              : 
    1569         1100 :   if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
    1570              :     return false;
    1571              : 
    1572              :   /* Stmts of outer loop will be moved to inner loop.  If there are two many
    1573              :      such stmts, it could make inner loop costly.  Here we compare stmt cost
    1574              :      between outer and inner loops.  */
    1575          508 :   if (i_stmt_cost && o_stmt_cost
    1576           46 :       && num_old_inv_drs + o_stmt_cost > num_new_inv_drs
    1577           41 :       && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost)
    1578              :     return false;
    1579              : 
    1580              :   /* We use different stride comparison ratio for interchanging innermost
    1581              :      two loops or not.  The idea is to be conservative in interchange for
    1582              :      the innermost loops.  */
    1583          490 :   ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
    1584              :   /* Do interchange if it gives better data locality behavior.  */
    1585          490 :   if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
    1586              :     return true;
    1587          348 :   if (wi::gtu_p (iloop_strides, oloop_strides))
    1588              :     {
    1589              :       /* Or it creates more invariant memory references.  */
    1590           15 :       if ((!all_seq_dr_before_p || all_seq_dr_after_p)
    1591           15 :           && num_new_inv_drs > num_old_inv_drs)
    1592              :         return true;
    1593              :       /* Or it makes all memory references sequential.  */
    1594           11 :       if (num_new_inv_drs >= num_old_inv_drs
    1595           11 :           && !all_seq_dr_before_p && all_seq_dr_after_p)
    1596              :         return true;
    1597              :     }
    1598              : 
    1599              :   return false;
    1600         1100 : }
    1601              : 
    1602              : /* Try to interchange inner loop of a loop nest to outer level.  */
    1603              : 
    1604              : bool
    1605           80 : tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
    1606              :                                     vec<ddr_p> ddrs)
    1607              : {
    1608           80 :   dump_user_location_t loc = find_loop_location (m_loop_nest[0]);
    1609           80 :   bool changed_p = false;
    1610              :   /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
    1611              :      The overall effect is to push inner loop to outermost level in whole
    1612              :      loop nest.  */
    1613          229 :   for (unsigned i = m_loop_nest.length (); i > 1; --i)
    1614              :     {
    1615           98 :       unsigned i_idx = i - 1, o_idx = i - 2;
    1616              : 
    1617              :       /* Check validity for loop interchange.  */
    1618           98 :       if (!valid_data_dependences (i_idx, o_idx, ddrs))
    1619              :         break;
    1620              : 
    1621           88 :       loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
    1622           88 :       loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
    1623              : 
    1624              :       /* Check if we can do transformation for loop interchange.  */
    1625           88 :       if (!iloop.analyze_carried_vars (NULL)
    1626           78 :           || !iloop.analyze_lcssa_phis ()
    1627           78 :           || !oloop.analyze_carried_vars (&iloop)
    1628           74 :           || !oloop.analyze_lcssa_phis ()
    1629           71 :           || !iloop.can_interchange_p (NULL)
    1630          158 :           || !oloop.can_interchange_p (&iloop))
    1631              :         break;
    1632              : 
    1633              :       /* Outer loop's stmts will be moved to inner loop during interchange.
    1634              :          If there are many of them, it may make inner loop very costly.  We
    1635              :          need to check number of outer loop's stmts in profit cost model of
    1636              :          interchange.  */
    1637           69 :       int stmt_cost = oloop.m_num_stmts;
    1638              :       /* Count out the exit checking stmt of outer loop.  */
    1639           69 :       stmt_cost --;
    1640              :       /* Count out IV's increasing stmt, IVOPTs takes care if it.  */
    1641           69 :       stmt_cost -= oloop.m_inductions.length ();
    1642              :       /* Count in the additional load and cond_expr stmts caused by inner
    1643              :          loop's constant initialized reduction.  */
    1644           69 :       stmt_cost += iloop.m_const_init_reduc * 2;
    1645           69 :       if (stmt_cost < 0)
    1646              :         stmt_cost = 0;
    1647              : 
    1648              :       /* Check profitability for loop interchange.  */
    1649           69 :       if (should_interchange_loops (i_idx, o_idx, datarefs,
    1650           69 :                                     (unsigned) iloop.m_num_stmts,
    1651              :                                     (unsigned) stmt_cost,
    1652           69 :                                     iloop.m_loop->inner == NULL))
    1653              :         {
    1654           48 :           if (dump_file && (dump_flags & TDF_DETAILS))
    1655           16 :             fprintf (dump_file,
    1656              :                      "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
    1657           16 :                      oloop.m_loop->num, iloop.m_loop->num);
    1658              : 
    1659           48 :           changed_p = true;
    1660           48 :           interchange_loops (iloop, oloop);
    1661              :           /* No need to update if there is no further loop interchange.  */
    1662           48 :           if (o_idx > 0)
    1663           13 :             update_data_info (i_idx, o_idx, datarefs, ddrs);
    1664              :         }
    1665              :       else
    1666              :         {
    1667           21 :           if (dump_file && (dump_flags & TDF_DETAILS))
    1668            1 :             fprintf (dump_file,
    1669              :                      "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
    1670            1 :                      oloop.m_loop->num, iloop.m_loop->num);
    1671              :         }
    1672           88 :     }
    1673           80 :   simple_dce_from_worklist (m_dce_seeds);
    1674              : 
    1675           80 :   if (changed_p && dump_enabled_p ())
    1676           14 :     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
    1677              :                      "loops interchanged in loop nest\n");
    1678              : 
    1679           80 :   return changed_p;
    1680              : }
    1681              : 
    1682              : 
    1683              : /* Loop interchange pass.  */
    1684              : 
    1685              : namespace {
    1686              : 
    1687              : const pass_data pass_data_linterchange =
    1688              : {
    1689              :   GIMPLE_PASS, /* type */
    1690              :   "linterchange", /* name */
    1691              :   OPTGROUP_LOOP, /* optinfo_flags */
    1692              :   TV_LINTERCHANGE, /* tv_id */
    1693              :   PROP_cfg, /* properties_required */
    1694              :   0, /* properties_provided */
    1695              :   0, /* properties_destroyed */
    1696              :   0, /* todo_flags_start */
    1697              :   0, /* todo_flags_finish */
    1698              : };
    1699              : 
    1700              : class pass_linterchange : public gimple_opt_pass
    1701              : {
    1702              : public:
    1703       285722 :   pass_linterchange (gcc::context *ctxt)
    1704       571444 :     : gimple_opt_pass (pass_data_linterchange, ctxt)
    1705              :   {}
    1706              : 
    1707              :   /* opt_pass methods: */
    1708            0 :   opt_pass * clone () final override { return new pass_linterchange (m_ctxt); }
    1709       241458 :   bool gate (function *) final override { return flag_loop_interchange; }
    1710              :   unsigned int execute (function *) final override;
    1711              : 
    1712              : }; // class pass_linterchange
    1713              : 
    1714              : 
    1715              : /* Return true if LOOP has proper form for interchange.  We check three
    1716              :    conditions in the function:
    1717              :      1) In general, a loop can be interchanged only if it doesn't have
    1718              :         basic blocks other than header, exit and latch besides possible
    1719              :         inner loop nest.  This basically restricts loop interchange to
    1720              :         below form loop nests:
    1721              : 
    1722              :           header<---+
    1723              :             |       |
    1724              :             v       |
    1725              :         INNER_LOOP  |
    1726              :             |       |
    1727              :             v       |
    1728              :           exit--->latch
    1729              : 
    1730              :      2) Data reference in basic block that executes in different times
    1731              :         than loop head/exit is not allowed.
    1732              :      3) Record the innermost outer loop that doesn't form rectangle loop
    1733              :         nest with LOOP.  */
    1734              : 
    1735              : static bool
    1736        58237 : proper_loop_form_for_interchange (class loop *loop, class loop **min_outer)
    1737              : {
    1738        58237 :   edge e0, e1, exit;
    1739              : 
    1740              :   /* Don't interchange if loop has unsupported information for the moment.  */
    1741        58237 :   if (loop->safelen > 0
    1742        57736 :       || loop->constraints != 0
    1743        57736 :       || loop->can_be_parallel
    1744        57736 :       || loop->dont_vectorize
    1745        57484 :       || loop->force_vectorize
    1746        57484 :       || loop->in_oacc_kernels_region
    1747        57418 :       || loop->orig_loop_num != 0
    1748        57332 :       || loop->simduid != NULL_TREE)
    1749              :     return false;
    1750              : 
    1751              :   /* Don't interchange if outer loop has basic block other than header, exit
    1752              :      and latch.  */
    1753        57330 :   if (loop->inner != NULL
    1754         2779 :       && loop->num_nodes != loop->inner->num_nodes + 3)
    1755              :     return false;
    1756              : 
    1757        56913 :   if ((exit = single_dom_exit (loop)) == NULL)
    1758              :     return false;
    1759              : 
    1760              :   /* Check control flow on loop header/exit blocks.  */
    1761        37908 :   if (loop->header == exit->src
    1762        37908 :       && (EDGE_COUNT (loop->header->preds) != 2
    1763        28005 :           || EDGE_COUNT (loop->header->succs) != 2))
    1764              :     return false;
    1765        37887 :   else if (loop->header != exit->src
    1766        37887 :            && (EDGE_COUNT (loop->header->preds) != 2
    1767        43493 :                || !single_succ_p (loop->header)
    1768         2356 :                || unsupported_edge (single_succ_edge (loop->header))
    1769         2356 :                || EDGE_COUNT (exit->src->succs) != 2
    1770        43491 :                || !single_pred_p (exit->src)
    1771         2354 :                || unsupported_edge (single_pred_edge (exit->src))))
    1772              :     return false;
    1773              : 
    1774        30338 :   e0 = EDGE_PRED (loop->header, 0);
    1775        30338 :   e1 = EDGE_PRED (loop->header, 1);
    1776        30329 :   if (unsupported_edge (e0) || unsupported_edge (e1)
    1777        30318 :       || (e0->src != loop->latch && e1->src != loop->latch)
    1778        60656 :       || (e0->src->loop_father == loop && e1->src->loop_father == loop))
    1779              :     return false;
    1780              : 
    1781        30318 :   e0 = EDGE_SUCC (exit->src, 0);
    1782        30318 :   e1 = EDGE_SUCC (exit->src, 1);
    1783        30318 :   if (unsupported_edge (e0) || unsupported_edge (e1)
    1784        30312 :       || (e0->dest != loop->latch && e1->dest != loop->latch)
    1785        60496 :       || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
    1786              :     return false;
    1787              : 
    1788              :   /* Don't interchange if any reference is in basic block that doesn't
    1789              :      dominate exit block.  */
    1790        30178 :   basic_block *bbs = get_loop_body (loop);
    1791        97460 :   for (unsigned i = 0; i < loop->num_nodes; i++)
    1792              :     {
    1793        69172 :       basic_block bb = bbs[i];
    1794              : 
    1795       108166 :       if (bb->loop_father != loop
    1796        62708 :           || bb == loop->header || bb == exit->src
    1797        99350 :           || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
    1798        38994 :         continue;
    1799              : 
    1800        30178 :       for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
    1801        30626 :            !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
    1802         4640 :         if (gimple_vuse (gsi_stmt (gsi)))
    1803              :           {
    1804         1890 :             free (bbs);
    1805         1890 :             return false;
    1806              :           }
    1807              :     }
    1808        28288 :   free (bbs);
    1809              : 
    1810        28288 :   tree niters = number_of_latch_executions (loop);
    1811        28288 :   niters = analyze_scalar_evolution (loop_outer (loop), niters);
    1812        28288 :   if (!niters || chrec_contains_undetermined (niters))
    1813        11188 :     return false;
    1814              : 
    1815              :   /* Record the innermost outer loop that doesn't form rectangle loop nest.  */
    1816        17100 :   for (loop_p loop2 = loop_outer (loop);
    1817        21302 :        loop2 && flow_loop_nested_p (*min_outer, loop2);
    1818         4202 :        loop2 = loop_outer (loop2))
    1819              :     {
    1820         4433 :       niters = instantiate_scev (loop_preheader_edge (loop2),
    1821              :                                  loop_outer (loop), niters);
    1822         4433 :       if (!evolution_function_is_invariant_p (niters, loop2->num))
    1823              :         {
    1824          231 :           *min_outer = loop2;
    1825          231 :           break;
    1826              :         }
    1827              :     }
    1828              :   return true;
    1829              : }
    1830              : 
    1831              : /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
    1832              :    should be interchanged by looking into all DATAREFS.  */
    1833              : 
    1834              : static bool
    1835          797 : should_interchange_loop_nest (class loop *loop_nest, class loop *innermost,
    1836              :                               vec<data_reference_p> datarefs)
    1837              : {
    1838         1594 :   unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
    1839          797 :   gcc_assert (idx > 0);
    1840              : 
    1841              :   /* Check if any two adjacent loops should be interchanged.  */
    1842              :   for (class loop *loop = innermost;
    1843         1728 :        loop != loop_nest; loop = loop_outer (loop), idx--)
    1844         1031 :     if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0,
    1845              :                                   loop == innermost, false))
    1846              :       return true;
    1847              : 
    1848              :   return false;
    1849              : }
    1850              : 
    1851              : /* Given loop nest LOOP_NEST and data references DATAREFS, compute data
    1852              :    dependences for loop interchange and store it in DDRS.  Note we compute
    1853              :    dependences directly rather than call generic interface so that we can
    1854              :    return on unknown dependence instantly.  */
    1855              : 
    1856              : static bool
    1857          107 : tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
    1858              :                                     vec<data_reference_p> datarefs,
    1859              :                                     vec<ddr_p> *ddrs)
    1860              : {
    1861          107 :   struct data_reference *a, *b;
    1862          107 :   class loop *innermost = loop_nest.last ();
    1863              : 
    1864          107 :   for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
    1865              :     {
    1866          438 :       bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
    1867         4765 :       for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
    1868         4247 :         if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
    1869              :           {
    1870         3262 :             bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
    1871              :             /* Don't support multiple write references in outer loop.  */
    1872         3262 :             if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
    1873           27 :               return false;
    1874              : 
    1875         3259 :             ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
    1876         3259 :             ddrs->safe_push (ddr);
    1877         3259 :             compute_affine_dependence (ddr, loop_nest[0]);
    1878              : 
    1879              :             /* Give up if ddr is unknown dependence or classic direct vector
    1880              :                is not available.  */
    1881         3259 :             if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
    1882         3259 :                 || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
    1883          112 :                     && DDR_NUM_DIR_VECTS (ddr) == 0))
    1884              :               return false;
    1885              : 
    1886              :             /* If either data references is in outer loop of nest, we require
    1887              :                no dependence here because the data reference need to be moved
    1888              :                into inner loop during interchange.  */
    1889         3246 :             if (a_outer_p && b_outer_p
    1890         3237 :                 && operand_equal_p (DR_REF (a), DR_REF (b), 0))
    1891            9 :               continue;
    1892         3228 :             if (DDR_ARE_DEPENDENT (ddr) != chrec_known
    1893           76 :                 && (a_outer_p || b_outer_p))
    1894              :               return false;
    1895              :         }
    1896              :     }
    1897              : 
    1898              :   return true;
    1899              : }
    1900              : 
    1901              : /* Prune DATAREFS by removing any data reference not inside of LOOP.  */
    1902              : 
    1903              : static inline void
    1904            7 : prune_datarefs_not_in_loop (class loop *loop, vec<data_reference_p> datarefs)
    1905              : {
    1906            7 :   unsigned i, j;
    1907            7 :   struct data_reference *dr;
    1908              : 
    1909           44 :   for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
    1910              :     {
    1911           37 :       if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
    1912           35 :         datarefs[j++] = dr;
    1913              :       else
    1914              :         {
    1915            2 :           if (dr->aux)
    1916              :             {
    1917            2 :               DR_ACCESS_STRIDE (dr)->release ();
    1918            2 :               delete (vec<tree> *) dr->aux;
    1919              :             }
    1920            2 :           free_data_ref (dr);
    1921              :         }
    1922              :     }
    1923            7 :   datarefs.truncate (j);
    1924            7 : }
    1925              : 
    1926              : /* Find and store data references in DATAREFS for LOOP nest.  If there's
    1927              :    difficult data reference in a basic block, we shrink the loop nest to
    1928              :    inner loop of that basic block's father loop.  On success, return the
    1929              :    outer loop of the result loop nest.  */
    1930              : 
    1931              : static class loop *
    1932         1240 : prepare_data_references (class loop *loop, vec<data_reference_p> *datarefs)
    1933              : {
    1934         1240 :   class loop *loop_nest = loop;
    1935         1240 :   vec<data_reference_p> *bb_refs;
    1936         1240 :   basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
    1937              : 
    1938         8580 :   for (unsigned i = 0; i < loop->num_nodes; i++)
    1939         7340 :     bbs[i]->aux = NULL;
    1940              : 
    1941              :   /* Find data references for all basic blocks.  Shrink loop nest on difficult
    1942              :      data reference.  */
    1943         7316 :   for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
    1944              :     {
    1945         6076 :       bb = bbs[i];
    1946         6076 :       if (!flow_bb_inside_loop_p (loop_nest, bb))
    1947            4 :         continue;
    1948              : 
    1949         6072 :       bb_refs = new vec<data_reference_p> ();
    1950         6072 :       if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
    1951              :         {
    1952          363 :           loop_nest = bb->loop_father->inner;
    1953          363 :           if (loop_nest && !loop_nest->inner)
    1954          361 :             loop_nest = NULL;
    1955              : 
    1956          363 :           free_data_refs (*bb_refs);
    1957          363 :           delete bb_refs;
    1958              :         }
    1959         5709 :       else if (bb_refs->is_empty ())
    1960              :         {
    1961         4604 :           bb_refs->release ();
    1962         4604 :           delete bb_refs;
    1963              :         }
    1964              :       else
    1965         1105 :         bb->aux = bb_refs;
    1966              :     }
    1967              : 
    1968              :   /* Collect all data references in loop nest.  */
    1969         8580 :   for (unsigned i = 0; i < loop->num_nodes; i++)
    1970              :     {
    1971         7340 :       bb = bbs[i];
    1972         7340 :       if (!bb->aux)
    1973         6235 :         continue;
    1974              : 
    1975         1105 :       bb_refs = (vec<data_reference_p> *) bb->aux;
    1976         1105 :       if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
    1977              :         {
    1978         1043 :           datarefs->safe_splice (*bb_refs);
    1979         1043 :           bb_refs->release ();
    1980              :         }
    1981              :       else
    1982           62 :         free_data_refs (*bb_refs);
    1983              : 
    1984         1105 :       delete bb_refs;
    1985         1105 :       bb->aux = NULL;
    1986              :     }
    1987         1240 :   free (bbs);
    1988              : 
    1989         1240 :   return loop_nest;
    1990              : }
    1991              : 
    1992              : /* Given innermost LOOP, return true if perfect loop nest can be found and
    1993              :    data dependences can be computed.  If succeed, record the perfect loop
    1994              :    nest in LOOP_NEST; record all data references in DATAREFS and record all
    1995              :    data dependence relations in DDRS.
    1996              : 
    1997              :    We do support a restricted form of imperfect loop nest, i.e, loop nest
    1998              :    with load/store in outer loop initializing/finalizing simple reduction
    1999              :    of the innermost loop.  For such outer loop reference, we require that
    2000              :    it has no dependence with others sinve it will be moved to inner loop
    2001              :    in interchange.  */
    2002              : 
    2003              : static bool
    2004        55436 : prepare_perfect_loop_nest (class loop *loop, vec<loop_p> *loop_nest,
    2005              :                            vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
    2006              : {
    2007        55436 :   class loop *start_loop = NULL, *innermost = loop;
    2008        55436 :   class loop *outermost = loops_for_fn (cfun)->tree_root;
    2009              : 
    2010              :   /* Find loop nest from the innermost loop.  The outermost is the innermost
    2011              :      outer*/
    2012       113910 :   while (loop->num != 0 && loop->inner == start_loop
    2013       120787 :          && flow_loop_nested_p (outermost, loop))
    2014              :     {
    2015        58237 :       if (!proper_loop_form_for_interchange (loop, &outermost))
    2016              :         break;
    2017              : 
    2018        17100 :       start_loop = loop;
    2019              :       /* If this loop has sibling loop, the father loop won't be in perfect
    2020              :          loop nest.  */
    2021        17100 :       if (loop->next != NULL)
    2022              :         break;
    2023              : 
    2024         7007 :       loop = loop_outer (loop);
    2025              :     }
    2026        55436 :   if (!start_loop || !start_loop->inner)
    2027              :     return false;
    2028              : 
    2029              :   /* Prepare the data reference vector for the loop nest, pruning outer
    2030              :      loops we cannot handle.  */
    2031         1240 :   start_loop = prepare_data_references (start_loop, datarefs);
    2032         1240 :   if (!start_loop
    2033              :       /* Check if there is no data reference.  */
    2034        56212 :       || datarefs->is_empty ()
    2035              :       /* Check if there are too many of data references.  */
    2036         2096 :       || (int) datarefs->length () > MAX_DATAREFS)
    2037              :     return false;
    2038              : 
    2039              :   /* Compute access strides for all data references, pruning outer
    2040              :      loops we cannot analyze refs in.  */
    2041          856 :   start_loop = compute_access_strides (start_loop, innermost, *datarefs);
    2042          856 :   if (!start_loop)
    2043              :     return false;
    2044              : 
    2045              :   /* Check if any interchange is profitable in the loop nest.  */
    2046          797 :   if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
    2047              :     return false;
    2048              : 
    2049              :   /* Check if data dependences can be computed for loop nest starting from
    2050              :      start_loop.  */
    2051              :   loop = start_loop;
    2052          107 :   do {
    2053          107 :     loop_nest->truncate (0);
    2054              : 
    2055          107 :     if (loop != start_loop)
    2056            7 :       prune_datarefs_not_in_loop (start_loop, *datarefs);
    2057              : 
    2058          107 :     if (find_loop_nest (start_loop, loop_nest)
    2059          107 :         && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
    2060              :       {
    2061           80 :         if (dump_file && (dump_flags & TDF_DETAILS))
    2062           18 :           fprintf (dump_file,
    2063              :                    "\nConsider loop interchange for loop_nest<%d - %d>\n",
    2064              :                    start_loop->num, innermost->num);
    2065              : 
    2066           80 :         if (loop != start_loop)
    2067            3 :           prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
    2068              : 
    2069           80 :         if (dump_file && (dump_flags & TDF_DETAILS))
    2070           18 :           dump_access_strides (*datarefs);
    2071              : 
    2072           80 :         return true;
    2073              :       }
    2074              : 
    2075           27 :     free_dependence_relations (*ddrs);
    2076           27 :     *ddrs = vNULL;
    2077              :     /* Try to compute data dependences with the outermost loop stripped.  */
    2078           27 :     loop = start_loop;
    2079           27 :     start_loop = start_loop->inner;
    2080           27 :   } while (start_loop && start_loop->inner);
    2081              : 
    2082              :   return false;
    2083              : }
    2084              : 
    2085              : /* Main entry for loop interchange pass.  */
    2086              : 
    2087              : unsigned int
    2088        28067 : pass_linterchange::execute (function *fun)
    2089              : {
    2090        71853 :   if (number_of_loops (fun) <= 2)
    2091              :     return 0;
    2092              : 
    2093        15762 :   bool changed_p = false;
    2094       102722 :   for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
    2095              :     {
    2096        55436 :       vec<loop_p> loop_nest = vNULL;
    2097        55436 :       vec<data_reference_p> datarefs = vNULL;
    2098        55436 :       vec<ddr_p> ddrs = vNULL;
    2099        55436 :       if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
    2100              :         {
    2101           80 :           tree_loop_interchange loop_interchange (loop_nest);
    2102           80 :           changed_p |= loop_interchange.interchange (datarefs, ddrs);
    2103           80 :         }
    2104        55436 :       free_dependence_relations (ddrs);
    2105        55436 :       free_data_refs_with_aux (datarefs);
    2106        55436 :       loop_nest.release ();
    2107        15762 :     }
    2108              : 
    2109        15762 :   if (changed_p)
    2110              :     {
    2111           43 :       unsigned todo = TODO_update_ssa_only_virtuals;
    2112           43 :       todo |= loop_invariant_motion_in_fun (cfun, false);
    2113           43 :       scev_reset ();
    2114           43 :       return todo;
    2115              :     }
    2116              :   return 0;
    2117              : }
    2118              : 
    2119              : } // anon namespace
    2120              : 
    2121              : gimple_opt_pass *
    2122       285722 : make_pass_linterchange (gcc::context *ctxt)
    2123              : {
    2124       285722 :   return new pass_linterchange (ctxt);
    2125              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.