LCOV - code coverage report
Current view: top level - gcc - modulo-sched.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 9.8 % 1427 140
Test Date: 2026-02-28 14:20:25 Functions: 14.3 % 63 9
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Swing Modulo Scheduling implementation.
       2              :    Copyright (C) 2004-2026 Free Software Foundation, Inc.
       3              :    Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
       4              : 
       5              : This file is part of GCC.
       6              : 
       7              : GCC is free software; you can redistribute it and/or modify it under
       8              : the terms of the GNU General Public License as published by the Free
       9              : Software Foundation; either version 3, or (at your option) any later
      10              : version.
      11              : 
      12              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15              : for more details.
      16              : 
      17              : You should have received a copy of the GNU General Public License
      18              : along with GCC; see the file COPYING3.  If not see
      19              : <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : 
      22              : #include "config.h"
      23              : #include "system.h"
      24              : #include "coretypes.h"
      25              : #include "backend.h"
      26              : #include "target.h"
      27              : #include "rtl.h"
      28              : #include "tree.h"
      29              : #include "cfghooks.h"
      30              : #include "df.h"
      31              : #include "memmodel.h"
      32              : #include "optabs.h"
      33              : #include "regs.h"
      34              : #include "emit-rtl.h"
      35              : #include "gcov-io.h"
      36              : #include "profile.h"
      37              : #include "insn-attr.h"
      38              : #include "cfgrtl.h"
      39              : #include "sched-int.h"
      40              : #include "cfgloop.h"
      41              : #include "expr.h"
      42              : #include "ddg.h"
      43              : #include "tree-pass.h"
      44              : #include "dbgcnt.h"
      45              : #include "loop-unroll.h"
      46              : #include "hard-reg-set.h"
      47              : 
      48              : #ifdef INSN_SCHEDULING
      49              : 
      50              : /* This file contains the implementation of the Swing Modulo Scheduler,
      51              :    described in the following references:
      52              :    [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
      53              :        Lifetime--sensitive modulo scheduling in a production environment.
      54              :        IEEE Trans. on Comps., 50(3), March 2001
      55              :    [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
      56              :        Swing Modulo Scheduling: A Lifetime Sensitive Approach.
      57              :        PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
      58              : 
      59              :    The basic structure is:
      60              :    1. Build a data-dependence graph (DDG) for each loop.
      61              :    2. Use the DDG to order the insns of a loop (not in topological order
      62              :       necessarily, but rather) trying to place each insn after all its
      63              :       predecessors _or_ after all its successors.
      64              :    3. Compute MII: a lower bound on the number of cycles to schedule the loop.
      65              :    4. Use the ordering to perform list-scheduling of the loop:
      66              :       1. Set II = MII.  We will try to schedule the loop within II cycles.
      67              :       2. Try to schedule the insns one by one according to the ordering.
      68              :          For each insn compute an interval of cycles by considering already-
      69              :          scheduled preds and succs (and associated latencies); try to place
      70              :          the insn in the cycles of this window checking for potential
      71              :          resource conflicts (using the DFA interface).
      72              :          Note: this is different from the cycle-scheduling of schedule_insns;
      73              :          here the insns are not scheduled monotonically top-down (nor bottom-
      74              :          up).
      75              :       3. If failed in scheduling all insns - bump II++ and try again, unless
      76              :          II reaches an upper bound MaxII, in which case report failure.
      77              :    5. If we succeeded in scheduling the loop within II cycles, we now
      78              :       generate prolog and epilog, decrease the counter of the loop, and
      79              :       perform modulo variable expansion for live ranges that span more than
      80              :       II cycles (i.e. use register copies to prevent a def from overwriting
      81              :       itself before reaching the use).
      82              : 
      83              :     SMS works with countable loops (1) whose control part can be easily
      84              :     decoupled from the rest of the loop and (2) whose loop count can
      85              :     be easily adjusted.  This is because we peel a constant number of
      86              :     iterations into a prologue and epilogue for which we want to avoid
      87              :     emitting the control part, and a kernel which is to iterate that
      88              :     constant number of iterations less than the original loop.  So the
      89              :     control part should be a set of insns clearly identified and having
      90              :     its own iv, not otherwise used in the loop (at-least for now), which
      91              :     initializes a register before the loop to the number of iterations.
      92              :     Currently SMS relies on the do-loop pattern to recognize such loops,
      93              :     where (1) the control part comprises of all insns defining and/or
      94              :     using a certain 'count' register and (2) the loop count can be
      95              :     adjusted by modifying this register prior to the loop.
      96              :     TODO: Rely on cfgloop analysis instead.  */
      97              : 
      98              : /* This page defines partial-schedule structures and functions for
      99              :    modulo scheduling.  */
     100              : 
     101              : typedef struct partial_schedule *partial_schedule_ptr;
     102              : typedef struct ps_insn *ps_insn_ptr;
     103              : 
     104              : /* The minimum (absolute) cycle that a node of ps was scheduled in.  */
     105              : #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
     106              : 
     107              : /* The maximum (absolute) cycle that a node of ps was scheduled in.  */
     108              : #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
     109              : 
     110              : /* Perform signed modulo, always returning a non-negative value.  */
     111              : #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
     112              : 
     113              : /* The number of different iterations the nodes in ps span, assuming
     114              :    the stage boundaries are placed efficiently.  */
     115              : #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
     116              :                          + 1 + ii - 1) / ii)
     117              : /* The stage count of ps.  */
     118              : #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
     119              : 
     120              : /* A single instruction in the partial schedule.  */
     121              : struct ps_insn
     122              : {
     123              :   /* Identifies the instruction to be scheduled.  Values smaller than
     124              :      the ddg's num_nodes refer directly to ddg nodes.  A value of
     125              :      X - num_nodes refers to register move X.  */
     126              :   int id;
     127              : 
     128              :   /* The (absolute) cycle in which the PS instruction is scheduled.
     129              :      Same as SCHED_TIME (node).  */
     130              :   int cycle;
     131              : 
     132              :   /* The next/prev PS_INSN in the same row.  */
     133              :   ps_insn_ptr next_in_row,
     134              :               prev_in_row;
     135              : 
     136              : };
     137              : 
     138              : /* Information about a register move that has been added to a partial
     139              :    schedule.  */
     140              : struct ps_reg_move_info
     141              : {
     142              :   /* The source of the move is defined by the ps_insn with id DEF.
     143              :      The destination is used by the ps_insns with the ids in USES.  */
     144              :   int def;
     145              :   sbitmap uses;
     146              : 
     147              :   /* The original form of USES' instructions used OLD_REG, but they
     148              :      should now use NEW_REG.  */
     149              :   rtx old_reg;
     150              :   rtx new_reg;
     151              : 
     152              :   /* The number of consecutive stages that the move occupies.  */
     153              :   int num_consecutive_stages;
     154              : 
     155              :   /* An instruction that sets NEW_REG to the correct value.  The first
     156              :      move associated with DEF will have an rhs of OLD_REG; later moves
     157              :      use the result of the previous move.  */
     158              :   rtx_insn *insn;
     159              : };
     160              : 
     161              : /* Holds the partial schedule as an array of II rows.  Each entry of the
     162              :    array points to a linked list of PS_INSNs, which represents the
     163              :    instructions that are scheduled for that row.  */
     164              : struct partial_schedule
     165              : {
     166              :   int ii;       /* Number of rows in the partial schedule.  */
     167              :   int history;  /* Threshold for conflict checking using DFA.  */
     168              : 
     169              :   /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
     170              :   ps_insn_ptr *rows;
     171              : 
     172              :   /* All the moves added for this partial schedule.  Index X has
     173              :      a ps_insn id of X + g->num_nodes.  */
     174              :   vec<ps_reg_move_info> reg_moves;
     175              : 
     176              :   /*  rows_length[i] holds the number of instructions in the row.
     177              :       It is used only (as an optimization) to back off quickly from
     178              :       trying to schedule a node in a full row; that is, to avoid running
     179              :       through futile DFA state transitions.  */
     180              :   int *rows_length;
     181              : 
     182              :   /* The earliest absolute cycle of an insn in the partial schedule.  */
     183              :   int min_cycle;
     184              : 
     185              :   /* The latest absolute cycle of an insn in the partial schedule.  */
     186              :   int max_cycle;
     187              : 
     188              :   ddg_ptr g;    /* The DDG of the insns in the partial schedule.  */
     189              : 
     190              :   int stage_count;  /* The stage count of the partial schedule.  */
     191              : };
     192              : 
     193              : 
     194              : static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
     195              : static void free_partial_schedule (partial_schedule_ptr);
     196              : static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
     197              : void print_partial_schedule (partial_schedule_ptr, FILE *);
     198              : static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
     199              : static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
     200              :                                                 int, int, sbitmap, sbitmap);
     201              : static void rotate_partial_schedule (partial_schedule_ptr, int);
     202              : void set_row_column_for_ps (partial_schedule_ptr);
     203              : static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
     204              : static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
     205              : 
     206              : 
     207              : /* This page defines constants and structures for the modulo scheduling
     208              :    driver.  */
     209              : 
     210              : static int sms_order_nodes (ddg_ptr, int, int *, int *);
     211              : static void set_node_sched_params (ddg_ptr);
     212              : static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
     213              : static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
     214              : static int calculate_stage_count (partial_schedule_ptr, int);
     215              : static void calculate_must_precede_follow (ddg_node_ptr, int, int,
     216              :                                            int, int, sbitmap, sbitmap, sbitmap);
     217              : static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
     218              :                              sbitmap, int, int *, int *, int *);
     219              : static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
     220              :                                           sbitmap, int *, sbitmap, sbitmap);
     221              : static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
     222              : 
     223              : #define NODE_ASAP(node) ((node)->aux.count)
     224              : 
     225              : #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
     226              : #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
     227              : #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
     228              : #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
     229              : #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
     230              : 
     231              : /* The scheduling parameters held for each node.  */
     232              : typedef struct node_sched_params
     233              : {
     234              :   int time;     /* The absolute scheduling cycle.  */
     235              : 
     236              :   int row;    /* Holds time % ii.  */
     237              :   int stage;  /* Holds time / ii.  */
     238              : 
     239              :   /* The column of a node inside the ps.  If nodes u, v are on the same row,
     240              :      u will precede v if column (u) < column (v).  */
     241              :   int column;
     242              : } *node_sched_params_ptr;
     243              : 
     244              : /* The following three functions are copied from the current scheduler
     245              :    code in order to use sched_analyze() for computing the dependencies.
     246              :    They are used when initializing the sched_info structure.  */
     247              : static const char *
     248            0 : sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
     249              : {
     250            0 :   static char tmp[80];
     251              : 
     252            0 :   sprintf (tmp, "i%4d", INSN_UID (insn));
     253            0 :   return tmp;
     254              : }
     255              : 
     256              : static void
     257            0 : compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
     258              :                                regset used ATTRIBUTE_UNUSED)
     259              : {
     260            0 : }
     261              : 
     262              : static struct common_sched_info_def sms_common_sched_info;
     263              : 
     264              : static struct sched_deps_info_def sms_sched_deps_info =
     265              :   {
     266              :     compute_jump_reg_dependencies,
     267              :     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     268              :     NULL,
     269              :     0, 0, 0
     270              :   };
     271              : 
     272              : static struct haifa_sched_info sms_sched_info =
     273              : {
     274              :   NULL,
     275              :   NULL,
     276              :   NULL,
     277              :   NULL,
     278              :   NULL,
     279              :   sms_print_insn,
     280              :   NULL,
     281              :   NULL, /* insn_finishes_block_p */
     282              :   NULL, NULL,
     283              :   NULL, NULL,
     284              :   0, 0,
     285              : 
     286              :   NULL, NULL, NULL, NULL,
     287              :   NULL, NULL,
     288              :   0
     289              : };
     290              : 
     291              : /* Partial schedule instruction ID in PS is a register move.  Return
     292              :    information about it.  */
     293              : static struct ps_reg_move_info *
     294            0 : ps_reg_move (partial_schedule_ptr ps, int id)
     295              : {
     296            0 :   gcc_checking_assert (id >= ps->g->num_nodes);
     297            0 :   return &ps->reg_moves[id - ps->g->num_nodes];
     298              : }
     299              : 
     300              : /* Return the rtl instruction that is being scheduled by partial schedule
     301              :    instruction ID, which belongs to schedule PS.  */
     302              : static rtx_insn *
     303            0 : ps_rtl_insn (partial_schedule_ptr ps, int id)
     304              : {
     305            0 :   if (id < ps->g->num_nodes)
     306            0 :     return ps->g->nodes[id].insn;
     307              :   else
     308            0 :     return ps_reg_move (ps, id)->insn;
     309              : }
     310              : 
     311              : /* Partial schedule instruction ID, which belongs to PS, occurred in
     312              :    the original (unscheduled) loop.  Return the first instruction
     313              :    in the loop that was associated with ps_rtl_insn (PS, ID).
     314              :    If the instruction had some notes before it, this is the first
     315              :    of those notes.  */
     316              : static rtx_insn *
     317            0 : ps_first_note (partial_schedule_ptr ps, int id)
     318              : {
     319            0 :   gcc_assert (id < ps->g->num_nodes);
     320            0 :   return ps->g->nodes[id].first_note;
     321              : }
     322              : 
     323              : /* Return the number of consecutive stages that are occupied by
     324              :    partial schedule instruction ID in PS.  */
     325              : static int
     326            0 : ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
     327              : {
     328            0 :   if (id < ps->g->num_nodes)
     329              :     return 1;
     330              :   else
     331            0 :     return ps_reg_move (ps, id)->num_consecutive_stages;
     332              : }
     333              : 
     334              : /* Given HEAD and TAIL which are the first and last insns in a loop;
     335              :    return the register which controls the loop.  Return zero if it has
     336              :    more than one occurrence in the loop besides the control part or the
     337              :    do-loop pattern is not of the form we expect.  */
     338              : static rtx
     339          156 : doloop_register_get (rtx_insn *head, rtx_insn *tail)
     340              : {
     341          156 :   rtx reg, condition;
     342          156 :   rtx_insn *insn, *first_insn_not_to_check;
     343              : 
     344          156 :   if (!JUMP_P (tail))
     345              :     return NULL_RTX;
     346              : 
     347          156 :   if (!targetm.code_for_doloop_end)
     348              :     return NULL_RTX;
     349              : 
     350              :   /* TODO: Free SMS's dependence on doloop_condition_get.  */
     351            0 :   condition = doloop_condition_get (tail);
     352            0 :   if (! condition)
     353              :     return NULL_RTX;
     354              : 
     355            0 :   if (REG_P (XEXP (condition, 0)))
     356              :     reg = XEXP (condition, 0);
     357            0 :   else if (GET_CODE (XEXP (condition, 0)) == PLUS
     358            0 :            && REG_P (XEXP (XEXP (condition, 0), 0)))
     359              :     {
     360            0 :       if (CONST_INT_P (XEXP (condition, 1))
     361            0 :           && INTVAL (XEXP (condition, 1)) == -1)
     362              :         reg = XEXP (XEXP (condition, 0), 0);
     363              :       else
     364              :         return NULL_RTX;
     365              :     }
     366              :   else
     367            0 :     gcc_unreachable ();
     368              : 
     369              :   /* Check that the COUNT_REG has no other occurrences in the loop
     370              :      until the decrement.  We assume the control part consists of
     371              :      either a single (parallel) branch-on-count or a (non-parallel)
     372              :      branch immediately preceded by a single (decrement) insn.  */
     373            0 :   first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
     374            0 :                              : prev_nondebug_insn (tail));
     375              : 
     376            0 :   for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
     377            0 :     if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
     378              :       {
     379            0 :         if (dump_file)
     380              :         {
     381            0 :           fprintf (dump_file, "SMS count_reg found ");
     382            0 :           print_rtl_single (dump_file, reg);
     383            0 :           fprintf (dump_file, " outside control in insn:\n");
     384            0 :           print_rtl_single (dump_file, insn);
     385              :         }
     386              : 
     387            0 :         return NULL_RTX;
     388              :       }
     389              : 
     390              :   return reg;
     391              : }
     392              : 
     393              : /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
     394              :    that the number of iterations is a compile-time constant.  If so,
     395              :    return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
     396              :    this constant.  Otherwise return 0.  */
     397              : static rtx_insn *
     398            0 : const_iteration_count (rtx count_reg, basic_block pre_header,
     399              :                        int64_t *count, bool* adjust_inplace)
     400              : {
     401            0 :   rtx_insn *insn;
     402            0 :   rtx_insn *head, *tail;
     403              : 
     404            0 :   *adjust_inplace = false;
     405            0 :   bool read_after = false;
     406              : 
     407            0 :   if (! pre_header)
     408              :     return NULL;
     409              : 
     410            0 :   get_ebb_head_tail (pre_header, pre_header, &head, &tail);
     411              : 
     412            0 :   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
     413            0 :     if (single_set (insn) && rtx_equal_p (count_reg,
     414            0 :                                           SET_DEST (single_set (insn))))
     415              :       {
     416            0 :         rtx pat = single_set (insn);
     417              : 
     418            0 :         if (CONST_INT_P (SET_SRC (pat)))
     419              :           {
     420            0 :             *count = INTVAL (SET_SRC (pat));
     421            0 :             *adjust_inplace = !read_after;
     422            0 :             return insn;
     423              :           }
     424              : 
     425              :         return NULL;
     426              :       }
     427            0 :     else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
     428              :       {
     429            0 :         read_after = true;
     430            0 :         if (reg_set_p (count_reg, insn))
     431              :            break;
     432              :       }
     433              : 
     434              :   return NULL;
     435              : }
     436              : 
     437              : /* A very simple resource-based lower bound on the initiation interval.
     438              :    ??? Improve the accuracy of this bound by considering the
     439              :    utilization of various units.  */
     440              : static int
     441            0 : res_MII (ddg_ptr g)
     442              : {
     443            0 :   if (targetm.sched.sms_res_mii)
     444            0 :     return targetm.sched.sms_res_mii (g);
     445              : 
     446            0 :   return g->num_nodes / issue_rate;
     447              : }
     448              : 
     449              : 
     450              : /* A vector that contains the sched data for each ps_insn.  */
     451              : static vec<node_sched_params> node_sched_param_vec;
     452              : 
     453              : /* Allocate sched_params for each node and initialize it.  */
     454              : static void
     455            0 : set_node_sched_params (ddg_ptr g)
     456              : {
     457            0 :   node_sched_param_vec.truncate (0);
     458            0 :   node_sched_param_vec.safe_grow_cleared (g->num_nodes, true);
     459            0 : }
     460              : 
     461              : /* Make sure that node_sched_param_vec has an entry for every move in PS.  */
     462              : static void
     463            0 : extend_node_sched_params (partial_schedule_ptr ps)
     464              : {
     465            0 :   node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
     466            0 :                                           + ps->reg_moves.length (), true);
     467            0 : }
     468              : 
     469              : /* Update the sched_params (time, row and stage) for node U using the II,
     470              :    the CYCLE of U and MIN_CYCLE.
     471              :    We're not simply taking the following
     472              :    SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
     473              :    because the stages may not be aligned on cycle 0.  */
     474              : static void
     475            0 : update_node_sched_params (int u, int ii, int cycle, int min_cycle)
     476              : {
     477            0 :   int sc_until_cycle_zero;
     478            0 :   int stage;
     479              : 
     480            0 :   SCHED_TIME (u) = cycle;
     481            0 :   SCHED_ROW (u) = SMODULO (cycle, ii);
     482              : 
     483              :   /* The calculation of stage count is done adding the number
     484              :      of stages before cycle zero and after cycle zero.  */
     485            0 :   sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
     486              : 
     487            0 :   if (SCHED_TIME (u) < 0)
     488              :     {
     489            0 :       stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
     490            0 :       SCHED_STAGE (u) = sc_until_cycle_zero - stage;
     491              :     }
     492              :   else
     493              :     {
     494            0 :       stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
     495            0 :       SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
     496              :     }
     497            0 : }
     498              : 
     499              : static void
     500            0 : print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
     501              : {
     502            0 :   int i;
     503              : 
     504            0 :   if (! file)
     505              :     return;
     506            0 :   for (i = 0; i < num_nodes; i++)
     507              :     {
     508            0 :       node_sched_params_ptr nsp = SCHED_PARAMS (i);
     509              : 
     510            0 :       fprintf (file, "Node = %d; INSN = %d\n", i,
     511            0 :                INSN_UID (ps_rtl_insn (ps, i)));
     512            0 :       fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
     513            0 :       fprintf (file, " time = %d:\n", nsp->time);
     514            0 :       fprintf (file, " stage = %d:\n", nsp->stage);
     515              :     }
     516              : }
     517              : 
     518              : /* Set SCHED_COLUMN for each instruction in row ROW of PS.  */
     519              : static void
     520            0 : set_columns_for_row (partial_schedule_ptr ps, int row)
     521              : {
     522            0 :   ps_insn_ptr cur_insn;
     523            0 :   int column;
     524              : 
     525            0 :   column = 0;
     526            0 :   for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
     527            0 :     SCHED_COLUMN (cur_insn->id) = column++;
     528            0 : }
     529              : 
     530              : /* Set SCHED_COLUMN for each instruction in PS.  */
     531              : static void
     532            0 : set_columns_for_ps (partial_schedule_ptr ps)
     533              : {
     534            0 :   int row;
     535              : 
     536            0 :   for (row = 0; row < ps->ii; row++)
     537            0 :     set_columns_for_row (ps, row);
     538            0 : }
     539              : 
     540              : /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
     541              :    Its single predecessor has already been scheduled, as has its
     542              :    ddg node successors.  (The move may have also another move as its
     543              :    successor, in which case that successor will be scheduled later.)
     544              : 
     545              :    The move is part of a chain that satisfies register dependencies
     546              :    between a producing ddg node and various consuming ddg nodes.
     547              :    If some of these dependencies have a distance of 1 (meaning that
     548              :    the use is upward-exposed) then DISTANCE1_USES is nonnull and
     549              :    contains the set of uses with distance-1 dependencies.
     550              :    DISTANCE1_USES is null otherwise.
     551              : 
     552              :    MUST_FOLLOW is a scratch bitmap that is big enough to hold
     553              :    all current ps_insn ids.
     554              : 
     555              :    Return true on success.  */
     556              : static bool
     557            0 : schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
     558              :                    sbitmap distance1_uses, sbitmap must_follow)
     559              : {
     560            0 :   unsigned int u;
     561            0 :   int this_time, this_distance, this_start, this_end, this_latency;
     562            0 :   int start, end, c, ii;
     563            0 :   sbitmap_iterator sbi;
     564            0 :   ps_reg_move_info *move;
     565            0 :   rtx_insn *this_insn;
     566            0 :   ps_insn_ptr psi;
     567              : 
     568            0 :   move = ps_reg_move (ps, i_reg_move);
     569            0 :   ii = ps->ii;
     570            0 :   if (dump_file)
     571              :     {
     572            0 :       fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
     573            0 :                ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
     574              :                PS_MIN_CYCLE (ps));
     575            0 :       print_rtl_single (dump_file, move->insn);
     576            0 :       fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
     577            0 :       fprintf (dump_file, "=========== =========== =====\n");
     578              :     }
     579              : 
     580            0 :   start = INT_MIN;
     581            0 :   end = INT_MAX;
     582              : 
     583              :   /* For dependencies of distance 1 between a producer ddg node A
     584              :      and consumer ddg node B, we have a chain of dependencies:
     585              : 
     586              :         A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
     587              : 
     588              :      where Mi is the ith move.  For dependencies of distance 0 between
     589              :      a producer ddg node A and consumer ddg node C, we have a chain of
     590              :      dependencies:
     591              : 
     592              :         A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
     593              : 
     594              :      where Mi' occupies the same position as Mi but occurs a stage later.
     595              :      We can only schedule each move once, so if we have both types of
     596              :      chain, we model the second as:
     597              : 
     598              :         A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
     599              : 
     600              :      First handle the dependencies between the previously-scheduled
     601              :      predecessor and the move.  */
     602            0 :   this_insn = ps_rtl_insn (ps, move->def);
     603            0 :   this_latency = insn_latency (this_insn, move->insn);
     604            0 :   this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
     605            0 :   this_time = SCHED_TIME (move->def) - this_distance * ii;
     606            0 :   this_start = this_time + this_latency;
     607            0 :   this_end = this_time + ii;
     608            0 :   if (dump_file)
     609            0 :     fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
     610            0 :              this_start, this_end, SCHED_TIME (move->def),
     611            0 :              INSN_UID (this_insn), this_latency, this_distance,
     612            0 :              INSN_UID (move->insn));
     613              : 
     614            0 :   if (start < this_start)
     615              :     start = this_start;
     616            0 :   if (end > this_end)
     617              :     end = this_end;
     618              : 
     619              :   /* Handle the dependencies between the move and previously-scheduled
     620              :      successors.  */
     621            0 :   EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
     622              :     {
     623            0 :       this_insn = ps_rtl_insn (ps, u);
     624            0 :       this_latency = insn_latency (move->insn, this_insn);
     625            0 :       if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
     626              :         this_distance = -1;
     627              :       else
     628              :         this_distance = 0;
     629            0 :       this_time = SCHED_TIME (u) + this_distance * ii;
     630            0 :       this_start = this_time - ii;
     631            0 :       this_end = this_time - this_latency;
     632            0 :       if (dump_file)
     633            0 :         fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
     634            0 :                  this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
     635            0 :                  this_latency, this_distance, INSN_UID (this_insn));
     636              : 
     637            0 :       if (start < this_start)
     638              :         start = this_start;
     639            0 :       if (end > this_end)
     640              :         end = this_end;
     641              :     }
     642              : 
     643            0 :   if (dump_file)
     644              :     {
     645            0 :       fprintf (dump_file, "----------- ----------- -----\n");
     646            0 :       fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
     647              :     }
     648              : 
     649            0 :   bitmap_clear (must_follow);
     650            0 :   bitmap_set_bit (must_follow, move->def);
     651              : 
     652            0 :   start = MAX (start, end - (ii - 1));
     653            0 :   for (c = end; c >= start; c--)
     654              :     {
     655            0 :       psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
     656              :                                          move->uses, must_follow);
     657            0 :       if (psi)
     658              :         {
     659            0 :           update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
     660            0 :           if (dump_file)
     661            0 :             fprintf (dump_file, "\nScheduled register move INSN %d at"
     662            0 :                      " time %d, row %d\n\n", INSN_UID (move->insn), c,
     663            0 :                      SCHED_ROW (i_reg_move));
     664            0 :           return true;
     665              :         }
     666              :     }
     667              : 
     668            0 :   if (dump_file)
     669            0 :     fprintf (dump_file, "\nNo available slot\n\n");
     670              : 
     671              :   return false;
     672              : }
     673              : 
     674              : /*
     675              :    Breaking intra-loop register anti-dependences:
     676              :    Each intra-loop register anti-dependence implies a cross-iteration true
     677              :    dependence of distance 1. Therefore, we can remove such false dependencies
     678              :    and figure out if the partial schedule broke them by checking if (for a
     679              :    true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
     680              :    if so generate a register move.   The number of such moves is equal to:
     681              :               SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
     682              :    nreg_moves = ----------------------------------- + 1 - {   dependence.
     683              :                             ii                          { 1 if not.
     684              : */
     685              : static bool
     686            0 : schedule_reg_moves (partial_schedule_ptr ps)
     687              : {
     688            0 :   ddg_ptr g = ps->g;
     689            0 :   int ii = ps->ii;
     690            0 :   int i;
     691              : 
     692            0 :   for (i = 0; i < g->num_nodes; i++)
     693              :     {
     694            0 :       ddg_node_ptr u = &g->nodes[i];
     695            0 :       ddg_edge_ptr e;
     696            0 :       int nreg_moves = 0, i_reg_move;
     697            0 :       rtx prev_reg, old_reg;
     698            0 :       int first_move;
     699            0 :       int distances[2];
     700            0 :       sbitmap distance1_uses;
     701            0 :       rtx set = single_set (u->insn);
     702              : 
     703              :       /* Skip instructions that do not set a register.  */
     704            0 :       if (set && !REG_P (SET_DEST (set)))
     705            0 :         continue;
     706              : 
     707              :       /* Compute the number of reg_moves needed for u, by looking at life
     708              :          ranges started at u (excluding self-loops).  */
     709            0 :       distances[0] = distances[1] = false;
     710            0 :       for (e = u->out; e; e = e->next_out)
     711            0 :         if (e->type == TRUE_DEP && e->dest != e->src)
     712              :           {
     713            0 :             int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
     714            0 :                                 - SCHED_TIME (e->src->cuid)) / ii;
     715              : 
     716            0 :             if (e->distance == 1)
     717            0 :               nreg_moves4e = (SCHED_TIME (e->dest->cuid)
     718            0 :                               - SCHED_TIME (e->src->cuid) + ii) / ii;
     719              : 
     720              :             /* If dest precedes src in the schedule of the kernel, then dest
     721              :                will read before src writes and we can save one reg_copy.  */
     722            0 :             if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
     723            0 :                 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
     724            0 :               nreg_moves4e--;
     725              : 
     726            0 :             if (nreg_moves4e >= 1)
     727              :               {
     728              :                 /* !single_set instructions are not supported yet and
     729              :                    thus we do not except to encounter them in the loop
     730              :                    except from the doloop part.  For the latter case
     731              :                    we assume no regmoves are generated as the doloop
     732              :                    instructions are tied to the branch with an edge.  */
     733            0 :                 gcc_assert (set);
     734              :                 /* If the instruction contains auto-inc register then
     735              :                    validate that the regmov is being generated for the
     736              :                    target regsiter rather then the inc'ed register.     */
     737            0 :                 gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
     738              :               }
     739              : 
     740            0 :             if (nreg_moves4e)
     741              :               {
     742            0 :                 gcc_assert (e->distance < 2);
     743            0 :                 distances[e->distance] = true;
     744              :               }
     745            0 :             nreg_moves = MAX (nreg_moves, nreg_moves4e);
     746              :           }
     747              : 
     748            0 :       if (nreg_moves == 0)
     749            0 :         continue;
     750              : 
     751              :       /* Create NREG_MOVES register moves.  */
     752            0 :       first_move = ps->reg_moves.length ();
     753            0 :       ps->reg_moves.safe_grow_cleared (first_move + nreg_moves, true);
     754            0 :       extend_node_sched_params (ps);
     755              : 
     756              :       /* Record the moves associated with this node.  */
     757            0 :       first_move += ps->g->num_nodes;
     758              : 
     759              :       /* Generate each move.  */
     760            0 :       old_reg = prev_reg = SET_DEST (set);
     761            0 :       if (HARD_REGISTER_P (old_reg))
     762            0 :         return false;
     763              : 
     764            0 :       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
     765              :         {
     766            0 :           ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
     767              : 
     768            0 :           move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
     769            0 :           move->uses = sbitmap_alloc (first_move + nreg_moves);
     770            0 :           move->old_reg = old_reg;
     771            0 :           move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
     772            0 :           move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
     773            0 :           move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
     774            0 :           bitmap_clear (move->uses);
     775              : 
     776            0 :           prev_reg = move->new_reg;
     777              :         }
     778              : 
     779            0 :       distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
     780              : 
     781            0 :       if (distance1_uses)
     782            0 :         bitmap_clear (distance1_uses);
     783              : 
     784              :       /* Every use of the register defined by node may require a different
     785              :          copy of this register, depending on the time the use is scheduled.
     786              :          Record which uses require which move results.  */
     787            0 :       for (e = u->out; e; e = e->next_out)
     788            0 :         if (e->type == TRUE_DEP && e->dest != e->src)
     789              :           {
     790            0 :             int dest_copy = (SCHED_TIME (e->dest->cuid)
     791            0 :                              - SCHED_TIME (e->src->cuid)) / ii;
     792              : 
     793            0 :             if (e->distance == 1)
     794            0 :               dest_copy = (SCHED_TIME (e->dest->cuid)
     795            0 :                            - SCHED_TIME (e->src->cuid) + ii) / ii;
     796              : 
     797            0 :             if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
     798            0 :                 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
     799            0 :               dest_copy--;
     800              : 
     801            0 :             if (dest_copy)
     802              :               {
     803            0 :                 ps_reg_move_info *move;
     804              : 
     805            0 :                 move = ps_reg_move (ps, first_move + dest_copy - 1);
     806            0 :                 bitmap_set_bit (move->uses, e->dest->cuid);
     807            0 :                 if (e->distance == 1)
     808            0 :                   bitmap_set_bit (distance1_uses, e->dest->cuid);
     809              :               }
     810              :           }
     811              : 
     812            0 :       auto_sbitmap must_follow (first_move + nreg_moves);
     813            0 :       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
     814            0 :         if (!schedule_reg_move (ps, first_move + i_reg_move,
     815              :                                 distance1_uses, must_follow))
     816              :           break;
     817            0 :       if (distance1_uses)
     818            0 :         sbitmap_free (distance1_uses);
     819            0 :       if (i_reg_move < nreg_moves)
     820            0 :         return false;
     821            0 :     }
     822              :   return true;
     823              : }
     824              : 
     825              : /* Emit the moves associated with PS.  Apply the substitutions
     826              :    associated with them.  */
     827              : static void
     828            0 : apply_reg_moves (partial_schedule_ptr ps)
     829              : {
     830            0 :   ps_reg_move_info *move;
     831            0 :   int i;
     832              : 
     833            0 :   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
     834              :     {
     835            0 :       unsigned int i_use;
     836            0 :       sbitmap_iterator sbi;
     837              : 
     838            0 :       EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
     839              :         {
     840            0 :           replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
     841            0 :           df_insn_rescan (ps->g->nodes[i_use].insn);
     842              :         }
     843              :     }
     844            0 : }
     845              : 
     846              : /* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
     847              :    SCHED_ROW and SCHED_STAGE.  Instruction scheduled on cycle AMOUNT
     848              :    will move to cycle zero.  */
     849              : static void
     850            0 : reset_sched_times (partial_schedule_ptr ps, int amount)
     851              : {
     852            0 :   int row;
     853            0 :   int ii = ps->ii;
     854            0 :   ps_insn_ptr crr_insn;
     855              : 
     856            0 :   for (row = 0; row < ii; row++)
     857            0 :     for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
     858              :       {
     859            0 :         int u = crr_insn->id;
     860            0 :         int normalized_time = SCHED_TIME (u) - amount;
     861            0 :         int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
     862              : 
     863            0 :         if (dump_file)
     864              :           {
     865              :             /* Print the scheduling times after the rotation.  */
     866            0 :             rtx_insn *insn = ps_rtl_insn (ps, u);
     867              : 
     868            0 :             fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
     869              :                      "crr_insn->cycle=%d, min_cycle=%d", u,
     870            0 :                      INSN_UID (insn), normalized_time, new_min_cycle);
     871            0 :             if (JUMP_P (insn))
     872            0 :               fprintf (dump_file, " (branch)");
     873            0 :             fprintf (dump_file, "\n");
     874              :           }
     875              : 
     876            0 :         gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
     877            0 :         gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
     878              : 
     879            0 :         crr_insn->cycle = normalized_time;
     880            0 :         update_node_sched_params (u, ii, normalized_time, new_min_cycle);
     881              :       }
     882            0 : }
     883              : 
     884              : /* Permute the insns according to their order in PS, from row 0 to
     885              :    row ii-1, and position them right before LAST.  This schedules
     886              :    the insns of the loop kernel.  */
     887              : static void
     888            0 : permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
     889              : {
     890            0 :   int ii = ps->ii;
     891            0 :   int row;
     892            0 :   ps_insn_ptr ps_ij;
     893              : 
     894            0 :   for (row = 0; row < ii ; row++)
     895            0 :     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
     896              :       {
     897            0 :         rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
     898              : 
     899            0 :         if (PREV_INSN (last) != insn)
     900              :           {
     901            0 :             if (ps_ij->id < ps->g->num_nodes)
     902            0 :               reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
     903              :                                   PREV_INSN (last));
     904              :             else
     905            0 :               add_insn_before (insn, last, NULL);
     906              :           }
     907              :       }
     908            0 : }
     909              : 
     910              : /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
     911              :    respectively only if cycle C falls on the border of the scheduling
     912              :    window boundaries marked by START and END cycles.  STEP is the
     913              :    direction of the window.  */
     914              : static inline void
     915            0 : set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
     916              :                          sbitmap *tmp_precede, sbitmap must_precede, int c,
     917              :                          int start, int end, int step)
     918              : {
     919            0 :   *tmp_precede = NULL;
     920            0 :   *tmp_follow = NULL;
     921              : 
     922            0 :   if (c == start)
     923              :     {
     924            0 :       if (step == 1)
     925              :         *tmp_precede = must_precede;
     926              :       else                      /* step == -1.  */
     927            0 :         *tmp_follow = must_follow;
     928              :     }
     929            0 :   if (c == end - step)
     930              :     {
     931            0 :       if (step == 1)
     932              :         *tmp_follow = must_follow;
     933              :       else                      /* step == -1.  */
     934            0 :         *tmp_precede = must_precede;
     935              :     }
     936              : 
     937              : }
     938              : 
     939              : /* Return True if the branch can be moved to row ii-1 while
     940              :    normalizing the partial schedule PS to start from cycle zero and thus
     941              :    optimize the SC.  Otherwise return False.  */
     942              : static bool
     943            0 : optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
     944              : {
     945            0 :   int amount = PS_MIN_CYCLE (ps);
     946            0 :   int start, end, step;
     947            0 :   int ii = ps->ii;
     948            0 :   bool ok = false;
     949            0 :   int stage_count, stage_count_curr;
     950              : 
     951              :   /* Compare the SC after normalization and SC after bringing the branch
     952              :      to row ii-1.  If they are equal just bail out.  */
     953            0 :   stage_count = calculate_stage_count (ps, amount);
     954            0 :   stage_count_curr =
     955            0 :     calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
     956              : 
     957            0 :   if (stage_count == stage_count_curr)
     958              :     {
     959            0 :       if (dump_file)
     960            0 :         fprintf (dump_file, "SMS SC already optimized.\n");
     961              : 
     962            0 :       return false;
     963              :     }
     964              : 
     965            0 :   if (dump_file)
     966              :     {
     967            0 :       fprintf (dump_file, "SMS Trying to optimize branch location\n");
     968            0 :       fprintf (dump_file, "SMS partial schedule before trial:\n");
     969            0 :       print_partial_schedule (ps, dump_file);
     970              :     }
     971              : 
     972              :   /* First, normalize the partial scheduling.  */
     973            0 :   reset_sched_times (ps, amount);
     974            0 :   rotate_partial_schedule (ps, amount);
     975            0 :   if (dump_file)
     976              :     {
     977            0 :       fprintf (dump_file,
     978              :                "SMS partial schedule after normalization (ii, %d, SC %d):\n",
     979              :                ii, stage_count);
     980            0 :       print_partial_schedule (ps, dump_file);
     981              :     }
     982              : 
     983            0 :   if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
     984              :     return true;
     985              : 
     986            0 :   auto_sbitmap sched_nodes (g->num_nodes);
     987            0 :   bitmap_ones (sched_nodes);
     988              : 
     989              :   /* Calculate the new placement of the branch.  It should be in row
     990              :      ii-1 and fall into it's scheduling window.  */
     991            0 :   if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
     992              :                         &step, &end) == 0)
     993              :     {
     994            0 :       bool success;
     995            0 :       ps_insn_ptr next_ps_i;
     996            0 :       int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
     997            0 :       int row = SMODULO (branch_cycle, ps->ii);
     998            0 :       int num_splits = 0;
     999            0 :       sbitmap tmp_precede, tmp_follow;
    1000            0 :       int min_cycle, c;
    1001              : 
    1002            0 :       if (dump_file)
    1003            0 :         fprintf (dump_file, "\nTrying to schedule node %d "
    1004              :                  "INSN = %d  in (%d .. %d) step %d\n",
    1005              :                  g->closing_branch->cuid,
    1006            0 :                  (INSN_UID (g->closing_branch->insn)), start, end, step);
    1007              : 
    1008            0 :       gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
    1009            0 :       if (step == 1)
    1010              :         {
    1011            0 :           c = start + ii - SMODULO (start, ii) - 1;
    1012            0 :           gcc_assert (c >= start);
    1013            0 :           if (c >= end)
    1014              :             {
    1015            0 :               if (dump_file)
    1016            0 :                 fprintf (dump_file,
    1017              :                          "SMS failed to schedule branch at cycle: %d\n", c);
    1018            0 :               return false;
    1019              :             }
    1020              :         }
    1021              :       else
    1022              :         {
    1023            0 :           c = start - SMODULO (start, ii) - 1;
    1024            0 :           gcc_assert (c <= start);
    1025              : 
    1026            0 :           if (c <= end)
    1027              :             {
    1028            0 :               if (dump_file)
    1029            0 :                 fprintf (dump_file,
    1030              :                          "SMS failed to schedule branch at cycle: %d\n", c);
    1031            0 :               return false;
    1032              :             }
    1033              :         }
    1034              : 
    1035            0 :       auto_sbitmap must_precede (g->num_nodes);
    1036            0 :       auto_sbitmap must_follow (g->num_nodes);
    1037              : 
    1038              :       /* Try to schedule the branch is it's new cycle.  */
    1039            0 :       calculate_must_precede_follow (g->closing_branch, start, end,
    1040              :                                      step, ii, sched_nodes,
    1041              :                                      must_precede, must_follow);
    1042              : 
    1043            0 :       set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
    1044              :                                must_precede, c, start, end, step);
    1045              : 
    1046              :       /* Find the element in the partial schedule related to the closing
    1047              :          branch so we can remove it from it's current cycle.  */
    1048            0 :       for (next_ps_i = ps->rows[row];
    1049            0 :            next_ps_i; next_ps_i = next_ps_i->next_in_row)
    1050            0 :         if (next_ps_i->id == g->closing_branch->cuid)
    1051              :           break;
    1052              : 
    1053            0 :       min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
    1054            0 :       remove_node_from_ps (ps, next_ps_i);
    1055            0 :       success =
    1056            0 :         try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
    1057              :                                       sched_nodes, &num_splits,
    1058              :                                       tmp_precede, tmp_follow);
    1059            0 :       gcc_assert (num_splits == 0);
    1060            0 :       if (!success)
    1061              :         {
    1062            0 :           if (dump_file)
    1063            0 :             fprintf (dump_file,
    1064              :                      "SMS failed to schedule branch at cycle: %d, "
    1065              :                      "bringing it back to cycle %d\n", c, branch_cycle);
    1066              : 
    1067              :           /* The branch was failed to be placed in row ii - 1.
    1068              :              Put it back in it's original place in the partial
    1069              :              schedualing.  */
    1070            0 :           set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
    1071              :                                    must_precede, branch_cycle, start, end,
    1072              :                                    step);
    1073            0 :           success =
    1074            0 :             try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
    1075              :                                           branch_cycle, sched_nodes,
    1076              :                                           &num_splits, tmp_precede,
    1077              :                                           tmp_follow);
    1078            0 :           gcc_assert (success && (num_splits == 0));
    1079              :           ok = false;
    1080              :         }
    1081              :       else
    1082              :         {
    1083              :           /* The branch is placed in row ii - 1.  */
    1084            0 :           if (dump_file)
    1085            0 :             fprintf (dump_file,
    1086              :                      "SMS success in moving branch to cycle %d\n", c);
    1087              : 
    1088            0 :           update_node_sched_params (g->closing_branch->cuid, ii, c,
    1089              :                                     PS_MIN_CYCLE (ps));
    1090            0 :           ok = true;
    1091              :         }
    1092              : 
    1093              :       /* This might have been added to a new first stage.  */
    1094            0 :       if (PS_MIN_CYCLE (ps) < min_cycle)
    1095            0 :         reset_sched_times (ps, 0);
    1096            0 :     }
    1097              : 
    1098              :   return ok;
    1099            0 : }
    1100              : 
    1101              : static void
    1102            0 : duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
    1103              :                            int to_stage, rtx count_reg, class loop *loop)
    1104              : {
    1105            0 :   int row;
    1106            0 :   ps_insn_ptr ps_ij;
    1107            0 :   copy_bb_data id;
    1108              : 
    1109            0 :   for (row = 0; row < ps->ii; row++)
    1110            0 :     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
    1111              :       {
    1112            0 :         int u = ps_ij->id;
    1113            0 :         int first_u, last_u;
    1114            0 :         rtx_insn *u_insn;
    1115              : 
    1116              :         /* Do not duplicate any insn which refers to count_reg as it
    1117              :            belongs to the control part.
    1118              :            The closing branch is scheduled as well and thus should
    1119              :            be ignored.
    1120              :            TODO: This should be done by analyzing the control part of
    1121              :            the loop.  */
    1122            0 :         u_insn = ps_rtl_insn (ps, u);
    1123            0 :         if (reg_mentioned_p (count_reg, u_insn)
    1124            0 :             || JUMP_P (u_insn))
    1125            0 :           continue;
    1126              : 
    1127            0 :         first_u = SCHED_STAGE (u);
    1128            0 :         last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
    1129            0 :         if (from_stage <= last_u && to_stage >= first_u)
    1130              :           {
    1131            0 :             if (u < ps->g->num_nodes)
    1132            0 :               duplicate_insn_chain (ps_first_note (ps, u), u_insn,
    1133              :                                     loop, &id);
    1134              :             else
    1135            0 :               emit_insn (copy_rtx (PATTERN (u_insn)));
    1136              :           }
    1137              :       }
    1138            0 : }
    1139              : 
    1140              : 
    1141              : /* Generate the instructions (including reg_moves) for prolog & epilog.  */
    1142              : static void
    1143            0 : generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
    1144              :                         rtx count_reg, bool adjust_init)
    1145              : {
    1146            0 :   int i;
    1147            0 :   int last_stage = PS_STAGE_COUNT (ps) - 1;
    1148            0 :   edge e;
    1149              : 
    1150              :   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
    1151            0 :   start_sequence ();
    1152              : 
    1153            0 :   if (adjust_init)
    1154              :     {
    1155              :       /* Generate instructions at the beginning of the prolog to
    1156              :          adjust the loop count by STAGE_COUNT.  If loop count is constant
    1157              :          and it not used anywhere in prologue, this constant is adjusted by
    1158              :          STAGE_COUNT outside of generate_prolog_epilog function.  */
    1159            0 :       rtx sub_reg = NULL_RTX;
    1160              : 
    1161            0 :       sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
    1162            0 :                                      gen_int_mode (last_stage,
    1163            0 :                                                    GET_MODE (count_reg)),
    1164              :                                      count_reg, 1, OPTAB_DIRECT);
    1165            0 :       gcc_assert (REG_P (sub_reg));
    1166            0 :       if (REGNO (sub_reg) != REGNO (count_reg))
    1167            0 :         emit_move_insn (count_reg, sub_reg);
    1168              :     }
    1169              : 
    1170            0 :   for (i = 0; i < last_stage; i++)
    1171            0 :     duplicate_insns_of_cycles (ps, 0, i, count_reg, loop);
    1172              : 
    1173              :   /* Put the prolog on the entry edge.  */
    1174            0 :   e = loop_preheader_edge (loop);
    1175            0 :   split_edge_and_insert (e, get_insns ());
    1176            0 :   if (!flag_resched_modulo_sched)
    1177            0 :     e->dest->flags |= BB_DISABLE_SCHEDULE;
    1178              : 
    1179            0 :   end_sequence ();
    1180              : 
    1181              :   /* Generate the epilog, inserting its insns on the loop-exit edge.  */
    1182            0 :   start_sequence ();
    1183              : 
    1184            0 :   for (i = 0; i < last_stage; i++)
    1185            0 :     duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg, loop);
    1186              : 
    1187              :   /* Put the epilogue on the exit edge.  */
    1188            0 :   gcc_assert (single_exit (loop));
    1189            0 :   e = single_exit (loop);
    1190            0 :   split_edge_and_insert (e, get_insns ());
    1191            0 :   if (!flag_resched_modulo_sched)
    1192            0 :     e->dest->flags |= BB_DISABLE_SCHEDULE;
    1193              : 
    1194            0 :   end_sequence ();
    1195            0 : }
    1196              : 
    1197              : /* Mark LOOP as software pipelined so the later
    1198              :    scheduling passes don't touch it.  */
    1199              : static void
    1200            0 : mark_loop_unsched (class loop *loop)
    1201              : {
    1202            0 :   unsigned i;
    1203            0 :   basic_block *bbs = get_loop_body (loop);
    1204              : 
    1205            0 :   for (i = 0; i < loop->num_nodes; i++)
    1206            0 :     bbs[i]->flags |= BB_DISABLE_SCHEDULE;
    1207              : 
    1208            0 :   free (bbs);
    1209            0 : }
    1210              : 
    1211              : /* Return true if all the BBs of the loop are empty except the
    1212              :    loop header.  */
    1213              : static bool
    1214          440 : loop_single_full_bb_p (class loop *loop)
    1215              : {
    1216          440 :   unsigned i;
    1217          440 :   basic_block *bbs = get_loop_body (loop);
    1218              : 
    1219          880 :   for (i = 0; i < loop->num_nodes ; i++)
    1220              :     {
    1221          568 :       rtx_insn *head, *tail;
    1222          568 :       bool empty_bb = true;
    1223              : 
    1224          568 :       if (bbs[i] == loop->header)
    1225          440 :         continue;
    1226              : 
    1227              :       /* Make sure that basic blocks other than the header
    1228              :          have only notes labels or jumps.  */
    1229          128 :       get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
    1230          262 :       for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
    1231              :         {
    1232          134 :           if (NOTE_P (head) || LABEL_P (head)
    1233          134 :               || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
    1234            6 :             continue;
    1235              :           empty_bb = false;
    1236              :           break;
    1237              :         }
    1238              : 
    1239          128 :       if (! empty_bb)
    1240              :         {
    1241          128 :           free (bbs);
    1242          128 :           return false;
    1243              :         }
    1244              :     }
    1245          312 :   free (bbs);
    1246          312 :   return true;
    1247              : }
    1248              : 
    1249              : /* Dump file:line from INSN's location info to dump_file.  */
    1250              : 
    1251              : static void
    1252           22 : dump_insn_location (rtx_insn *insn)
    1253              : {
    1254           22 :   if (dump_file && INSN_HAS_LOCATION (insn))
    1255              :     {
    1256           16 :       expanded_location xloc = insn_location (insn);
    1257           16 :       fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
    1258              :     }
    1259           22 : }
    1260              : 
    1261              : /* A simple loop from SMS point of view; it is a loop that is composed of
    1262              :    either a single basic block or two BBs - a header and a latch.  */
    1263              : #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 )               \
    1264              :                                   && (EDGE_COUNT (loop->latch->preds) == 1) \
    1265              :                                   && (EDGE_COUNT (loop->latch->succs) == 1))
    1266              : 
    1267              : /* Return true if the loop is in its canonical form and false if not.
    1268              :    i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
    1269              : static bool
    1270          434 : loop_canon_p (class loop *loop)
    1271              : {
    1272              : 
    1273          434 :   if (loop->inner || !loop_outer (loop))
    1274              :   {
    1275          142 :     if (dump_file)
    1276            3 :       fprintf (dump_file, "SMS loop inner or !loop_outer\n");
    1277          142 :     return false;
    1278              :   }
    1279              : 
    1280          292 :   if (!single_exit (loop))
    1281              :     {
    1282            8 :       if (dump_file)
    1283              :         {
    1284            3 :           rtx_insn *insn = BB_END (loop->header);
    1285              : 
    1286            3 :           fprintf (dump_file, "SMS loop many exits");
    1287            3 :           dump_insn_location (insn);
    1288            3 :           fprintf (dump_file, "\n");
    1289              :         }
    1290            8 :       return false;
    1291              :     }
    1292              : 
    1293          284 :   if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
    1294              :     {
    1295            5 :       if (dump_file)
    1296              :         {
    1297            0 :           rtx_insn *insn = BB_END (loop->header);
    1298              : 
    1299            0 :           fprintf (dump_file, "SMS loop many BBs.");
    1300            0 :           dump_insn_location (insn);
    1301            0 :           fprintf (dump_file, "\n");
    1302              :         }
    1303            5 :       return false;
    1304              :     }
    1305              : 
    1306              :     return true;
    1307              : }
    1308              : 
    1309              : /* If there are more than one entry for the loop,
    1310              :    make it one by splitting the first entry edge and
    1311              :    redirecting the others to the new BB.  */
    1312              : static void
    1313            0 : canon_loop (class loop *loop)
    1314              : {
    1315            0 :   edge e;
    1316            0 :   edge_iterator i;
    1317              : 
    1318              :   /* Avoid annoying special cases of edges going to exit
    1319              :      block.  */
    1320            0 :   FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
    1321            0 :     if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
    1322            0 :       split_edge (e);
    1323              : 
    1324            0 :   if (loop->latch == loop->header
    1325            0 :       || EDGE_COUNT (loop->latch->succs) > 1)
    1326              :     {
    1327            0 :       FOR_EACH_EDGE (e, i, loop->header->preds)
    1328            0 :         if (e->src == loop->latch)
    1329              :           break;
    1330            0 :       split_edge (e);
    1331              :     }
    1332            0 : }
    1333              : 
    1334              : /* Setup infos.  */
    1335              : static void
    1336          267 : setup_sched_infos (void)
    1337              : {
    1338          267 :   memcpy (&sms_common_sched_info, &haifa_common_sched_info,
    1339              :           sizeof (sms_common_sched_info));
    1340          267 :   sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
    1341          267 :   common_sched_info = &sms_common_sched_info;
    1342              : 
    1343          267 :   sched_deps_info = &sms_sched_deps_info;
    1344          267 :   current_sched_info = &sms_sched_info;
    1345          267 : }
    1346              : 
    1347              : /* Probability in % that the sms-ed loop rolls enough so that optimized
    1348              :    version may be entered.  Just a guess.  */
    1349              : #define PROB_SMS_ENOUGH_ITERATIONS 80
    1350              : 
    1351              : /* Main entry point, perform SMS scheduling on the loops of the function
    1352              :    that consist of single basic blocks.  */
    1353              : static void
    1354          313 : sms_schedule (void)
    1355              : {
    1356          313 :   rtx_insn *insn;
    1357          313 :   ddg_ptr *g_arr, g;
    1358          313 :   int * node_order;
    1359          313 :   int maxii, max_asap;
    1360          313 :   partial_schedule_ptr ps;
    1361          313 :   basic_block bb = NULL;
    1362          313 :   basic_block condition_bb = NULL;
    1363          313 :   edge latch_edge;
    1364          313 :   HOST_WIDE_INT trip_count, max_trip_count;
    1365          313 :   HARD_REG_SET prohibited_regs;
    1366              : 
    1367          313 :   loop_optimizer_init (LOOPS_HAVE_PREHEADERS
    1368              :                        | LOOPS_HAVE_RECORDED_EXITS);
    1369          626 :   if (number_of_loops (cfun) <= 1)
    1370              :     {
    1371           46 :       loop_optimizer_finalize ();
    1372           46 :       return;  /* There are no loops to schedule.  */
    1373              :     }
    1374              : 
    1375              :   /* Initialize issue_rate.  */
    1376          267 :   if (targetm.sched.issue_rate)
    1377              :     {
    1378          267 :       int temp = reload_completed;
    1379              : 
    1380          267 :       reload_completed = 1;
    1381          267 :       issue_rate = targetm.sched.issue_rate ();
    1382          267 :       reload_completed = temp;
    1383              :     }
    1384              :   else
    1385            0 :     issue_rate = 1;
    1386              : 
    1387              :   /* Initialize the scheduler.  */
    1388          267 :   setup_sched_infos ();
    1389          267 :   haifa_sched_init ();
    1390              : 
    1391              :   /* Allocate memory to hold the DDG array one entry for each loop.
    1392              :      We use loop->num as index into this array.  */
    1393          267 :   g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
    1394              : 
    1395          534 :   REG_SET_TO_HARD_REG_SET (prohibited_regs, &df->regular_block_artificial_uses);
    1396              : 
    1397          267 :   if (dump_file)
    1398              :   {
    1399           12 :     fprintf (dump_file, "\n\nSMS analysis phase\n");
    1400           12 :     fprintf (dump_file, "===================\n\n");
    1401              :   }
    1402              : 
    1403              :   /* Build DDGs for all the relevant loops and hold them in G_ARR
    1404              :      indexed by the loop index.  */
    1405         1235 :   for (auto loop : loops_list (cfun, 0))
    1406              :     {
    1407          434 :       rtx_insn *head, *tail;
    1408          434 :       rtx count_reg;
    1409              : 
    1410              :       /* For debugging.  */
    1411          434 :       if (dbg_cnt (sms_sched_loop) == false)
    1412              :         {
    1413            0 :           if (dump_file)
    1414            0 :             fprintf (dump_file, "SMS reached max limit... \n");
    1415              : 
    1416            0 :           break;
    1417              :         }
    1418              : 
    1419          434 :       if (dump_file)
    1420              :         {
    1421           19 :           rtx_insn *insn = BB_END (loop->header);
    1422              : 
    1423           19 :           fprintf (dump_file, "SMS loop num: %d", loop->num);
    1424           19 :           dump_insn_location (insn);
    1425           19 :           fprintf (dump_file, "\n");
    1426              :         }
    1427              : 
    1428          434 :       if (! loop_canon_p (loop))
    1429          589 :         continue;
    1430              : 
    1431          279 :       if (! loop_single_full_bb_p (loop))
    1432              :       {
    1433          123 :         if (dump_file)
    1434            0 :           fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
    1435          123 :         continue;
    1436              :       }
    1437              : 
    1438          156 :       bb = loop->header;
    1439              : 
    1440          156 :       get_ebb_head_tail (bb, bb, &head, &tail);
    1441          156 :       latch_edge = loop_latch_edge (loop);
    1442          156 :       gcc_assert (single_exit (loop));
    1443          156 :       trip_count = get_estimated_loop_iterations_int (loop);
    1444          156 :       max_trip_count = get_max_loop_iterations_int (loop);
    1445              : 
    1446              :       /* Perform SMS only on loops that their average count is above threshold.  */
    1447              : 
    1448          156 :       if (latch_edge->count () > profile_count::zero ()
    1449          312 :           && (latch_edge->count ()
    1450          312 :               < (single_exit (loop)->count ()
    1451          312 :                  * param_sms_loop_average_count_threshold)))
    1452              :         {
    1453            0 :           if (dump_file)
    1454              :             {
    1455            0 :               dump_insn_location (tail);
    1456            0 :               fprintf (dump_file, "\nSMS single-bb-loop\n");
    1457            0 :               if (profile_info && flag_branch_probabilities)
    1458              :                 {
    1459            0 :                   fprintf (dump_file, "SMS loop-count ");
    1460            0 :                   fprintf (dump_file, "%" PRId64,
    1461            0 :                            (int64_t) bb->count.to_gcov_type ());
    1462            0 :                   fprintf (dump_file, "\n");
    1463            0 :                   fprintf (dump_file, "SMS trip-count ");
    1464            0 :                   fprintf (dump_file, "%" PRId64 "max %" PRId64,
    1465              :                            (int64_t) trip_count, (int64_t) max_trip_count);
    1466            0 :                   fprintf (dump_file, "\n");
    1467              :                 }
    1468              :             }
    1469            0 :           continue;
    1470              :         }
    1471              : 
    1472              :       /* Make sure this is a doloop.  */
    1473          156 :       if (!(count_reg = doloop_register_get (head, tail)))
    1474              :         {
    1475          156 :           if (dump_file)
    1476           13 :             fprintf (dump_file, "SMS doloop_register_get failed\n");
    1477          156 :           continue;
    1478              :         }
    1479              : 
    1480              :       /* Don't handle BBs with calls or barriers
    1481              :          or !single_set with the exception of do-loop control part insns.
    1482              :          ??? Should handle insns defining subregs.  */
    1483            0 :       for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
    1484              :         {
    1485            0 :           if (INSN_P (insn))
    1486              :             {
    1487              :               HARD_REG_SET regs;
    1488            0 :               CLEAR_HARD_REG_SET (regs);
    1489            0 :               note_stores (insn, record_hard_reg_sets, &regs);
    1490            0 :               if (hard_reg_set_intersect_p (regs, prohibited_regs))
    1491              :                 break;
    1492              :             }
    1493              : 
    1494            0 :           if (CALL_P (insn)
    1495            0 :               || BARRIER_P (insn)
    1496            0 :               || (INSN_P (insn) && single_set (insn)
    1497            0 :                   && GET_CODE (SET_DEST (single_set (insn))) == SUBREG)
    1498              :               /* Not a single set.  */
    1499            0 :               || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
    1500            0 :                   && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
    1501              :                   /* But non-single-set allowed in one special case.  */
    1502            0 :                   && (insn != prev_nondebug_insn (tail)
    1503            0 :                       || !reg_mentioned_p (count_reg, insn))))
    1504              :             break;
    1505              :         }
    1506              : 
    1507            0 :       if (insn != NEXT_INSN (tail))
    1508              :         {
    1509            0 :           if (dump_file)
    1510              :             {
    1511            0 :               if (CALL_P (insn))
    1512            0 :                 fprintf (dump_file, "SMS loop-with-call\n");
    1513            0 :               else if (BARRIER_P (insn))
    1514            0 :                 fprintf (dump_file, "SMS loop-with-barrier\n");
    1515            0 :               else if (INSN_P (insn) && single_set (insn)
    1516            0 :                        && GET_CODE (SET_DEST (single_set (insn))) == SUBREG)
    1517            0 :                 fprintf (dump_file, "SMS loop with subreg in lhs\n");
    1518              :               else
    1519            0 :                 fprintf (dump_file,
    1520              :                          "SMS loop-with-not-single-set-or-prohibited-reg\n");
    1521              : 
    1522            0 :               print_rtl_single (dump_file, insn);
    1523              :             }
    1524              : 
    1525            0 :           continue;
    1526              :         }
    1527              : 
    1528              :       /* Always schedule the closing branch with the rest of the
    1529              :          instructions. The branch is rotated to be in row ii-1 at the
    1530              :          end of the scheduling procedure to make sure it's the last
    1531              :          instruction in the iteration.  */
    1532            0 :       if (! (g = create_ddg (bb, 1)))
    1533              :         {
    1534            0 :           if (dump_file)
    1535            0 :             fprintf (dump_file, "SMS create_ddg failed\n");
    1536            0 :           continue;
    1537              :         }
    1538              : 
    1539            0 :       g_arr[loop->num] = g;
    1540            0 :       if (dump_file)
    1541            0 :         fprintf (dump_file, "...OK\n");
    1542              : 
    1543          267 :     }
    1544          267 :   if (dump_file)
    1545              :   {
    1546           12 :     fprintf (dump_file, "\nSMS transformation phase\n");
    1547           12 :     fprintf (dump_file, "=========================\n\n");
    1548              :   }
    1549              : 
    1550              :   /* We don't want to perform SMS on new loops - created by versioning.  */
    1551         1235 :   for (auto loop : loops_list (cfun, 0))
    1552              :     {
    1553          434 :       rtx_insn *head, *tail;
    1554          434 :       rtx count_reg;
    1555          434 :       rtx_insn *count_init;
    1556          434 :       int mii, rec_mii, stage_count, min_cycle;
    1557          434 :       int64_t loop_count = 0;
    1558          434 :       bool opt_sc_p, adjust_inplace = false;
    1559          434 :       basic_block pre_header;
    1560              : 
    1561          434 :       if (! (g = g_arr[loop->num]))
    1562          434 :         continue;
    1563              : 
    1564            0 :       if (dump_file)
    1565              :         {
    1566            0 :           rtx_insn *insn = BB_END (loop->header);
    1567              : 
    1568            0 :           fprintf (dump_file, "SMS loop num: %d", loop->num);
    1569            0 :           dump_insn_location (insn);
    1570            0 :           fprintf (dump_file, "\n");
    1571              : 
    1572            0 :           print_ddg (dump_file, g);
    1573              :         }
    1574              : 
    1575            0 :       get_ebb_head_tail (loop->header, loop->header, &head, &tail);
    1576              : 
    1577            0 :       latch_edge = loop_latch_edge (loop);
    1578            0 :       gcc_assert (single_exit (loop));
    1579            0 :       trip_count = get_estimated_loop_iterations_int (loop);
    1580            0 :       max_trip_count = get_max_loop_iterations_int (loop);
    1581              : 
    1582            0 :       if (dump_file)
    1583              :         {
    1584            0 :           dump_insn_location (tail);
    1585            0 :           fprintf (dump_file, "\nSMS single-bb-loop\n");
    1586            0 :           if (profile_info && flag_branch_probabilities)
    1587              :             {
    1588            0 :               fprintf (dump_file, "SMS loop-count ");
    1589            0 :               fprintf (dump_file, "%" PRId64,
    1590            0 :                        (int64_t) bb->count.to_gcov_type ());
    1591            0 :               fprintf (dump_file, "\n");
    1592              :             }
    1593            0 :           fprintf (dump_file, "SMS doloop\n");
    1594            0 :           fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
    1595            0 :           fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
    1596            0 :           fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
    1597              :         }
    1598              : 
    1599              : 
    1600            0 :       count_reg = doloop_register_get (head, tail);
    1601            0 :       gcc_assert (count_reg);
    1602              : 
    1603            0 :       pre_header = loop_preheader_edge (loop)->src;
    1604            0 :       count_init = const_iteration_count (count_reg, pre_header, &loop_count,
    1605              :                                           &adjust_inplace);
    1606              : 
    1607            0 :       if (dump_file && count_init)
    1608              :         {
    1609            0 :           fprintf (dump_file, "SMS const-doloop ");
    1610            0 :           fprintf (dump_file, "%" PRId64,
    1611              :                      loop_count);
    1612            0 :           fprintf (dump_file, "\n");
    1613              :         }
    1614              : 
    1615            0 :       node_order = XNEWVEC (int, g->num_nodes);
    1616              : 
    1617            0 :       mii = 1; /* Need to pass some estimate of mii.  */
    1618            0 :       rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
    1619            0 :       mii = MAX (res_MII (g), rec_mii);
    1620            0 :       mii = MAX (mii, 1);
    1621            0 :       maxii = MAX (max_asap, param_sms_max_ii_factor * mii);
    1622              : 
    1623            0 :       if (dump_file)
    1624            0 :         fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
    1625              :                  rec_mii, mii, maxii);
    1626              : 
    1627            0 :       for (;;)
    1628              :         {
    1629            0 :           set_node_sched_params (g);
    1630              : 
    1631            0 :           stage_count = 0;
    1632            0 :           opt_sc_p = false;
    1633            0 :           ps = sms_schedule_by_order (g, mii, maxii, node_order);
    1634              : 
    1635            0 :           if (ps)
    1636              :             {
    1637              :               /* Try to achieve optimized SC by normalizing the partial
    1638              :                  schedule (having the cycles start from cycle zero).
    1639              :                  The branch location must be placed in row ii-1 in the
    1640              :                  final scheduling.      If failed, shift all instructions to
    1641              :                  position the branch in row ii-1.  */
    1642            0 :               opt_sc_p = optimize_sc (ps, g);
    1643            0 :               if (opt_sc_p)
    1644            0 :                 stage_count = calculate_stage_count (ps, 0);
    1645              :               else
    1646              :                 {
    1647              :                   /* Bring the branch to cycle ii-1.  */
    1648            0 :                   int amount = (SCHED_TIME (g->closing_branch->cuid)
    1649            0 :                                 - (ps->ii - 1));
    1650              : 
    1651            0 :                   if (dump_file)
    1652            0 :                     fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
    1653              : 
    1654            0 :                   stage_count = calculate_stage_count (ps, amount);
    1655              :                 }
    1656              : 
    1657            0 :               gcc_assert (stage_count >= 1);
    1658              :             }
    1659              : 
    1660              :           /* The default value of param_sms_min_sc is 2 as stage count of
    1661              :              1 means that there is no interleaving between iterations thus
    1662              :              we let the scheduling passes do the job in this case.  */
    1663            0 :           if (stage_count < param_sms_min_sc
    1664            0 :               || (count_init && (loop_count <= stage_count))
    1665            0 :               || (max_trip_count >= 0 && max_trip_count <= stage_count)
    1666            0 :               || (trip_count >= 0 && trip_count <= stage_count))
    1667              :             {
    1668            0 :               if (dump_file)
    1669              :                 {
    1670            0 :                   fprintf (dump_file, "SMS failed... \n");
    1671            0 :                   fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
    1672              :                            " loop-count=", stage_count);
    1673            0 :                   fprintf (dump_file, "%" PRId64, loop_count);
    1674            0 :                   fprintf (dump_file, ", trip-count=");
    1675            0 :                   fprintf (dump_file, "%" PRId64 "max %" PRId64,
    1676              :                            (int64_t) trip_count, (int64_t) max_trip_count);
    1677            0 :                   fprintf (dump_file, ")\n");
    1678              :                 }
    1679              :               break;
    1680              :             }
    1681              : 
    1682            0 :           if (!opt_sc_p)
    1683              :             {
    1684              :               /* Rotate the partial schedule to have the branch in row ii-1.  */
    1685            0 :               int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
    1686              : 
    1687            0 :               reset_sched_times (ps, amount);
    1688            0 :               rotate_partial_schedule (ps, amount);
    1689              :             }
    1690              : 
    1691            0 :           set_columns_for_ps (ps);
    1692              : 
    1693            0 :           min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
    1694            0 :           if (!schedule_reg_moves (ps))
    1695              :             {
    1696            0 :               mii = ps->ii + 1;
    1697            0 :               free_partial_schedule (ps);
    1698            0 :               continue;
    1699              :             }
    1700              : 
    1701              :           /* Moves that handle incoming values might have been added
    1702              :              to a new first stage.  Bump the stage count if so.
    1703              : 
    1704              :              ??? Perhaps we could consider rotating the schedule here
    1705              :              instead?  */
    1706            0 :           if (PS_MIN_CYCLE (ps) < min_cycle)
    1707              :             {
    1708            0 :               reset_sched_times (ps, 0);
    1709            0 :               stage_count++;
    1710              :             }
    1711              : 
    1712              :           /* The stage count should now be correct without rotation.  */
    1713            0 :           gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
    1714            0 :           PS_STAGE_COUNT (ps) = stage_count;
    1715              : 
    1716            0 :           canon_loop (loop);
    1717              : 
    1718            0 :           if (dump_file)
    1719              :             {
    1720            0 :               dump_insn_location (tail);
    1721            0 :               fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
    1722              :                        ps->ii, stage_count);
    1723            0 :               print_partial_schedule (ps, dump_file);
    1724              :             }
    1725              : 
    1726            0 :           if (count_init)
    1727              :             {
    1728            0 :                if (adjust_inplace)
    1729              :                 {
    1730              :                   /* When possible, set new iteration count of loop kernel in
    1731              :                      place.  Otherwise, generate_prolog_epilog creates an insn
    1732              :                      to adjust.  */
    1733            0 :                   SET_SRC (single_set (count_init)) = GEN_INT (loop_count
    1734              :                                                             - stage_count + 1);
    1735              :                 }
    1736              :             }
    1737              :           else
    1738              :             {
    1739              :               /* case the BCT count is not known , Do loop-versioning */
    1740            0 :               rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
    1741              :                                          gen_int_mode (stage_count,
    1742              :                                                        GET_MODE (count_reg)));
    1743            0 :               profile_probability prob = profile_probability::guessed_always ()
    1744            0 :                                 .apply_scale (PROB_SMS_ENOUGH_ITERATIONS, 100);
    1745              : 
    1746            0 :               loop_version (loop, comp_rtx, &condition_bb,
    1747              :                             prob, prob.invert (),
    1748              :                             prob, prob.invert (), true);
    1749              :             }
    1750              : 
    1751              :           /* Now apply the scheduled kernel to the RTL of the loop.  */
    1752            0 :           permute_partial_schedule (ps, g->closing_branch->first_note);
    1753              : 
    1754              :           /* Mark this loop as software pipelined so the later
    1755              :              scheduling passes don't touch it.  */
    1756            0 :           if (! flag_resched_modulo_sched)
    1757            0 :             mark_loop_unsched (loop);
    1758              : 
    1759              :           /* The life-info is not valid any more.  */
    1760            0 :           df_set_bb_dirty (g->bb);
    1761              : 
    1762            0 :           apply_reg_moves (ps);
    1763            0 :           if (dump_file)
    1764            0 :             print_node_sched_params (dump_file, g->num_nodes, ps);
    1765              :           /* Generate prolog and epilog.  */
    1766            0 :           generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
    1767            0 :           break;
    1768            0 :         }
    1769              : 
    1770            0 :       free_partial_schedule (ps);
    1771            0 :       node_sched_param_vec.release ();
    1772            0 :       free (node_order);
    1773            0 :       free_ddg (g);
    1774          267 :     }
    1775              : 
    1776          267 :   free (g_arr);
    1777              : 
    1778              :   /* Release scheduler data, needed until now because of DFA.  */
    1779          267 :   haifa_sched_finish ();
    1780          267 :   loop_optimizer_finalize ();
    1781              : }
    1782              : 
    1783              : /* The SMS scheduling algorithm itself
    1784              :    -----------------------------------
    1785              :    Input: 'O' an ordered list of insns of a loop.
    1786              :    Output: A scheduling of the loop - kernel, prolog, and epilogue.
    1787              : 
    1788              :    'Q' is the empty Set
    1789              :    'PS' is the partial schedule; it holds the currently scheduled nodes with
    1790              :         their cycle/slot.
    1791              :    'PSP' previously scheduled predecessors.
    1792              :    'PSS' previously scheduled successors.
    1793              :    't(u)' the cycle where u is scheduled.
    1794              :    'l(u)' is the latency of u.
    1795              :    'd(v,u)' is the dependence distance from v to u.
    1796              :    'ASAP(u)' the earliest time at which u could be scheduled as computed in
    1797              :              the node ordering phase.
    1798              :    'check_hardware_resources_conflicts(u, PS, c)'
    1799              :                              run a trace around cycle/slot through DFA model
    1800              :                              to check resource conflicts involving instruction u
    1801              :                              at cycle c given the partial schedule PS.
    1802              :    'add_to_partial_schedule_at_time(u, PS, c)'
    1803              :                              Add the node/instruction u to the partial schedule
    1804              :                              PS at time c.
    1805              :    'calculate_register_pressure(PS)'
    1806              :                              Given a schedule of instructions, calculate the register
    1807              :                              pressure it implies.  One implementation could be the
    1808              :                              maximum number of overlapping live ranges.
    1809              :    'maxRP' The maximum allowed register pressure, it is usually derived from the number
    1810              :            registers available in the hardware.
    1811              : 
    1812              :    1. II = MII.
    1813              :    2. PS = empty list
    1814              :    3. for each node u in O in pre-computed order
    1815              :    4.   if (PSP(u) != Q && PSS(u) == Q) then
    1816              :    5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
    1817              :    6.     start = Early_start; end = Early_start + II - 1; step = 1
    1818              :    11.  else if (PSP(u) == Q && PSS(u) != Q) then
    1819              :    12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
    1820              :    13.     start = Late_start; end = Late_start - II + 1; step = -1
    1821              :    14.  else if (PSP(u) != Q && PSS(u) != Q) then
    1822              :    15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
    1823              :    16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
    1824              :    17.     start = Early_start;
    1825              :    18.     end = min(Early_start + II - 1 , Late_start);
    1826              :    19.     step = 1
    1827              :    20.     else "if (PSP(u) == Q && PSS(u) == Q)"
    1828              :    21.    start = ASAP(u); end = start + II - 1; step = 1
    1829              :    22.  endif
    1830              : 
    1831              :    23.  success = false
    1832              :    24.  for (c = start ; c != end ; c += step)
    1833              :    25.     if check_hardware_resources_conflicts(u, PS, c) then
    1834              :    26.       add_to_partial_schedule_at_time(u, PS, c)
    1835              :    27.       success = true
    1836              :    28.       break
    1837              :    29.     endif
    1838              :    30.  endfor
    1839              :    31.  if (success == false) then
    1840              :    32.    II = II + 1
    1841              :    33.    if (II > maxII) then
    1842              :    34.       finish - failed to schedule
    1843              :    35.   endif
    1844              :    36.    goto 2.
    1845              :    37.  endif
    1846              :    38. endfor
    1847              :    39. if (calculate_register_pressure(PS) > maxRP) then
    1848              :    40.    goto 32.
    1849              :    41. endif
    1850              :    42. compute epilogue & prologue
    1851              :    43. finish - succeeded to schedule
    1852              : 
    1853              :    ??? The algorithm restricts the scheduling window to II cycles.
    1854              :    In rare cases, it may be better to allow windows of II+1 cycles.
    1855              :    The window would then start and end on the same row, but with
    1856              :    different "must precede" and "must follow" requirements.  */
    1857              : 
    1858              : /* A threshold for the number of repeated unsuccessful attempts to insert
    1859              :    an empty row, before we flush the partial schedule and start over.  */
    1860              : #define MAX_SPLIT_NUM 10
    1861              : /* Given the partial schedule PS, this function calculates and returns the
    1862              :    cycles in which we can schedule the node with the given index I.
    1863              :    NOTE: Here we do the backtracking in SMS, in some special cases. We have
    1864              :    noticed that there are several cases in which we fail    to SMS the loop
    1865              :    because the sched window of a node is empty    due to tight data-deps. In
    1866              :    such cases we want to unschedule    some of the predecessors/successors
    1867              :    until we get non-empty    scheduling window.  It returns -1 if the
    1868              :    scheduling window is empty and zero otherwise.  */
    1869              : 
    1870              : static int
    1871            0 : get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
    1872              :                   sbitmap sched_nodes, int ii, int *start_p, int *step_p,
    1873              :                   int *end_p)
    1874              : {
    1875            0 :   int start, step, end;
    1876            0 :   int early_start, late_start;
    1877            0 :   ddg_edge_ptr e;
    1878            0 :   auto_sbitmap psp (ps->g->num_nodes);
    1879            0 :   auto_sbitmap pss (ps->g->num_nodes);
    1880            0 :   sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
    1881            0 :   sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
    1882            0 :   int psp_not_empty;
    1883            0 :   int pss_not_empty;
    1884            0 :   int count_preds;
    1885            0 :   int count_succs;
    1886              : 
    1887              :   /* 1. compute sched window for u (start, end, step).  */
    1888            0 :   bitmap_clear (psp);
    1889            0 :   bitmap_clear (pss);
    1890            0 :   psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
    1891            0 :   pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
    1892              : 
    1893              :   /* We first compute a forward range (start <= end), then decide whether
    1894              :      to reverse it.  */
    1895            0 :   early_start = INT_MIN;
    1896            0 :   late_start = INT_MAX;
    1897            0 :   start = INT_MIN;
    1898            0 :   end = INT_MAX;
    1899            0 :   step = 1;
    1900              : 
    1901            0 :   count_preds = 0;
    1902            0 :   count_succs = 0;
    1903              : 
    1904            0 :   if (dump_file && (psp_not_empty || pss_not_empty))
    1905              :     {
    1906            0 :       fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
    1907            0 :                "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
    1908            0 :       fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
    1909              :                "start", "early start", "late start", "end", "time");
    1910            0 :       fprintf (dump_file, "=========== =========== =========== ==========="
    1911              :                " =====\n");
    1912              :     }
    1913              :   /* Calculate early_start and limit end.  Both bounds are inclusive.  */
    1914            0 :   if (psp_not_empty)
    1915            0 :     for (e = u_node->in; e != 0; e = e->next_in)
    1916              :       {
    1917            0 :         int v = e->src->cuid;
    1918              : 
    1919            0 :         if (bitmap_bit_p (sched_nodes, v))
    1920              :           {
    1921            0 :             int p_st = SCHED_TIME (v);
    1922            0 :             int earliest = p_st + e->latency - (e->distance * ii);
    1923            0 :             int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
    1924              : 
    1925            0 :             if (dump_file)
    1926              :               {
    1927            0 :                 fprintf (dump_file, "%11s %11d %11s %11d %5d",
    1928              :                          "", earliest, "", latest, p_st);
    1929            0 :                 print_ddg_edge (dump_file, e);
    1930            0 :                 fprintf (dump_file, "\n");
    1931              :               }
    1932              : 
    1933            0 :             early_start = MAX (early_start, earliest);
    1934            0 :             end = MIN (end, latest);
    1935              : 
    1936            0 :             if (e->type == TRUE_DEP && e->data_type == REG_DEP)
    1937            0 :               count_preds++;
    1938              :           }
    1939              :       }
    1940              : 
    1941              :   /* Calculate late_start and limit start.  Both bounds are inclusive.  */
    1942            0 :   if (pss_not_empty)
    1943            0 :     for (e = u_node->out; e != 0; e = e->next_out)
    1944              :       {
    1945            0 :         int v = e->dest->cuid;
    1946              : 
    1947            0 :         if (bitmap_bit_p (sched_nodes, v))
    1948              :           {
    1949            0 :             int s_st = SCHED_TIME (v);
    1950            0 :             int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
    1951            0 :             int latest = s_st - e->latency + (e->distance * ii);
    1952              : 
    1953            0 :             if (dump_file)
    1954              :               {
    1955            0 :                 fprintf (dump_file, "%11d %11s %11d %11s %5d",
    1956              :                          earliest, "", latest, "", s_st);
    1957            0 :                 print_ddg_edge (dump_file, e);
    1958            0 :                 fprintf (dump_file, "\n");
    1959              :               }
    1960              : 
    1961            0 :             start = MAX (start, earliest);
    1962            0 :             late_start = MIN (late_start, latest);
    1963              : 
    1964            0 :             if (e->type == TRUE_DEP && e->data_type == REG_DEP)
    1965            0 :               count_succs++;
    1966              :           }
    1967              :       }
    1968              : 
    1969            0 :   if (dump_file && (psp_not_empty || pss_not_empty))
    1970              :     {
    1971            0 :       fprintf (dump_file, "----------- ----------- ----------- -----------"
    1972              :                " -----\n");
    1973            0 :       fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
    1974              :                start, early_start, late_start, end, "",
    1975              :                "(max, max, min, min)");
    1976              :     }
    1977              : 
    1978              :   /* Get a target scheduling window no bigger than ii.  */
    1979            0 :   if (early_start == INT_MIN && late_start == INT_MAX)
    1980            0 :     early_start = NODE_ASAP (u_node);
    1981            0 :   else if (early_start == INT_MIN)
    1982            0 :     early_start = late_start - (ii - 1);
    1983            0 :   late_start = MIN (late_start, early_start + (ii - 1));
    1984              : 
    1985              :   /* Apply memory dependence limits.  */
    1986            0 :   start = MAX (start, early_start);
    1987            0 :   end = MIN (end, late_start);
    1988              : 
    1989            0 :   if (dump_file && (psp_not_empty || pss_not_empty))
    1990            0 :     fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
    1991              :              "", start, end, "", "");
    1992              : 
    1993              :   /* If there are at least as many successors as predecessors, schedule the
    1994              :      node close to its successors.  */
    1995            0 :   if (pss_not_empty && count_succs >= count_preds)
    1996              :     {
    1997            0 :       std::swap (start, end);
    1998            0 :       step = -1;
    1999              :     }
    2000              : 
    2001              :   /* Now that we've finalized the window, make END an exclusive rather
    2002              :      than an inclusive bound.  */
    2003            0 :   end += step;
    2004              : 
    2005            0 :   *start_p = start;
    2006            0 :   *step_p = step;
    2007            0 :   *end_p = end;
    2008              : 
    2009            0 :   if ((start >= end && step == 1) || (start <= end && step == -1))
    2010              :     {
    2011            0 :       if (dump_file)
    2012            0 :         fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
    2013              :                  start, end, step);
    2014            0 :       return -1;
    2015              :     }
    2016              : 
    2017              :   return 0;
    2018            0 : }
    2019              : 
    2020              : /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
    2021              :    node currently been scheduled.  At the end of the calculation
    2022              :    MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
    2023              :    U_NODE which are (1) already scheduled in the first/last row of
    2024              :    U_NODE's scheduling window, (2) whose dependence inequality with U
    2025              :    becomes an equality when U is scheduled in this same row, and (3)
    2026              :    whose dependence latency is zero.
    2027              : 
    2028              :    The first and last rows are calculated using the following parameters:
    2029              :    START/END rows - The cycles that begins/ends the traversal on the window;
    2030              :    searching for an empty cycle to schedule U_NODE.
    2031              :    STEP - The direction in which we traverse the window.
    2032              :    II - The initiation interval.  */
    2033              : 
    2034              : static void
    2035            0 : calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
    2036              :                                int step, int ii, sbitmap sched_nodes,
    2037              :                                sbitmap must_precede, sbitmap must_follow)
    2038              : {
    2039            0 :   ddg_edge_ptr e;
    2040            0 :   int first_cycle_in_window, last_cycle_in_window;
    2041              : 
    2042            0 :   gcc_assert (must_precede && must_follow);
    2043              : 
    2044              :   /* Consider the following scheduling window:
    2045              :      {first_cycle_in_window, first_cycle_in_window+1, ...,
    2046              :      last_cycle_in_window}.  If step is 1 then the following will be
    2047              :      the order we traverse the window: {start=first_cycle_in_window,
    2048              :      first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
    2049              :      or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
    2050              :      end=first_cycle_in_window-1} if step is -1.  */
    2051            0 :   first_cycle_in_window = (step == 1) ? start : end - step;
    2052            0 :   last_cycle_in_window = (step == 1) ? end - step : start;
    2053              : 
    2054            0 :   bitmap_clear (must_precede);
    2055            0 :   bitmap_clear (must_follow);
    2056              : 
    2057            0 :   if (dump_file)
    2058            0 :     fprintf (dump_file, "\nmust_precede: ");
    2059              : 
    2060              :   /* Instead of checking if:
    2061              :       (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
    2062              :       && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
    2063              :              first_cycle_in_window)
    2064              :       && e->latency == 0
    2065              :      we use the fact that latency is non-negative:
    2066              :       SCHED_TIME (e->src) - (e->distance * ii) <=
    2067              :       SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
    2068              :       first_cycle_in_window
    2069              :      and check only if
    2070              :       SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
    2071            0 :   for (e = u_node->in; e != 0; e = e->next_in)
    2072            0 :     if (bitmap_bit_p (sched_nodes, e->src->cuid)
    2073            0 :         && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
    2074              :              first_cycle_in_window))
    2075              :       {
    2076            0 :         if (dump_file)
    2077            0 :           fprintf (dump_file, "%d ", e->src->cuid);
    2078              : 
    2079            0 :         bitmap_set_bit (must_precede, e->src->cuid);
    2080              :       }
    2081              : 
    2082            0 :   if (dump_file)
    2083            0 :     fprintf (dump_file, "\nmust_follow: ");
    2084              : 
    2085              :   /* Instead of checking if:
    2086              :       (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
    2087              :       && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
    2088              :              last_cycle_in_window)
    2089              :       && e->latency == 0
    2090              :      we use the fact that latency is non-negative:
    2091              :       SCHED_TIME (e->dest) + (e->distance * ii) >=
    2092              :       SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
    2093              :       last_cycle_in_window
    2094              :      and check only if
    2095              :       SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
    2096            0 :   for (e = u_node->out; e != 0; e = e->next_out)
    2097            0 :     if (bitmap_bit_p (sched_nodes, e->dest->cuid)
    2098            0 :         && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
    2099              :              last_cycle_in_window))
    2100              :       {
    2101            0 :         if (dump_file)
    2102            0 :           fprintf (dump_file, "%d ", e->dest->cuid);
    2103              : 
    2104            0 :         bitmap_set_bit (must_follow, e->dest->cuid);
    2105              :       }
    2106              : 
    2107            0 :   if (dump_file)
    2108            0 :     fprintf (dump_file, "\n");
    2109            0 : }
    2110              : 
    2111              : /* Return 1 if U_NODE can be scheduled in CYCLE.  Use the following
    2112              :    parameters to decide if that's possible:
    2113              :    PS - The partial schedule.
    2114              :    U - The serial number of U_NODE.
    2115              :    NUM_SPLITS - The number of row splits made so far.
    2116              :    MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
    2117              :    the first row of the scheduling window)
    2118              :    MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
    2119              :    last row of the scheduling window)  */
    2120              : 
    2121              : static bool
    2122            0 : try_scheduling_node_in_cycle (partial_schedule_ptr ps,
    2123              :                               int u, int cycle, sbitmap sched_nodes,
    2124              :                               int *num_splits, sbitmap must_precede,
    2125              :                               sbitmap must_follow)
    2126              : {
    2127            0 :   ps_insn_ptr psi;
    2128            0 :   bool success = false;
    2129              : 
    2130            0 :   verify_partial_schedule (ps, sched_nodes);
    2131            0 :   psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
    2132            0 :   if (psi)
    2133              :     {
    2134            0 :       SCHED_TIME (u) = cycle;
    2135            0 :       bitmap_set_bit (sched_nodes, u);
    2136            0 :       success = true;
    2137            0 :       *num_splits = 0;
    2138            0 :       if (dump_file)
    2139            0 :         fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
    2140              : 
    2141              :     }
    2142              : 
    2143            0 :   return success;
    2144              : }
    2145              : 
    2146              : /* This function implements the scheduling algorithm for SMS according to the
    2147              :    above algorithm.  */
    2148              : static partial_schedule_ptr
    2149            0 : sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
    2150              : {
    2151            0 :   int ii = mii;
    2152            0 :   int i, c, success, num_splits = 0;
    2153            0 :   int flush_and_start_over = true;
    2154            0 :   int num_nodes = g->num_nodes;
    2155            0 :   int start, end, step; /* Place together into one struct?  */
    2156            0 :   auto_sbitmap sched_nodes (num_nodes);
    2157            0 :   auto_sbitmap must_precede (num_nodes);
    2158            0 :   auto_sbitmap must_follow (num_nodes);
    2159            0 :   auto_sbitmap tobe_scheduled (num_nodes);
    2160              : 
    2161              :   /* Value of param_sms_dfa_history is a limit on the number of cycles that
    2162              :      resource conflicts can span.  ??? Should be provided by DFA, and be
    2163              :      dependent on the type of insn scheduled.  Set to 0 by default to save
    2164              :      compile time.  */
    2165            0 :   partial_schedule_ptr ps = create_partial_schedule (ii, g,
    2166              :                                                      param_sms_dfa_history);
    2167              : 
    2168            0 :   bitmap_ones (tobe_scheduled);
    2169            0 :   bitmap_clear (sched_nodes);
    2170              : 
    2171            0 :   while (flush_and_start_over && (ii < maxii))
    2172              :     {
    2173              : 
    2174            0 :       if (dump_file)
    2175            0 :         fprintf (dump_file, "Starting with ii=%d\n", ii);
    2176            0 :       flush_and_start_over = false;
    2177            0 :       bitmap_clear (sched_nodes);
    2178              : 
    2179            0 :       for (i = 0; i < num_nodes; i++)
    2180              :         {
    2181            0 :           int u = nodes_order[i];
    2182            0 :           ddg_node_ptr u_node = &ps->g->nodes[u];
    2183            0 :           rtx_insn *insn = u_node->insn;
    2184              : 
    2185            0 :           gcc_checking_assert (NONDEBUG_INSN_P (insn));
    2186              : 
    2187            0 :           if (bitmap_bit_p (sched_nodes, u))
    2188            0 :             continue;
    2189              : 
    2190              :           /* Try to get non-empty scheduling window.  */
    2191            0 :          success = 0;
    2192            0 :          if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
    2193              :                                 &step, &end) == 0)
    2194              :             {
    2195            0 :               if (dump_file)
    2196            0 :                 fprintf (dump_file, "\nTrying to schedule node %d "
    2197              :                          "INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
    2198            0 :                         (g->nodes[u].insn)), start, end, step);
    2199              : 
    2200            0 :               gcc_assert ((step > 0 && start < end)
    2201              :                           || (step < 0 && start > end));
    2202              : 
    2203            0 :               calculate_must_precede_follow (u_node, start, end, step, ii,
    2204              :                                              sched_nodes, must_precede,
    2205              :                                              must_follow);
    2206              : 
    2207            0 :               for (c = start; c != end; c += step)
    2208              :                 {
    2209            0 :                   sbitmap tmp_precede, tmp_follow;
    2210              : 
    2211            0 :                   set_must_precede_follow (&tmp_follow, must_follow,
    2212              :                                            &tmp_precede, must_precede,
    2213              :                                            c, start, end, step);
    2214            0 :                   success =
    2215            0 :                     try_scheduling_node_in_cycle (ps, u, c,
    2216              :                                                   sched_nodes,
    2217              :                                                   &num_splits, tmp_precede,
    2218              :                                                   tmp_follow);
    2219            0 :                   if (success)
    2220              :                     break;
    2221              :                 }
    2222              : 
    2223            0 :               verify_partial_schedule (ps, sched_nodes);
    2224              :             }
    2225            0 :             if (!success)
    2226              :             {
    2227            0 :               int split_row;
    2228              : 
    2229            0 :               if (ii++ == maxii)
    2230              :                 break;
    2231              : 
    2232            0 :               if (num_splits >= MAX_SPLIT_NUM)
    2233              :                 {
    2234            0 :                   num_splits = 0;
    2235            0 :                   flush_and_start_over = true;
    2236            0 :                   verify_partial_schedule (ps, sched_nodes);
    2237            0 :                   reset_partial_schedule (ps, ii);
    2238            0 :                   verify_partial_schedule (ps, sched_nodes);
    2239            0 :                   break;
    2240              :                 }
    2241              : 
    2242            0 :               num_splits++;
    2243              :               /* The scheduling window is exclusive of 'end'
    2244              :                  whereas compute_split_window() expects an inclusive,
    2245              :                  ordered range.  */
    2246            0 :               if (step == 1)
    2247            0 :                 split_row = compute_split_row (sched_nodes, start, end - 1,
    2248              :                                                ps->ii, u_node);
    2249              :               else
    2250            0 :                 split_row = compute_split_row (sched_nodes, end + 1, start,
    2251              :                                                ps->ii, u_node);
    2252              : 
    2253            0 :               ps_insert_empty_row (ps, split_row, sched_nodes);
    2254            0 :               i--;              /* Go back and retry node i.  */
    2255              : 
    2256            0 :               if (dump_file)
    2257            0 :                 fprintf (dump_file, "num_splits=%d\n", num_splits);
    2258              :             }
    2259              : 
    2260              :           /* ??? If (success), check register pressure estimates.  */
    2261              :         }                       /* Continue with next node.  */
    2262              :     }                           /* While flush_and_start_over.  */
    2263            0 :   if (ii >= maxii)
    2264              :     {
    2265            0 :       free_partial_schedule (ps);
    2266            0 :       ps = NULL;
    2267              :     }
    2268              :   else
    2269            0 :     gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
    2270              : 
    2271            0 :   return ps;
    2272            0 : }
    2273              : 
    2274              : /* This function inserts a new empty row into PS at the position
    2275              :    according to SPLITROW, keeping all already scheduled instructions
    2276              :    intact and updating their SCHED_TIME and cycle accordingly.  */
    2277              : static void
    2278            0 : ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
    2279              :                      sbitmap sched_nodes)
    2280              : {
    2281            0 :   ps_insn_ptr crr_insn;
    2282            0 :   ps_insn_ptr *rows_new;
    2283            0 :   int ii = ps->ii;
    2284            0 :   int new_ii = ii + 1;
    2285            0 :   int row;
    2286            0 :   int *rows_length_new;
    2287              : 
    2288            0 :   verify_partial_schedule (ps, sched_nodes);
    2289              : 
    2290              :   /* We normalize sched_time and rotate ps to have only non-negative sched
    2291              :      times, for simplicity of updating cycles after inserting new row.  */
    2292            0 :   split_row -= ps->min_cycle;
    2293            0 :   split_row = SMODULO (split_row, ii);
    2294            0 :   if (dump_file)
    2295            0 :     fprintf (dump_file, "split_row=%d\n", split_row);
    2296              : 
    2297            0 :   reset_sched_times (ps, PS_MIN_CYCLE (ps));
    2298            0 :   rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
    2299              : 
    2300            0 :   rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
    2301            0 :   rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
    2302            0 :   for (row = 0; row < split_row; row++)
    2303              :     {
    2304            0 :       rows_new[row] = ps->rows[row];
    2305            0 :       rows_length_new[row] = ps->rows_length[row];
    2306            0 :       ps->rows[row] = NULL;
    2307            0 :       for (crr_insn = rows_new[row];
    2308            0 :            crr_insn; crr_insn = crr_insn->next_in_row)
    2309              :         {
    2310            0 :           int u = crr_insn->id;
    2311            0 :           int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
    2312              : 
    2313            0 :           SCHED_TIME (u) = new_time;
    2314            0 :           crr_insn->cycle = new_time;
    2315            0 :           SCHED_ROW (u) = new_time % new_ii;
    2316            0 :           SCHED_STAGE (u) = new_time / new_ii;
    2317              :         }
    2318              : 
    2319              :     }
    2320              : 
    2321            0 :   rows_new[split_row] = NULL;
    2322              : 
    2323            0 :   for (row = split_row; row < ii; row++)
    2324              :     {
    2325            0 :       rows_new[row + 1] = ps->rows[row];
    2326            0 :       rows_length_new[row + 1] = ps->rows_length[row];
    2327            0 :       ps->rows[row] = NULL;
    2328            0 :       for (crr_insn = rows_new[row + 1];
    2329            0 :            crr_insn; crr_insn = crr_insn->next_in_row)
    2330              :         {
    2331            0 :           int u = crr_insn->id;
    2332            0 :           int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
    2333              : 
    2334            0 :           SCHED_TIME (u) = new_time;
    2335            0 :           crr_insn->cycle = new_time;
    2336            0 :           SCHED_ROW (u) = new_time % new_ii;
    2337            0 :           SCHED_STAGE (u) = new_time / new_ii;
    2338              :         }
    2339              :     }
    2340              : 
    2341              :   /* Updating ps.  */
    2342            0 :   ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
    2343            0 :     + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
    2344            0 :   ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
    2345            0 :     + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
    2346            0 :   free (ps->rows);
    2347            0 :   ps->rows = rows_new;
    2348            0 :   free (ps->rows_length);
    2349            0 :   ps->rows_length = rows_length_new;
    2350            0 :   ps->ii = new_ii;
    2351            0 :   gcc_assert (ps->min_cycle >= 0);
    2352              : 
    2353            0 :   verify_partial_schedule (ps, sched_nodes);
    2354              : 
    2355            0 :   if (dump_file)
    2356            0 :     fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
    2357              :              ps->max_cycle);
    2358            0 : }
    2359              : 
    2360              : /* Given U_NODE which is the node that failed to be scheduled; LOW and
    2361              :    UP which are the boundaries of it's scheduling window; compute using
    2362              :    SCHED_NODES and II a row in the partial schedule that can be split
    2363              :    which will separate a critical predecessor from a critical successor
    2364              :    thereby expanding the window, and return it.  */
    2365              : static int
    2366            0 : compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
    2367              :                    ddg_node_ptr u_node)
    2368              : {
    2369            0 :   ddg_edge_ptr e;
    2370            0 :   int lower = INT_MIN, upper = INT_MAX;
    2371            0 :   int crit_pred = -1;
    2372            0 :   int crit_succ = -1;
    2373            0 :   int crit_cycle;
    2374              : 
    2375            0 :   for (e = u_node->in; e != 0; e = e->next_in)
    2376              :     {
    2377            0 :       int v = e->src->cuid;
    2378              : 
    2379            0 :       if (bitmap_bit_p (sched_nodes, v)
    2380            0 :           && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
    2381            0 :         if (SCHED_TIME (v) > lower)
    2382              :           {
    2383            0 :             crit_pred = v;
    2384            0 :             lower = SCHED_TIME (v);
    2385              :           }
    2386              :     }
    2387              : 
    2388            0 :   if (crit_pred >= 0)
    2389              :     {
    2390            0 :       crit_cycle = SCHED_TIME (crit_pred) + 1;
    2391            0 :       return SMODULO (crit_cycle, ii);
    2392              :     }
    2393              : 
    2394            0 :   for (e = u_node->out; e != 0; e = e->next_out)
    2395              :     {
    2396            0 :       int v = e->dest->cuid;
    2397              : 
    2398            0 :       if (bitmap_bit_p (sched_nodes, v)
    2399            0 :           && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
    2400            0 :         if (SCHED_TIME (v) < upper)
    2401              :           {
    2402            0 :             crit_succ = v;
    2403            0 :             upper = SCHED_TIME (v);
    2404              :           }
    2405              :     }
    2406              : 
    2407            0 :   if (crit_succ >= 0)
    2408              :     {
    2409            0 :       crit_cycle = SCHED_TIME (crit_succ);
    2410            0 :       return SMODULO (crit_cycle, ii);
    2411              :     }
    2412              : 
    2413            0 :   if (dump_file)
    2414            0 :     fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
    2415              : 
    2416            0 :   return SMODULO ((low + up + 1) / 2, ii);
    2417              : }
    2418              : 
    2419              : static void
    2420            0 : verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
    2421              : {
    2422            0 :   int row;
    2423            0 :   ps_insn_ptr crr_insn;
    2424              : 
    2425            0 :   for (row = 0; row < ps->ii; row++)
    2426              :     {
    2427            0 :       int length = 0;
    2428              : 
    2429            0 :       for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
    2430              :         {
    2431            0 :           int u = crr_insn->id;
    2432              : 
    2433            0 :           length++;
    2434            0 :           gcc_assert (bitmap_bit_p (sched_nodes, u));
    2435              :           /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
    2436              :              popcount (sched_nodes) == number of insns in ps.  */
    2437            0 :           gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
    2438            0 :           gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
    2439              :         }
    2440              : 
    2441            0 :       gcc_assert (ps->rows_length[row] == length);
    2442              :     }
    2443            0 : }
    2444              : 
    2445              : 
    2446              : /* This page implements the algorithm for ordering the nodes of a DDG
    2447              :    for modulo scheduling, activated through the
    2448              :    "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
    2449              : 
    2450              : #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
    2451              : #define ASAP(x) (ORDER_PARAMS ((x))->asap)
    2452              : #define ALAP(x) (ORDER_PARAMS ((x))->alap)
    2453              : #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
    2454              : #define MOB(x) (ALAP ((x)) - ASAP ((x)))
    2455              : #define DEPTH(x) (ASAP ((x)))
    2456              : 
    2457              : typedef struct node_order_params * nopa;
    2458              : 
    2459              : static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
    2460              : static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
    2461              : static nopa  calculate_order_params (ddg_ptr, int, int *);
    2462              : static int find_max_asap (ddg_ptr, sbitmap);
    2463              : static int find_max_hv_min_mob (ddg_ptr, sbitmap);
    2464              : static int find_max_dv_min_mob (ddg_ptr, sbitmap);
    2465              : 
    2466              : enum sms_direction {BOTTOMUP, TOPDOWN};
    2467              : 
    2468              : struct node_order_params
    2469              : {
    2470              :   int asap;
    2471              :   int alap;
    2472              :   int height;
    2473              : };
    2474              : 
    2475              : /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
    2476              : static void
    2477            0 : check_nodes_order (int *node_order, int num_nodes)
    2478              : {
    2479            0 :   int i;
    2480            0 :   auto_sbitmap tmp (num_nodes);
    2481              : 
    2482            0 :   bitmap_clear (tmp);
    2483              : 
    2484            0 :   if (dump_file)
    2485            0 :     fprintf (dump_file, "SMS final nodes order: \n");
    2486              : 
    2487            0 :   for (i = 0; i < num_nodes; i++)
    2488              :     {
    2489            0 :       int u = node_order[i];
    2490              : 
    2491            0 :       if (dump_file)
    2492            0 :         fprintf (dump_file, "%d ", u);
    2493            0 :       gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
    2494              : 
    2495            0 :       bitmap_set_bit (tmp, u);
    2496              :     }
    2497              : 
    2498            0 :   if (dump_file)
    2499            0 :     fprintf (dump_file, "\n");
    2500            0 : }
    2501              : 
    2502              : /* Order the nodes of G for scheduling and pass the result in
    2503              :    NODE_ORDER.  Also set aux.count of each node to ASAP.
    2504              :    Put maximal ASAP to PMAX_ASAP.  Return the recMII for the given DDG.  */
    2505              : static int
    2506            0 : sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
    2507              : {
    2508            0 :   int i;
    2509            0 :   int rec_mii = 0;
    2510            0 :   ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
    2511              : 
    2512            0 :   nopa nops = calculate_order_params (g, mii, pmax_asap);
    2513              : 
    2514            0 :   if (dump_file)
    2515            0 :     print_sccs (dump_file, sccs, g);
    2516              : 
    2517            0 :   order_nodes_of_sccs (sccs, node_order);
    2518              : 
    2519            0 :   if (sccs->num_sccs > 0)
    2520              :     /* First SCC has the largest recurrence_length.  */
    2521            0 :     rec_mii = sccs->sccs[0]->recurrence_length;
    2522              : 
    2523              :   /* Save ASAP before destroying node_order_params.  */
    2524            0 :   for (i = 0; i < g->num_nodes; i++)
    2525              :     {
    2526            0 :       ddg_node_ptr v = &g->nodes[i];
    2527            0 :       v->aux.count = ASAP (v);
    2528              :     }
    2529              : 
    2530            0 :   free (nops);
    2531            0 :   free_ddg_all_sccs (sccs);
    2532            0 :   check_nodes_order (node_order, g->num_nodes);
    2533              : 
    2534            0 :   return rec_mii;
    2535              : }
    2536              : 
    2537              : static void
    2538            0 : order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
    2539              : {
    2540            0 :   int i, pos = 0;
    2541            0 :   ddg_ptr g = all_sccs->ddg;
    2542            0 :   int num_nodes = g->num_nodes;
    2543            0 :   auto_sbitmap prev_sccs (num_nodes);
    2544            0 :   auto_sbitmap on_path (num_nodes);
    2545            0 :   auto_sbitmap tmp (num_nodes);
    2546            0 :   auto_sbitmap ones (num_nodes);
    2547              : 
    2548            0 :   bitmap_clear (prev_sccs);
    2549            0 :   bitmap_ones (ones);
    2550              : 
    2551              :   /* Perform the node ordering starting from the SCC with the highest recMII.
    2552              :      For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
    2553            0 :   for (i = 0; i < all_sccs->num_sccs; i++)
    2554              :     {
    2555            0 :       ddg_scc_ptr scc = all_sccs->sccs[i];
    2556              : 
    2557              :       /* Add nodes on paths from previous SCCs to the current SCC.  */
    2558            0 :       find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
    2559            0 :       bitmap_ior (tmp, scc->nodes, on_path);
    2560              : 
    2561              :       /* Add nodes on paths from the current SCC to previous SCCs.  */
    2562            0 :       find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
    2563            0 :       bitmap_ior (tmp, tmp, on_path);
    2564              : 
    2565              :       /* Remove nodes of previous SCCs from current extended SCC.  */
    2566            0 :       bitmap_and_compl (tmp, tmp, prev_sccs);
    2567              : 
    2568            0 :       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
    2569              :       /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
    2570              :     }
    2571              : 
    2572              :   /* Handle the remaining nodes that do not belong to any scc.  Each call
    2573              :      to order_nodes_in_scc handles a single connected component.  */
    2574            0 :   while (pos < g->num_nodes)
    2575              :     {
    2576            0 :       bitmap_and_compl (tmp, ones, prev_sccs);
    2577            0 :       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
    2578              :     }
    2579            0 : }
    2580              : 
    2581              : /* MII is needed if we consider backarcs (that do not close recursive cycles).  */
    2582              : static struct node_order_params *
    2583            0 : calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
    2584              : {
    2585            0 :   int u;
    2586            0 :   int max_asap;
    2587            0 :   int num_nodes = g->num_nodes;
    2588            0 :   ddg_edge_ptr e;
    2589              :   /* Allocate a place to hold ordering params for each node in the DDG.  */
    2590            0 :   nopa node_order_params_arr;
    2591              : 
    2592              :   /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
    2593            0 :   node_order_params_arr = (nopa) xcalloc (num_nodes,
    2594              :                                           sizeof (struct node_order_params));
    2595              : 
    2596              :   /* Set the aux pointer of each node to point to its order_params structure.  */
    2597            0 :   for (u = 0; u < num_nodes; u++)
    2598            0 :     g->nodes[u].aux.info = &node_order_params_arr[u];
    2599              : 
    2600              :   /* Disregarding a backarc from each recursive cycle to obtain a DAG,
    2601              :      calculate ASAP, ALAP, mobility, distance, and height for each node
    2602              :      in the dependence (direct acyclic) graph.  */
    2603              : 
    2604              :   /* We assume that the nodes in the array are in topological order.  */
    2605              : 
    2606              :   max_asap = 0;
    2607            0 :   for (u = 0; u < num_nodes; u++)
    2608              :     {
    2609            0 :       ddg_node_ptr u_node = &g->nodes[u];
    2610              : 
    2611            0 :       ASAP (u_node) = 0;
    2612            0 :       for (e = u_node->in; e; e = e->next_in)
    2613            0 :         if (e->distance == 0)
    2614            0 :           ASAP (u_node) = MAX (ASAP (u_node),
    2615              :                                ASAP (e->src) + e->latency);
    2616            0 :       max_asap = MAX (max_asap, ASAP (u_node));
    2617              :     }
    2618              : 
    2619            0 :   for (u = num_nodes - 1; u > -1; u--)
    2620              :     {
    2621            0 :       ddg_node_ptr u_node = &g->nodes[u];
    2622              : 
    2623            0 :       ALAP (u_node) = max_asap;
    2624            0 :       HEIGHT (u_node) = 0;
    2625            0 :       for (e = u_node->out; e; e = e->next_out)
    2626            0 :         if (e->distance == 0)
    2627              :           {
    2628            0 :             ALAP (u_node) = MIN (ALAP (u_node),
    2629              :                                  ALAP (e->dest) - e->latency);
    2630            0 :             HEIGHT (u_node) = MAX (HEIGHT (u_node),
    2631              :                                    HEIGHT (e->dest) + e->latency);
    2632              :           }
    2633              :     }
    2634            0 :   if (dump_file)
    2635              :   {
    2636            0 :     fprintf (dump_file, "\nOrder params\n");
    2637            0 :     for (u = 0; u < num_nodes; u++)
    2638              :       {
    2639            0 :         ddg_node_ptr u_node = &g->nodes[u];
    2640              : 
    2641            0 :         fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
    2642            0 :                  ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
    2643              :       }
    2644              :   }
    2645              : 
    2646            0 :   *pmax_asap = max_asap;
    2647            0 :   return node_order_params_arr;
    2648              : }
    2649              : 
    2650              : static int
    2651            0 : find_max_asap (ddg_ptr g, sbitmap nodes)
    2652              : {
    2653            0 :   unsigned int u = 0;
    2654            0 :   int max_asap = -1;
    2655            0 :   int result = -1;
    2656            0 :   sbitmap_iterator sbi;
    2657              : 
    2658            0 :   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
    2659              :     {
    2660            0 :       ddg_node_ptr u_node = &g->nodes[u];
    2661              : 
    2662            0 :       if (max_asap < ASAP (u_node))
    2663              :         {
    2664            0 :           max_asap = ASAP (u_node);
    2665            0 :           result = u;
    2666              :         }
    2667              :     }
    2668            0 :   return result;
    2669              : }
    2670              : 
    2671              : static int
    2672            0 : find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
    2673              : {
    2674            0 :   unsigned int u = 0;
    2675            0 :   int max_hv = -1;
    2676            0 :   int min_mob = INT_MAX;
    2677            0 :   int result = -1;
    2678            0 :   sbitmap_iterator sbi;
    2679              : 
    2680            0 :   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
    2681              :     {
    2682            0 :       ddg_node_ptr u_node = &g->nodes[u];
    2683              : 
    2684            0 :       if (max_hv < HEIGHT (u_node))
    2685              :         {
    2686            0 :           max_hv = HEIGHT (u_node);
    2687            0 :           min_mob = MOB (u_node);
    2688            0 :           result = u;
    2689              :         }
    2690            0 :       else if ((max_hv == HEIGHT (u_node))
    2691            0 :                && (min_mob > MOB (u_node)))
    2692              :         {
    2693            0 :           min_mob = MOB (u_node);
    2694            0 :           result = u;
    2695              :         }
    2696              :     }
    2697            0 :   return result;
    2698              : }
    2699              : 
    2700              : static int
    2701            0 : find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
    2702              : {
    2703            0 :   unsigned int u = 0;
    2704            0 :   int max_dv = -1;
    2705            0 :   int min_mob = INT_MAX;
    2706            0 :   int result = -1;
    2707            0 :   sbitmap_iterator sbi;
    2708              : 
    2709            0 :   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
    2710              :     {
    2711            0 :       ddg_node_ptr u_node = &g->nodes[u];
    2712              : 
    2713            0 :       if (max_dv < DEPTH (u_node))
    2714              :         {
    2715            0 :           max_dv = DEPTH (u_node);
    2716            0 :           min_mob = MOB (u_node);
    2717            0 :           result = u;
    2718              :         }
    2719            0 :       else if ((max_dv == DEPTH (u_node))
    2720            0 :                && (min_mob > MOB (u_node)))
    2721              :         {
    2722            0 :           min_mob = MOB (u_node);
    2723            0 :           result = u;
    2724              :         }
    2725              :     }
    2726            0 :   return result;
    2727              : }
    2728              : 
    2729              : /* Places the nodes of SCC into the NODE_ORDER array starting
    2730              :    at position POS, according to the SMS ordering algorithm.
    2731              :    NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
    2732              :    the NODE_ORDER array, starting from position zero.  */
    2733              : static int
    2734            0 : order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
    2735              :                     int * node_order, int pos)
    2736              : {
    2737            0 :   enum sms_direction dir;
    2738            0 :   int num_nodes = g->num_nodes;
    2739            0 :   auto_sbitmap workset (num_nodes);
    2740            0 :   auto_sbitmap tmp (num_nodes);
    2741            0 :   sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
    2742            0 :   auto_sbitmap predecessors (num_nodes);
    2743            0 :   auto_sbitmap successors (num_nodes);
    2744              : 
    2745            0 :   bitmap_clear (predecessors);
    2746            0 :   find_predecessors (predecessors, g, nodes_ordered);
    2747              : 
    2748            0 :   bitmap_clear (successors);
    2749            0 :   find_successors (successors, g, nodes_ordered);
    2750              : 
    2751            0 :   bitmap_clear (tmp);
    2752            0 :   if (bitmap_and (tmp, predecessors, scc))
    2753              :     {
    2754            0 :       bitmap_copy (workset, tmp);
    2755            0 :       dir = BOTTOMUP;
    2756              :     }
    2757            0 :   else if (bitmap_and (tmp, successors, scc))
    2758              :     {
    2759            0 :       bitmap_copy (workset, tmp);
    2760            0 :       dir = TOPDOWN;
    2761              :     }
    2762              :   else
    2763              :     {
    2764            0 :       int u;
    2765              : 
    2766            0 :       bitmap_clear (workset);
    2767            0 :       if ((u = find_max_asap (g, scc)) >= 0)
    2768            0 :         bitmap_set_bit (workset, u);
    2769              :       dir = BOTTOMUP;
    2770              :     }
    2771              : 
    2772            0 :   bitmap_clear (zero_bitmap);
    2773            0 :   while (!bitmap_equal_p (workset, zero_bitmap))
    2774              :     {
    2775            0 :       int v;
    2776            0 :       ddg_node_ptr v_node;
    2777            0 :       sbitmap v_node_preds;
    2778            0 :       sbitmap v_node_succs;
    2779              : 
    2780            0 :       if (dir == TOPDOWN)
    2781              :         {
    2782            0 :           while (!bitmap_equal_p (workset, zero_bitmap))
    2783              :             {
    2784            0 :               v = find_max_hv_min_mob (g, workset);
    2785            0 :               v_node = &g->nodes[v];
    2786            0 :               node_order[pos++] = v;
    2787            0 :               v_node_succs = NODE_SUCCESSORS (v_node);
    2788            0 :               bitmap_and (tmp, v_node_succs, scc);
    2789              : 
    2790              :               /* Don't consider the already ordered successors again.  */
    2791            0 :               bitmap_and_compl (tmp, tmp, nodes_ordered);
    2792            0 :               bitmap_ior (workset, workset, tmp);
    2793            0 :               bitmap_clear_bit (workset, v);
    2794            0 :               bitmap_set_bit (nodes_ordered, v);
    2795              :             }
    2796            0 :           dir = BOTTOMUP;
    2797            0 :           bitmap_clear (predecessors);
    2798            0 :           find_predecessors (predecessors, g, nodes_ordered);
    2799            0 :           bitmap_and (workset, predecessors, scc);
    2800              :         }
    2801              :       else
    2802              :         {
    2803            0 :           while (!bitmap_equal_p (workset, zero_bitmap))
    2804              :             {
    2805            0 :               v = find_max_dv_min_mob (g, workset);
    2806            0 :               v_node = &g->nodes[v];
    2807            0 :               node_order[pos++] = v;
    2808            0 :               v_node_preds = NODE_PREDECESSORS (v_node);
    2809            0 :               bitmap_and (tmp, v_node_preds, scc);
    2810              : 
    2811              :               /* Don't consider the already ordered predecessors again.  */
    2812            0 :               bitmap_and_compl (tmp, tmp, nodes_ordered);
    2813            0 :               bitmap_ior (workset, workset, tmp);
    2814            0 :               bitmap_clear_bit (workset, v);
    2815            0 :               bitmap_set_bit (nodes_ordered, v);
    2816              :             }
    2817            0 :           dir = TOPDOWN;
    2818            0 :           bitmap_clear (successors);
    2819            0 :           find_successors (successors, g, nodes_ordered);
    2820            0 :           bitmap_and (workset, successors, scc);
    2821              :         }
    2822              :     }
    2823            0 :   sbitmap_free (zero_bitmap);
    2824            0 :   return pos;
    2825            0 : }
    2826              : 
    2827              : 
    2828              : /* This page contains functions for manipulating partial-schedules during
    2829              :    modulo scheduling.  */
    2830              : 
    2831              : /* Create a partial schedule and allocate a memory to hold II rows.  */
    2832              : 
    2833              : static partial_schedule_ptr
    2834            0 : create_partial_schedule (int ii, ddg_ptr g, int history)
    2835              : {
    2836            0 :   partial_schedule_ptr ps = XNEW (struct partial_schedule);
    2837            0 :   ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
    2838            0 :   ps->rows_length = (int *) xcalloc (ii, sizeof (int));
    2839            0 :   ps->reg_moves.create (0);
    2840            0 :   ps->ii = ii;
    2841            0 :   ps->history = history;
    2842            0 :   ps->min_cycle = INT_MAX;
    2843            0 :   ps->max_cycle = INT_MIN;
    2844            0 :   ps->g = g;
    2845              : 
    2846            0 :   return ps;
    2847              : }
    2848              : 
    2849              : /* Free the PS_INSNs in rows array of the given partial schedule.
    2850              :    ??? Consider caching the PS_INSN's.  */
    2851              : static void
    2852            0 : free_ps_insns (partial_schedule_ptr ps)
    2853              : {
    2854            0 :   int i;
    2855              : 
    2856            0 :   for (i = 0; i < ps->ii; i++)
    2857              :     {
    2858            0 :       while (ps->rows[i])
    2859              :         {
    2860            0 :           ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
    2861              : 
    2862            0 :           free (ps->rows[i]);
    2863            0 :           ps->rows[i] = ps_insn;
    2864              :         }
    2865            0 :       ps->rows[i] = NULL;
    2866              :     }
    2867            0 : }
    2868              : 
    2869              : /* Free all the memory allocated to the partial schedule.  */
    2870              : 
    2871              : static void
    2872            0 : free_partial_schedule (partial_schedule_ptr ps)
    2873              : {
    2874            0 :   ps_reg_move_info *move;
    2875            0 :   unsigned int i;
    2876              : 
    2877            0 :   if (!ps)
    2878            0 :     return;
    2879              : 
    2880            0 :   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
    2881            0 :     sbitmap_free (move->uses);
    2882            0 :   ps->reg_moves.release ();
    2883              : 
    2884            0 :   free_ps_insns (ps);
    2885            0 :   free (ps->rows);
    2886            0 :   free (ps->rows_length);
    2887            0 :   free (ps);
    2888              : }
    2889              : 
    2890              : /* Clear the rows array with its PS_INSNs, and create a new one with
    2891              :    NEW_II rows.  */
    2892              : 
    2893              : static void
    2894            0 : reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
    2895              : {
    2896            0 :   if (!ps)
    2897              :     return;
    2898            0 :   free_ps_insns (ps);
    2899            0 :   if (new_ii == ps->ii)
    2900              :     return;
    2901            0 :   ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
    2902              :                                                  * sizeof (ps_insn_ptr));
    2903            0 :   memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
    2904            0 :   ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
    2905            0 :   memset (ps->rows_length, 0, new_ii * sizeof (int));
    2906            0 :   ps->ii = new_ii;
    2907            0 :   ps->min_cycle = INT_MAX;
    2908            0 :   ps->max_cycle = INT_MIN;
    2909              : }
    2910              : 
    2911              : /* Prints the partial schedule as an ii rows array, for each rows
    2912              :    print the ids of the insns in it.  */
    2913              : void
    2914            0 : print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
    2915              : {
    2916            0 :   int i;
    2917              : 
    2918            0 :   for (i = 0; i < ps->ii; i++)
    2919              :     {
    2920            0 :       ps_insn_ptr ps_i = ps->rows[i];
    2921              : 
    2922            0 :       fprintf (dump, "\n[ROW %d ]: ", i);
    2923            0 :       while (ps_i)
    2924              :         {
    2925            0 :           rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
    2926              : 
    2927            0 :           if (JUMP_P (insn))
    2928            0 :             fprintf (dump, "%d (branch), ", INSN_UID (insn));
    2929              :           else
    2930            0 :             fprintf (dump, "%d, ", INSN_UID (insn));
    2931              : 
    2932            0 :           ps_i = ps_i->next_in_row;
    2933              :         }
    2934              :     }
    2935            0 : }
    2936              : 
    2937              : /* Creates an object of PS_INSN and initializes it to the given parameters.  */
    2938              : static ps_insn_ptr
    2939            0 : create_ps_insn (int id, int cycle)
    2940              : {
    2941            0 :   ps_insn_ptr ps_i = XNEW (struct ps_insn);
    2942              : 
    2943            0 :   ps_i->id = id;
    2944            0 :   ps_i->next_in_row = NULL;
    2945            0 :   ps_i->prev_in_row = NULL;
    2946            0 :   ps_i->cycle = cycle;
    2947              : 
    2948            0 :   return ps_i;
    2949              : }
    2950              : 
    2951              : 
    2952              : /* Removes the given PS_INSN from the partial schedule.  */
    2953              : static void
    2954            0 : remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
    2955              : {
    2956            0 :   int row;
    2957              : 
    2958            0 :   gcc_assert (ps && ps_i);
    2959              : 
    2960            0 :   row = SMODULO (ps_i->cycle, ps->ii);
    2961            0 :   if (! ps_i->prev_in_row)
    2962              :     {
    2963            0 :       gcc_assert (ps_i == ps->rows[row]);
    2964            0 :       ps->rows[row] = ps_i->next_in_row;
    2965            0 :       if (ps->rows[row])
    2966            0 :         ps->rows[row]->prev_in_row = NULL;
    2967              :     }
    2968              :   else
    2969              :     {
    2970            0 :       ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
    2971            0 :       if (ps_i->next_in_row)
    2972            0 :         ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
    2973              :     }
    2974              : 
    2975            0 :   ps->rows_length[row] -= 1;
    2976            0 :   free (ps_i);
    2977            0 :   return;
    2978              : }
    2979              : 
    2980              : /* Unlike what literature describes for modulo scheduling (which focuses
    2981              :    on VLIW machines) the order of the instructions inside a cycle is
    2982              :    important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
    2983              :    where the current instruction should go relative to the already
    2984              :    scheduled instructions in the given cycle.  Go over these
    2985              :    instructions and find the first possible column to put it in.  */
    2986              : static bool
    2987            0 : ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
    2988              :                      sbitmap must_precede, sbitmap must_follow)
    2989              : {
    2990            0 :   ps_insn_ptr next_ps_i;
    2991            0 :   ps_insn_ptr first_must_follow = NULL;
    2992            0 :   ps_insn_ptr last_must_precede = NULL;
    2993            0 :   ps_insn_ptr last_in_row = NULL;
    2994            0 :   int row;
    2995              : 
    2996            0 :   if (! ps_i)
    2997              :     return false;
    2998              : 
    2999            0 :   row = SMODULO (ps_i->cycle, ps->ii);
    3000              : 
    3001              :   /* Find the first must follow and the last must precede
    3002              :      and insert the node immediately after the must precede
    3003              :      but make sure that it there is no must follow after it.  */
    3004            0 :   for (next_ps_i = ps->rows[row];
    3005            0 :        next_ps_i;
    3006            0 :        next_ps_i = next_ps_i->next_in_row)
    3007              :     {
    3008            0 :       if (must_follow
    3009            0 :           && bitmap_bit_p (must_follow, next_ps_i->id)
    3010            0 :           && ! first_must_follow)
    3011              :         first_must_follow = next_ps_i;
    3012            0 :       if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
    3013              :         {
    3014              :           /* If we have already met a node that must follow, then
    3015              :              there is no possible column.  */
    3016            0 :           if (first_must_follow)
    3017              :             return false;
    3018              :           else
    3019              :             last_must_precede = next_ps_i;
    3020              :         }
    3021              :       /* The closing branch must be the last in the row.  */
    3022            0 :       if (JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
    3023              :         return false;
    3024              : 
    3025            0 :        last_in_row = next_ps_i;
    3026              :     }
    3027              : 
    3028              :   /* The closing branch is scheduled as well.  Make sure there is no
    3029              :      dependent instruction after it as the branch should be the last
    3030              :      instruction in the row.  */
    3031            0 :   if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
    3032              :     {
    3033            0 :       if (first_must_follow)
    3034              :         return false;
    3035            0 :       if (last_in_row)
    3036              :         {
    3037              :           /* Make the branch the last in the row.  New instructions
    3038              :              will be inserted at the beginning of the row or after the
    3039              :              last must_precede instruction thus the branch is guaranteed
    3040              :              to remain the last instruction in the row.  */
    3041            0 :           last_in_row->next_in_row = ps_i;
    3042            0 :           ps_i->prev_in_row = last_in_row;
    3043            0 :           ps_i->next_in_row = NULL;
    3044              :         }
    3045              :       else
    3046            0 :         ps->rows[row] = ps_i;
    3047            0 :       return true;
    3048              :     }
    3049              : 
    3050              :   /* Now insert the node after INSERT_AFTER_PSI.  */
    3051              : 
    3052            0 :   if (! last_must_precede)
    3053              :     {
    3054            0 :       ps_i->next_in_row = ps->rows[row];
    3055            0 :       ps_i->prev_in_row = NULL;
    3056            0 :       if (ps_i->next_in_row)
    3057            0 :         ps_i->next_in_row->prev_in_row = ps_i;
    3058            0 :       ps->rows[row] = ps_i;
    3059              :     }
    3060              :   else
    3061              :     {
    3062            0 :       ps_i->next_in_row = last_must_precede->next_in_row;
    3063            0 :       last_must_precede->next_in_row = ps_i;
    3064            0 :       ps_i->prev_in_row = last_must_precede;
    3065            0 :       if (ps_i->next_in_row)
    3066            0 :         ps_i->next_in_row->prev_in_row = ps_i;
    3067              :     }
    3068              : 
    3069              :   return true;
    3070              : }
    3071              : 
    3072              : /* Advances the PS_INSN one column in its current row; returns false
    3073              :    in failure and true in success.  Bit N is set in MUST_FOLLOW if
    3074              :    the node with cuid N must be come after the node pointed to by
    3075              :    PS_I when scheduled in the same cycle.  */
    3076              : static bool
    3077            0 : ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
    3078              :                         sbitmap must_follow)
    3079              : {
    3080            0 :   ps_insn_ptr prev, next;
    3081            0 :   int row;
    3082              : 
    3083            0 :   if (!ps || !ps_i)
    3084              :     return false;
    3085              : 
    3086            0 :   row = SMODULO (ps_i->cycle, ps->ii);
    3087              : 
    3088            0 :   if (! ps_i->next_in_row)
    3089              :     return false;
    3090              : 
    3091              :   /* Check if next_in_row is dependent on ps_i, both having same sched
    3092              :      times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
    3093            0 :   if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
    3094              :     return false;
    3095              : 
    3096              :   /* Advance PS_I over its next_in_row in the doubly linked list.  */
    3097            0 :   prev = ps_i->prev_in_row;
    3098            0 :   next = ps_i->next_in_row;
    3099              : 
    3100            0 :   if (ps_i == ps->rows[row])
    3101            0 :     ps->rows[row] = next;
    3102              : 
    3103            0 :   ps_i->next_in_row = next->next_in_row;
    3104              : 
    3105            0 :   if (next->next_in_row)
    3106            0 :     next->next_in_row->prev_in_row = ps_i;
    3107              : 
    3108            0 :   next->next_in_row = ps_i;
    3109            0 :   ps_i->prev_in_row = next;
    3110              : 
    3111            0 :   next->prev_in_row = prev;
    3112            0 :   if (prev)
    3113            0 :     prev->next_in_row = next;
    3114              : 
    3115              :   return true;
    3116              : }
    3117              : 
    3118              : /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
    3119              :    Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
    3120              :    set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
    3121              :    before/after (respectively) the node pointed to by PS_I when scheduled
    3122              :    in the same cycle.  */
    3123              : static ps_insn_ptr
    3124            0 : add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
    3125              :                 sbitmap must_precede, sbitmap must_follow)
    3126              : {
    3127            0 :   ps_insn_ptr ps_i;
    3128            0 :   int row = SMODULO (cycle, ps->ii);
    3129              : 
    3130            0 :   if (ps->rows_length[row] >= issue_rate)
    3131              :     return NULL;
    3132              : 
    3133            0 :   ps_i = create_ps_insn (id, cycle);
    3134              : 
    3135              :   /* Finds and inserts PS_I according to MUST_FOLLOW and
    3136              :      MUST_PRECEDE.  */
    3137            0 :   if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
    3138              :     {
    3139            0 :       free (ps_i);
    3140            0 :       return NULL;
    3141              :     }
    3142              : 
    3143            0 :   ps->rows_length[row] += 1;
    3144            0 :   return ps_i;
    3145              : }
    3146              : 
    3147              : /* Advance time one cycle.  Assumes DFA is being used.  */
    3148              : static void
    3149            0 : advance_one_cycle (void)
    3150              : {
    3151            0 :   if (targetm.sched.dfa_pre_cycle_insn)
    3152            0 :     state_transition (curr_state,
    3153              :                       targetm.sched.dfa_pre_cycle_insn ());
    3154              : 
    3155            0 :   state_transition (curr_state, NULL);
    3156              : 
    3157            0 :   if (targetm.sched.dfa_post_cycle_insn)
    3158            0 :     state_transition (curr_state,
    3159            0 :                       targetm.sched.dfa_post_cycle_insn ());
    3160            0 : }
    3161              : 
    3162              : 
    3163              : 
    3164              : /* Checks if PS has resource conflicts according to DFA, starting from
    3165              :    FROM cycle to TO cycle; returns true if there are conflicts and false
    3166              :    if there are no conflicts.  Assumes DFA is being used.  */
    3167              : static bool
    3168            0 : ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
    3169              : {
    3170            0 :   int cycle;
    3171              : 
    3172            0 :   state_reset (curr_state);
    3173              : 
    3174            0 :   for (cycle = from; cycle <= to; cycle++)
    3175              :     {
    3176            0 :       ps_insn_ptr crr_insn;
    3177              :       /* Holds the remaining issue slots in the current row.  */
    3178            0 :       int can_issue_more = issue_rate;
    3179              : 
    3180              :       /* Walk through the DFA for the current row.  */
    3181            0 :       for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
    3182            0 :            crr_insn;
    3183            0 :            crr_insn = crr_insn->next_in_row)
    3184              :         {
    3185            0 :           rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
    3186              : 
    3187              :           /* Check if there is room for the current insn.  */
    3188            0 :           if (!can_issue_more || state_dead_lock_p (curr_state))
    3189            0 :             return true;
    3190              : 
    3191              :           /* Update the DFA state and return with failure if the DFA found
    3192              :              resource conflicts.  */
    3193            0 :           if (state_transition (curr_state, insn) >= 0)
    3194              :             return true;
    3195              : 
    3196            0 :           if (targetm.sched.variable_issue)
    3197            0 :             can_issue_more =
    3198            0 :               targetm.sched.variable_issue (sched_dump, sched_verbose,
    3199              :                                             insn, can_issue_more);
    3200              :           /* A naked CLOBBER or USE generates no instruction, so don't
    3201              :              let them consume issue slots.  */
    3202            0 :           else if (GET_CODE (PATTERN (insn)) != USE
    3203            0 :                    && GET_CODE (PATTERN (insn)) != CLOBBER)
    3204            0 :             can_issue_more--;
    3205              :         }
    3206              : 
    3207              :       /* Advance the DFA to the next cycle.  */
    3208            0 :       advance_one_cycle ();
    3209              :     }
    3210              :   return false;
    3211              : }
    3212              : 
    3213              : /* Checks if the given node causes resource conflicts when added to PS at
    3214              :    cycle C.  If not the node is added to PS and returned; otherwise zero
    3215              :    is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
    3216              :    cuid N must be come before/after (respectively) the node pointed to by
    3217              :    PS_I when scheduled in the same cycle.  */
    3218              : ps_insn_ptr
    3219            0 : ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
    3220              :                              int c, sbitmap must_precede,
    3221              :                              sbitmap must_follow)
    3222              : {
    3223            0 :   int i, first, amount;
    3224            0 :   bool has_conflicts = false;
    3225            0 :   ps_insn_ptr ps_i;
    3226              : 
    3227              :   /* First add the node to the PS, if this succeeds check for
    3228              :      conflicts, trying different issue slots in the same row.  */
    3229            0 :   if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
    3230              :     return NULL; /* Failed to insert the node at the given cycle.  */
    3231              : 
    3232            0 :   while (1)
    3233              :     {
    3234            0 :       has_conflicts = ps_has_conflicts (ps, c, c);
    3235            0 :       if (ps->history > 0 && !has_conflicts)
    3236              :         {
    3237              :           /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h,
    3238              :              but not more than ii intervals.  */
    3239            0 :           first = c - ps->history;
    3240            0 :           amount = 2 * ps->history + 1;
    3241            0 :           if (amount > ps->ii)
    3242              :             amount = ps->ii;
    3243            0 :           for (i = first; i < first + amount; i++)
    3244              :             {
    3245            0 :               has_conflicts = ps_has_conflicts (ps,
    3246              :                                                 i - ps->history,
    3247            0 :                                                 i + ps->history);
    3248            0 :               if (has_conflicts)
    3249              :                 break;
    3250              :             }
    3251              :         }
    3252            0 :       if (!has_conflicts)
    3253              :         break;
    3254              :       /* Try different issue slots to find one that the given node can be
    3255              :          scheduled in without conflicts.  */
    3256            0 :       if (! ps_insn_advance_column (ps, ps_i, must_follow))
    3257              :         break;
    3258              :     }
    3259              : 
    3260            0 :   if (has_conflicts)
    3261              :     {
    3262            0 :       remove_node_from_ps (ps, ps_i);
    3263            0 :       return NULL;
    3264              :     }
    3265              : 
    3266            0 :   ps->min_cycle = MIN (ps->min_cycle, c);
    3267            0 :   ps->max_cycle = MAX (ps->max_cycle, c);
    3268            0 :   return ps_i;
    3269              : }
    3270              : 
    3271              : /* Calculate the stage count of the partial schedule PS.  The calculation
    3272              :    takes into account the rotation amount passed in ROTATION_AMOUNT.  */
    3273              : int
    3274            0 : calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
    3275              : {
    3276            0 :   int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
    3277            0 :   int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
    3278            0 :   int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
    3279              : 
    3280              :   /* The calculation of stage count is done adding the number of stages
    3281              :      before cycle zero and after cycle zero.  */
    3282            0 :   stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
    3283              : 
    3284            0 :   return stage_count;
    3285              : }
    3286              : 
    3287              : /* Rotate the rows of PS such that insns scheduled at time
    3288              :    START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
    3289              : void
    3290            0 : rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
    3291              : {
    3292            0 :   int i, row, backward_rotates;
    3293            0 :   int last_row = ps->ii - 1;
    3294              : 
    3295            0 :   if (start_cycle == 0)
    3296              :     return;
    3297              : 
    3298            0 :   backward_rotates = SMODULO (start_cycle, ps->ii);
    3299              : 
    3300              :   /* Revisit later and optimize this into a single loop.  */
    3301            0 :   for (i = 0; i < backward_rotates; i++)
    3302              :     {
    3303            0 :       ps_insn_ptr first_row = ps->rows[0];
    3304            0 :       int first_row_length = ps->rows_length[0];
    3305              : 
    3306            0 :       for (row = 0; row < last_row; row++)
    3307              :         {
    3308            0 :           ps->rows[row] = ps->rows[row + 1];
    3309            0 :           ps->rows_length[row] = ps->rows_length[row + 1];
    3310              :         }
    3311              : 
    3312            0 :       ps->rows[last_row] = first_row;
    3313            0 :       ps->rows_length[last_row] = first_row_length;
    3314              :     }
    3315              : 
    3316            0 :   ps->max_cycle -= start_cycle;
    3317            0 :   ps->min_cycle -= start_cycle;
    3318              : }
    3319              : 
    3320              : #endif /* INSN_SCHEDULING */
    3321              : 
    3322              : /* Run instruction scheduler.  */
    3323              : /* Perform SMS module scheduling.  */
    3324              : 
    3325              : namespace {
    3326              : 
    3327              : const pass_data pass_data_sms =
    3328              : {
    3329              :   RTL_PASS, /* type */
    3330              :   "sms", /* name */
    3331              :   OPTGROUP_NONE, /* optinfo_flags */
    3332              :   TV_SMS, /* tv_id */
    3333              :   0, /* properties_required */
    3334              :   0, /* properties_provided */
    3335              :   0, /* properties_destroyed */
    3336              :   0, /* todo_flags_start */
    3337              :   TODO_df_finish, /* todo_flags_finish */
    3338              : };
    3339              : 
    3340              : class pass_sms : public rtl_opt_pass
    3341              : {
    3342              : public:
    3343       285722 :   pass_sms (gcc::context *ctxt)
    3344       571444 :     : rtl_opt_pass (pass_data_sms, ctxt)
    3345              :   {}
    3346              : 
    3347              :   /* opt_pass methods: */
    3348      1471370 :   bool gate (function *) final override
    3349              : {
    3350      1471370 :   return (optimize > 0 && flag_modulo_sched);
    3351              : }
    3352              : 
    3353              :   unsigned int execute (function *) final override;
    3354              : 
    3355              : }; // class pass_sms
    3356              : 
    3357              : unsigned int
    3358          313 : pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
    3359              : {
    3360              : #ifdef INSN_SCHEDULING
    3361          313 :   basic_block bb;
    3362              : 
    3363              :   /* Collect loop information to be used in SMS.  */
    3364          313 :   cfg_layout_initialize (0);
    3365          313 :   sms_schedule ();
    3366              : 
    3367              :   /* Update the life information, because we add pseudos.  */
    3368          313 :   max_regno = max_reg_num ();
    3369              : 
    3370              :   /* Finalize layout changes.  */
    3371         2314 :   FOR_EACH_BB_FN (bb, fun)
    3372         2001 :     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
    3373         1688 :       bb->aux = bb->next_bb;
    3374          313 :   free_dominance_info (CDI_DOMINATORS);
    3375          313 :   cfg_layout_finalize ();
    3376              : #endif /* INSN_SCHEDULING */
    3377          313 :   return 0;
    3378              : }
    3379              : 
    3380              : } // anon namespace
    3381              : 
    3382              : rtl_opt_pass *
    3383       285722 : make_pass_sms (gcc::context *ctxt)
    3384              : {
    3385       285722 :   return new pass_sms (ctxt);
    3386              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.