Branch data Line data Source code
1 : : /* Swing Modulo Scheduling implementation.
2 : : Copyright (C) 2004-2024 Free Software Foundation, Inc.
3 : : Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : :
22 : : #include "config.h"
23 : : #include "system.h"
24 : : #include "coretypes.h"
25 : : #include "backend.h"
26 : : #include "target.h"
27 : : #include "rtl.h"
28 : : #include "tree.h"
29 : : #include "cfghooks.h"
30 : : #include "df.h"
31 : : #include "memmodel.h"
32 : : #include "optabs.h"
33 : : #include "regs.h"
34 : : #include "emit-rtl.h"
35 : : #include "gcov-io.h"
36 : : #include "profile.h"
37 : : #include "insn-attr.h"
38 : : #include "cfgrtl.h"
39 : : #include "sched-int.h"
40 : : #include "cfgloop.h"
41 : : #include "expr.h"
42 : : #include "ddg.h"
43 : : #include "tree-pass.h"
44 : : #include "dbgcnt.h"
45 : : #include "loop-unroll.h"
46 : : #include "hard-reg-set.h"
47 : :
48 : : #ifdef INSN_SCHEDULING
49 : :
50 : : /* This file contains the implementation of the Swing Modulo Scheduler,
51 : : described in the following references:
52 : : [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
53 : : Lifetime--sensitive modulo scheduling in a production environment.
54 : : IEEE Trans. on Comps., 50(3), March 2001
55 : : [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
56 : : Swing Modulo Scheduling: A Lifetime Sensitive Approach.
57 : : PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
58 : :
59 : : The basic structure is:
60 : : 1. Build a data-dependence graph (DDG) for each loop.
61 : : 2. Use the DDG to order the insns of a loop (not in topological order
62 : : necessarily, but rather) trying to place each insn after all its
63 : : predecessors _or_ after all its successors.
64 : : 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
65 : : 4. Use the ordering to perform list-scheduling of the loop:
66 : : 1. Set II = MII. We will try to schedule the loop within II cycles.
67 : : 2. Try to schedule the insns one by one according to the ordering.
68 : : For each insn compute an interval of cycles by considering already-
69 : : scheduled preds and succs (and associated latencies); try to place
70 : : the insn in the cycles of this window checking for potential
71 : : resource conflicts (using the DFA interface).
72 : : Note: this is different from the cycle-scheduling of schedule_insns;
73 : : here the insns are not scheduled monotonically top-down (nor bottom-
74 : : up).
75 : : 3. If failed in scheduling all insns - bump II++ and try again, unless
76 : : II reaches an upper bound MaxII, in which case report failure.
77 : : 5. If we succeeded in scheduling the loop within II cycles, we now
78 : : generate prolog and epilog, decrease the counter of the loop, and
79 : : perform modulo variable expansion for live ranges that span more than
80 : : II cycles (i.e. use register copies to prevent a def from overwriting
81 : : itself before reaching the use).
82 : :
83 : : SMS works with countable loops (1) whose control part can be easily
84 : : decoupled from the rest of the loop and (2) whose loop count can
85 : : be easily adjusted. This is because we peel a constant number of
86 : : iterations into a prologue and epilogue for which we want to avoid
87 : : emitting the control part, and a kernel which is to iterate that
88 : : constant number of iterations less than the original loop. So the
89 : : control part should be a set of insns clearly identified and having
90 : : its own iv, not otherwise used in the loop (at-least for now), which
91 : : initializes a register before the loop to the number of iterations.
92 : : Currently SMS relies on the do-loop pattern to recognize such loops,
93 : : where (1) the control part comprises of all insns defining and/or
94 : : using a certain 'count' register and (2) the loop count can be
95 : : adjusted by modifying this register prior to the loop.
96 : : TODO: Rely on cfgloop analysis instead. */
97 : :
98 : : /* This page defines partial-schedule structures and functions for
99 : : modulo scheduling. */
100 : :
101 : : typedef struct partial_schedule *partial_schedule_ptr;
102 : : typedef struct ps_insn *ps_insn_ptr;
103 : :
104 : : /* The minimum (absolute) cycle that a node of ps was scheduled in. */
105 : : #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
106 : :
107 : : /* The maximum (absolute) cycle that a node of ps was scheduled in. */
108 : : #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
109 : :
110 : : /* Perform signed modulo, always returning a non-negative value. */
111 : : #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
112 : :
113 : : /* The number of different iterations the nodes in ps span, assuming
114 : : the stage boundaries are placed efficiently. */
115 : : #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
116 : : + 1 + ii - 1) / ii)
117 : : /* The stage count of ps. */
118 : : #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
119 : :
120 : : /* A single instruction in the partial schedule. */
121 : : struct ps_insn
122 : : {
123 : : /* Identifies the instruction to be scheduled. Values smaller than
124 : : the ddg's num_nodes refer directly to ddg nodes. A value of
125 : : X - num_nodes refers to register move X. */
126 : : int id;
127 : :
128 : : /* The (absolute) cycle in which the PS instruction is scheduled.
129 : : Same as SCHED_TIME (node). */
130 : : int cycle;
131 : :
132 : : /* The next/prev PS_INSN in the same row. */
133 : : ps_insn_ptr next_in_row,
134 : : prev_in_row;
135 : :
136 : : };
137 : :
138 : : /* Information about a register move that has been added to a partial
139 : : schedule. */
140 : : struct ps_reg_move_info
141 : : {
142 : : /* The source of the move is defined by the ps_insn with id DEF.
143 : : The destination is used by the ps_insns with the ids in USES. */
144 : : int def;
145 : : sbitmap uses;
146 : :
147 : : /* The original form of USES' instructions used OLD_REG, but they
148 : : should now use NEW_REG. */
149 : : rtx old_reg;
150 : : rtx new_reg;
151 : :
152 : : /* The number of consecutive stages that the move occupies. */
153 : : int num_consecutive_stages;
154 : :
155 : : /* An instruction that sets NEW_REG to the correct value. The first
156 : : move associated with DEF will have an rhs of OLD_REG; later moves
157 : : use the result of the previous move. */
158 : : rtx_insn *insn;
159 : : };
160 : :
161 : : /* Holds the partial schedule as an array of II rows. Each entry of the
162 : : array points to a linked list of PS_INSNs, which represents the
163 : : instructions that are scheduled for that row. */
164 : : struct partial_schedule
165 : : {
166 : : int ii; /* Number of rows in the partial schedule. */
167 : : int history; /* Threshold for conflict checking using DFA. */
168 : :
169 : : /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
170 : : ps_insn_ptr *rows;
171 : :
172 : : /* All the moves added for this partial schedule. Index X has
173 : : a ps_insn id of X + g->num_nodes. */
174 : : vec<ps_reg_move_info> reg_moves;
175 : :
176 : : /* rows_length[i] holds the number of instructions in the row.
177 : : It is used only (as an optimization) to back off quickly from
178 : : trying to schedule a node in a full row; that is, to avoid running
179 : : through futile DFA state transitions. */
180 : : int *rows_length;
181 : :
182 : : /* The earliest absolute cycle of an insn in the partial schedule. */
183 : : int min_cycle;
184 : :
185 : : /* The latest absolute cycle of an insn in the partial schedule. */
186 : : int max_cycle;
187 : :
188 : : ddg_ptr g; /* The DDG of the insns in the partial schedule. */
189 : :
190 : : int stage_count; /* The stage count of the partial schedule. */
191 : : };
192 : :
193 : :
194 : : static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
195 : : static void free_partial_schedule (partial_schedule_ptr);
196 : : static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
197 : : void print_partial_schedule (partial_schedule_ptr, FILE *);
198 : : static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
199 : : static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
200 : : int, int, sbitmap, sbitmap);
201 : : static void rotate_partial_schedule (partial_schedule_ptr, int);
202 : : void set_row_column_for_ps (partial_schedule_ptr);
203 : : static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
204 : : static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
205 : :
206 : :
207 : : /* This page defines constants and structures for the modulo scheduling
208 : : driver. */
209 : :
210 : : static int sms_order_nodes (ddg_ptr, int, int *, int *);
211 : : static void set_node_sched_params (ddg_ptr);
212 : : static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
213 : : static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
214 : : static int calculate_stage_count (partial_schedule_ptr, int);
215 : : static void calculate_must_precede_follow (ddg_node_ptr, int, int,
216 : : int, int, sbitmap, sbitmap, sbitmap);
217 : : static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
218 : : sbitmap, int, int *, int *, int *);
219 : : static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
220 : : sbitmap, int *, sbitmap, sbitmap);
221 : : static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
222 : :
223 : : #define NODE_ASAP(node) ((node)->aux.count)
224 : :
225 : : #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
226 : : #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
227 : : #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
228 : : #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
229 : : #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
230 : :
231 : : /* The scheduling parameters held for each node. */
232 : : typedef struct node_sched_params
233 : : {
234 : : int time; /* The absolute scheduling cycle. */
235 : :
236 : : int row; /* Holds time % ii. */
237 : : int stage; /* Holds time / ii. */
238 : :
239 : : /* The column of a node inside the ps. If nodes u, v are on the same row,
240 : : u will precede v if column (u) < column (v). */
241 : : int column;
242 : : } *node_sched_params_ptr;
243 : :
244 : : /* The following three functions are copied from the current scheduler
245 : : code in order to use sched_analyze() for computing the dependencies.
246 : : They are used when initializing the sched_info structure. */
247 : : static const char *
248 : 0 : sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
249 : : {
250 : 0 : static char tmp[80];
251 : :
252 : 0 : sprintf (tmp, "i%4d", INSN_UID (insn));
253 : 0 : return tmp;
254 : : }
255 : :
256 : : static void
257 : 0 : compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
258 : : regset used ATTRIBUTE_UNUSED)
259 : : {
260 : 0 : }
261 : :
262 : : static struct common_sched_info_def sms_common_sched_info;
263 : :
264 : : static struct sched_deps_info_def sms_sched_deps_info =
265 : : {
266 : : compute_jump_reg_dependencies,
267 : : NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
268 : : NULL,
269 : : 0, 0, 0
270 : : };
271 : :
272 : : static struct haifa_sched_info sms_sched_info =
273 : : {
274 : : NULL,
275 : : NULL,
276 : : NULL,
277 : : NULL,
278 : : NULL,
279 : : sms_print_insn,
280 : : NULL,
281 : : NULL, /* insn_finishes_block_p */
282 : : NULL, NULL,
283 : : NULL, NULL,
284 : : 0, 0,
285 : :
286 : : NULL, NULL, NULL, NULL,
287 : : NULL, NULL,
288 : : 0
289 : : };
290 : :
291 : : /* Partial schedule instruction ID in PS is a register move. Return
292 : : information about it. */
293 : : static struct ps_reg_move_info *
294 : 0 : ps_reg_move (partial_schedule_ptr ps, int id)
295 : : {
296 : 0 : gcc_checking_assert (id >= ps->g->num_nodes);
297 : 0 : return &ps->reg_moves[id - ps->g->num_nodes];
298 : : }
299 : :
300 : : /* Return the rtl instruction that is being scheduled by partial schedule
301 : : instruction ID, which belongs to schedule PS. */
302 : : static rtx_insn *
303 : 0 : ps_rtl_insn (partial_schedule_ptr ps, int id)
304 : : {
305 : 0 : if (id < ps->g->num_nodes)
306 : 0 : return ps->g->nodes[id].insn;
307 : : else
308 : 0 : return ps_reg_move (ps, id)->insn;
309 : : }
310 : :
311 : : /* Partial schedule instruction ID, which belongs to PS, occurred in
312 : : the original (unscheduled) loop. Return the first instruction
313 : : in the loop that was associated with ps_rtl_insn (PS, ID).
314 : : If the instruction had some notes before it, this is the first
315 : : of those notes. */
316 : : static rtx_insn *
317 : 0 : ps_first_note (partial_schedule_ptr ps, int id)
318 : : {
319 : 0 : gcc_assert (id < ps->g->num_nodes);
320 : 0 : return ps->g->nodes[id].first_note;
321 : : }
322 : :
323 : : /* Return the number of consecutive stages that are occupied by
324 : : partial schedule instruction ID in PS. */
325 : : static int
326 : 0 : ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
327 : : {
328 : 0 : if (id < ps->g->num_nodes)
329 : : return 1;
330 : : else
331 : 0 : return ps_reg_move (ps, id)->num_consecutive_stages;
332 : : }
333 : :
334 : : /* Given HEAD and TAIL which are the first and last insns in a loop;
335 : : return the register which controls the loop. Return zero if it has
336 : : more than one occurrence in the loop besides the control part or the
337 : : do-loop pattern is not of the form we expect. */
338 : : static rtx
339 : 100 : doloop_register_get (rtx_insn *head, rtx_insn *tail)
340 : : {
341 : 100 : rtx reg, condition;
342 : 100 : rtx_insn *insn, *first_insn_not_to_check;
343 : :
344 : 100 : if (!JUMP_P (tail))
345 : : return NULL_RTX;
346 : :
347 : 100 : if (!targetm.code_for_doloop_end)
348 : : return NULL_RTX;
349 : :
350 : : /* TODO: Free SMS's dependence on doloop_condition_get. */
351 : 0 : condition = doloop_condition_get (tail);
352 : 0 : if (! condition)
353 : : return NULL_RTX;
354 : :
355 : 0 : if (REG_P (XEXP (condition, 0)))
356 : : reg = XEXP (condition, 0);
357 : 0 : else if (GET_CODE (XEXP (condition, 0)) == PLUS
358 : 0 : && REG_P (XEXP (XEXP (condition, 0), 0)))
359 : : reg = XEXP (XEXP (condition, 0), 0);
360 : : else
361 : 0 : gcc_unreachable ();
362 : :
363 : : /* Check that the COUNT_REG has no other occurrences in the loop
364 : : until the decrement. We assume the control part consists of
365 : : either a single (parallel) branch-on-count or a (non-parallel)
366 : : branch immediately preceded by a single (decrement) insn. */
367 : 0 : first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
368 : 0 : : prev_nondebug_insn (tail));
369 : :
370 : 0 : for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
371 : 0 : if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
372 : : {
373 : 0 : if (dump_file)
374 : : {
375 : 0 : fprintf (dump_file, "SMS count_reg found ");
376 : 0 : print_rtl_single (dump_file, reg);
377 : 0 : fprintf (dump_file, " outside control in insn:\n");
378 : 0 : print_rtl_single (dump_file, insn);
379 : : }
380 : :
381 : 0 : return NULL_RTX;
382 : : }
383 : :
384 : : return reg;
385 : : }
386 : :
387 : : /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
388 : : that the number of iterations is a compile-time constant. If so,
389 : : return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
390 : : this constant. Otherwise return 0. */
391 : : static rtx_insn *
392 : 0 : const_iteration_count (rtx count_reg, basic_block pre_header,
393 : : int64_t *count, bool* adjust_inplace)
394 : : {
395 : 0 : rtx_insn *insn;
396 : 0 : rtx_insn *head, *tail;
397 : :
398 : 0 : *adjust_inplace = false;
399 : 0 : bool read_after = false;
400 : :
401 : 0 : if (! pre_header)
402 : : return NULL;
403 : :
404 : 0 : get_ebb_head_tail (pre_header, pre_header, &head, &tail);
405 : :
406 : 0 : for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
407 : 0 : if (single_set (insn) && rtx_equal_p (count_reg,
408 : 0 : SET_DEST (single_set (insn))))
409 : : {
410 : 0 : rtx pat = single_set (insn);
411 : :
412 : 0 : if (CONST_INT_P (SET_SRC (pat)))
413 : : {
414 : 0 : *count = INTVAL (SET_SRC (pat));
415 : 0 : *adjust_inplace = !read_after;
416 : 0 : return insn;
417 : : }
418 : :
419 : : return NULL;
420 : : }
421 : 0 : else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
422 : : {
423 : 0 : read_after = true;
424 : 0 : if (reg_set_p (count_reg, insn))
425 : : break;
426 : : }
427 : :
428 : : return NULL;
429 : : }
430 : :
431 : : /* A very simple resource-based lower bound on the initiation interval.
432 : : ??? Improve the accuracy of this bound by considering the
433 : : utilization of various units. */
434 : : static int
435 : 0 : res_MII (ddg_ptr g)
436 : : {
437 : 0 : if (targetm.sched.sms_res_mii)
438 : 0 : return targetm.sched.sms_res_mii (g);
439 : :
440 : 0 : return g->num_nodes / issue_rate;
441 : : }
442 : :
443 : :
444 : : /* A vector that contains the sched data for each ps_insn. */
445 : : static vec<node_sched_params> node_sched_param_vec;
446 : :
447 : : /* Allocate sched_params for each node and initialize it. */
448 : : static void
449 : 0 : set_node_sched_params (ddg_ptr g)
450 : : {
451 : 0 : node_sched_param_vec.truncate (0);
452 : 0 : node_sched_param_vec.safe_grow_cleared (g->num_nodes, true);
453 : 0 : }
454 : :
455 : : /* Make sure that node_sched_param_vec has an entry for every move in PS. */
456 : : static void
457 : 0 : extend_node_sched_params (partial_schedule_ptr ps)
458 : : {
459 : 0 : node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
460 : 0 : + ps->reg_moves.length (), true);
461 : 0 : }
462 : :
463 : : /* Update the sched_params (time, row and stage) for node U using the II,
464 : : the CYCLE of U and MIN_CYCLE.
465 : : We're not simply taking the following
466 : : SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
467 : : because the stages may not be aligned on cycle 0. */
468 : : static void
469 : 0 : update_node_sched_params (int u, int ii, int cycle, int min_cycle)
470 : : {
471 : 0 : int sc_until_cycle_zero;
472 : 0 : int stage;
473 : :
474 : 0 : SCHED_TIME (u) = cycle;
475 : 0 : SCHED_ROW (u) = SMODULO (cycle, ii);
476 : :
477 : : /* The calculation of stage count is done adding the number
478 : : of stages before cycle zero and after cycle zero. */
479 : 0 : sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
480 : :
481 : 0 : if (SCHED_TIME (u) < 0)
482 : : {
483 : 0 : stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
484 : 0 : SCHED_STAGE (u) = sc_until_cycle_zero - stage;
485 : : }
486 : : else
487 : : {
488 : 0 : stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
489 : 0 : SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
490 : : }
491 : 0 : }
492 : :
493 : : static void
494 : 0 : print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
495 : : {
496 : 0 : int i;
497 : :
498 : 0 : if (! file)
499 : : return;
500 : 0 : for (i = 0; i < num_nodes; i++)
501 : : {
502 : 0 : node_sched_params_ptr nsp = SCHED_PARAMS (i);
503 : :
504 : 0 : fprintf (file, "Node = %d; INSN = %d\n", i,
505 : 0 : INSN_UID (ps_rtl_insn (ps, i)));
506 : 0 : fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
507 : 0 : fprintf (file, " time = %d:\n", nsp->time);
508 : 0 : fprintf (file, " stage = %d:\n", nsp->stage);
509 : : }
510 : : }
511 : :
512 : : /* Set SCHED_COLUMN for each instruction in row ROW of PS. */
513 : : static void
514 : 0 : set_columns_for_row (partial_schedule_ptr ps, int row)
515 : : {
516 : 0 : ps_insn_ptr cur_insn;
517 : 0 : int column;
518 : :
519 : 0 : column = 0;
520 : 0 : for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
521 : 0 : SCHED_COLUMN (cur_insn->id) = column++;
522 : 0 : }
523 : :
524 : : /* Set SCHED_COLUMN for each instruction in PS. */
525 : : static void
526 : 0 : set_columns_for_ps (partial_schedule_ptr ps)
527 : : {
528 : 0 : int row;
529 : :
530 : 0 : for (row = 0; row < ps->ii; row++)
531 : 0 : set_columns_for_row (ps, row);
532 : 0 : }
533 : :
534 : : /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
535 : : Its single predecessor has already been scheduled, as has its
536 : : ddg node successors. (The move may have also another move as its
537 : : successor, in which case that successor will be scheduled later.)
538 : :
539 : : The move is part of a chain that satisfies register dependencies
540 : : between a producing ddg node and various consuming ddg nodes.
541 : : If some of these dependencies have a distance of 1 (meaning that
542 : : the use is upward-exposed) then DISTANCE1_USES is nonnull and
543 : : contains the set of uses with distance-1 dependencies.
544 : : DISTANCE1_USES is null otherwise.
545 : :
546 : : MUST_FOLLOW is a scratch bitmap that is big enough to hold
547 : : all current ps_insn ids.
548 : :
549 : : Return true on success. */
550 : : static bool
551 : 0 : schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
552 : : sbitmap distance1_uses, sbitmap must_follow)
553 : : {
554 : 0 : unsigned int u;
555 : 0 : int this_time, this_distance, this_start, this_end, this_latency;
556 : 0 : int start, end, c, ii;
557 : 0 : sbitmap_iterator sbi;
558 : 0 : ps_reg_move_info *move;
559 : 0 : rtx_insn *this_insn;
560 : 0 : ps_insn_ptr psi;
561 : :
562 : 0 : move = ps_reg_move (ps, i_reg_move);
563 : 0 : ii = ps->ii;
564 : 0 : if (dump_file)
565 : : {
566 : 0 : fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
567 : 0 : ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
568 : : PS_MIN_CYCLE (ps));
569 : 0 : print_rtl_single (dump_file, move->insn);
570 : 0 : fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
571 : 0 : fprintf (dump_file, "=========== =========== =====\n");
572 : : }
573 : :
574 : 0 : start = INT_MIN;
575 : 0 : end = INT_MAX;
576 : :
577 : : /* For dependencies of distance 1 between a producer ddg node A
578 : : and consumer ddg node B, we have a chain of dependencies:
579 : :
580 : : A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
581 : :
582 : : where Mi is the ith move. For dependencies of distance 0 between
583 : : a producer ddg node A and consumer ddg node C, we have a chain of
584 : : dependencies:
585 : :
586 : : A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
587 : :
588 : : where Mi' occupies the same position as Mi but occurs a stage later.
589 : : We can only schedule each move once, so if we have both types of
590 : : chain, we model the second as:
591 : :
592 : : A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
593 : :
594 : : First handle the dependencies between the previously-scheduled
595 : : predecessor and the move. */
596 : 0 : this_insn = ps_rtl_insn (ps, move->def);
597 : 0 : this_latency = insn_latency (this_insn, move->insn);
598 : 0 : this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
599 : 0 : this_time = SCHED_TIME (move->def) - this_distance * ii;
600 : 0 : this_start = this_time + this_latency;
601 : 0 : this_end = this_time + ii;
602 : 0 : if (dump_file)
603 : 0 : fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
604 : 0 : this_start, this_end, SCHED_TIME (move->def),
605 : 0 : INSN_UID (this_insn), this_latency, this_distance,
606 : 0 : INSN_UID (move->insn));
607 : :
608 : 0 : if (start < this_start)
609 : : start = this_start;
610 : 0 : if (end > this_end)
611 : : end = this_end;
612 : :
613 : : /* Handle the dependencies between the move and previously-scheduled
614 : : successors. */
615 : 0 : EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
616 : : {
617 : 0 : this_insn = ps_rtl_insn (ps, u);
618 : 0 : this_latency = insn_latency (move->insn, this_insn);
619 : 0 : if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
620 : : this_distance = -1;
621 : : else
622 : : this_distance = 0;
623 : 0 : this_time = SCHED_TIME (u) + this_distance * ii;
624 : 0 : this_start = this_time - ii;
625 : 0 : this_end = this_time - this_latency;
626 : 0 : if (dump_file)
627 : 0 : fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
628 : 0 : this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
629 : 0 : this_latency, this_distance, INSN_UID (this_insn));
630 : :
631 : 0 : if (start < this_start)
632 : : start = this_start;
633 : 0 : if (end > this_end)
634 : : end = this_end;
635 : : }
636 : :
637 : 0 : if (dump_file)
638 : : {
639 : 0 : fprintf (dump_file, "----------- ----------- -----\n");
640 : 0 : fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
641 : : }
642 : :
643 : 0 : bitmap_clear (must_follow);
644 : 0 : bitmap_set_bit (must_follow, move->def);
645 : :
646 : 0 : start = MAX (start, end - (ii - 1));
647 : 0 : for (c = end; c >= start; c--)
648 : : {
649 : 0 : psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
650 : : move->uses, must_follow);
651 : 0 : if (psi)
652 : : {
653 : 0 : update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
654 : 0 : if (dump_file)
655 : 0 : fprintf (dump_file, "\nScheduled register move INSN %d at"
656 : 0 : " time %d, row %d\n\n", INSN_UID (move->insn), c,
657 : 0 : SCHED_ROW (i_reg_move));
658 : 0 : return true;
659 : : }
660 : : }
661 : :
662 : 0 : if (dump_file)
663 : 0 : fprintf (dump_file, "\nNo available slot\n\n");
664 : :
665 : : return false;
666 : : }
667 : :
668 : : /*
669 : : Breaking intra-loop register anti-dependences:
670 : : Each intra-loop register anti-dependence implies a cross-iteration true
671 : : dependence of distance 1. Therefore, we can remove such false dependencies
672 : : and figure out if the partial schedule broke them by checking if (for a
673 : : true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
674 : : if so generate a register move. The number of such moves is equal to:
675 : : SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
676 : : nreg_moves = ----------------------------------- + 1 - { dependence.
677 : : ii { 1 if not.
678 : : */
679 : : static bool
680 : 0 : schedule_reg_moves (partial_schedule_ptr ps)
681 : : {
682 : 0 : ddg_ptr g = ps->g;
683 : 0 : int ii = ps->ii;
684 : 0 : int i;
685 : :
686 : 0 : for (i = 0; i < g->num_nodes; i++)
687 : : {
688 : 0 : ddg_node_ptr u = &g->nodes[i];
689 : 0 : ddg_edge_ptr e;
690 : 0 : int nreg_moves = 0, i_reg_move;
691 : 0 : rtx prev_reg, old_reg;
692 : 0 : int first_move;
693 : 0 : int distances[2];
694 : 0 : sbitmap distance1_uses;
695 : 0 : rtx set = single_set (u->insn);
696 : :
697 : : /* Skip instructions that do not set a register. */
698 : 0 : if (set && !REG_P (SET_DEST (set)))
699 : 0 : continue;
700 : :
701 : : /* Compute the number of reg_moves needed for u, by looking at life
702 : : ranges started at u (excluding self-loops). */
703 : 0 : distances[0] = distances[1] = false;
704 : 0 : for (e = u->out; e; e = e->next_out)
705 : 0 : if (e->type == TRUE_DEP && e->dest != e->src)
706 : : {
707 : 0 : int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
708 : 0 : - SCHED_TIME (e->src->cuid)) / ii;
709 : :
710 : 0 : if (e->distance == 1)
711 : 0 : nreg_moves4e = (SCHED_TIME (e->dest->cuid)
712 : 0 : - SCHED_TIME (e->src->cuid) + ii) / ii;
713 : :
714 : : /* If dest precedes src in the schedule of the kernel, then dest
715 : : will read before src writes and we can save one reg_copy. */
716 : 0 : if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
717 : 0 : && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
718 : 0 : nreg_moves4e--;
719 : :
720 : 0 : if (nreg_moves4e >= 1)
721 : : {
722 : : /* !single_set instructions are not supported yet and
723 : : thus we do not except to encounter them in the loop
724 : : except from the doloop part. For the latter case
725 : : we assume no regmoves are generated as the doloop
726 : : instructions are tied to the branch with an edge. */
727 : 0 : gcc_assert (set);
728 : : /* If the instruction contains auto-inc register then
729 : : validate that the regmov is being generated for the
730 : : target regsiter rather then the inc'ed register. */
731 : 0 : gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
732 : : }
733 : :
734 : 0 : if (nreg_moves4e)
735 : : {
736 : 0 : gcc_assert (e->distance < 2);
737 : 0 : distances[e->distance] = true;
738 : : }
739 : 0 : nreg_moves = MAX (nreg_moves, nreg_moves4e);
740 : : }
741 : :
742 : 0 : if (nreg_moves == 0)
743 : 0 : continue;
744 : :
745 : : /* Create NREG_MOVES register moves. */
746 : 0 : first_move = ps->reg_moves.length ();
747 : 0 : ps->reg_moves.safe_grow_cleared (first_move + nreg_moves, true);
748 : 0 : extend_node_sched_params (ps);
749 : :
750 : : /* Record the moves associated with this node. */
751 : 0 : first_move += ps->g->num_nodes;
752 : :
753 : : /* Generate each move. */
754 : 0 : old_reg = prev_reg = SET_DEST (set);
755 : 0 : if (HARD_REGISTER_P (old_reg))
756 : 0 : return false;
757 : :
758 : 0 : for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
759 : : {
760 : 0 : ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
761 : :
762 : 0 : move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
763 : 0 : move->uses = sbitmap_alloc (first_move + nreg_moves);
764 : 0 : move->old_reg = old_reg;
765 : 0 : move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
766 : 0 : move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
767 : 0 : move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
768 : 0 : bitmap_clear (move->uses);
769 : :
770 : 0 : prev_reg = move->new_reg;
771 : : }
772 : :
773 : 0 : distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
774 : :
775 : 0 : if (distance1_uses)
776 : 0 : bitmap_clear (distance1_uses);
777 : :
778 : : /* Every use of the register defined by node may require a different
779 : : copy of this register, depending on the time the use is scheduled.
780 : : Record which uses require which move results. */
781 : 0 : for (e = u->out; e; e = e->next_out)
782 : 0 : if (e->type == TRUE_DEP && e->dest != e->src)
783 : : {
784 : 0 : int dest_copy = (SCHED_TIME (e->dest->cuid)
785 : 0 : - SCHED_TIME (e->src->cuid)) / ii;
786 : :
787 : 0 : if (e->distance == 1)
788 : 0 : dest_copy = (SCHED_TIME (e->dest->cuid)
789 : 0 : - SCHED_TIME (e->src->cuid) + ii) / ii;
790 : :
791 : 0 : if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
792 : 0 : && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
793 : 0 : dest_copy--;
794 : :
795 : 0 : if (dest_copy)
796 : : {
797 : 0 : ps_reg_move_info *move;
798 : :
799 : 0 : move = ps_reg_move (ps, first_move + dest_copy - 1);
800 : 0 : bitmap_set_bit (move->uses, e->dest->cuid);
801 : 0 : if (e->distance == 1)
802 : 0 : bitmap_set_bit (distance1_uses, e->dest->cuid);
803 : : }
804 : : }
805 : :
806 : 0 : auto_sbitmap must_follow (first_move + nreg_moves);
807 : 0 : for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
808 : 0 : if (!schedule_reg_move (ps, first_move + i_reg_move,
809 : : distance1_uses, must_follow))
810 : : break;
811 : 0 : if (distance1_uses)
812 : 0 : sbitmap_free (distance1_uses);
813 : 0 : if (i_reg_move < nreg_moves)
814 : 0 : return false;
815 : 0 : }
816 : : return true;
817 : : }
818 : :
819 : : /* Emit the moves associated with PS. Apply the substitutions
820 : : associated with them. */
821 : : static void
822 : 0 : apply_reg_moves (partial_schedule_ptr ps)
823 : : {
824 : 0 : ps_reg_move_info *move;
825 : 0 : int i;
826 : :
827 : 0 : FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
828 : : {
829 : 0 : unsigned int i_use;
830 : 0 : sbitmap_iterator sbi;
831 : :
832 : 0 : EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
833 : : {
834 : 0 : replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
835 : 0 : df_insn_rescan (ps->g->nodes[i_use].insn);
836 : : }
837 : : }
838 : 0 : }
839 : :
840 : : /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
841 : : SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
842 : : will move to cycle zero. */
843 : : static void
844 : 0 : reset_sched_times (partial_schedule_ptr ps, int amount)
845 : : {
846 : 0 : int row;
847 : 0 : int ii = ps->ii;
848 : 0 : ps_insn_ptr crr_insn;
849 : :
850 : 0 : for (row = 0; row < ii; row++)
851 : 0 : for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
852 : : {
853 : 0 : int u = crr_insn->id;
854 : 0 : int normalized_time = SCHED_TIME (u) - amount;
855 : 0 : int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
856 : :
857 : 0 : if (dump_file)
858 : : {
859 : : /* Print the scheduling times after the rotation. */
860 : 0 : rtx_insn *insn = ps_rtl_insn (ps, u);
861 : :
862 : 0 : fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
863 : : "crr_insn->cycle=%d, min_cycle=%d", u,
864 : 0 : INSN_UID (insn), normalized_time, new_min_cycle);
865 : 0 : if (JUMP_P (insn))
866 : 0 : fprintf (dump_file, " (branch)");
867 : 0 : fprintf (dump_file, "\n");
868 : : }
869 : :
870 : 0 : gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
871 : 0 : gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
872 : :
873 : 0 : crr_insn->cycle = normalized_time;
874 : 0 : update_node_sched_params (u, ii, normalized_time, new_min_cycle);
875 : : }
876 : 0 : }
877 : :
878 : : /* Permute the insns according to their order in PS, from row 0 to
879 : : row ii-1, and position them right before LAST. This schedules
880 : : the insns of the loop kernel. */
881 : : static void
882 : 0 : permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
883 : : {
884 : 0 : int ii = ps->ii;
885 : 0 : int row;
886 : 0 : ps_insn_ptr ps_ij;
887 : :
888 : 0 : for (row = 0; row < ii ; row++)
889 : 0 : for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
890 : : {
891 : 0 : rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
892 : :
893 : 0 : if (PREV_INSN (last) != insn)
894 : : {
895 : 0 : if (ps_ij->id < ps->g->num_nodes)
896 : 0 : reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
897 : : PREV_INSN (last));
898 : : else
899 : 0 : add_insn_before (insn, last, NULL);
900 : : }
901 : : }
902 : 0 : }
903 : :
904 : : /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
905 : : respectively only if cycle C falls on the border of the scheduling
906 : : window boundaries marked by START and END cycles. STEP is the
907 : : direction of the window. */
908 : : static inline void
909 : 0 : set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
910 : : sbitmap *tmp_precede, sbitmap must_precede, int c,
911 : : int start, int end, int step)
912 : : {
913 : 0 : *tmp_precede = NULL;
914 : 0 : *tmp_follow = NULL;
915 : :
916 : 0 : if (c == start)
917 : : {
918 : 0 : if (step == 1)
919 : : *tmp_precede = must_precede;
920 : : else /* step == -1. */
921 : 0 : *tmp_follow = must_follow;
922 : : }
923 : 0 : if (c == end - step)
924 : : {
925 : 0 : if (step == 1)
926 : : *tmp_follow = must_follow;
927 : : else /* step == -1. */
928 : 0 : *tmp_precede = must_precede;
929 : : }
930 : :
931 : : }
932 : :
933 : : /* Return True if the branch can be moved to row ii-1 while
934 : : normalizing the partial schedule PS to start from cycle zero and thus
935 : : optimize the SC. Otherwise return False. */
936 : : static bool
937 : 0 : optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
938 : : {
939 : 0 : int amount = PS_MIN_CYCLE (ps);
940 : 0 : int start, end, step;
941 : 0 : int ii = ps->ii;
942 : 0 : bool ok = false;
943 : 0 : int stage_count, stage_count_curr;
944 : :
945 : : /* Compare the SC after normalization and SC after bringing the branch
946 : : to row ii-1. If they are equal just bail out. */
947 : 0 : stage_count = calculate_stage_count (ps, amount);
948 : 0 : stage_count_curr =
949 : 0 : calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
950 : :
951 : 0 : if (stage_count == stage_count_curr)
952 : : {
953 : 0 : if (dump_file)
954 : 0 : fprintf (dump_file, "SMS SC already optimized.\n");
955 : :
956 : 0 : return false;
957 : : }
958 : :
959 : 0 : if (dump_file)
960 : : {
961 : 0 : fprintf (dump_file, "SMS Trying to optimize branch location\n");
962 : 0 : fprintf (dump_file, "SMS partial schedule before trial:\n");
963 : 0 : print_partial_schedule (ps, dump_file);
964 : : }
965 : :
966 : : /* First, normalize the partial scheduling. */
967 : 0 : reset_sched_times (ps, amount);
968 : 0 : rotate_partial_schedule (ps, amount);
969 : 0 : if (dump_file)
970 : : {
971 : 0 : fprintf (dump_file,
972 : : "SMS partial schedule after normalization (ii, %d, SC %d):\n",
973 : : ii, stage_count);
974 : 0 : print_partial_schedule (ps, dump_file);
975 : : }
976 : :
977 : 0 : if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
978 : : return true;
979 : :
980 : 0 : auto_sbitmap sched_nodes (g->num_nodes);
981 : 0 : bitmap_ones (sched_nodes);
982 : :
983 : : /* Calculate the new placement of the branch. It should be in row
984 : : ii-1 and fall into it's scheduling window. */
985 : 0 : if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
986 : : &step, &end) == 0)
987 : : {
988 : 0 : bool success;
989 : 0 : ps_insn_ptr next_ps_i;
990 : 0 : int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
991 : 0 : int row = SMODULO (branch_cycle, ps->ii);
992 : 0 : int num_splits = 0;
993 : 0 : sbitmap tmp_precede, tmp_follow;
994 : 0 : int min_cycle, c;
995 : :
996 : 0 : if (dump_file)
997 : 0 : fprintf (dump_file, "\nTrying to schedule node %d "
998 : : "INSN = %d in (%d .. %d) step %d\n",
999 : : g->closing_branch->cuid,
1000 : 0 : (INSN_UID (g->closing_branch->insn)), start, end, step);
1001 : :
1002 : 0 : gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
1003 : 0 : if (step == 1)
1004 : : {
1005 : 0 : c = start + ii - SMODULO (start, ii) - 1;
1006 : 0 : gcc_assert (c >= start);
1007 : 0 : if (c >= end)
1008 : : {
1009 : 0 : if (dump_file)
1010 : 0 : fprintf (dump_file,
1011 : : "SMS failed to schedule branch at cycle: %d\n", c);
1012 : 0 : return false;
1013 : : }
1014 : : }
1015 : : else
1016 : : {
1017 : 0 : c = start - SMODULO (start, ii) - 1;
1018 : 0 : gcc_assert (c <= start);
1019 : :
1020 : 0 : if (c <= end)
1021 : : {
1022 : 0 : if (dump_file)
1023 : 0 : fprintf (dump_file,
1024 : : "SMS failed to schedule branch at cycle: %d\n", c);
1025 : 0 : return false;
1026 : : }
1027 : : }
1028 : :
1029 : 0 : auto_sbitmap must_precede (g->num_nodes);
1030 : 0 : auto_sbitmap must_follow (g->num_nodes);
1031 : :
1032 : : /* Try to schedule the branch is it's new cycle. */
1033 : 0 : calculate_must_precede_follow (g->closing_branch, start, end,
1034 : : step, ii, sched_nodes,
1035 : : must_precede, must_follow);
1036 : :
1037 : 0 : set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1038 : : must_precede, c, start, end, step);
1039 : :
1040 : : /* Find the element in the partial schedule related to the closing
1041 : : branch so we can remove it from it's current cycle. */
1042 : 0 : for (next_ps_i = ps->rows[row];
1043 : 0 : next_ps_i; next_ps_i = next_ps_i->next_in_row)
1044 : 0 : if (next_ps_i->id == g->closing_branch->cuid)
1045 : : break;
1046 : :
1047 : 0 : min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1048 : 0 : remove_node_from_ps (ps, next_ps_i);
1049 : 0 : success =
1050 : 0 : try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1051 : : sched_nodes, &num_splits,
1052 : : tmp_precede, tmp_follow);
1053 : 0 : gcc_assert (num_splits == 0);
1054 : 0 : if (!success)
1055 : : {
1056 : 0 : if (dump_file)
1057 : 0 : fprintf (dump_file,
1058 : : "SMS failed to schedule branch at cycle: %d, "
1059 : : "bringing it back to cycle %d\n", c, branch_cycle);
1060 : :
1061 : : /* The branch was failed to be placed in row ii - 1.
1062 : : Put it back in it's original place in the partial
1063 : : schedualing. */
1064 : 0 : set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1065 : : must_precede, branch_cycle, start, end,
1066 : : step);
1067 : 0 : success =
1068 : 0 : try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1069 : : branch_cycle, sched_nodes,
1070 : : &num_splits, tmp_precede,
1071 : : tmp_follow);
1072 : 0 : gcc_assert (success && (num_splits == 0));
1073 : : ok = false;
1074 : : }
1075 : : else
1076 : : {
1077 : : /* The branch is placed in row ii - 1. */
1078 : 0 : if (dump_file)
1079 : 0 : fprintf (dump_file,
1080 : : "SMS success in moving branch to cycle %d\n", c);
1081 : :
1082 : 0 : update_node_sched_params (g->closing_branch->cuid, ii, c,
1083 : : PS_MIN_CYCLE (ps));
1084 : 0 : ok = true;
1085 : : }
1086 : :
1087 : : /* This might have been added to a new first stage. */
1088 : 0 : if (PS_MIN_CYCLE (ps) < min_cycle)
1089 : 0 : reset_sched_times (ps, 0);
1090 : 0 : }
1091 : :
1092 : : return ok;
1093 : 0 : }
1094 : :
1095 : : static void
1096 : 0 : duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1097 : : int to_stage, rtx count_reg, class loop *loop)
1098 : : {
1099 : 0 : int row;
1100 : 0 : ps_insn_ptr ps_ij;
1101 : 0 : copy_bb_data id;
1102 : :
1103 : 0 : for (row = 0; row < ps->ii; row++)
1104 : 0 : for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1105 : : {
1106 : 0 : int u = ps_ij->id;
1107 : 0 : int first_u, last_u;
1108 : 0 : rtx_insn *u_insn;
1109 : :
1110 : : /* Do not duplicate any insn which refers to count_reg as it
1111 : : belongs to the control part.
1112 : : The closing branch is scheduled as well and thus should
1113 : : be ignored.
1114 : : TODO: This should be done by analyzing the control part of
1115 : : the loop. */
1116 : 0 : u_insn = ps_rtl_insn (ps, u);
1117 : 0 : if (reg_mentioned_p (count_reg, u_insn)
1118 : 0 : || JUMP_P (u_insn))
1119 : 0 : continue;
1120 : :
1121 : 0 : first_u = SCHED_STAGE (u);
1122 : 0 : last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1123 : 0 : if (from_stage <= last_u && to_stage >= first_u)
1124 : : {
1125 : 0 : if (u < ps->g->num_nodes)
1126 : 0 : duplicate_insn_chain (ps_first_note (ps, u), u_insn,
1127 : : loop, &id);
1128 : : else
1129 : 0 : emit_insn (copy_rtx (PATTERN (u_insn)));
1130 : : }
1131 : : }
1132 : 0 : }
1133 : :
1134 : :
1135 : : /* Generate the instructions (including reg_moves) for prolog & epilog. */
1136 : : static void
1137 : 0 : generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
1138 : : rtx count_reg, bool adjust_init)
1139 : : {
1140 : 0 : int i;
1141 : 0 : int last_stage = PS_STAGE_COUNT (ps) - 1;
1142 : 0 : edge e;
1143 : :
1144 : : /* Generate the prolog, inserting its insns on the loop-entry edge. */
1145 : 0 : start_sequence ();
1146 : :
1147 : 0 : if (adjust_init)
1148 : : {
1149 : : /* Generate instructions at the beginning of the prolog to
1150 : : adjust the loop count by STAGE_COUNT. If loop count is constant
1151 : : and it not used anywhere in prologue, this constant is adjusted by
1152 : : STAGE_COUNT outside of generate_prolog_epilog function. */
1153 : 0 : rtx sub_reg = NULL_RTX;
1154 : :
1155 : 0 : sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1156 : 0 : gen_int_mode (last_stage,
1157 : 0 : GET_MODE (count_reg)),
1158 : : count_reg, 1, OPTAB_DIRECT);
1159 : 0 : gcc_assert (REG_P (sub_reg));
1160 : 0 : if (REGNO (sub_reg) != REGNO (count_reg))
1161 : 0 : emit_move_insn (count_reg, sub_reg);
1162 : : }
1163 : :
1164 : 0 : for (i = 0; i < last_stage; i++)
1165 : 0 : duplicate_insns_of_cycles (ps, 0, i, count_reg, loop);
1166 : :
1167 : : /* Put the prolog on the entry edge. */
1168 : 0 : e = loop_preheader_edge (loop);
1169 : 0 : split_edge_and_insert (e, get_insns ());
1170 : 0 : if (!flag_resched_modulo_sched)
1171 : 0 : e->dest->flags |= BB_DISABLE_SCHEDULE;
1172 : :
1173 : 0 : end_sequence ();
1174 : :
1175 : : /* Generate the epilog, inserting its insns on the loop-exit edge. */
1176 : 0 : start_sequence ();
1177 : :
1178 : 0 : for (i = 0; i < last_stage; i++)
1179 : 0 : duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg, loop);
1180 : :
1181 : : /* Put the epilogue on the exit edge. */
1182 : 0 : gcc_assert (single_exit (loop));
1183 : 0 : e = single_exit (loop);
1184 : 0 : split_edge_and_insert (e, get_insns ());
1185 : 0 : if (!flag_resched_modulo_sched)
1186 : 0 : e->dest->flags |= BB_DISABLE_SCHEDULE;
1187 : :
1188 : 0 : end_sequence ();
1189 : 0 : }
1190 : :
1191 : : /* Mark LOOP as software pipelined so the later
1192 : : scheduling passes don't touch it. */
1193 : : static void
1194 : 0 : mark_loop_unsched (class loop *loop)
1195 : : {
1196 : 0 : unsigned i;
1197 : 0 : basic_block *bbs = get_loop_body (loop);
1198 : :
1199 : 0 : for (i = 0; i < loop->num_nodes; i++)
1200 : 0 : bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1201 : :
1202 : 0 : free (bbs);
1203 : 0 : }
1204 : :
1205 : : /* Return true if all the BBs of the loop are empty except the
1206 : : loop header. */
1207 : : static bool
1208 : 229 : loop_single_full_bb_p (class loop *loop)
1209 : : {
1210 : 229 : unsigned i;
1211 : 229 : basic_block *bbs = get_loop_body (loop);
1212 : :
1213 : 458 : for (i = 0; i < loop->num_nodes ; i++)
1214 : : {
1215 : 258 : rtx_insn *head, *tail;
1216 : 258 : bool empty_bb = true;
1217 : :
1218 : 258 : if (bbs[i] == loop->header)
1219 : 229 : continue;
1220 : :
1221 : : /* Make sure that basic blocks other than the header
1222 : : have only notes labels or jumps. */
1223 : 29 : get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1224 : 64 : for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1225 : : {
1226 : 35 : if (NOTE_P (head) || LABEL_P (head)
1227 : 35 : || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1228 : 6 : continue;
1229 : : empty_bb = false;
1230 : : break;
1231 : : }
1232 : :
1233 : 29 : if (! empty_bb)
1234 : : {
1235 : 29 : free (bbs);
1236 : 29 : return false;
1237 : : }
1238 : : }
1239 : 200 : free (bbs);
1240 : 200 : return true;
1241 : : }
1242 : :
1243 : : /* Dump file:line from INSN's location info to dump_file. */
1244 : :
1245 : : static void
1246 : 22 : dump_insn_location (rtx_insn *insn)
1247 : : {
1248 : 22 : if (dump_file && INSN_HAS_LOCATION (insn))
1249 : : {
1250 : 15 : expanded_location xloc = insn_location (insn);
1251 : 15 : fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
1252 : : }
1253 : 22 : }
1254 : :
1255 : : /* A simple loop from SMS point of view; it is a loop that is composed of
1256 : : either a single basic block or two BBs - a header and a latch. */
1257 : : #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
1258 : : && (EDGE_COUNT (loop->latch->preds) == 1) \
1259 : : && (EDGE_COUNT (loop->latch->succs) == 1))
1260 : :
1261 : : /* Return true if the loop is in its canonical form and false if not.
1262 : : i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
1263 : : static bool
1264 : 175 : loop_canon_p (class loop *loop)
1265 : : {
1266 : :
1267 : 175 : if (loop->inner || !loop_outer (loop))
1268 : : {
1269 : 33 : if (dump_file)
1270 : 3 : fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1271 : 33 : return false;
1272 : : }
1273 : :
1274 : 142 : if (!single_exit (loop))
1275 : : {
1276 : 13 : if (dump_file)
1277 : : {
1278 : 3 : rtx_insn *insn = BB_END (loop->header);
1279 : :
1280 : 3 : fprintf (dump_file, "SMS loop many exits");
1281 : 3 : dump_insn_location (insn);
1282 : 3 : fprintf (dump_file, "\n");
1283 : : }
1284 : 13 : return false;
1285 : : }
1286 : :
1287 : 129 : if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1288 : : {
1289 : 5 : if (dump_file)
1290 : : {
1291 : 0 : rtx_insn *insn = BB_END (loop->header);
1292 : :
1293 : 0 : fprintf (dump_file, "SMS loop many BBs.");
1294 : 0 : dump_insn_location (insn);
1295 : 0 : fprintf (dump_file, "\n");
1296 : : }
1297 : 5 : return false;
1298 : : }
1299 : :
1300 : : return true;
1301 : : }
1302 : :
1303 : : /* If there are more than one entry for the loop,
1304 : : make it one by splitting the first entry edge and
1305 : : redirecting the others to the new BB. */
1306 : : static void
1307 : 0 : canon_loop (class loop *loop)
1308 : : {
1309 : 0 : edge e;
1310 : 0 : edge_iterator i;
1311 : :
1312 : : /* Avoid annoying special cases of edges going to exit
1313 : : block. */
1314 : 0 : FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1315 : 0 : if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1316 : 0 : split_edge (e);
1317 : :
1318 : 0 : if (loop->latch == loop->header
1319 : 0 : || EDGE_COUNT (loop->latch->succs) > 1)
1320 : : {
1321 : 0 : FOR_EACH_EDGE (e, i, loop->header->preds)
1322 : 0 : if (e->src == loop->latch)
1323 : : break;
1324 : 0 : split_edge (e);
1325 : : }
1326 : 0 : }
1327 : :
1328 : : /* Setup infos. */
1329 : : static void
1330 : 104 : setup_sched_infos (void)
1331 : : {
1332 : 104 : memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1333 : : sizeof (sms_common_sched_info));
1334 : 104 : sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1335 : 104 : common_sched_info = &sms_common_sched_info;
1336 : :
1337 : 104 : sched_deps_info = &sms_sched_deps_info;
1338 : 104 : current_sched_info = &sms_sched_info;
1339 : 104 : }
1340 : :
1341 : : /* Probability in % that the sms-ed loop rolls enough so that optimized
1342 : : version may be entered. Just a guess. */
1343 : : #define PROB_SMS_ENOUGH_ITERATIONS 80
1344 : :
1345 : : /* Main entry point, perform SMS scheduling on the loops of the function
1346 : : that consist of single basic blocks. */
1347 : : static void
1348 : 154 : sms_schedule (void)
1349 : : {
1350 : 154 : rtx_insn *insn;
1351 : 154 : ddg_ptr *g_arr, g;
1352 : 154 : int * node_order;
1353 : 154 : int maxii, max_asap;
1354 : 154 : partial_schedule_ptr ps;
1355 : 154 : basic_block bb = NULL;
1356 : 154 : basic_block condition_bb = NULL;
1357 : 154 : edge latch_edge;
1358 : 154 : HOST_WIDE_INT trip_count, max_trip_count;
1359 : 154 : HARD_REG_SET prohibited_regs;
1360 : :
1361 : 154 : loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1362 : : | LOOPS_HAVE_RECORDED_EXITS);
1363 : 308 : if (number_of_loops (cfun) <= 1)
1364 : : {
1365 : 50 : loop_optimizer_finalize ();
1366 : 50 : return; /* There are no loops to schedule. */
1367 : : }
1368 : :
1369 : : /* Initialize issue_rate. */
1370 : 104 : if (targetm.sched.issue_rate)
1371 : : {
1372 : 104 : int temp = reload_completed;
1373 : :
1374 : 104 : reload_completed = 1;
1375 : 104 : issue_rate = targetm.sched.issue_rate ();
1376 : 104 : reload_completed = temp;
1377 : : }
1378 : : else
1379 : 0 : issue_rate = 1;
1380 : :
1381 : : /* Initialize the scheduler. */
1382 : 104 : setup_sched_infos ();
1383 : 104 : haifa_sched_init ();
1384 : :
1385 : : /* Allocate memory to hold the DDG array one entry for each loop.
1386 : : We use loop->num as index into this array. */
1387 : 104 : g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
1388 : :
1389 : 208 : REG_SET_TO_HARD_REG_SET (prohibited_regs, &df->regular_block_artificial_uses);
1390 : :
1391 : 104 : if (dump_file)
1392 : : {
1393 : 12 : fprintf (dump_file, "\n\nSMS analysis phase\n");
1394 : 12 : fprintf (dump_file, "===================\n\n");
1395 : : }
1396 : :
1397 : : /* Build DDGs for all the relevant loops and hold them in G_ARR
1398 : : indexed by the loop index. */
1399 : 487 : for (auto loop : loops_list (cfun, 0))
1400 : : {
1401 : 175 : rtx_insn *head, *tail;
1402 : 175 : rtx count_reg;
1403 : :
1404 : : /* For debugging. */
1405 : 175 : if (dbg_cnt (sms_sched_loop) == false)
1406 : : {
1407 : 0 : if (dump_file)
1408 : 0 : fprintf (dump_file, "SMS reached max limit... \n");
1409 : :
1410 : 0 : break;
1411 : : }
1412 : :
1413 : 175 : if (dump_file)
1414 : : {
1415 : 19 : rtx_insn *insn = BB_END (loop->header);
1416 : :
1417 : 19 : fprintf (dump_file, "SMS loop num: %d", loop->num);
1418 : 19 : dump_insn_location (insn);
1419 : 19 : fprintf (dump_file, "\n");
1420 : : }
1421 : :
1422 : 175 : if (! loop_canon_p (loop))
1423 : 226 : continue;
1424 : :
1425 : 124 : if (! loop_single_full_bb_p (loop))
1426 : : {
1427 : 24 : if (dump_file)
1428 : 0 : fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1429 : 24 : continue;
1430 : : }
1431 : :
1432 : 100 : bb = loop->header;
1433 : :
1434 : 100 : get_ebb_head_tail (bb, bb, &head, &tail);
1435 : 100 : latch_edge = loop_latch_edge (loop);
1436 : 100 : gcc_assert (single_exit (loop));
1437 : 100 : trip_count = get_estimated_loop_iterations_int (loop);
1438 : 100 : max_trip_count = get_max_loop_iterations_int (loop);
1439 : :
1440 : : /* Perform SMS only on loops that their average count is above threshold. */
1441 : :
1442 : 100 : if (latch_edge->count () > profile_count::zero ()
1443 : 200 : && (latch_edge->count ()
1444 : 200 : < (single_exit (loop)->count ()
1445 : 200 : * param_sms_loop_average_count_threshold)))
1446 : : {
1447 : 0 : if (dump_file)
1448 : : {
1449 : 0 : dump_insn_location (tail);
1450 : 0 : fprintf (dump_file, "\nSMS single-bb-loop\n");
1451 : 0 : if (profile_info && flag_branch_probabilities)
1452 : : {
1453 : 0 : fprintf (dump_file, "SMS loop-count ");
1454 : 0 : fprintf (dump_file, "%" PRId64,
1455 : 0 : (int64_t) bb->count.to_gcov_type ());
1456 : 0 : fprintf (dump_file, "\n");
1457 : 0 : fprintf (dump_file, "SMS trip-count ");
1458 : 0 : fprintf (dump_file, "%" PRId64 "max %" PRId64,
1459 : : (int64_t) trip_count, (int64_t) max_trip_count);
1460 : 0 : fprintf (dump_file, "\n");
1461 : : }
1462 : : }
1463 : 0 : continue;
1464 : : }
1465 : :
1466 : : /* Make sure this is a doloop. */
1467 : 100 : if (!(count_reg = doloop_register_get (head, tail)))
1468 : : {
1469 : 100 : if (dump_file)
1470 : 13 : fprintf (dump_file, "SMS doloop_register_get failed\n");
1471 : 100 : continue;
1472 : : }
1473 : :
1474 : : /* Don't handle BBs with calls or barriers
1475 : : or !single_set with the exception of do-loop control part insns.
1476 : : ??? Should handle insns defining subregs. */
1477 : 0 : for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1478 : : {
1479 : 0 : if (INSN_P (insn))
1480 : : {
1481 : : HARD_REG_SET regs;
1482 : 0 : CLEAR_HARD_REG_SET (regs);
1483 : 0 : note_stores (insn, record_hard_reg_sets, ®s);
1484 : 0 : if (hard_reg_set_intersect_p (regs, prohibited_regs))
1485 : : break;
1486 : : }
1487 : :
1488 : 0 : if (CALL_P (insn)
1489 : 0 : || BARRIER_P (insn)
1490 : 0 : || (INSN_P (insn) && single_set (insn)
1491 : 0 : && GET_CODE (SET_DEST (single_set (insn))) == SUBREG)
1492 : : /* Not a single set. */
1493 : 0 : || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1494 : 0 : && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1495 : : /* But non-single-set allowed in one special case. */
1496 : 0 : && (insn != prev_nondebug_insn (tail)
1497 : 0 : || !reg_mentioned_p (count_reg, insn))))
1498 : : break;
1499 : : }
1500 : :
1501 : 0 : if (insn != NEXT_INSN (tail))
1502 : : {
1503 : 0 : if (dump_file)
1504 : : {
1505 : 0 : if (CALL_P (insn))
1506 : 0 : fprintf (dump_file, "SMS loop-with-call\n");
1507 : 0 : else if (BARRIER_P (insn))
1508 : 0 : fprintf (dump_file, "SMS loop-with-barrier\n");
1509 : 0 : else if (INSN_P (insn) && single_set (insn)
1510 : 0 : && GET_CODE (SET_DEST (single_set (insn))) == SUBREG)
1511 : 0 : fprintf (dump_file, "SMS loop with subreg in lhs\n");
1512 : : else
1513 : 0 : fprintf (dump_file,
1514 : : "SMS loop-with-not-single-set-or-prohibited-reg\n");
1515 : :
1516 : 0 : print_rtl_single (dump_file, insn);
1517 : : }
1518 : :
1519 : 0 : continue;
1520 : : }
1521 : :
1522 : : /* Always schedule the closing branch with the rest of the
1523 : : instructions. The branch is rotated to be in row ii-1 at the
1524 : : end of the scheduling procedure to make sure it's the last
1525 : : instruction in the iteration. */
1526 : 0 : if (! (g = create_ddg (bb, 1)))
1527 : : {
1528 : 0 : if (dump_file)
1529 : 0 : fprintf (dump_file, "SMS create_ddg failed\n");
1530 : 0 : continue;
1531 : : }
1532 : :
1533 : 0 : g_arr[loop->num] = g;
1534 : 0 : if (dump_file)
1535 : 0 : fprintf (dump_file, "...OK\n");
1536 : :
1537 : 104 : }
1538 : 104 : if (dump_file)
1539 : : {
1540 : 12 : fprintf (dump_file, "\nSMS transformation phase\n");
1541 : 12 : fprintf (dump_file, "=========================\n\n");
1542 : : }
1543 : :
1544 : : /* We don't want to perform SMS on new loops - created by versioning. */
1545 : 487 : for (auto loop : loops_list (cfun, 0))
1546 : : {
1547 : 175 : rtx_insn *head, *tail;
1548 : 175 : rtx count_reg;
1549 : 175 : rtx_insn *count_init;
1550 : 175 : int mii, rec_mii, stage_count, min_cycle;
1551 : 175 : int64_t loop_count = 0;
1552 : 175 : bool opt_sc_p, adjust_inplace = false;
1553 : 175 : basic_block pre_header;
1554 : :
1555 : 175 : if (! (g = g_arr[loop->num]))
1556 : 175 : continue;
1557 : :
1558 : 0 : if (dump_file)
1559 : : {
1560 : 0 : rtx_insn *insn = BB_END (loop->header);
1561 : :
1562 : 0 : fprintf (dump_file, "SMS loop num: %d", loop->num);
1563 : 0 : dump_insn_location (insn);
1564 : 0 : fprintf (dump_file, "\n");
1565 : :
1566 : 0 : print_ddg (dump_file, g);
1567 : : }
1568 : :
1569 : 0 : get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1570 : :
1571 : 0 : latch_edge = loop_latch_edge (loop);
1572 : 0 : gcc_assert (single_exit (loop));
1573 : 0 : trip_count = get_estimated_loop_iterations_int (loop);
1574 : 0 : max_trip_count = get_max_loop_iterations_int (loop);
1575 : :
1576 : 0 : if (dump_file)
1577 : : {
1578 : 0 : dump_insn_location (tail);
1579 : 0 : fprintf (dump_file, "\nSMS single-bb-loop\n");
1580 : 0 : if (profile_info && flag_branch_probabilities)
1581 : : {
1582 : 0 : fprintf (dump_file, "SMS loop-count ");
1583 : 0 : fprintf (dump_file, "%" PRId64,
1584 : 0 : (int64_t) bb->count.to_gcov_type ());
1585 : 0 : fprintf (dump_file, "\n");
1586 : : }
1587 : 0 : fprintf (dump_file, "SMS doloop\n");
1588 : 0 : fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1589 : 0 : fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1590 : 0 : fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1591 : : }
1592 : :
1593 : :
1594 : 0 : count_reg = doloop_register_get (head, tail);
1595 : 0 : gcc_assert (count_reg);
1596 : :
1597 : 0 : pre_header = loop_preheader_edge (loop)->src;
1598 : 0 : count_init = const_iteration_count (count_reg, pre_header, &loop_count,
1599 : : &adjust_inplace);
1600 : :
1601 : 0 : if (dump_file && count_init)
1602 : : {
1603 : 0 : fprintf (dump_file, "SMS const-doloop ");
1604 : 0 : fprintf (dump_file, "%" PRId64,
1605 : : loop_count);
1606 : 0 : fprintf (dump_file, "\n");
1607 : : }
1608 : :
1609 : 0 : node_order = XNEWVEC (int, g->num_nodes);
1610 : :
1611 : 0 : mii = 1; /* Need to pass some estimate of mii. */
1612 : 0 : rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1613 : 0 : mii = MAX (res_MII (g), rec_mii);
1614 : 0 : mii = MAX (mii, 1);
1615 : 0 : maxii = MAX (max_asap, param_sms_max_ii_factor * mii);
1616 : :
1617 : 0 : if (dump_file)
1618 : 0 : fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1619 : : rec_mii, mii, maxii);
1620 : :
1621 : 0 : for (;;)
1622 : : {
1623 : 0 : set_node_sched_params (g);
1624 : :
1625 : 0 : stage_count = 0;
1626 : 0 : opt_sc_p = false;
1627 : 0 : ps = sms_schedule_by_order (g, mii, maxii, node_order);
1628 : :
1629 : 0 : if (ps)
1630 : : {
1631 : : /* Try to achieve optimized SC by normalizing the partial
1632 : : schedule (having the cycles start from cycle zero).
1633 : : The branch location must be placed in row ii-1 in the
1634 : : final scheduling. If failed, shift all instructions to
1635 : : position the branch in row ii-1. */
1636 : 0 : opt_sc_p = optimize_sc (ps, g);
1637 : 0 : if (opt_sc_p)
1638 : 0 : stage_count = calculate_stage_count (ps, 0);
1639 : : else
1640 : : {
1641 : : /* Bring the branch to cycle ii-1. */
1642 : 0 : int amount = (SCHED_TIME (g->closing_branch->cuid)
1643 : 0 : - (ps->ii - 1));
1644 : :
1645 : 0 : if (dump_file)
1646 : 0 : fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1647 : :
1648 : 0 : stage_count = calculate_stage_count (ps, amount);
1649 : : }
1650 : :
1651 : 0 : gcc_assert (stage_count >= 1);
1652 : : }
1653 : :
1654 : : /* The default value of param_sms_min_sc is 2 as stage count of
1655 : : 1 means that there is no interleaving between iterations thus
1656 : : we let the scheduling passes do the job in this case. */
1657 : 0 : if (stage_count < param_sms_min_sc
1658 : 0 : || (count_init && (loop_count <= stage_count))
1659 : 0 : || (max_trip_count >= 0 && max_trip_count <= stage_count)
1660 : 0 : || (trip_count >= 0 && trip_count <= stage_count))
1661 : : {
1662 : 0 : if (dump_file)
1663 : : {
1664 : 0 : fprintf (dump_file, "SMS failed... \n");
1665 : 0 : fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1666 : : " loop-count=", stage_count);
1667 : 0 : fprintf (dump_file, "%" PRId64, loop_count);
1668 : 0 : fprintf (dump_file, ", trip-count=");
1669 : 0 : fprintf (dump_file, "%" PRId64 "max %" PRId64,
1670 : : (int64_t) trip_count, (int64_t) max_trip_count);
1671 : 0 : fprintf (dump_file, ")\n");
1672 : : }
1673 : : break;
1674 : : }
1675 : :
1676 : 0 : if (!opt_sc_p)
1677 : : {
1678 : : /* Rotate the partial schedule to have the branch in row ii-1. */
1679 : 0 : int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1680 : :
1681 : 0 : reset_sched_times (ps, amount);
1682 : 0 : rotate_partial_schedule (ps, amount);
1683 : : }
1684 : :
1685 : 0 : set_columns_for_ps (ps);
1686 : :
1687 : 0 : min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1688 : 0 : if (!schedule_reg_moves (ps))
1689 : : {
1690 : 0 : mii = ps->ii + 1;
1691 : 0 : free_partial_schedule (ps);
1692 : 0 : continue;
1693 : : }
1694 : :
1695 : : /* Moves that handle incoming values might have been added
1696 : : to a new first stage. Bump the stage count if so.
1697 : :
1698 : : ??? Perhaps we could consider rotating the schedule here
1699 : : instead? */
1700 : 0 : if (PS_MIN_CYCLE (ps) < min_cycle)
1701 : : {
1702 : 0 : reset_sched_times (ps, 0);
1703 : 0 : stage_count++;
1704 : : }
1705 : :
1706 : : /* The stage count should now be correct without rotation. */
1707 : 0 : gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1708 : 0 : PS_STAGE_COUNT (ps) = stage_count;
1709 : :
1710 : 0 : canon_loop (loop);
1711 : :
1712 : 0 : if (dump_file)
1713 : : {
1714 : 0 : dump_insn_location (tail);
1715 : 0 : fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1716 : : ps->ii, stage_count);
1717 : 0 : print_partial_schedule (ps, dump_file);
1718 : : }
1719 : :
1720 : 0 : if (count_init)
1721 : : {
1722 : 0 : if (adjust_inplace)
1723 : : {
1724 : : /* When possible, set new iteration count of loop kernel in
1725 : : place. Otherwise, generate_prolog_epilog creates an insn
1726 : : to adjust. */
1727 : 0 : SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1728 : : - stage_count + 1);
1729 : : }
1730 : : }
1731 : : else
1732 : : {
1733 : : /* case the BCT count is not known , Do loop-versioning */
1734 : 0 : rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1735 : : gen_int_mode (stage_count,
1736 : : GET_MODE (count_reg)));
1737 : 0 : profile_probability prob = profile_probability::guessed_always ()
1738 : 0 : .apply_scale (PROB_SMS_ENOUGH_ITERATIONS, 100);
1739 : :
1740 : 0 : loop_version (loop, comp_rtx, &condition_bb,
1741 : : prob, prob.invert (),
1742 : : prob, prob.invert (), true);
1743 : : }
1744 : :
1745 : : /* Now apply the scheduled kernel to the RTL of the loop. */
1746 : 0 : permute_partial_schedule (ps, g->closing_branch->first_note);
1747 : :
1748 : : /* Mark this loop as software pipelined so the later
1749 : : scheduling passes don't touch it. */
1750 : 0 : if (! flag_resched_modulo_sched)
1751 : 0 : mark_loop_unsched (loop);
1752 : :
1753 : : /* The life-info is not valid any more. */
1754 : 0 : df_set_bb_dirty (g->bb);
1755 : :
1756 : 0 : apply_reg_moves (ps);
1757 : 0 : if (dump_file)
1758 : 0 : print_node_sched_params (dump_file, g->num_nodes, ps);
1759 : : /* Generate prolog and epilog. */
1760 : 0 : generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
1761 : 0 : break;
1762 : 0 : }
1763 : :
1764 : 0 : free_partial_schedule (ps);
1765 : 0 : node_sched_param_vec.release ();
1766 : 0 : free (node_order);
1767 : 0 : free_ddg (g);
1768 : 104 : }
1769 : :
1770 : 104 : free (g_arr);
1771 : :
1772 : : /* Release scheduler data, needed until now because of DFA. */
1773 : 104 : haifa_sched_finish ();
1774 : 104 : loop_optimizer_finalize ();
1775 : : }
1776 : :
1777 : : /* The SMS scheduling algorithm itself
1778 : : -----------------------------------
1779 : : Input: 'O' an ordered list of insns of a loop.
1780 : : Output: A scheduling of the loop - kernel, prolog, and epilogue.
1781 : :
1782 : : 'Q' is the empty Set
1783 : : 'PS' is the partial schedule; it holds the currently scheduled nodes with
1784 : : their cycle/slot.
1785 : : 'PSP' previously scheduled predecessors.
1786 : : 'PSS' previously scheduled successors.
1787 : : 't(u)' the cycle where u is scheduled.
1788 : : 'l(u)' is the latency of u.
1789 : : 'd(v,u)' is the dependence distance from v to u.
1790 : : 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1791 : : the node ordering phase.
1792 : : 'check_hardware_resources_conflicts(u, PS, c)'
1793 : : run a trace around cycle/slot through DFA model
1794 : : to check resource conflicts involving instruction u
1795 : : at cycle c given the partial schedule PS.
1796 : : 'add_to_partial_schedule_at_time(u, PS, c)'
1797 : : Add the node/instruction u to the partial schedule
1798 : : PS at time c.
1799 : : 'calculate_register_pressure(PS)'
1800 : : Given a schedule of instructions, calculate the register
1801 : : pressure it implies. One implementation could be the
1802 : : maximum number of overlapping live ranges.
1803 : : 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1804 : : registers available in the hardware.
1805 : :
1806 : : 1. II = MII.
1807 : : 2. PS = empty list
1808 : : 3. for each node u in O in pre-computed order
1809 : : 4. if (PSP(u) != Q && PSS(u) == Q) then
1810 : : 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1811 : : 6. start = Early_start; end = Early_start + II - 1; step = 1
1812 : : 11. else if (PSP(u) == Q && PSS(u) != Q) then
1813 : : 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1814 : : 13. start = Late_start; end = Late_start - II + 1; step = -1
1815 : : 14. else if (PSP(u) != Q && PSS(u) != Q) then
1816 : : 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1817 : : 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1818 : : 17. start = Early_start;
1819 : : 18. end = min(Early_start + II - 1 , Late_start);
1820 : : 19. step = 1
1821 : : 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1822 : : 21. start = ASAP(u); end = start + II - 1; step = 1
1823 : : 22. endif
1824 : :
1825 : : 23. success = false
1826 : : 24. for (c = start ; c != end ; c += step)
1827 : : 25. if check_hardware_resources_conflicts(u, PS, c) then
1828 : : 26. add_to_partial_schedule_at_time(u, PS, c)
1829 : : 27. success = true
1830 : : 28. break
1831 : : 29. endif
1832 : : 30. endfor
1833 : : 31. if (success == false) then
1834 : : 32. II = II + 1
1835 : : 33. if (II > maxII) then
1836 : : 34. finish - failed to schedule
1837 : : 35. endif
1838 : : 36. goto 2.
1839 : : 37. endif
1840 : : 38. endfor
1841 : : 39. if (calculate_register_pressure(PS) > maxRP) then
1842 : : 40. goto 32.
1843 : : 41. endif
1844 : : 42. compute epilogue & prologue
1845 : : 43. finish - succeeded to schedule
1846 : :
1847 : : ??? The algorithm restricts the scheduling window to II cycles.
1848 : : In rare cases, it may be better to allow windows of II+1 cycles.
1849 : : The window would then start and end on the same row, but with
1850 : : different "must precede" and "must follow" requirements. */
1851 : :
1852 : : /* A threshold for the number of repeated unsuccessful attempts to insert
1853 : : an empty row, before we flush the partial schedule and start over. */
1854 : : #define MAX_SPLIT_NUM 10
1855 : : /* Given the partial schedule PS, this function calculates and returns the
1856 : : cycles in which we can schedule the node with the given index I.
1857 : : NOTE: Here we do the backtracking in SMS, in some special cases. We have
1858 : : noticed that there are several cases in which we fail to SMS the loop
1859 : : because the sched window of a node is empty due to tight data-deps. In
1860 : : such cases we want to unschedule some of the predecessors/successors
1861 : : until we get non-empty scheduling window. It returns -1 if the
1862 : : scheduling window is empty and zero otherwise. */
1863 : :
1864 : : static int
1865 : 0 : get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1866 : : sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1867 : : int *end_p)
1868 : : {
1869 : 0 : int start, step, end;
1870 : 0 : int early_start, late_start;
1871 : 0 : ddg_edge_ptr e;
1872 : 0 : auto_sbitmap psp (ps->g->num_nodes);
1873 : 0 : auto_sbitmap pss (ps->g->num_nodes);
1874 : 0 : sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1875 : 0 : sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1876 : 0 : int psp_not_empty;
1877 : 0 : int pss_not_empty;
1878 : 0 : int count_preds;
1879 : 0 : int count_succs;
1880 : :
1881 : : /* 1. compute sched window for u (start, end, step). */
1882 : 0 : bitmap_clear (psp);
1883 : 0 : bitmap_clear (pss);
1884 : 0 : psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1885 : 0 : pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
1886 : :
1887 : : /* We first compute a forward range (start <= end), then decide whether
1888 : : to reverse it. */
1889 : 0 : early_start = INT_MIN;
1890 : 0 : late_start = INT_MAX;
1891 : 0 : start = INT_MIN;
1892 : 0 : end = INT_MAX;
1893 : 0 : step = 1;
1894 : :
1895 : 0 : count_preds = 0;
1896 : 0 : count_succs = 0;
1897 : :
1898 : 0 : if (dump_file && (psp_not_empty || pss_not_empty))
1899 : : {
1900 : 0 : fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1901 : 0 : "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1902 : 0 : fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1903 : : "start", "early start", "late start", "end", "time");
1904 : 0 : fprintf (dump_file, "=========== =========== =========== ==========="
1905 : : " =====\n");
1906 : : }
1907 : : /* Calculate early_start and limit end. Both bounds are inclusive. */
1908 : 0 : if (psp_not_empty)
1909 : 0 : for (e = u_node->in; e != 0; e = e->next_in)
1910 : : {
1911 : 0 : int v = e->src->cuid;
1912 : :
1913 : 0 : if (bitmap_bit_p (sched_nodes, v))
1914 : : {
1915 : 0 : int p_st = SCHED_TIME (v);
1916 : 0 : int earliest = p_st + e->latency - (e->distance * ii);
1917 : 0 : int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1918 : :
1919 : 0 : if (dump_file)
1920 : : {
1921 : 0 : fprintf (dump_file, "%11s %11d %11s %11d %5d",
1922 : : "", earliest, "", latest, p_st);
1923 : 0 : print_ddg_edge (dump_file, e);
1924 : 0 : fprintf (dump_file, "\n");
1925 : : }
1926 : :
1927 : 0 : early_start = MAX (early_start, earliest);
1928 : 0 : end = MIN (end, latest);
1929 : :
1930 : 0 : if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1931 : 0 : count_preds++;
1932 : : }
1933 : : }
1934 : :
1935 : : /* Calculate late_start and limit start. Both bounds are inclusive. */
1936 : 0 : if (pss_not_empty)
1937 : 0 : for (e = u_node->out; e != 0; e = e->next_out)
1938 : : {
1939 : 0 : int v = e->dest->cuid;
1940 : :
1941 : 0 : if (bitmap_bit_p (sched_nodes, v))
1942 : : {
1943 : 0 : int s_st = SCHED_TIME (v);
1944 : 0 : int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1945 : 0 : int latest = s_st - e->latency + (e->distance * ii);
1946 : :
1947 : 0 : if (dump_file)
1948 : : {
1949 : 0 : fprintf (dump_file, "%11d %11s %11d %11s %5d",
1950 : : earliest, "", latest, "", s_st);
1951 : 0 : print_ddg_edge (dump_file, e);
1952 : 0 : fprintf (dump_file, "\n");
1953 : : }
1954 : :
1955 : 0 : start = MAX (start, earliest);
1956 : 0 : late_start = MIN (late_start, latest);
1957 : :
1958 : 0 : if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1959 : 0 : count_succs++;
1960 : : }
1961 : : }
1962 : :
1963 : 0 : if (dump_file && (psp_not_empty || pss_not_empty))
1964 : : {
1965 : 0 : fprintf (dump_file, "----------- ----------- ----------- -----------"
1966 : : " -----\n");
1967 : 0 : fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1968 : : start, early_start, late_start, end, "",
1969 : : "(max, max, min, min)");
1970 : : }
1971 : :
1972 : : /* Get a target scheduling window no bigger than ii. */
1973 : 0 : if (early_start == INT_MIN && late_start == INT_MAX)
1974 : 0 : early_start = NODE_ASAP (u_node);
1975 : 0 : else if (early_start == INT_MIN)
1976 : 0 : early_start = late_start - (ii - 1);
1977 : 0 : late_start = MIN (late_start, early_start + (ii - 1));
1978 : :
1979 : : /* Apply memory dependence limits. */
1980 : 0 : start = MAX (start, early_start);
1981 : 0 : end = MIN (end, late_start);
1982 : :
1983 : 0 : if (dump_file && (psp_not_empty || pss_not_empty))
1984 : 0 : fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1985 : : "", start, end, "", "");
1986 : :
1987 : : /* If there are at least as many successors as predecessors, schedule the
1988 : : node close to its successors. */
1989 : 0 : if (pss_not_empty && count_succs >= count_preds)
1990 : : {
1991 : 0 : std::swap (start, end);
1992 : 0 : step = -1;
1993 : : }
1994 : :
1995 : : /* Now that we've finalized the window, make END an exclusive rather
1996 : : than an inclusive bound. */
1997 : 0 : end += step;
1998 : :
1999 : 0 : *start_p = start;
2000 : 0 : *step_p = step;
2001 : 0 : *end_p = end;
2002 : :
2003 : 0 : if ((start >= end && step == 1) || (start <= end && step == -1))
2004 : : {
2005 : 0 : if (dump_file)
2006 : 0 : fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2007 : : start, end, step);
2008 : 0 : return -1;
2009 : : }
2010 : :
2011 : : return 0;
2012 : 0 : }
2013 : :
2014 : : /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2015 : : node currently been scheduled. At the end of the calculation
2016 : : MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2017 : : U_NODE which are (1) already scheduled in the first/last row of
2018 : : U_NODE's scheduling window, (2) whose dependence inequality with U
2019 : : becomes an equality when U is scheduled in this same row, and (3)
2020 : : whose dependence latency is zero.
2021 : :
2022 : : The first and last rows are calculated using the following parameters:
2023 : : START/END rows - The cycles that begins/ends the traversal on the window;
2024 : : searching for an empty cycle to schedule U_NODE.
2025 : : STEP - The direction in which we traverse the window.
2026 : : II - The initiation interval. */
2027 : :
2028 : : static void
2029 : 0 : calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2030 : : int step, int ii, sbitmap sched_nodes,
2031 : : sbitmap must_precede, sbitmap must_follow)
2032 : : {
2033 : 0 : ddg_edge_ptr e;
2034 : 0 : int first_cycle_in_window, last_cycle_in_window;
2035 : :
2036 : 0 : gcc_assert (must_precede && must_follow);
2037 : :
2038 : : /* Consider the following scheduling window:
2039 : : {first_cycle_in_window, first_cycle_in_window+1, ...,
2040 : : last_cycle_in_window}. If step is 1 then the following will be
2041 : : the order we traverse the window: {start=first_cycle_in_window,
2042 : : first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2043 : : or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2044 : : end=first_cycle_in_window-1} if step is -1. */
2045 : 0 : first_cycle_in_window = (step == 1) ? start : end - step;
2046 : 0 : last_cycle_in_window = (step == 1) ? end - step : start;
2047 : :
2048 : 0 : bitmap_clear (must_precede);
2049 : 0 : bitmap_clear (must_follow);
2050 : :
2051 : 0 : if (dump_file)
2052 : 0 : fprintf (dump_file, "\nmust_precede: ");
2053 : :
2054 : : /* Instead of checking if:
2055 : : (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2056 : : && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2057 : : first_cycle_in_window)
2058 : : && e->latency == 0
2059 : : we use the fact that latency is non-negative:
2060 : : SCHED_TIME (e->src) - (e->distance * ii) <=
2061 : : SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2062 : : first_cycle_in_window
2063 : : and check only if
2064 : : SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
2065 : 0 : for (e = u_node->in; e != 0; e = e->next_in)
2066 : 0 : if (bitmap_bit_p (sched_nodes, e->src->cuid)
2067 : 0 : && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2068 : : first_cycle_in_window))
2069 : : {
2070 : 0 : if (dump_file)
2071 : 0 : fprintf (dump_file, "%d ", e->src->cuid);
2072 : :
2073 : 0 : bitmap_set_bit (must_precede, e->src->cuid);
2074 : : }
2075 : :
2076 : 0 : if (dump_file)
2077 : 0 : fprintf (dump_file, "\nmust_follow: ");
2078 : :
2079 : : /* Instead of checking if:
2080 : : (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2081 : : && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2082 : : last_cycle_in_window)
2083 : : && e->latency == 0
2084 : : we use the fact that latency is non-negative:
2085 : : SCHED_TIME (e->dest) + (e->distance * ii) >=
2086 : : SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2087 : : last_cycle_in_window
2088 : : and check only if
2089 : : SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
2090 : 0 : for (e = u_node->out; e != 0; e = e->next_out)
2091 : 0 : if (bitmap_bit_p (sched_nodes, e->dest->cuid)
2092 : 0 : && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2093 : : last_cycle_in_window))
2094 : : {
2095 : 0 : if (dump_file)
2096 : 0 : fprintf (dump_file, "%d ", e->dest->cuid);
2097 : :
2098 : 0 : bitmap_set_bit (must_follow, e->dest->cuid);
2099 : : }
2100 : :
2101 : 0 : if (dump_file)
2102 : 0 : fprintf (dump_file, "\n");
2103 : 0 : }
2104 : :
2105 : : /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
2106 : : parameters to decide if that's possible:
2107 : : PS - The partial schedule.
2108 : : U - The serial number of U_NODE.
2109 : : NUM_SPLITS - The number of row splits made so far.
2110 : : MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2111 : : the first row of the scheduling window)
2112 : : MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2113 : : last row of the scheduling window) */
2114 : :
2115 : : static bool
2116 : 0 : try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2117 : : int u, int cycle, sbitmap sched_nodes,
2118 : : int *num_splits, sbitmap must_precede,
2119 : : sbitmap must_follow)
2120 : : {
2121 : 0 : ps_insn_ptr psi;
2122 : 0 : bool success = false;
2123 : :
2124 : 0 : verify_partial_schedule (ps, sched_nodes);
2125 : 0 : psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2126 : 0 : if (psi)
2127 : : {
2128 : 0 : SCHED_TIME (u) = cycle;
2129 : 0 : bitmap_set_bit (sched_nodes, u);
2130 : 0 : success = true;
2131 : 0 : *num_splits = 0;
2132 : 0 : if (dump_file)
2133 : 0 : fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2134 : :
2135 : : }
2136 : :
2137 : 0 : return success;
2138 : : }
2139 : :
2140 : : /* This function implements the scheduling algorithm for SMS according to the
2141 : : above algorithm. */
2142 : : static partial_schedule_ptr
2143 : 0 : sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2144 : : {
2145 : 0 : int ii = mii;
2146 : 0 : int i, c, success, num_splits = 0;
2147 : 0 : int flush_and_start_over = true;
2148 : 0 : int num_nodes = g->num_nodes;
2149 : 0 : int start, end, step; /* Place together into one struct? */
2150 : 0 : auto_sbitmap sched_nodes (num_nodes);
2151 : 0 : auto_sbitmap must_precede (num_nodes);
2152 : 0 : auto_sbitmap must_follow (num_nodes);
2153 : 0 : auto_sbitmap tobe_scheduled (num_nodes);
2154 : :
2155 : : /* Value of param_sms_dfa_history is a limit on the number of cycles that
2156 : : resource conflicts can span. ??? Should be provided by DFA, and be
2157 : : dependent on the type of insn scheduled. Set to 0 by default to save
2158 : : compile time. */
2159 : 0 : partial_schedule_ptr ps = create_partial_schedule (ii, g,
2160 : : param_sms_dfa_history);
2161 : :
2162 : 0 : bitmap_ones (tobe_scheduled);
2163 : 0 : bitmap_clear (sched_nodes);
2164 : :
2165 : 0 : while (flush_and_start_over && (ii < maxii))
2166 : : {
2167 : :
2168 : 0 : if (dump_file)
2169 : 0 : fprintf (dump_file, "Starting with ii=%d\n", ii);
2170 : 0 : flush_and_start_over = false;
2171 : 0 : bitmap_clear (sched_nodes);
2172 : :
2173 : 0 : for (i = 0; i < num_nodes; i++)
2174 : : {
2175 : 0 : int u = nodes_order[i];
2176 : 0 : ddg_node_ptr u_node = &ps->g->nodes[u];
2177 : 0 : rtx_insn *insn = u_node->insn;
2178 : :
2179 : 0 : gcc_checking_assert (NONDEBUG_INSN_P (insn));
2180 : :
2181 : 0 : if (bitmap_bit_p (sched_nodes, u))
2182 : 0 : continue;
2183 : :
2184 : : /* Try to get non-empty scheduling window. */
2185 : 0 : success = 0;
2186 : 0 : if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2187 : : &step, &end) == 0)
2188 : : {
2189 : 0 : if (dump_file)
2190 : 0 : fprintf (dump_file, "\nTrying to schedule node %d "
2191 : : "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
2192 : 0 : (g->nodes[u].insn)), start, end, step);
2193 : :
2194 : 0 : gcc_assert ((step > 0 && start < end)
2195 : : || (step < 0 && start > end));
2196 : :
2197 : 0 : calculate_must_precede_follow (u_node, start, end, step, ii,
2198 : : sched_nodes, must_precede,
2199 : : must_follow);
2200 : :
2201 : 0 : for (c = start; c != end; c += step)
2202 : : {
2203 : 0 : sbitmap tmp_precede, tmp_follow;
2204 : :
2205 : 0 : set_must_precede_follow (&tmp_follow, must_follow,
2206 : : &tmp_precede, must_precede,
2207 : : c, start, end, step);
2208 : 0 : success =
2209 : 0 : try_scheduling_node_in_cycle (ps, u, c,
2210 : : sched_nodes,
2211 : : &num_splits, tmp_precede,
2212 : : tmp_follow);
2213 : 0 : if (success)
2214 : : break;
2215 : : }
2216 : :
2217 : 0 : verify_partial_schedule (ps, sched_nodes);
2218 : : }
2219 : 0 : if (!success)
2220 : : {
2221 : 0 : int split_row;
2222 : :
2223 : 0 : if (ii++ == maxii)
2224 : : break;
2225 : :
2226 : 0 : if (num_splits >= MAX_SPLIT_NUM)
2227 : : {
2228 : 0 : num_splits = 0;
2229 : 0 : flush_and_start_over = true;
2230 : 0 : verify_partial_schedule (ps, sched_nodes);
2231 : 0 : reset_partial_schedule (ps, ii);
2232 : 0 : verify_partial_schedule (ps, sched_nodes);
2233 : 0 : break;
2234 : : }
2235 : :
2236 : 0 : num_splits++;
2237 : : /* The scheduling window is exclusive of 'end'
2238 : : whereas compute_split_window() expects an inclusive,
2239 : : ordered range. */
2240 : 0 : if (step == 1)
2241 : 0 : split_row = compute_split_row (sched_nodes, start, end - 1,
2242 : : ps->ii, u_node);
2243 : : else
2244 : 0 : split_row = compute_split_row (sched_nodes, end + 1, start,
2245 : : ps->ii, u_node);
2246 : :
2247 : 0 : ps_insert_empty_row (ps, split_row, sched_nodes);
2248 : 0 : i--; /* Go back and retry node i. */
2249 : :
2250 : 0 : if (dump_file)
2251 : 0 : fprintf (dump_file, "num_splits=%d\n", num_splits);
2252 : : }
2253 : :
2254 : : /* ??? If (success), check register pressure estimates. */
2255 : : } /* Continue with next node. */
2256 : : } /* While flush_and_start_over. */
2257 : 0 : if (ii >= maxii)
2258 : : {
2259 : 0 : free_partial_schedule (ps);
2260 : 0 : ps = NULL;
2261 : : }
2262 : : else
2263 : 0 : gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
2264 : :
2265 : 0 : return ps;
2266 : 0 : }
2267 : :
2268 : : /* This function inserts a new empty row into PS at the position
2269 : : according to SPLITROW, keeping all already scheduled instructions
2270 : : intact and updating their SCHED_TIME and cycle accordingly. */
2271 : : static void
2272 : 0 : ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2273 : : sbitmap sched_nodes)
2274 : : {
2275 : 0 : ps_insn_ptr crr_insn;
2276 : 0 : ps_insn_ptr *rows_new;
2277 : 0 : int ii = ps->ii;
2278 : 0 : int new_ii = ii + 1;
2279 : 0 : int row;
2280 : 0 : int *rows_length_new;
2281 : :
2282 : 0 : verify_partial_schedule (ps, sched_nodes);
2283 : :
2284 : : /* We normalize sched_time and rotate ps to have only non-negative sched
2285 : : times, for simplicity of updating cycles after inserting new row. */
2286 : 0 : split_row -= ps->min_cycle;
2287 : 0 : split_row = SMODULO (split_row, ii);
2288 : 0 : if (dump_file)
2289 : 0 : fprintf (dump_file, "split_row=%d\n", split_row);
2290 : :
2291 : 0 : reset_sched_times (ps, PS_MIN_CYCLE (ps));
2292 : 0 : rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2293 : :
2294 : 0 : rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2295 : 0 : rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2296 : 0 : for (row = 0; row < split_row; row++)
2297 : : {
2298 : 0 : rows_new[row] = ps->rows[row];
2299 : 0 : rows_length_new[row] = ps->rows_length[row];
2300 : 0 : ps->rows[row] = NULL;
2301 : 0 : for (crr_insn = rows_new[row];
2302 : 0 : crr_insn; crr_insn = crr_insn->next_in_row)
2303 : : {
2304 : 0 : int u = crr_insn->id;
2305 : 0 : int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2306 : :
2307 : 0 : SCHED_TIME (u) = new_time;
2308 : 0 : crr_insn->cycle = new_time;
2309 : 0 : SCHED_ROW (u) = new_time % new_ii;
2310 : 0 : SCHED_STAGE (u) = new_time / new_ii;
2311 : : }
2312 : :
2313 : : }
2314 : :
2315 : 0 : rows_new[split_row] = NULL;
2316 : :
2317 : 0 : for (row = split_row; row < ii; row++)
2318 : : {
2319 : 0 : rows_new[row + 1] = ps->rows[row];
2320 : 0 : rows_length_new[row + 1] = ps->rows_length[row];
2321 : 0 : ps->rows[row] = NULL;
2322 : 0 : for (crr_insn = rows_new[row + 1];
2323 : 0 : crr_insn; crr_insn = crr_insn->next_in_row)
2324 : : {
2325 : 0 : int u = crr_insn->id;
2326 : 0 : int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2327 : :
2328 : 0 : SCHED_TIME (u) = new_time;
2329 : 0 : crr_insn->cycle = new_time;
2330 : 0 : SCHED_ROW (u) = new_time % new_ii;
2331 : 0 : SCHED_STAGE (u) = new_time / new_ii;
2332 : : }
2333 : : }
2334 : :
2335 : : /* Updating ps. */
2336 : 0 : ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2337 : 0 : + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2338 : 0 : ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2339 : 0 : + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2340 : 0 : free (ps->rows);
2341 : 0 : ps->rows = rows_new;
2342 : 0 : free (ps->rows_length);
2343 : 0 : ps->rows_length = rows_length_new;
2344 : 0 : ps->ii = new_ii;
2345 : 0 : gcc_assert (ps->min_cycle >= 0);
2346 : :
2347 : 0 : verify_partial_schedule (ps, sched_nodes);
2348 : :
2349 : 0 : if (dump_file)
2350 : 0 : fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2351 : : ps->max_cycle);
2352 : 0 : }
2353 : :
2354 : : /* Given U_NODE which is the node that failed to be scheduled; LOW and
2355 : : UP which are the boundaries of it's scheduling window; compute using
2356 : : SCHED_NODES and II a row in the partial schedule that can be split
2357 : : which will separate a critical predecessor from a critical successor
2358 : : thereby expanding the window, and return it. */
2359 : : static int
2360 : 0 : compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2361 : : ddg_node_ptr u_node)
2362 : : {
2363 : 0 : ddg_edge_ptr e;
2364 : 0 : int lower = INT_MIN, upper = INT_MAX;
2365 : 0 : int crit_pred = -1;
2366 : 0 : int crit_succ = -1;
2367 : 0 : int crit_cycle;
2368 : :
2369 : 0 : for (e = u_node->in; e != 0; e = e->next_in)
2370 : : {
2371 : 0 : int v = e->src->cuid;
2372 : :
2373 : 0 : if (bitmap_bit_p (sched_nodes, v)
2374 : 0 : && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2375 : 0 : if (SCHED_TIME (v) > lower)
2376 : : {
2377 : 0 : crit_pred = v;
2378 : 0 : lower = SCHED_TIME (v);
2379 : : }
2380 : : }
2381 : :
2382 : 0 : if (crit_pred >= 0)
2383 : : {
2384 : 0 : crit_cycle = SCHED_TIME (crit_pred) + 1;
2385 : 0 : return SMODULO (crit_cycle, ii);
2386 : : }
2387 : :
2388 : 0 : for (e = u_node->out; e != 0; e = e->next_out)
2389 : : {
2390 : 0 : int v = e->dest->cuid;
2391 : :
2392 : 0 : if (bitmap_bit_p (sched_nodes, v)
2393 : 0 : && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2394 : 0 : if (SCHED_TIME (v) < upper)
2395 : : {
2396 : 0 : crit_succ = v;
2397 : 0 : upper = SCHED_TIME (v);
2398 : : }
2399 : : }
2400 : :
2401 : 0 : if (crit_succ >= 0)
2402 : : {
2403 : 0 : crit_cycle = SCHED_TIME (crit_succ);
2404 : 0 : return SMODULO (crit_cycle, ii);
2405 : : }
2406 : :
2407 : 0 : if (dump_file)
2408 : 0 : fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2409 : :
2410 : 0 : return SMODULO ((low + up + 1) / 2, ii);
2411 : : }
2412 : :
2413 : : static void
2414 : 0 : verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2415 : : {
2416 : 0 : int row;
2417 : 0 : ps_insn_ptr crr_insn;
2418 : :
2419 : 0 : for (row = 0; row < ps->ii; row++)
2420 : : {
2421 : 0 : int length = 0;
2422 : :
2423 : 0 : for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2424 : : {
2425 : 0 : int u = crr_insn->id;
2426 : :
2427 : 0 : length++;
2428 : 0 : gcc_assert (bitmap_bit_p (sched_nodes, u));
2429 : : /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2430 : : popcount (sched_nodes) == number of insns in ps. */
2431 : 0 : gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2432 : 0 : gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2433 : : }
2434 : :
2435 : 0 : gcc_assert (ps->rows_length[row] == length);
2436 : : }
2437 : 0 : }
2438 : :
2439 : :
2440 : : /* This page implements the algorithm for ordering the nodes of a DDG
2441 : : for modulo scheduling, activated through the
2442 : : "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2443 : :
2444 : : #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2445 : : #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2446 : : #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2447 : : #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2448 : : #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2449 : : #define DEPTH(x) (ASAP ((x)))
2450 : :
2451 : : typedef struct node_order_params * nopa;
2452 : :
2453 : : static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2454 : : static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2455 : : static nopa calculate_order_params (ddg_ptr, int, int *);
2456 : : static int find_max_asap (ddg_ptr, sbitmap);
2457 : : static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2458 : : static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2459 : :
2460 : : enum sms_direction {BOTTOMUP, TOPDOWN};
2461 : :
2462 : : struct node_order_params
2463 : : {
2464 : : int asap;
2465 : : int alap;
2466 : : int height;
2467 : : };
2468 : :
2469 : : /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2470 : : static void
2471 : 0 : check_nodes_order (int *node_order, int num_nodes)
2472 : : {
2473 : 0 : int i;
2474 : 0 : auto_sbitmap tmp (num_nodes);
2475 : :
2476 : 0 : bitmap_clear (tmp);
2477 : :
2478 : 0 : if (dump_file)
2479 : 0 : fprintf (dump_file, "SMS final nodes order: \n");
2480 : :
2481 : 0 : for (i = 0; i < num_nodes; i++)
2482 : : {
2483 : 0 : int u = node_order[i];
2484 : :
2485 : 0 : if (dump_file)
2486 : 0 : fprintf (dump_file, "%d ", u);
2487 : 0 : gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
2488 : :
2489 : 0 : bitmap_set_bit (tmp, u);
2490 : : }
2491 : :
2492 : 0 : if (dump_file)
2493 : 0 : fprintf (dump_file, "\n");
2494 : 0 : }
2495 : :
2496 : : /* Order the nodes of G for scheduling and pass the result in
2497 : : NODE_ORDER. Also set aux.count of each node to ASAP.
2498 : : Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2499 : : static int
2500 : 0 : sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2501 : : {
2502 : 0 : int i;
2503 : 0 : int rec_mii = 0;
2504 : 0 : ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2505 : :
2506 : 0 : nopa nops = calculate_order_params (g, mii, pmax_asap);
2507 : :
2508 : 0 : if (dump_file)
2509 : 0 : print_sccs (dump_file, sccs, g);
2510 : :
2511 : 0 : order_nodes_of_sccs (sccs, node_order);
2512 : :
2513 : 0 : if (sccs->num_sccs > 0)
2514 : : /* First SCC has the largest recurrence_length. */
2515 : 0 : rec_mii = sccs->sccs[0]->recurrence_length;
2516 : :
2517 : : /* Save ASAP before destroying node_order_params. */
2518 : 0 : for (i = 0; i < g->num_nodes; i++)
2519 : : {
2520 : 0 : ddg_node_ptr v = &g->nodes[i];
2521 : 0 : v->aux.count = ASAP (v);
2522 : : }
2523 : :
2524 : 0 : free (nops);
2525 : 0 : free_ddg_all_sccs (sccs);
2526 : 0 : check_nodes_order (node_order, g->num_nodes);
2527 : :
2528 : 0 : return rec_mii;
2529 : : }
2530 : :
2531 : : static void
2532 : 0 : order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2533 : : {
2534 : 0 : int i, pos = 0;
2535 : 0 : ddg_ptr g = all_sccs->ddg;
2536 : 0 : int num_nodes = g->num_nodes;
2537 : 0 : auto_sbitmap prev_sccs (num_nodes);
2538 : 0 : auto_sbitmap on_path (num_nodes);
2539 : 0 : auto_sbitmap tmp (num_nodes);
2540 : 0 : auto_sbitmap ones (num_nodes);
2541 : :
2542 : 0 : bitmap_clear (prev_sccs);
2543 : 0 : bitmap_ones (ones);
2544 : :
2545 : : /* Perform the node ordering starting from the SCC with the highest recMII.
2546 : : For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2547 : 0 : for (i = 0; i < all_sccs->num_sccs; i++)
2548 : : {
2549 : 0 : ddg_scc_ptr scc = all_sccs->sccs[i];
2550 : :
2551 : : /* Add nodes on paths from previous SCCs to the current SCC. */
2552 : 0 : find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2553 : 0 : bitmap_ior (tmp, scc->nodes, on_path);
2554 : :
2555 : : /* Add nodes on paths from the current SCC to previous SCCs. */
2556 : 0 : find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2557 : 0 : bitmap_ior (tmp, tmp, on_path);
2558 : :
2559 : : /* Remove nodes of previous SCCs from current extended SCC. */
2560 : 0 : bitmap_and_compl (tmp, tmp, prev_sccs);
2561 : :
2562 : 0 : pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2563 : : /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2564 : : }
2565 : :
2566 : : /* Handle the remaining nodes that do not belong to any scc. Each call
2567 : : to order_nodes_in_scc handles a single connected component. */
2568 : 0 : while (pos < g->num_nodes)
2569 : : {
2570 : 0 : bitmap_and_compl (tmp, ones, prev_sccs);
2571 : 0 : pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2572 : : }
2573 : 0 : }
2574 : :
2575 : : /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2576 : : static struct node_order_params *
2577 : 0 : calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2578 : : {
2579 : 0 : int u;
2580 : 0 : int max_asap;
2581 : 0 : int num_nodes = g->num_nodes;
2582 : 0 : ddg_edge_ptr e;
2583 : : /* Allocate a place to hold ordering params for each node in the DDG. */
2584 : 0 : nopa node_order_params_arr;
2585 : :
2586 : : /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2587 : 0 : node_order_params_arr = (nopa) xcalloc (num_nodes,
2588 : : sizeof (struct node_order_params));
2589 : :
2590 : : /* Set the aux pointer of each node to point to its order_params structure. */
2591 : 0 : for (u = 0; u < num_nodes; u++)
2592 : 0 : g->nodes[u].aux.info = &node_order_params_arr[u];
2593 : :
2594 : : /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2595 : : calculate ASAP, ALAP, mobility, distance, and height for each node
2596 : : in the dependence (direct acyclic) graph. */
2597 : :
2598 : : /* We assume that the nodes in the array are in topological order. */
2599 : :
2600 : : max_asap = 0;
2601 : 0 : for (u = 0; u < num_nodes; u++)
2602 : : {
2603 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2604 : :
2605 : 0 : ASAP (u_node) = 0;
2606 : 0 : for (e = u_node->in; e; e = e->next_in)
2607 : 0 : if (e->distance == 0)
2608 : 0 : ASAP (u_node) = MAX (ASAP (u_node),
2609 : : ASAP (e->src) + e->latency);
2610 : 0 : max_asap = MAX (max_asap, ASAP (u_node));
2611 : : }
2612 : :
2613 : 0 : for (u = num_nodes - 1; u > -1; u--)
2614 : : {
2615 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2616 : :
2617 : 0 : ALAP (u_node) = max_asap;
2618 : 0 : HEIGHT (u_node) = 0;
2619 : 0 : for (e = u_node->out; e; e = e->next_out)
2620 : 0 : if (e->distance == 0)
2621 : : {
2622 : 0 : ALAP (u_node) = MIN (ALAP (u_node),
2623 : : ALAP (e->dest) - e->latency);
2624 : 0 : HEIGHT (u_node) = MAX (HEIGHT (u_node),
2625 : : HEIGHT (e->dest) + e->latency);
2626 : : }
2627 : : }
2628 : 0 : if (dump_file)
2629 : : {
2630 : 0 : fprintf (dump_file, "\nOrder params\n");
2631 : 0 : for (u = 0; u < num_nodes; u++)
2632 : : {
2633 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2634 : :
2635 : 0 : fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2636 : 0 : ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2637 : : }
2638 : : }
2639 : :
2640 : 0 : *pmax_asap = max_asap;
2641 : 0 : return node_order_params_arr;
2642 : : }
2643 : :
2644 : : static int
2645 : 0 : find_max_asap (ddg_ptr g, sbitmap nodes)
2646 : : {
2647 : 0 : unsigned int u = 0;
2648 : 0 : int max_asap = -1;
2649 : 0 : int result = -1;
2650 : 0 : sbitmap_iterator sbi;
2651 : :
2652 : 0 : EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2653 : : {
2654 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2655 : :
2656 : 0 : if (max_asap < ASAP (u_node))
2657 : : {
2658 : 0 : max_asap = ASAP (u_node);
2659 : 0 : result = u;
2660 : : }
2661 : : }
2662 : 0 : return result;
2663 : : }
2664 : :
2665 : : static int
2666 : 0 : find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2667 : : {
2668 : 0 : unsigned int u = 0;
2669 : 0 : int max_hv = -1;
2670 : 0 : int min_mob = INT_MAX;
2671 : 0 : int result = -1;
2672 : 0 : sbitmap_iterator sbi;
2673 : :
2674 : 0 : EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2675 : : {
2676 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2677 : :
2678 : 0 : if (max_hv < HEIGHT (u_node))
2679 : : {
2680 : 0 : max_hv = HEIGHT (u_node);
2681 : 0 : min_mob = MOB (u_node);
2682 : 0 : result = u;
2683 : : }
2684 : 0 : else if ((max_hv == HEIGHT (u_node))
2685 : 0 : && (min_mob > MOB (u_node)))
2686 : : {
2687 : 0 : min_mob = MOB (u_node);
2688 : 0 : result = u;
2689 : : }
2690 : : }
2691 : 0 : return result;
2692 : : }
2693 : :
2694 : : static int
2695 : 0 : find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2696 : : {
2697 : 0 : unsigned int u = 0;
2698 : 0 : int max_dv = -1;
2699 : 0 : int min_mob = INT_MAX;
2700 : 0 : int result = -1;
2701 : 0 : sbitmap_iterator sbi;
2702 : :
2703 : 0 : EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2704 : : {
2705 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2706 : :
2707 : 0 : if (max_dv < DEPTH (u_node))
2708 : : {
2709 : 0 : max_dv = DEPTH (u_node);
2710 : 0 : min_mob = MOB (u_node);
2711 : 0 : result = u;
2712 : : }
2713 : 0 : else if ((max_dv == DEPTH (u_node))
2714 : 0 : && (min_mob > MOB (u_node)))
2715 : : {
2716 : 0 : min_mob = MOB (u_node);
2717 : 0 : result = u;
2718 : : }
2719 : : }
2720 : 0 : return result;
2721 : : }
2722 : :
2723 : : /* Places the nodes of SCC into the NODE_ORDER array starting
2724 : : at position POS, according to the SMS ordering algorithm.
2725 : : NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2726 : : the NODE_ORDER array, starting from position zero. */
2727 : : static int
2728 : 0 : order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2729 : : int * node_order, int pos)
2730 : : {
2731 : 0 : enum sms_direction dir;
2732 : 0 : int num_nodes = g->num_nodes;
2733 : 0 : auto_sbitmap workset (num_nodes);
2734 : 0 : auto_sbitmap tmp (num_nodes);
2735 : 0 : sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2736 : 0 : auto_sbitmap predecessors (num_nodes);
2737 : 0 : auto_sbitmap successors (num_nodes);
2738 : :
2739 : 0 : bitmap_clear (predecessors);
2740 : 0 : find_predecessors (predecessors, g, nodes_ordered);
2741 : :
2742 : 0 : bitmap_clear (successors);
2743 : 0 : find_successors (successors, g, nodes_ordered);
2744 : :
2745 : 0 : bitmap_clear (tmp);
2746 : 0 : if (bitmap_and (tmp, predecessors, scc))
2747 : : {
2748 : 0 : bitmap_copy (workset, tmp);
2749 : 0 : dir = BOTTOMUP;
2750 : : }
2751 : 0 : else if (bitmap_and (tmp, successors, scc))
2752 : : {
2753 : 0 : bitmap_copy (workset, tmp);
2754 : 0 : dir = TOPDOWN;
2755 : : }
2756 : : else
2757 : : {
2758 : 0 : int u;
2759 : :
2760 : 0 : bitmap_clear (workset);
2761 : 0 : if ((u = find_max_asap (g, scc)) >= 0)
2762 : 0 : bitmap_set_bit (workset, u);
2763 : : dir = BOTTOMUP;
2764 : : }
2765 : :
2766 : 0 : bitmap_clear (zero_bitmap);
2767 : 0 : while (!bitmap_equal_p (workset, zero_bitmap))
2768 : : {
2769 : 0 : int v;
2770 : 0 : ddg_node_ptr v_node;
2771 : 0 : sbitmap v_node_preds;
2772 : 0 : sbitmap v_node_succs;
2773 : :
2774 : 0 : if (dir == TOPDOWN)
2775 : : {
2776 : 0 : while (!bitmap_equal_p (workset, zero_bitmap))
2777 : : {
2778 : 0 : v = find_max_hv_min_mob (g, workset);
2779 : 0 : v_node = &g->nodes[v];
2780 : 0 : node_order[pos++] = v;
2781 : 0 : v_node_succs = NODE_SUCCESSORS (v_node);
2782 : 0 : bitmap_and (tmp, v_node_succs, scc);
2783 : :
2784 : : /* Don't consider the already ordered successors again. */
2785 : 0 : bitmap_and_compl (tmp, tmp, nodes_ordered);
2786 : 0 : bitmap_ior (workset, workset, tmp);
2787 : 0 : bitmap_clear_bit (workset, v);
2788 : 0 : bitmap_set_bit (nodes_ordered, v);
2789 : : }
2790 : 0 : dir = BOTTOMUP;
2791 : 0 : bitmap_clear (predecessors);
2792 : 0 : find_predecessors (predecessors, g, nodes_ordered);
2793 : 0 : bitmap_and (workset, predecessors, scc);
2794 : : }
2795 : : else
2796 : : {
2797 : 0 : while (!bitmap_equal_p (workset, zero_bitmap))
2798 : : {
2799 : 0 : v = find_max_dv_min_mob (g, workset);
2800 : 0 : v_node = &g->nodes[v];
2801 : 0 : node_order[pos++] = v;
2802 : 0 : v_node_preds = NODE_PREDECESSORS (v_node);
2803 : 0 : bitmap_and (tmp, v_node_preds, scc);
2804 : :
2805 : : /* Don't consider the already ordered predecessors again. */
2806 : 0 : bitmap_and_compl (tmp, tmp, nodes_ordered);
2807 : 0 : bitmap_ior (workset, workset, tmp);
2808 : 0 : bitmap_clear_bit (workset, v);
2809 : 0 : bitmap_set_bit (nodes_ordered, v);
2810 : : }
2811 : 0 : dir = TOPDOWN;
2812 : 0 : bitmap_clear (successors);
2813 : 0 : find_successors (successors, g, nodes_ordered);
2814 : 0 : bitmap_and (workset, successors, scc);
2815 : : }
2816 : : }
2817 : 0 : sbitmap_free (zero_bitmap);
2818 : 0 : return pos;
2819 : 0 : }
2820 : :
2821 : :
2822 : : /* This page contains functions for manipulating partial-schedules during
2823 : : modulo scheduling. */
2824 : :
2825 : : /* Create a partial schedule and allocate a memory to hold II rows. */
2826 : :
2827 : : static partial_schedule_ptr
2828 : 0 : create_partial_schedule (int ii, ddg_ptr g, int history)
2829 : : {
2830 : 0 : partial_schedule_ptr ps = XNEW (struct partial_schedule);
2831 : 0 : ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2832 : 0 : ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2833 : 0 : ps->reg_moves.create (0);
2834 : 0 : ps->ii = ii;
2835 : 0 : ps->history = history;
2836 : 0 : ps->min_cycle = INT_MAX;
2837 : 0 : ps->max_cycle = INT_MIN;
2838 : 0 : ps->g = g;
2839 : :
2840 : 0 : return ps;
2841 : : }
2842 : :
2843 : : /* Free the PS_INSNs in rows array of the given partial schedule.
2844 : : ??? Consider caching the PS_INSN's. */
2845 : : static void
2846 : 0 : free_ps_insns (partial_schedule_ptr ps)
2847 : : {
2848 : 0 : int i;
2849 : :
2850 : 0 : for (i = 0; i < ps->ii; i++)
2851 : : {
2852 : 0 : while (ps->rows[i])
2853 : : {
2854 : 0 : ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2855 : :
2856 : 0 : free (ps->rows[i]);
2857 : 0 : ps->rows[i] = ps_insn;
2858 : : }
2859 : 0 : ps->rows[i] = NULL;
2860 : : }
2861 : 0 : }
2862 : :
2863 : : /* Free all the memory allocated to the partial schedule. */
2864 : :
2865 : : static void
2866 : 0 : free_partial_schedule (partial_schedule_ptr ps)
2867 : : {
2868 : 0 : ps_reg_move_info *move;
2869 : 0 : unsigned int i;
2870 : :
2871 : 0 : if (!ps)
2872 : 0 : return;
2873 : :
2874 : 0 : FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
2875 : 0 : sbitmap_free (move->uses);
2876 : 0 : ps->reg_moves.release ();
2877 : :
2878 : 0 : free_ps_insns (ps);
2879 : 0 : free (ps->rows);
2880 : 0 : free (ps->rows_length);
2881 : 0 : free (ps);
2882 : : }
2883 : :
2884 : : /* Clear the rows array with its PS_INSNs, and create a new one with
2885 : : NEW_II rows. */
2886 : :
2887 : : static void
2888 : 0 : reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2889 : : {
2890 : 0 : if (!ps)
2891 : : return;
2892 : 0 : free_ps_insns (ps);
2893 : 0 : if (new_ii == ps->ii)
2894 : : return;
2895 : 0 : ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2896 : : * sizeof (ps_insn_ptr));
2897 : 0 : memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2898 : 0 : ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2899 : 0 : memset (ps->rows_length, 0, new_ii * sizeof (int));
2900 : 0 : ps->ii = new_ii;
2901 : 0 : ps->min_cycle = INT_MAX;
2902 : 0 : ps->max_cycle = INT_MIN;
2903 : : }
2904 : :
2905 : : /* Prints the partial schedule as an ii rows array, for each rows
2906 : : print the ids of the insns in it. */
2907 : : void
2908 : 0 : print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2909 : : {
2910 : 0 : int i;
2911 : :
2912 : 0 : for (i = 0; i < ps->ii; i++)
2913 : : {
2914 : 0 : ps_insn_ptr ps_i = ps->rows[i];
2915 : :
2916 : 0 : fprintf (dump, "\n[ROW %d ]: ", i);
2917 : 0 : while (ps_i)
2918 : : {
2919 : 0 : rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
2920 : :
2921 : 0 : if (JUMP_P (insn))
2922 : 0 : fprintf (dump, "%d (branch), ", INSN_UID (insn));
2923 : : else
2924 : 0 : fprintf (dump, "%d, ", INSN_UID (insn));
2925 : :
2926 : 0 : ps_i = ps_i->next_in_row;
2927 : : }
2928 : : }
2929 : 0 : }
2930 : :
2931 : : /* Creates an object of PS_INSN and initializes it to the given parameters. */
2932 : : static ps_insn_ptr
2933 : 0 : create_ps_insn (int id, int cycle)
2934 : : {
2935 : 0 : ps_insn_ptr ps_i = XNEW (struct ps_insn);
2936 : :
2937 : 0 : ps_i->id = id;
2938 : 0 : ps_i->next_in_row = NULL;
2939 : 0 : ps_i->prev_in_row = NULL;
2940 : 0 : ps_i->cycle = cycle;
2941 : :
2942 : 0 : return ps_i;
2943 : : }
2944 : :
2945 : :
2946 : : /* Removes the given PS_INSN from the partial schedule. */
2947 : : static void
2948 : 0 : remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2949 : : {
2950 : 0 : int row;
2951 : :
2952 : 0 : gcc_assert (ps && ps_i);
2953 : :
2954 : 0 : row = SMODULO (ps_i->cycle, ps->ii);
2955 : 0 : if (! ps_i->prev_in_row)
2956 : : {
2957 : 0 : gcc_assert (ps_i == ps->rows[row]);
2958 : 0 : ps->rows[row] = ps_i->next_in_row;
2959 : 0 : if (ps->rows[row])
2960 : 0 : ps->rows[row]->prev_in_row = NULL;
2961 : : }
2962 : : else
2963 : : {
2964 : 0 : ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2965 : 0 : if (ps_i->next_in_row)
2966 : 0 : ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2967 : : }
2968 : :
2969 : 0 : ps->rows_length[row] -= 1;
2970 : 0 : free (ps_i);
2971 : 0 : return;
2972 : : }
2973 : :
2974 : : /* Unlike what literature describes for modulo scheduling (which focuses
2975 : : on VLIW machines) the order of the instructions inside a cycle is
2976 : : important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2977 : : where the current instruction should go relative to the already
2978 : : scheduled instructions in the given cycle. Go over these
2979 : : instructions and find the first possible column to put it in. */
2980 : : static bool
2981 : 0 : ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2982 : : sbitmap must_precede, sbitmap must_follow)
2983 : : {
2984 : 0 : ps_insn_ptr next_ps_i;
2985 : 0 : ps_insn_ptr first_must_follow = NULL;
2986 : 0 : ps_insn_ptr last_must_precede = NULL;
2987 : 0 : ps_insn_ptr last_in_row = NULL;
2988 : 0 : int row;
2989 : :
2990 : 0 : if (! ps_i)
2991 : : return false;
2992 : :
2993 : 0 : row = SMODULO (ps_i->cycle, ps->ii);
2994 : :
2995 : : /* Find the first must follow and the last must precede
2996 : : and insert the node immediately after the must precede
2997 : : but make sure that it there is no must follow after it. */
2998 : 0 : for (next_ps_i = ps->rows[row];
2999 : 0 : next_ps_i;
3000 : 0 : next_ps_i = next_ps_i->next_in_row)
3001 : : {
3002 : 0 : if (must_follow
3003 : 0 : && bitmap_bit_p (must_follow, next_ps_i->id)
3004 : 0 : && ! first_must_follow)
3005 : : first_must_follow = next_ps_i;
3006 : 0 : if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
3007 : : {
3008 : : /* If we have already met a node that must follow, then
3009 : : there is no possible column. */
3010 : 0 : if (first_must_follow)
3011 : : return false;
3012 : : else
3013 : : last_must_precede = next_ps_i;
3014 : : }
3015 : : /* The closing branch must be the last in the row. */
3016 : 0 : if (JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3017 : : return false;
3018 : :
3019 : 0 : last_in_row = next_ps_i;
3020 : : }
3021 : :
3022 : : /* The closing branch is scheduled as well. Make sure there is no
3023 : : dependent instruction after it as the branch should be the last
3024 : : instruction in the row. */
3025 : 0 : if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3026 : : {
3027 : 0 : if (first_must_follow)
3028 : : return false;
3029 : 0 : if (last_in_row)
3030 : : {
3031 : : /* Make the branch the last in the row. New instructions
3032 : : will be inserted at the beginning of the row or after the
3033 : : last must_precede instruction thus the branch is guaranteed
3034 : : to remain the last instruction in the row. */
3035 : 0 : last_in_row->next_in_row = ps_i;
3036 : 0 : ps_i->prev_in_row = last_in_row;
3037 : 0 : ps_i->next_in_row = NULL;
3038 : : }
3039 : : else
3040 : 0 : ps->rows[row] = ps_i;
3041 : 0 : return true;
3042 : : }
3043 : :
3044 : : /* Now insert the node after INSERT_AFTER_PSI. */
3045 : :
3046 : 0 : if (! last_must_precede)
3047 : : {
3048 : 0 : ps_i->next_in_row = ps->rows[row];
3049 : 0 : ps_i->prev_in_row = NULL;
3050 : 0 : if (ps_i->next_in_row)
3051 : 0 : ps_i->next_in_row->prev_in_row = ps_i;
3052 : 0 : ps->rows[row] = ps_i;
3053 : : }
3054 : : else
3055 : : {
3056 : 0 : ps_i->next_in_row = last_must_precede->next_in_row;
3057 : 0 : last_must_precede->next_in_row = ps_i;
3058 : 0 : ps_i->prev_in_row = last_must_precede;
3059 : 0 : if (ps_i->next_in_row)
3060 : 0 : ps_i->next_in_row->prev_in_row = ps_i;
3061 : : }
3062 : :
3063 : : return true;
3064 : : }
3065 : :
3066 : : /* Advances the PS_INSN one column in its current row; returns false
3067 : : in failure and true in success. Bit N is set in MUST_FOLLOW if
3068 : : the node with cuid N must be come after the node pointed to by
3069 : : PS_I when scheduled in the same cycle. */
3070 : : static bool
3071 : 0 : ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3072 : : sbitmap must_follow)
3073 : : {
3074 : 0 : ps_insn_ptr prev, next;
3075 : 0 : int row;
3076 : :
3077 : 0 : if (!ps || !ps_i)
3078 : : return false;
3079 : :
3080 : 0 : row = SMODULO (ps_i->cycle, ps->ii);
3081 : :
3082 : 0 : if (! ps_i->next_in_row)
3083 : : return false;
3084 : :
3085 : : /* Check if next_in_row is dependent on ps_i, both having same sched
3086 : : times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
3087 : 0 : if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
3088 : : return false;
3089 : :
3090 : : /* Advance PS_I over its next_in_row in the doubly linked list. */
3091 : 0 : prev = ps_i->prev_in_row;
3092 : 0 : next = ps_i->next_in_row;
3093 : :
3094 : 0 : if (ps_i == ps->rows[row])
3095 : 0 : ps->rows[row] = next;
3096 : :
3097 : 0 : ps_i->next_in_row = next->next_in_row;
3098 : :
3099 : 0 : if (next->next_in_row)
3100 : 0 : next->next_in_row->prev_in_row = ps_i;
3101 : :
3102 : 0 : next->next_in_row = ps_i;
3103 : 0 : ps_i->prev_in_row = next;
3104 : :
3105 : 0 : next->prev_in_row = prev;
3106 : 0 : if (prev)
3107 : 0 : prev->next_in_row = next;
3108 : :
3109 : : return true;
3110 : : }
3111 : :
3112 : : /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3113 : : Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
3114 : : set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3115 : : before/after (respectively) the node pointed to by PS_I when scheduled
3116 : : in the same cycle. */
3117 : : static ps_insn_ptr
3118 : 0 : add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3119 : : sbitmap must_precede, sbitmap must_follow)
3120 : : {
3121 : 0 : ps_insn_ptr ps_i;
3122 : 0 : int row = SMODULO (cycle, ps->ii);
3123 : :
3124 : 0 : if (ps->rows_length[row] >= issue_rate)
3125 : : return NULL;
3126 : :
3127 : 0 : ps_i = create_ps_insn (id, cycle);
3128 : :
3129 : : /* Finds and inserts PS_I according to MUST_FOLLOW and
3130 : : MUST_PRECEDE. */
3131 : 0 : if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3132 : : {
3133 : 0 : free (ps_i);
3134 : 0 : return NULL;
3135 : : }
3136 : :
3137 : 0 : ps->rows_length[row] += 1;
3138 : 0 : return ps_i;
3139 : : }
3140 : :
3141 : : /* Advance time one cycle. Assumes DFA is being used. */
3142 : : static void
3143 : 0 : advance_one_cycle (void)
3144 : : {
3145 : 0 : if (targetm.sched.dfa_pre_cycle_insn)
3146 : 0 : state_transition (curr_state,
3147 : : targetm.sched.dfa_pre_cycle_insn ());
3148 : :
3149 : 0 : state_transition (curr_state, NULL);
3150 : :
3151 : 0 : if (targetm.sched.dfa_post_cycle_insn)
3152 : 0 : state_transition (curr_state,
3153 : 0 : targetm.sched.dfa_post_cycle_insn ());
3154 : 0 : }
3155 : :
3156 : :
3157 : :
3158 : : /* Checks if PS has resource conflicts according to DFA, starting from
3159 : : FROM cycle to TO cycle; returns true if there are conflicts and false
3160 : : if there are no conflicts. Assumes DFA is being used. */
3161 : : static bool
3162 : 0 : ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3163 : : {
3164 : 0 : int cycle;
3165 : :
3166 : 0 : state_reset (curr_state);
3167 : :
3168 : 0 : for (cycle = from; cycle <= to; cycle++)
3169 : : {
3170 : 0 : ps_insn_ptr crr_insn;
3171 : : /* Holds the remaining issue slots in the current row. */
3172 : 0 : int can_issue_more = issue_rate;
3173 : :
3174 : : /* Walk through the DFA for the current row. */
3175 : 0 : for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3176 : 0 : crr_insn;
3177 : 0 : crr_insn = crr_insn->next_in_row)
3178 : : {
3179 : 0 : rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
3180 : :
3181 : : /* Check if there is room for the current insn. */
3182 : 0 : if (!can_issue_more || state_dead_lock_p (curr_state))
3183 : 0 : return true;
3184 : :
3185 : : /* Update the DFA state and return with failure if the DFA found
3186 : : resource conflicts. */
3187 : 0 : if (state_transition (curr_state, insn) >= 0)
3188 : : return true;
3189 : :
3190 : 0 : if (targetm.sched.variable_issue)
3191 : 0 : can_issue_more =
3192 : 0 : targetm.sched.variable_issue (sched_dump, sched_verbose,
3193 : : insn, can_issue_more);
3194 : : /* A naked CLOBBER or USE generates no instruction, so don't
3195 : : let them consume issue slots. */
3196 : 0 : else if (GET_CODE (PATTERN (insn)) != USE
3197 : 0 : && GET_CODE (PATTERN (insn)) != CLOBBER)
3198 : 0 : can_issue_more--;
3199 : : }
3200 : :
3201 : : /* Advance the DFA to the next cycle. */
3202 : 0 : advance_one_cycle ();
3203 : : }
3204 : : return false;
3205 : : }
3206 : :
3207 : : /* Checks if the given node causes resource conflicts when added to PS at
3208 : : cycle C. If not the node is added to PS and returned; otherwise zero
3209 : : is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3210 : : cuid N must be come before/after (respectively) the node pointed to by
3211 : : PS_I when scheduled in the same cycle. */
3212 : : ps_insn_ptr
3213 : 0 : ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3214 : : int c, sbitmap must_precede,
3215 : : sbitmap must_follow)
3216 : : {
3217 : 0 : int i, first, amount;
3218 : 0 : bool has_conflicts = false;
3219 : 0 : ps_insn_ptr ps_i;
3220 : :
3221 : : /* First add the node to the PS, if this succeeds check for
3222 : : conflicts, trying different issue slots in the same row. */
3223 : 0 : if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3224 : : return NULL; /* Failed to insert the node at the given cycle. */
3225 : :
3226 : 0 : while (1)
3227 : : {
3228 : 0 : has_conflicts = ps_has_conflicts (ps, c, c);
3229 : 0 : if (ps->history > 0 && !has_conflicts)
3230 : : {
3231 : : /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h,
3232 : : but not more than ii intervals. */
3233 : 0 : first = c - ps->history;
3234 : 0 : amount = 2 * ps->history + 1;
3235 : 0 : if (amount > ps->ii)
3236 : : amount = ps->ii;
3237 : 0 : for (i = first; i < first + amount; i++)
3238 : : {
3239 : 0 : has_conflicts = ps_has_conflicts (ps,
3240 : : i - ps->history,
3241 : 0 : i + ps->history);
3242 : 0 : if (has_conflicts)
3243 : : break;
3244 : : }
3245 : : }
3246 : 0 : if (!has_conflicts)
3247 : : break;
3248 : : /* Try different issue slots to find one that the given node can be
3249 : : scheduled in without conflicts. */
3250 : 0 : if (! ps_insn_advance_column (ps, ps_i, must_follow))
3251 : : break;
3252 : : }
3253 : :
3254 : 0 : if (has_conflicts)
3255 : : {
3256 : 0 : remove_node_from_ps (ps, ps_i);
3257 : 0 : return NULL;
3258 : : }
3259 : :
3260 : 0 : ps->min_cycle = MIN (ps->min_cycle, c);
3261 : 0 : ps->max_cycle = MAX (ps->max_cycle, c);
3262 : 0 : return ps_i;
3263 : : }
3264 : :
3265 : : /* Calculate the stage count of the partial schedule PS. The calculation
3266 : : takes into account the rotation amount passed in ROTATION_AMOUNT. */
3267 : : int
3268 : 0 : calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3269 : : {
3270 : 0 : int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3271 : 0 : int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3272 : 0 : int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3273 : :
3274 : : /* The calculation of stage count is done adding the number of stages
3275 : : before cycle zero and after cycle zero. */
3276 : 0 : stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3277 : :
3278 : 0 : return stage_count;
3279 : : }
3280 : :
3281 : : /* Rotate the rows of PS such that insns scheduled at time
3282 : : START_CYCLE will appear in row 0. Updates max/min_cycles. */
3283 : : void
3284 : 0 : rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3285 : : {
3286 : 0 : int i, row, backward_rotates;
3287 : 0 : int last_row = ps->ii - 1;
3288 : :
3289 : 0 : if (start_cycle == 0)
3290 : : return;
3291 : :
3292 : 0 : backward_rotates = SMODULO (start_cycle, ps->ii);
3293 : :
3294 : : /* Revisit later and optimize this into a single loop. */
3295 : 0 : for (i = 0; i < backward_rotates; i++)
3296 : : {
3297 : 0 : ps_insn_ptr first_row = ps->rows[0];
3298 : 0 : int first_row_length = ps->rows_length[0];
3299 : :
3300 : 0 : for (row = 0; row < last_row; row++)
3301 : : {
3302 : 0 : ps->rows[row] = ps->rows[row + 1];
3303 : 0 : ps->rows_length[row] = ps->rows_length[row + 1];
3304 : : }
3305 : :
3306 : 0 : ps->rows[last_row] = first_row;
3307 : 0 : ps->rows_length[last_row] = first_row_length;
3308 : : }
3309 : :
3310 : 0 : ps->max_cycle -= start_cycle;
3311 : 0 : ps->min_cycle -= start_cycle;
3312 : : }
3313 : :
3314 : : #endif /* INSN_SCHEDULING */
3315 : :
3316 : : /* Run instruction scheduler. */
3317 : : /* Perform SMS module scheduling. */
3318 : :
3319 : : namespace {
3320 : :
3321 : : const pass_data pass_data_sms =
3322 : : {
3323 : : RTL_PASS, /* type */
3324 : : "sms", /* name */
3325 : : OPTGROUP_NONE, /* optinfo_flags */
3326 : : TV_SMS, /* tv_id */
3327 : : 0, /* properties_required */
3328 : : 0, /* properties_provided */
3329 : : 0, /* properties_destroyed */
3330 : : 0, /* todo_flags_start */
3331 : : TODO_df_finish, /* todo_flags_finish */
3332 : : };
3333 : :
3334 : : class pass_sms : public rtl_opt_pass
3335 : : {
3336 : : public:
3337 : 280114 : pass_sms (gcc::context *ctxt)
3338 : 560228 : : rtl_opt_pass (pass_data_sms, ctxt)
3339 : : {}
3340 : :
3341 : : /* opt_pass methods: */
3342 : 1415668 : bool gate (function *) final override
3343 : : {
3344 : 1415668 : return (optimize > 0 && flag_modulo_sched);
3345 : : }
3346 : :
3347 : : unsigned int execute (function *) final override;
3348 : :
3349 : : }; // class pass_sms
3350 : :
3351 : : unsigned int
3352 : 154 : pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3353 : : {
3354 : : #ifdef INSN_SCHEDULING
3355 : 154 : basic_block bb;
3356 : :
3357 : : /* Collect loop information to be used in SMS. */
3358 : 154 : cfg_layout_initialize (0);
3359 : 154 : sms_schedule ();
3360 : :
3361 : : /* Update the life information, because we add pseudos. */
3362 : 154 : max_regno = max_reg_num ();
3363 : :
3364 : : /* Finalize layout changes. */
3365 : 1038 : FOR_EACH_BB_FN (bb, fun)
3366 : 884 : if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3367 : 730 : bb->aux = bb->next_bb;
3368 : 154 : free_dominance_info (CDI_DOMINATORS);
3369 : 154 : cfg_layout_finalize ();
3370 : : #endif /* INSN_SCHEDULING */
3371 : 154 : return 0;
3372 : : }
3373 : :
3374 : : } // anon namespace
3375 : :
3376 : : rtl_opt_pass *
3377 : 280114 : make_pass_sms (gcc::context *ctxt)
3378 : : {
3379 : 280114 : return new pass_sms (ctxt);
3380 : : }
|