Branch data Line data Source code
1 : : /* Swing Modulo Scheduling implementation.
2 : : Copyright (C) 2004-2025 Free Software Foundation, Inc.
3 : : Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : :
22 : : #include "config.h"
23 : : #include "system.h"
24 : : #include "coretypes.h"
25 : : #include "backend.h"
26 : : #include "target.h"
27 : : #include "rtl.h"
28 : : #include "tree.h"
29 : : #include "cfghooks.h"
30 : : #include "df.h"
31 : : #include "memmodel.h"
32 : : #include "optabs.h"
33 : : #include "regs.h"
34 : : #include "emit-rtl.h"
35 : : #include "gcov-io.h"
36 : : #include "profile.h"
37 : : #include "insn-attr.h"
38 : : #include "cfgrtl.h"
39 : : #include "sched-int.h"
40 : : #include "cfgloop.h"
41 : : #include "expr.h"
42 : : #include "ddg.h"
43 : : #include "tree-pass.h"
44 : : #include "dbgcnt.h"
45 : : #include "loop-unroll.h"
46 : : #include "hard-reg-set.h"
47 : :
48 : : #ifdef INSN_SCHEDULING
49 : :
50 : : /* This file contains the implementation of the Swing Modulo Scheduler,
51 : : described in the following references:
52 : : [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
53 : : Lifetime--sensitive modulo scheduling in a production environment.
54 : : IEEE Trans. on Comps., 50(3), March 2001
55 : : [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
56 : : Swing Modulo Scheduling: A Lifetime Sensitive Approach.
57 : : PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
58 : :
59 : : The basic structure is:
60 : : 1. Build a data-dependence graph (DDG) for each loop.
61 : : 2. Use the DDG to order the insns of a loop (not in topological order
62 : : necessarily, but rather) trying to place each insn after all its
63 : : predecessors _or_ after all its successors.
64 : : 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
65 : : 4. Use the ordering to perform list-scheduling of the loop:
66 : : 1. Set II = MII. We will try to schedule the loop within II cycles.
67 : : 2. Try to schedule the insns one by one according to the ordering.
68 : : For each insn compute an interval of cycles by considering already-
69 : : scheduled preds and succs (and associated latencies); try to place
70 : : the insn in the cycles of this window checking for potential
71 : : resource conflicts (using the DFA interface).
72 : : Note: this is different from the cycle-scheduling of schedule_insns;
73 : : here the insns are not scheduled monotonically top-down (nor bottom-
74 : : up).
75 : : 3. If failed in scheduling all insns - bump II++ and try again, unless
76 : : II reaches an upper bound MaxII, in which case report failure.
77 : : 5. If we succeeded in scheduling the loop within II cycles, we now
78 : : generate prolog and epilog, decrease the counter of the loop, and
79 : : perform modulo variable expansion for live ranges that span more than
80 : : II cycles (i.e. use register copies to prevent a def from overwriting
81 : : itself before reaching the use).
82 : :
83 : : SMS works with countable loops (1) whose control part can be easily
84 : : decoupled from the rest of the loop and (2) whose loop count can
85 : : be easily adjusted. This is because we peel a constant number of
86 : : iterations into a prologue and epilogue for which we want to avoid
87 : : emitting the control part, and a kernel which is to iterate that
88 : : constant number of iterations less than the original loop. So the
89 : : control part should be a set of insns clearly identified and having
90 : : its own iv, not otherwise used in the loop (at-least for now), which
91 : : initializes a register before the loop to the number of iterations.
92 : : Currently SMS relies on the do-loop pattern to recognize such loops,
93 : : where (1) the control part comprises of all insns defining and/or
94 : : using a certain 'count' register and (2) the loop count can be
95 : : adjusted by modifying this register prior to the loop.
96 : : TODO: Rely on cfgloop analysis instead. */
97 : :
98 : : /* This page defines partial-schedule structures and functions for
99 : : modulo scheduling. */
100 : :
101 : : typedef struct partial_schedule *partial_schedule_ptr;
102 : : typedef struct ps_insn *ps_insn_ptr;
103 : :
104 : : /* The minimum (absolute) cycle that a node of ps was scheduled in. */
105 : : #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
106 : :
107 : : /* The maximum (absolute) cycle that a node of ps was scheduled in. */
108 : : #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
109 : :
110 : : /* Perform signed modulo, always returning a non-negative value. */
111 : : #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
112 : :
113 : : /* The number of different iterations the nodes in ps span, assuming
114 : : the stage boundaries are placed efficiently. */
115 : : #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
116 : : + 1 + ii - 1) / ii)
117 : : /* The stage count of ps. */
118 : : #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
119 : :
120 : : /* A single instruction in the partial schedule. */
121 : : struct ps_insn
122 : : {
123 : : /* Identifies the instruction to be scheduled. Values smaller than
124 : : the ddg's num_nodes refer directly to ddg nodes. A value of
125 : : X - num_nodes refers to register move X. */
126 : : int id;
127 : :
128 : : /* The (absolute) cycle in which the PS instruction is scheduled.
129 : : Same as SCHED_TIME (node). */
130 : : int cycle;
131 : :
132 : : /* The next/prev PS_INSN in the same row. */
133 : : ps_insn_ptr next_in_row,
134 : : prev_in_row;
135 : :
136 : : };
137 : :
138 : : /* Information about a register move that has been added to a partial
139 : : schedule. */
140 : : struct ps_reg_move_info
141 : : {
142 : : /* The source of the move is defined by the ps_insn with id DEF.
143 : : The destination is used by the ps_insns with the ids in USES. */
144 : : int def;
145 : : sbitmap uses;
146 : :
147 : : /* The original form of USES' instructions used OLD_REG, but they
148 : : should now use NEW_REG. */
149 : : rtx old_reg;
150 : : rtx new_reg;
151 : :
152 : : /* The number of consecutive stages that the move occupies. */
153 : : int num_consecutive_stages;
154 : :
155 : : /* An instruction that sets NEW_REG to the correct value. The first
156 : : move associated with DEF will have an rhs of OLD_REG; later moves
157 : : use the result of the previous move. */
158 : : rtx_insn *insn;
159 : : };
160 : :
161 : : /* Holds the partial schedule as an array of II rows. Each entry of the
162 : : array points to a linked list of PS_INSNs, which represents the
163 : : instructions that are scheduled for that row. */
164 : : struct partial_schedule
165 : : {
166 : : int ii; /* Number of rows in the partial schedule. */
167 : : int history; /* Threshold for conflict checking using DFA. */
168 : :
169 : : /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
170 : : ps_insn_ptr *rows;
171 : :
172 : : /* All the moves added for this partial schedule. Index X has
173 : : a ps_insn id of X + g->num_nodes. */
174 : : vec<ps_reg_move_info> reg_moves;
175 : :
176 : : /* rows_length[i] holds the number of instructions in the row.
177 : : It is used only (as an optimization) to back off quickly from
178 : : trying to schedule a node in a full row; that is, to avoid running
179 : : through futile DFA state transitions. */
180 : : int *rows_length;
181 : :
182 : : /* The earliest absolute cycle of an insn in the partial schedule. */
183 : : int min_cycle;
184 : :
185 : : /* The latest absolute cycle of an insn in the partial schedule. */
186 : : int max_cycle;
187 : :
188 : : ddg_ptr g; /* The DDG of the insns in the partial schedule. */
189 : :
190 : : int stage_count; /* The stage count of the partial schedule. */
191 : : };
192 : :
193 : :
194 : : static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
195 : : static void free_partial_schedule (partial_schedule_ptr);
196 : : static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
197 : : void print_partial_schedule (partial_schedule_ptr, FILE *);
198 : : static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
199 : : static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
200 : : int, int, sbitmap, sbitmap);
201 : : static void rotate_partial_schedule (partial_schedule_ptr, int);
202 : : void set_row_column_for_ps (partial_schedule_ptr);
203 : : static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
204 : : static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
205 : :
206 : :
207 : : /* This page defines constants and structures for the modulo scheduling
208 : : driver. */
209 : :
210 : : static int sms_order_nodes (ddg_ptr, int, int *, int *);
211 : : static void set_node_sched_params (ddg_ptr);
212 : : static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
213 : : static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
214 : : static int calculate_stage_count (partial_schedule_ptr, int);
215 : : static void calculate_must_precede_follow (ddg_node_ptr, int, int,
216 : : int, int, sbitmap, sbitmap, sbitmap);
217 : : static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
218 : : sbitmap, int, int *, int *, int *);
219 : : static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
220 : : sbitmap, int *, sbitmap, sbitmap);
221 : : static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
222 : :
223 : : #define NODE_ASAP(node) ((node)->aux.count)
224 : :
225 : : #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
226 : : #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
227 : : #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
228 : : #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
229 : : #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
230 : :
231 : : /* The scheduling parameters held for each node. */
232 : : typedef struct node_sched_params
233 : : {
234 : : int time; /* The absolute scheduling cycle. */
235 : :
236 : : int row; /* Holds time % ii. */
237 : : int stage; /* Holds time / ii. */
238 : :
239 : : /* The column of a node inside the ps. If nodes u, v are on the same row,
240 : : u will precede v if column (u) < column (v). */
241 : : int column;
242 : : } *node_sched_params_ptr;
243 : :
244 : : /* The following three functions are copied from the current scheduler
245 : : code in order to use sched_analyze() for computing the dependencies.
246 : : They are used when initializing the sched_info structure. */
247 : : static const char *
248 : 0 : sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
249 : : {
250 : 0 : static char tmp[80];
251 : :
252 : 0 : sprintf (tmp, "i%4d", INSN_UID (insn));
253 : 0 : return tmp;
254 : : }
255 : :
256 : : static void
257 : 0 : compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
258 : : regset used ATTRIBUTE_UNUSED)
259 : : {
260 : 0 : }
261 : :
262 : : static struct common_sched_info_def sms_common_sched_info;
263 : :
264 : : static struct sched_deps_info_def sms_sched_deps_info =
265 : : {
266 : : compute_jump_reg_dependencies,
267 : : NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
268 : : NULL,
269 : : 0, 0, 0
270 : : };
271 : :
272 : : static struct haifa_sched_info sms_sched_info =
273 : : {
274 : : NULL,
275 : : NULL,
276 : : NULL,
277 : : NULL,
278 : : NULL,
279 : : sms_print_insn,
280 : : NULL,
281 : : NULL, /* insn_finishes_block_p */
282 : : NULL, NULL,
283 : : NULL, NULL,
284 : : 0, 0,
285 : :
286 : : NULL, NULL, NULL, NULL,
287 : : NULL, NULL,
288 : : 0
289 : : };
290 : :
291 : : /* Partial schedule instruction ID in PS is a register move. Return
292 : : information about it. */
293 : : static struct ps_reg_move_info *
294 : 0 : ps_reg_move (partial_schedule_ptr ps, int id)
295 : : {
296 : 0 : gcc_checking_assert (id >= ps->g->num_nodes);
297 : 0 : return &ps->reg_moves[id - ps->g->num_nodes];
298 : : }
299 : :
300 : : /* Return the rtl instruction that is being scheduled by partial schedule
301 : : instruction ID, which belongs to schedule PS. */
302 : : static rtx_insn *
303 : 0 : ps_rtl_insn (partial_schedule_ptr ps, int id)
304 : : {
305 : 0 : if (id < ps->g->num_nodes)
306 : 0 : return ps->g->nodes[id].insn;
307 : : else
308 : 0 : return ps_reg_move (ps, id)->insn;
309 : : }
310 : :
311 : : /* Partial schedule instruction ID, which belongs to PS, occurred in
312 : : the original (unscheduled) loop. Return the first instruction
313 : : in the loop that was associated with ps_rtl_insn (PS, ID).
314 : : If the instruction had some notes before it, this is the first
315 : : of those notes. */
316 : : static rtx_insn *
317 : 0 : ps_first_note (partial_schedule_ptr ps, int id)
318 : : {
319 : 0 : gcc_assert (id < ps->g->num_nodes);
320 : 0 : return ps->g->nodes[id].first_note;
321 : : }
322 : :
323 : : /* Return the number of consecutive stages that are occupied by
324 : : partial schedule instruction ID in PS. */
325 : : static int
326 : 0 : ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
327 : : {
328 : 0 : if (id < ps->g->num_nodes)
329 : : return 1;
330 : : else
331 : 0 : return ps_reg_move (ps, id)->num_consecutive_stages;
332 : : }
333 : :
334 : : /* Given HEAD and TAIL which are the first and last insns in a loop;
335 : : return the register which controls the loop. Return zero if it has
336 : : more than one occurrence in the loop besides the control part or the
337 : : do-loop pattern is not of the form we expect. */
338 : : static rtx
339 : 79 : doloop_register_get (rtx_insn *head, rtx_insn *tail)
340 : : {
341 : 79 : rtx reg, condition;
342 : 79 : rtx_insn *insn, *first_insn_not_to_check;
343 : :
344 : 79 : if (!JUMP_P (tail))
345 : : return NULL_RTX;
346 : :
347 : 79 : if (!targetm.code_for_doloop_end)
348 : : return NULL_RTX;
349 : :
350 : : /* TODO: Free SMS's dependence on doloop_condition_get. */
351 : 0 : condition = doloop_condition_get (tail);
352 : 0 : if (! condition)
353 : : return NULL_RTX;
354 : :
355 : 0 : if (REG_P (XEXP (condition, 0)))
356 : : reg = XEXP (condition, 0);
357 : 0 : else if (GET_CODE (XEXP (condition, 0)) == PLUS
358 : 0 : && REG_P (XEXP (XEXP (condition, 0), 0)))
359 : : {
360 : 0 : if (CONST_INT_P (XEXP (condition, 1))
361 : 0 : && INTVAL (XEXP (condition, 1)) == -1)
362 : : reg = XEXP (XEXP (condition, 0), 0);
363 : : else
364 : : return NULL_RTX;
365 : : }
366 : : else
367 : 0 : gcc_unreachable ();
368 : :
369 : : /* Check that the COUNT_REG has no other occurrences in the loop
370 : : until the decrement. We assume the control part consists of
371 : : either a single (parallel) branch-on-count or a (non-parallel)
372 : : branch immediately preceded by a single (decrement) insn. */
373 : 0 : first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
374 : 0 : : prev_nondebug_insn (tail));
375 : :
376 : 0 : for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
377 : 0 : if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
378 : : {
379 : 0 : if (dump_file)
380 : : {
381 : 0 : fprintf (dump_file, "SMS count_reg found ");
382 : 0 : print_rtl_single (dump_file, reg);
383 : 0 : fprintf (dump_file, " outside control in insn:\n");
384 : 0 : print_rtl_single (dump_file, insn);
385 : : }
386 : :
387 : 0 : return NULL_RTX;
388 : : }
389 : :
390 : : return reg;
391 : : }
392 : :
393 : : /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
394 : : that the number of iterations is a compile-time constant. If so,
395 : : return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
396 : : this constant. Otherwise return 0. */
397 : : static rtx_insn *
398 : 0 : const_iteration_count (rtx count_reg, basic_block pre_header,
399 : : int64_t *count, bool* adjust_inplace)
400 : : {
401 : 0 : rtx_insn *insn;
402 : 0 : rtx_insn *head, *tail;
403 : :
404 : 0 : *adjust_inplace = false;
405 : 0 : bool read_after = false;
406 : :
407 : 0 : if (! pre_header)
408 : : return NULL;
409 : :
410 : 0 : get_ebb_head_tail (pre_header, pre_header, &head, &tail);
411 : :
412 : 0 : for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
413 : 0 : if (single_set (insn) && rtx_equal_p (count_reg,
414 : 0 : SET_DEST (single_set (insn))))
415 : : {
416 : 0 : rtx pat = single_set (insn);
417 : :
418 : 0 : if (CONST_INT_P (SET_SRC (pat)))
419 : : {
420 : 0 : *count = INTVAL (SET_SRC (pat));
421 : 0 : *adjust_inplace = !read_after;
422 : 0 : return insn;
423 : : }
424 : :
425 : : return NULL;
426 : : }
427 : 0 : else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
428 : : {
429 : 0 : read_after = true;
430 : 0 : if (reg_set_p (count_reg, insn))
431 : : break;
432 : : }
433 : :
434 : : return NULL;
435 : : }
436 : :
437 : : /* A very simple resource-based lower bound on the initiation interval.
438 : : ??? Improve the accuracy of this bound by considering the
439 : : utilization of various units. */
440 : : static int
441 : 0 : res_MII (ddg_ptr g)
442 : : {
443 : 0 : if (targetm.sched.sms_res_mii)
444 : 0 : return targetm.sched.sms_res_mii (g);
445 : :
446 : 0 : return g->num_nodes / issue_rate;
447 : : }
448 : :
449 : :
450 : : /* A vector that contains the sched data for each ps_insn. */
451 : : static vec<node_sched_params> node_sched_param_vec;
452 : :
453 : : /* Allocate sched_params for each node and initialize it. */
454 : : static void
455 : 0 : set_node_sched_params (ddg_ptr g)
456 : : {
457 : 0 : node_sched_param_vec.truncate (0);
458 : 0 : node_sched_param_vec.safe_grow_cleared (g->num_nodes, true);
459 : 0 : }
460 : :
461 : : /* Make sure that node_sched_param_vec has an entry for every move in PS. */
462 : : static void
463 : 0 : extend_node_sched_params (partial_schedule_ptr ps)
464 : : {
465 : 0 : node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
466 : 0 : + ps->reg_moves.length (), true);
467 : 0 : }
468 : :
469 : : /* Update the sched_params (time, row and stage) for node U using the II,
470 : : the CYCLE of U and MIN_CYCLE.
471 : : We're not simply taking the following
472 : : SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
473 : : because the stages may not be aligned on cycle 0. */
474 : : static void
475 : 0 : update_node_sched_params (int u, int ii, int cycle, int min_cycle)
476 : : {
477 : 0 : int sc_until_cycle_zero;
478 : 0 : int stage;
479 : :
480 : 0 : SCHED_TIME (u) = cycle;
481 : 0 : SCHED_ROW (u) = SMODULO (cycle, ii);
482 : :
483 : : /* The calculation of stage count is done adding the number
484 : : of stages before cycle zero and after cycle zero. */
485 : 0 : sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
486 : :
487 : 0 : if (SCHED_TIME (u) < 0)
488 : : {
489 : 0 : stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
490 : 0 : SCHED_STAGE (u) = sc_until_cycle_zero - stage;
491 : : }
492 : : else
493 : : {
494 : 0 : stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
495 : 0 : SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
496 : : }
497 : 0 : }
498 : :
499 : : static void
500 : 0 : print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
501 : : {
502 : 0 : int i;
503 : :
504 : 0 : if (! file)
505 : : return;
506 : 0 : for (i = 0; i < num_nodes; i++)
507 : : {
508 : 0 : node_sched_params_ptr nsp = SCHED_PARAMS (i);
509 : :
510 : 0 : fprintf (file, "Node = %d; INSN = %d\n", i,
511 : 0 : INSN_UID (ps_rtl_insn (ps, i)));
512 : 0 : fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
513 : 0 : fprintf (file, " time = %d:\n", nsp->time);
514 : 0 : fprintf (file, " stage = %d:\n", nsp->stage);
515 : : }
516 : : }
517 : :
518 : : /* Set SCHED_COLUMN for each instruction in row ROW of PS. */
519 : : static void
520 : 0 : set_columns_for_row (partial_schedule_ptr ps, int row)
521 : : {
522 : 0 : ps_insn_ptr cur_insn;
523 : 0 : int column;
524 : :
525 : 0 : column = 0;
526 : 0 : for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
527 : 0 : SCHED_COLUMN (cur_insn->id) = column++;
528 : 0 : }
529 : :
530 : : /* Set SCHED_COLUMN for each instruction in PS. */
531 : : static void
532 : 0 : set_columns_for_ps (partial_schedule_ptr ps)
533 : : {
534 : 0 : int row;
535 : :
536 : 0 : for (row = 0; row < ps->ii; row++)
537 : 0 : set_columns_for_row (ps, row);
538 : 0 : }
539 : :
540 : : /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
541 : : Its single predecessor has already been scheduled, as has its
542 : : ddg node successors. (The move may have also another move as its
543 : : successor, in which case that successor will be scheduled later.)
544 : :
545 : : The move is part of a chain that satisfies register dependencies
546 : : between a producing ddg node and various consuming ddg nodes.
547 : : If some of these dependencies have a distance of 1 (meaning that
548 : : the use is upward-exposed) then DISTANCE1_USES is nonnull and
549 : : contains the set of uses with distance-1 dependencies.
550 : : DISTANCE1_USES is null otherwise.
551 : :
552 : : MUST_FOLLOW is a scratch bitmap that is big enough to hold
553 : : all current ps_insn ids.
554 : :
555 : : Return true on success. */
556 : : static bool
557 : 0 : schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
558 : : sbitmap distance1_uses, sbitmap must_follow)
559 : : {
560 : 0 : unsigned int u;
561 : 0 : int this_time, this_distance, this_start, this_end, this_latency;
562 : 0 : int start, end, c, ii;
563 : 0 : sbitmap_iterator sbi;
564 : 0 : ps_reg_move_info *move;
565 : 0 : rtx_insn *this_insn;
566 : 0 : ps_insn_ptr psi;
567 : :
568 : 0 : move = ps_reg_move (ps, i_reg_move);
569 : 0 : ii = ps->ii;
570 : 0 : if (dump_file)
571 : : {
572 : 0 : fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
573 : 0 : ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
574 : : PS_MIN_CYCLE (ps));
575 : 0 : print_rtl_single (dump_file, move->insn);
576 : 0 : fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
577 : 0 : fprintf (dump_file, "=========== =========== =====\n");
578 : : }
579 : :
580 : 0 : start = INT_MIN;
581 : 0 : end = INT_MAX;
582 : :
583 : : /* For dependencies of distance 1 between a producer ddg node A
584 : : and consumer ddg node B, we have a chain of dependencies:
585 : :
586 : : A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
587 : :
588 : : where Mi is the ith move. For dependencies of distance 0 between
589 : : a producer ddg node A and consumer ddg node C, we have a chain of
590 : : dependencies:
591 : :
592 : : A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
593 : :
594 : : where Mi' occupies the same position as Mi but occurs a stage later.
595 : : We can only schedule each move once, so if we have both types of
596 : : chain, we model the second as:
597 : :
598 : : A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
599 : :
600 : : First handle the dependencies between the previously-scheduled
601 : : predecessor and the move. */
602 : 0 : this_insn = ps_rtl_insn (ps, move->def);
603 : 0 : this_latency = insn_latency (this_insn, move->insn);
604 : 0 : this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
605 : 0 : this_time = SCHED_TIME (move->def) - this_distance * ii;
606 : 0 : this_start = this_time + this_latency;
607 : 0 : this_end = this_time + ii;
608 : 0 : if (dump_file)
609 : 0 : fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
610 : 0 : this_start, this_end, SCHED_TIME (move->def),
611 : 0 : INSN_UID (this_insn), this_latency, this_distance,
612 : 0 : INSN_UID (move->insn));
613 : :
614 : 0 : if (start < this_start)
615 : : start = this_start;
616 : 0 : if (end > this_end)
617 : : end = this_end;
618 : :
619 : : /* Handle the dependencies between the move and previously-scheduled
620 : : successors. */
621 : 0 : EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
622 : : {
623 : 0 : this_insn = ps_rtl_insn (ps, u);
624 : 0 : this_latency = insn_latency (move->insn, this_insn);
625 : 0 : if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
626 : : this_distance = -1;
627 : : else
628 : : this_distance = 0;
629 : 0 : this_time = SCHED_TIME (u) + this_distance * ii;
630 : 0 : this_start = this_time - ii;
631 : 0 : this_end = this_time - this_latency;
632 : 0 : if (dump_file)
633 : 0 : fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
634 : 0 : this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
635 : 0 : this_latency, this_distance, INSN_UID (this_insn));
636 : :
637 : 0 : if (start < this_start)
638 : : start = this_start;
639 : 0 : if (end > this_end)
640 : : end = this_end;
641 : : }
642 : :
643 : 0 : if (dump_file)
644 : : {
645 : 0 : fprintf (dump_file, "----------- ----------- -----\n");
646 : 0 : fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
647 : : }
648 : :
649 : 0 : bitmap_clear (must_follow);
650 : 0 : bitmap_set_bit (must_follow, move->def);
651 : :
652 : 0 : start = MAX (start, end - (ii - 1));
653 : 0 : for (c = end; c >= start; c--)
654 : : {
655 : 0 : psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
656 : : move->uses, must_follow);
657 : 0 : if (psi)
658 : : {
659 : 0 : update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
660 : 0 : if (dump_file)
661 : 0 : fprintf (dump_file, "\nScheduled register move INSN %d at"
662 : 0 : " time %d, row %d\n\n", INSN_UID (move->insn), c,
663 : 0 : SCHED_ROW (i_reg_move));
664 : 0 : return true;
665 : : }
666 : : }
667 : :
668 : 0 : if (dump_file)
669 : 0 : fprintf (dump_file, "\nNo available slot\n\n");
670 : :
671 : : return false;
672 : : }
673 : :
674 : : /*
675 : : Breaking intra-loop register anti-dependences:
676 : : Each intra-loop register anti-dependence implies a cross-iteration true
677 : : dependence of distance 1. Therefore, we can remove such false dependencies
678 : : and figure out if the partial schedule broke them by checking if (for a
679 : : true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
680 : : if so generate a register move. The number of such moves is equal to:
681 : : SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
682 : : nreg_moves = ----------------------------------- + 1 - { dependence.
683 : : ii { 1 if not.
684 : : */
685 : : static bool
686 : 0 : schedule_reg_moves (partial_schedule_ptr ps)
687 : : {
688 : 0 : ddg_ptr g = ps->g;
689 : 0 : int ii = ps->ii;
690 : 0 : int i;
691 : :
692 : 0 : for (i = 0; i < g->num_nodes; i++)
693 : : {
694 : 0 : ddg_node_ptr u = &g->nodes[i];
695 : 0 : ddg_edge_ptr e;
696 : 0 : int nreg_moves = 0, i_reg_move;
697 : 0 : rtx prev_reg, old_reg;
698 : 0 : int first_move;
699 : 0 : int distances[2];
700 : 0 : sbitmap distance1_uses;
701 : 0 : rtx set = single_set (u->insn);
702 : :
703 : : /* Skip instructions that do not set a register. */
704 : 0 : if (set && !REG_P (SET_DEST (set)))
705 : 0 : continue;
706 : :
707 : : /* Compute the number of reg_moves needed for u, by looking at life
708 : : ranges started at u (excluding self-loops). */
709 : 0 : distances[0] = distances[1] = false;
710 : 0 : for (e = u->out; e; e = e->next_out)
711 : 0 : if (e->type == TRUE_DEP && e->dest != e->src)
712 : : {
713 : 0 : int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
714 : 0 : - SCHED_TIME (e->src->cuid)) / ii;
715 : :
716 : 0 : if (e->distance == 1)
717 : 0 : nreg_moves4e = (SCHED_TIME (e->dest->cuid)
718 : 0 : - SCHED_TIME (e->src->cuid) + ii) / ii;
719 : :
720 : : /* If dest precedes src in the schedule of the kernel, then dest
721 : : will read before src writes and we can save one reg_copy. */
722 : 0 : if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
723 : 0 : && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
724 : 0 : nreg_moves4e--;
725 : :
726 : 0 : if (nreg_moves4e >= 1)
727 : : {
728 : : /* !single_set instructions are not supported yet and
729 : : thus we do not except to encounter them in the loop
730 : : except from the doloop part. For the latter case
731 : : we assume no regmoves are generated as the doloop
732 : : instructions are tied to the branch with an edge. */
733 : 0 : gcc_assert (set);
734 : : /* If the instruction contains auto-inc register then
735 : : validate that the regmov is being generated for the
736 : : target regsiter rather then the inc'ed register. */
737 : 0 : gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
738 : : }
739 : :
740 : 0 : if (nreg_moves4e)
741 : : {
742 : 0 : gcc_assert (e->distance < 2);
743 : 0 : distances[e->distance] = true;
744 : : }
745 : 0 : nreg_moves = MAX (nreg_moves, nreg_moves4e);
746 : : }
747 : :
748 : 0 : if (nreg_moves == 0)
749 : 0 : continue;
750 : :
751 : : /* Create NREG_MOVES register moves. */
752 : 0 : first_move = ps->reg_moves.length ();
753 : 0 : ps->reg_moves.safe_grow_cleared (first_move + nreg_moves, true);
754 : 0 : extend_node_sched_params (ps);
755 : :
756 : : /* Record the moves associated with this node. */
757 : 0 : first_move += ps->g->num_nodes;
758 : :
759 : : /* Generate each move. */
760 : 0 : old_reg = prev_reg = SET_DEST (set);
761 : 0 : if (HARD_REGISTER_P (old_reg))
762 : 0 : return false;
763 : :
764 : 0 : for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
765 : : {
766 : 0 : ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
767 : :
768 : 0 : move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
769 : 0 : move->uses = sbitmap_alloc (first_move + nreg_moves);
770 : 0 : move->old_reg = old_reg;
771 : 0 : move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
772 : 0 : move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
773 : 0 : move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
774 : 0 : bitmap_clear (move->uses);
775 : :
776 : 0 : prev_reg = move->new_reg;
777 : : }
778 : :
779 : 0 : distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
780 : :
781 : 0 : if (distance1_uses)
782 : 0 : bitmap_clear (distance1_uses);
783 : :
784 : : /* Every use of the register defined by node may require a different
785 : : copy of this register, depending on the time the use is scheduled.
786 : : Record which uses require which move results. */
787 : 0 : for (e = u->out; e; e = e->next_out)
788 : 0 : if (e->type == TRUE_DEP && e->dest != e->src)
789 : : {
790 : 0 : int dest_copy = (SCHED_TIME (e->dest->cuid)
791 : 0 : - SCHED_TIME (e->src->cuid)) / ii;
792 : :
793 : 0 : if (e->distance == 1)
794 : 0 : dest_copy = (SCHED_TIME (e->dest->cuid)
795 : 0 : - SCHED_TIME (e->src->cuid) + ii) / ii;
796 : :
797 : 0 : if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
798 : 0 : && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
799 : 0 : dest_copy--;
800 : :
801 : 0 : if (dest_copy)
802 : : {
803 : 0 : ps_reg_move_info *move;
804 : :
805 : 0 : move = ps_reg_move (ps, first_move + dest_copy - 1);
806 : 0 : bitmap_set_bit (move->uses, e->dest->cuid);
807 : 0 : if (e->distance == 1)
808 : 0 : bitmap_set_bit (distance1_uses, e->dest->cuid);
809 : : }
810 : : }
811 : :
812 : 0 : auto_sbitmap must_follow (first_move + nreg_moves);
813 : 0 : for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
814 : 0 : if (!schedule_reg_move (ps, first_move + i_reg_move,
815 : : distance1_uses, must_follow))
816 : : break;
817 : 0 : if (distance1_uses)
818 : 0 : sbitmap_free (distance1_uses);
819 : 0 : if (i_reg_move < nreg_moves)
820 : 0 : return false;
821 : 0 : }
822 : : return true;
823 : : }
824 : :
825 : : /* Emit the moves associated with PS. Apply the substitutions
826 : : associated with them. */
827 : : static void
828 : 0 : apply_reg_moves (partial_schedule_ptr ps)
829 : : {
830 : 0 : ps_reg_move_info *move;
831 : 0 : int i;
832 : :
833 : 0 : FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
834 : : {
835 : 0 : unsigned int i_use;
836 : 0 : sbitmap_iterator sbi;
837 : :
838 : 0 : EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
839 : : {
840 : 0 : replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
841 : 0 : df_insn_rescan (ps->g->nodes[i_use].insn);
842 : : }
843 : : }
844 : 0 : }
845 : :
846 : : /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
847 : : SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
848 : : will move to cycle zero. */
849 : : static void
850 : 0 : reset_sched_times (partial_schedule_ptr ps, int amount)
851 : : {
852 : 0 : int row;
853 : 0 : int ii = ps->ii;
854 : 0 : ps_insn_ptr crr_insn;
855 : :
856 : 0 : for (row = 0; row < ii; row++)
857 : 0 : for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
858 : : {
859 : 0 : int u = crr_insn->id;
860 : 0 : int normalized_time = SCHED_TIME (u) - amount;
861 : 0 : int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
862 : :
863 : 0 : if (dump_file)
864 : : {
865 : : /* Print the scheduling times after the rotation. */
866 : 0 : rtx_insn *insn = ps_rtl_insn (ps, u);
867 : :
868 : 0 : fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
869 : : "crr_insn->cycle=%d, min_cycle=%d", u,
870 : 0 : INSN_UID (insn), normalized_time, new_min_cycle);
871 : 0 : if (JUMP_P (insn))
872 : 0 : fprintf (dump_file, " (branch)");
873 : 0 : fprintf (dump_file, "\n");
874 : : }
875 : :
876 : 0 : gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
877 : 0 : gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
878 : :
879 : 0 : crr_insn->cycle = normalized_time;
880 : 0 : update_node_sched_params (u, ii, normalized_time, new_min_cycle);
881 : : }
882 : 0 : }
883 : :
884 : : /* Permute the insns according to their order in PS, from row 0 to
885 : : row ii-1, and position them right before LAST. This schedules
886 : : the insns of the loop kernel. */
887 : : static void
888 : 0 : permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
889 : : {
890 : 0 : int ii = ps->ii;
891 : 0 : int row;
892 : 0 : ps_insn_ptr ps_ij;
893 : :
894 : 0 : for (row = 0; row < ii ; row++)
895 : 0 : for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
896 : : {
897 : 0 : rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
898 : :
899 : 0 : if (PREV_INSN (last) != insn)
900 : : {
901 : 0 : if (ps_ij->id < ps->g->num_nodes)
902 : 0 : reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
903 : : PREV_INSN (last));
904 : : else
905 : 0 : add_insn_before (insn, last, NULL);
906 : : }
907 : : }
908 : 0 : }
909 : :
910 : : /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
911 : : respectively only if cycle C falls on the border of the scheduling
912 : : window boundaries marked by START and END cycles. STEP is the
913 : : direction of the window. */
914 : : static inline void
915 : 0 : set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
916 : : sbitmap *tmp_precede, sbitmap must_precede, int c,
917 : : int start, int end, int step)
918 : : {
919 : 0 : *tmp_precede = NULL;
920 : 0 : *tmp_follow = NULL;
921 : :
922 : 0 : if (c == start)
923 : : {
924 : 0 : if (step == 1)
925 : : *tmp_precede = must_precede;
926 : : else /* step == -1. */
927 : 0 : *tmp_follow = must_follow;
928 : : }
929 : 0 : if (c == end - step)
930 : : {
931 : 0 : if (step == 1)
932 : : *tmp_follow = must_follow;
933 : : else /* step == -1. */
934 : 0 : *tmp_precede = must_precede;
935 : : }
936 : :
937 : : }
938 : :
939 : : /* Return True if the branch can be moved to row ii-1 while
940 : : normalizing the partial schedule PS to start from cycle zero and thus
941 : : optimize the SC. Otherwise return False. */
942 : : static bool
943 : 0 : optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
944 : : {
945 : 0 : int amount = PS_MIN_CYCLE (ps);
946 : 0 : int start, end, step;
947 : 0 : int ii = ps->ii;
948 : 0 : bool ok = false;
949 : 0 : int stage_count, stage_count_curr;
950 : :
951 : : /* Compare the SC after normalization and SC after bringing the branch
952 : : to row ii-1. If they are equal just bail out. */
953 : 0 : stage_count = calculate_stage_count (ps, amount);
954 : 0 : stage_count_curr =
955 : 0 : calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
956 : :
957 : 0 : if (stage_count == stage_count_curr)
958 : : {
959 : 0 : if (dump_file)
960 : 0 : fprintf (dump_file, "SMS SC already optimized.\n");
961 : :
962 : 0 : return false;
963 : : }
964 : :
965 : 0 : if (dump_file)
966 : : {
967 : 0 : fprintf (dump_file, "SMS Trying to optimize branch location\n");
968 : 0 : fprintf (dump_file, "SMS partial schedule before trial:\n");
969 : 0 : print_partial_schedule (ps, dump_file);
970 : : }
971 : :
972 : : /* First, normalize the partial scheduling. */
973 : 0 : reset_sched_times (ps, amount);
974 : 0 : rotate_partial_schedule (ps, amount);
975 : 0 : if (dump_file)
976 : : {
977 : 0 : fprintf (dump_file,
978 : : "SMS partial schedule after normalization (ii, %d, SC %d):\n",
979 : : ii, stage_count);
980 : 0 : print_partial_schedule (ps, dump_file);
981 : : }
982 : :
983 : 0 : if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
984 : : return true;
985 : :
986 : 0 : auto_sbitmap sched_nodes (g->num_nodes);
987 : 0 : bitmap_ones (sched_nodes);
988 : :
989 : : /* Calculate the new placement of the branch. It should be in row
990 : : ii-1 and fall into it's scheduling window. */
991 : 0 : if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
992 : : &step, &end) == 0)
993 : : {
994 : 0 : bool success;
995 : 0 : ps_insn_ptr next_ps_i;
996 : 0 : int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
997 : 0 : int row = SMODULO (branch_cycle, ps->ii);
998 : 0 : int num_splits = 0;
999 : 0 : sbitmap tmp_precede, tmp_follow;
1000 : 0 : int min_cycle, c;
1001 : :
1002 : 0 : if (dump_file)
1003 : 0 : fprintf (dump_file, "\nTrying to schedule node %d "
1004 : : "INSN = %d in (%d .. %d) step %d\n",
1005 : : g->closing_branch->cuid,
1006 : 0 : (INSN_UID (g->closing_branch->insn)), start, end, step);
1007 : :
1008 : 0 : gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
1009 : 0 : if (step == 1)
1010 : : {
1011 : 0 : c = start + ii - SMODULO (start, ii) - 1;
1012 : 0 : gcc_assert (c >= start);
1013 : 0 : if (c >= end)
1014 : : {
1015 : 0 : if (dump_file)
1016 : 0 : fprintf (dump_file,
1017 : : "SMS failed to schedule branch at cycle: %d\n", c);
1018 : 0 : return false;
1019 : : }
1020 : : }
1021 : : else
1022 : : {
1023 : 0 : c = start - SMODULO (start, ii) - 1;
1024 : 0 : gcc_assert (c <= start);
1025 : :
1026 : 0 : if (c <= end)
1027 : : {
1028 : 0 : if (dump_file)
1029 : 0 : fprintf (dump_file,
1030 : : "SMS failed to schedule branch at cycle: %d\n", c);
1031 : 0 : return false;
1032 : : }
1033 : : }
1034 : :
1035 : 0 : auto_sbitmap must_precede (g->num_nodes);
1036 : 0 : auto_sbitmap must_follow (g->num_nodes);
1037 : :
1038 : : /* Try to schedule the branch is it's new cycle. */
1039 : 0 : calculate_must_precede_follow (g->closing_branch, start, end,
1040 : : step, ii, sched_nodes,
1041 : : must_precede, must_follow);
1042 : :
1043 : 0 : set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1044 : : must_precede, c, start, end, step);
1045 : :
1046 : : /* Find the element in the partial schedule related to the closing
1047 : : branch so we can remove it from it's current cycle. */
1048 : 0 : for (next_ps_i = ps->rows[row];
1049 : 0 : next_ps_i; next_ps_i = next_ps_i->next_in_row)
1050 : 0 : if (next_ps_i->id == g->closing_branch->cuid)
1051 : : break;
1052 : :
1053 : 0 : min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1054 : 0 : remove_node_from_ps (ps, next_ps_i);
1055 : 0 : success =
1056 : 0 : try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1057 : : sched_nodes, &num_splits,
1058 : : tmp_precede, tmp_follow);
1059 : 0 : gcc_assert (num_splits == 0);
1060 : 0 : if (!success)
1061 : : {
1062 : 0 : if (dump_file)
1063 : 0 : fprintf (dump_file,
1064 : : "SMS failed to schedule branch at cycle: %d, "
1065 : : "bringing it back to cycle %d\n", c, branch_cycle);
1066 : :
1067 : : /* The branch was failed to be placed in row ii - 1.
1068 : : Put it back in it's original place in the partial
1069 : : schedualing. */
1070 : 0 : set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1071 : : must_precede, branch_cycle, start, end,
1072 : : step);
1073 : 0 : success =
1074 : 0 : try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1075 : : branch_cycle, sched_nodes,
1076 : : &num_splits, tmp_precede,
1077 : : tmp_follow);
1078 : 0 : gcc_assert (success && (num_splits == 0));
1079 : : ok = false;
1080 : : }
1081 : : else
1082 : : {
1083 : : /* The branch is placed in row ii - 1. */
1084 : 0 : if (dump_file)
1085 : 0 : fprintf (dump_file,
1086 : : "SMS success in moving branch to cycle %d\n", c);
1087 : :
1088 : 0 : update_node_sched_params (g->closing_branch->cuid, ii, c,
1089 : : PS_MIN_CYCLE (ps));
1090 : 0 : ok = true;
1091 : : }
1092 : :
1093 : : /* This might have been added to a new first stage. */
1094 : 0 : if (PS_MIN_CYCLE (ps) < min_cycle)
1095 : 0 : reset_sched_times (ps, 0);
1096 : 0 : }
1097 : :
1098 : : return ok;
1099 : 0 : }
1100 : :
1101 : : static void
1102 : 0 : duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1103 : : int to_stage, rtx count_reg, class loop *loop)
1104 : : {
1105 : 0 : int row;
1106 : 0 : ps_insn_ptr ps_ij;
1107 : 0 : copy_bb_data id;
1108 : :
1109 : 0 : for (row = 0; row < ps->ii; row++)
1110 : 0 : for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1111 : : {
1112 : 0 : int u = ps_ij->id;
1113 : 0 : int first_u, last_u;
1114 : 0 : rtx_insn *u_insn;
1115 : :
1116 : : /* Do not duplicate any insn which refers to count_reg as it
1117 : : belongs to the control part.
1118 : : The closing branch is scheduled as well and thus should
1119 : : be ignored.
1120 : : TODO: This should be done by analyzing the control part of
1121 : : the loop. */
1122 : 0 : u_insn = ps_rtl_insn (ps, u);
1123 : 0 : if (reg_mentioned_p (count_reg, u_insn)
1124 : 0 : || JUMP_P (u_insn))
1125 : 0 : continue;
1126 : :
1127 : 0 : first_u = SCHED_STAGE (u);
1128 : 0 : last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1129 : 0 : if (from_stage <= last_u && to_stage >= first_u)
1130 : : {
1131 : 0 : if (u < ps->g->num_nodes)
1132 : 0 : duplicate_insn_chain (ps_first_note (ps, u), u_insn,
1133 : : loop, &id);
1134 : : else
1135 : 0 : emit_insn (copy_rtx (PATTERN (u_insn)));
1136 : : }
1137 : : }
1138 : 0 : }
1139 : :
1140 : :
1141 : : /* Generate the instructions (including reg_moves) for prolog & epilog. */
1142 : : static void
1143 : 0 : generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
1144 : : rtx count_reg, bool adjust_init)
1145 : : {
1146 : 0 : int i;
1147 : 0 : int last_stage = PS_STAGE_COUNT (ps) - 1;
1148 : 0 : edge e;
1149 : :
1150 : : /* Generate the prolog, inserting its insns on the loop-entry edge. */
1151 : 0 : start_sequence ();
1152 : :
1153 : 0 : if (adjust_init)
1154 : : {
1155 : : /* Generate instructions at the beginning of the prolog to
1156 : : adjust the loop count by STAGE_COUNT. If loop count is constant
1157 : : and it not used anywhere in prologue, this constant is adjusted by
1158 : : STAGE_COUNT outside of generate_prolog_epilog function. */
1159 : 0 : rtx sub_reg = NULL_RTX;
1160 : :
1161 : 0 : sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1162 : 0 : gen_int_mode (last_stage,
1163 : 0 : GET_MODE (count_reg)),
1164 : : count_reg, 1, OPTAB_DIRECT);
1165 : 0 : gcc_assert (REG_P (sub_reg));
1166 : 0 : if (REGNO (sub_reg) != REGNO (count_reg))
1167 : 0 : emit_move_insn (count_reg, sub_reg);
1168 : : }
1169 : :
1170 : 0 : for (i = 0; i < last_stage; i++)
1171 : 0 : duplicate_insns_of_cycles (ps, 0, i, count_reg, loop);
1172 : :
1173 : : /* Put the prolog on the entry edge. */
1174 : 0 : e = loop_preheader_edge (loop);
1175 : 0 : split_edge_and_insert (e, get_insns ());
1176 : 0 : if (!flag_resched_modulo_sched)
1177 : 0 : e->dest->flags |= BB_DISABLE_SCHEDULE;
1178 : :
1179 : 0 : end_sequence ();
1180 : :
1181 : : /* Generate the epilog, inserting its insns on the loop-exit edge. */
1182 : 0 : start_sequence ();
1183 : :
1184 : 0 : for (i = 0; i < last_stage; i++)
1185 : 0 : duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg, loop);
1186 : :
1187 : : /* Put the epilogue on the exit edge. */
1188 : 0 : gcc_assert (single_exit (loop));
1189 : 0 : e = single_exit (loop);
1190 : 0 : split_edge_and_insert (e, get_insns ());
1191 : 0 : if (!flag_resched_modulo_sched)
1192 : 0 : e->dest->flags |= BB_DISABLE_SCHEDULE;
1193 : :
1194 : 0 : end_sequence ();
1195 : 0 : }
1196 : :
1197 : : /* Mark LOOP as software pipelined so the later
1198 : : scheduling passes don't touch it. */
1199 : : static void
1200 : 0 : mark_loop_unsched (class loop *loop)
1201 : : {
1202 : 0 : unsigned i;
1203 : 0 : basic_block *bbs = get_loop_body (loop);
1204 : :
1205 : 0 : for (i = 0; i < loop->num_nodes; i++)
1206 : 0 : bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1207 : :
1208 : 0 : free (bbs);
1209 : 0 : }
1210 : :
1211 : : /* Return true if all the BBs of the loop are empty except the
1212 : : loop header. */
1213 : : static bool
1214 : 182 : loop_single_full_bb_p (class loop *loop)
1215 : : {
1216 : 182 : unsigned i;
1217 : 182 : basic_block *bbs = get_loop_body (loop);
1218 : :
1219 : 364 : for (i = 0; i < loop->num_nodes ; i++)
1220 : : {
1221 : 206 : rtx_insn *head, *tail;
1222 : 206 : bool empty_bb = true;
1223 : :
1224 : 206 : if (bbs[i] == loop->header)
1225 : 182 : continue;
1226 : :
1227 : : /* Make sure that basic blocks other than the header
1228 : : have only notes labels or jumps. */
1229 : 24 : get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1230 : 54 : for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1231 : : {
1232 : 30 : if (NOTE_P (head) || LABEL_P (head)
1233 : 30 : || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1234 : 6 : continue;
1235 : : empty_bb = false;
1236 : : break;
1237 : : }
1238 : :
1239 : 24 : if (! empty_bb)
1240 : : {
1241 : 24 : free (bbs);
1242 : 24 : return false;
1243 : : }
1244 : : }
1245 : 158 : free (bbs);
1246 : 158 : return true;
1247 : : }
1248 : :
1249 : : /* Dump file:line from INSN's location info to dump_file. */
1250 : :
1251 : : static void
1252 : 22 : dump_insn_location (rtx_insn *insn)
1253 : : {
1254 : 22 : if (dump_file && INSN_HAS_LOCATION (insn))
1255 : : {
1256 : 16 : expanded_location xloc = insn_location (insn);
1257 : 16 : fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
1258 : : }
1259 : 22 : }
1260 : :
1261 : : /* A simple loop from SMS point of view; it is a loop that is composed of
1262 : : either a single basic block or two BBs - a header and a latch. */
1263 : : #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
1264 : : && (EDGE_COUNT (loop->latch->preds) == 1) \
1265 : : && (EDGE_COUNT (loop->latch->succs) == 1))
1266 : :
1267 : : /* Return true if the loop is in its canonical form and false if not.
1268 : : i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
1269 : : static bool
1270 : 127 : loop_canon_p (class loop *loop)
1271 : : {
1272 : :
1273 : 127 : if (loop->inner || !loop_outer (loop))
1274 : : {
1275 : 16 : if (dump_file)
1276 : 3 : fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1277 : 16 : return false;
1278 : : }
1279 : :
1280 : 111 : if (!single_exit (loop))
1281 : : {
1282 : 8 : if (dump_file)
1283 : : {
1284 : 3 : rtx_insn *insn = BB_END (loop->header);
1285 : :
1286 : 3 : fprintf (dump_file, "SMS loop many exits");
1287 : 3 : dump_insn_location (insn);
1288 : 3 : fprintf (dump_file, "\n");
1289 : : }
1290 : 8 : return false;
1291 : : }
1292 : :
1293 : 103 : if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1294 : : {
1295 : 1 : if (dump_file)
1296 : : {
1297 : 0 : rtx_insn *insn = BB_END (loop->header);
1298 : :
1299 : 0 : fprintf (dump_file, "SMS loop many BBs.");
1300 : 0 : dump_insn_location (insn);
1301 : 0 : fprintf (dump_file, "\n");
1302 : : }
1303 : 1 : return false;
1304 : : }
1305 : :
1306 : : return true;
1307 : : }
1308 : :
1309 : : /* If there are more than one entry for the loop,
1310 : : make it one by splitting the first entry edge and
1311 : : redirecting the others to the new BB. */
1312 : : static void
1313 : 0 : canon_loop (class loop *loop)
1314 : : {
1315 : 0 : edge e;
1316 : 0 : edge_iterator i;
1317 : :
1318 : : /* Avoid annoying special cases of edges going to exit
1319 : : block. */
1320 : 0 : FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1321 : 0 : if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1322 : 0 : split_edge (e);
1323 : :
1324 : 0 : if (loop->latch == loop->header
1325 : 0 : || EDGE_COUNT (loop->latch->succs) > 1)
1326 : : {
1327 : 0 : FOR_EACH_EDGE (e, i, loop->header->preds)
1328 : 0 : if (e->src == loop->latch)
1329 : : break;
1330 : 0 : split_edge (e);
1331 : : }
1332 : 0 : }
1333 : :
1334 : : /* Setup infos. */
1335 : : static void
1336 : 94 : setup_sched_infos (void)
1337 : : {
1338 : 94 : memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1339 : : sizeof (sms_common_sched_info));
1340 : 94 : sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1341 : 94 : common_sched_info = &sms_common_sched_info;
1342 : :
1343 : 94 : sched_deps_info = &sms_sched_deps_info;
1344 : 94 : current_sched_info = &sms_sched_info;
1345 : 94 : }
1346 : :
1347 : : /* Probability in % that the sms-ed loop rolls enough so that optimized
1348 : : version may be entered. Just a guess. */
1349 : : #define PROB_SMS_ENOUGH_ITERATIONS 80
1350 : :
1351 : : /* Main entry point, perform SMS scheduling on the loops of the function
1352 : : that consist of single basic blocks. */
1353 : : static void
1354 : 141 : sms_schedule (void)
1355 : : {
1356 : 141 : rtx_insn *insn;
1357 : 141 : ddg_ptr *g_arr, g;
1358 : 141 : int * node_order;
1359 : 141 : int maxii, max_asap;
1360 : 141 : partial_schedule_ptr ps;
1361 : 141 : basic_block bb = NULL;
1362 : 141 : basic_block condition_bb = NULL;
1363 : 141 : edge latch_edge;
1364 : 141 : HOST_WIDE_INT trip_count, max_trip_count;
1365 : 141 : HARD_REG_SET prohibited_regs;
1366 : :
1367 : 141 : loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1368 : : | LOOPS_HAVE_RECORDED_EXITS);
1369 : 282 : if (number_of_loops (cfun) <= 1)
1370 : : {
1371 : 47 : loop_optimizer_finalize ();
1372 : 47 : return; /* There are no loops to schedule. */
1373 : : }
1374 : :
1375 : : /* Initialize issue_rate. */
1376 : 94 : if (targetm.sched.issue_rate)
1377 : : {
1378 : 94 : int temp = reload_completed;
1379 : :
1380 : 94 : reload_completed = 1;
1381 : 94 : issue_rate = targetm.sched.issue_rate ();
1382 : 94 : reload_completed = temp;
1383 : : }
1384 : : else
1385 : 0 : issue_rate = 1;
1386 : :
1387 : : /* Initialize the scheduler. */
1388 : 94 : setup_sched_infos ();
1389 : 94 : haifa_sched_init ();
1390 : :
1391 : : /* Allocate memory to hold the DDG array one entry for each loop.
1392 : : We use loop->num as index into this array. */
1393 : 94 : g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
1394 : :
1395 : 188 : REG_SET_TO_HARD_REG_SET (prohibited_regs, &df->regular_block_artificial_uses);
1396 : :
1397 : 94 : if (dump_file)
1398 : : {
1399 : 12 : fprintf (dump_file, "\n\nSMS analysis phase\n");
1400 : 12 : fprintf (dump_file, "===================\n\n");
1401 : : }
1402 : :
1403 : : /* Build DDGs for all the relevant loops and hold them in G_ARR
1404 : : indexed by the loop index. */
1405 : 409 : for (auto loop : loops_list (cfun, 0))
1406 : : {
1407 : 127 : rtx_insn *head, *tail;
1408 : 127 : rtx count_reg;
1409 : :
1410 : : /* For debugging. */
1411 : 127 : if (dbg_cnt (sms_sched_loop) == false)
1412 : : {
1413 : 0 : if (dump_file)
1414 : 0 : fprintf (dump_file, "SMS reached max limit... \n");
1415 : :
1416 : 0 : break;
1417 : : }
1418 : :
1419 : 127 : if (dump_file)
1420 : : {
1421 : 19 : rtx_insn *insn = BB_END (loop->header);
1422 : :
1423 : 19 : fprintf (dump_file, "SMS loop num: %d", loop->num);
1424 : 19 : dump_insn_location (insn);
1425 : 19 : fprintf (dump_file, "\n");
1426 : : }
1427 : :
1428 : 127 : if (! loop_canon_p (loop))
1429 : 152 : continue;
1430 : :
1431 : 102 : if (! loop_single_full_bb_p (loop))
1432 : : {
1433 : 23 : if (dump_file)
1434 : 0 : fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1435 : 23 : continue;
1436 : : }
1437 : :
1438 : 79 : bb = loop->header;
1439 : :
1440 : 79 : get_ebb_head_tail (bb, bb, &head, &tail);
1441 : 79 : latch_edge = loop_latch_edge (loop);
1442 : 79 : gcc_assert (single_exit (loop));
1443 : 79 : trip_count = get_estimated_loop_iterations_int (loop);
1444 : 79 : max_trip_count = get_max_loop_iterations_int (loop);
1445 : :
1446 : : /* Perform SMS only on loops that their average count is above threshold. */
1447 : :
1448 : 79 : if (latch_edge->count () > profile_count::zero ()
1449 : 158 : && (latch_edge->count ()
1450 : 158 : < (single_exit (loop)->count ()
1451 : 158 : * param_sms_loop_average_count_threshold)))
1452 : : {
1453 : 0 : if (dump_file)
1454 : : {
1455 : 0 : dump_insn_location (tail);
1456 : 0 : fprintf (dump_file, "\nSMS single-bb-loop\n");
1457 : 0 : if (profile_info && flag_branch_probabilities)
1458 : : {
1459 : 0 : fprintf (dump_file, "SMS loop-count ");
1460 : 0 : fprintf (dump_file, "%" PRId64,
1461 : 0 : (int64_t) bb->count.to_gcov_type ());
1462 : 0 : fprintf (dump_file, "\n");
1463 : 0 : fprintf (dump_file, "SMS trip-count ");
1464 : 0 : fprintf (dump_file, "%" PRId64 "max %" PRId64,
1465 : : (int64_t) trip_count, (int64_t) max_trip_count);
1466 : 0 : fprintf (dump_file, "\n");
1467 : : }
1468 : : }
1469 : 0 : continue;
1470 : : }
1471 : :
1472 : : /* Make sure this is a doloop. */
1473 : 79 : if (!(count_reg = doloop_register_get (head, tail)))
1474 : : {
1475 : 79 : if (dump_file)
1476 : 13 : fprintf (dump_file, "SMS doloop_register_get failed\n");
1477 : 79 : continue;
1478 : : }
1479 : :
1480 : : /* Don't handle BBs with calls or barriers
1481 : : or !single_set with the exception of do-loop control part insns.
1482 : : ??? Should handle insns defining subregs. */
1483 : 0 : for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1484 : : {
1485 : 0 : if (INSN_P (insn))
1486 : : {
1487 : : HARD_REG_SET regs;
1488 : 0 : CLEAR_HARD_REG_SET (regs);
1489 : 0 : note_stores (insn, record_hard_reg_sets, ®s);
1490 : 0 : if (hard_reg_set_intersect_p (regs, prohibited_regs))
1491 : : break;
1492 : : }
1493 : :
1494 : 0 : if (CALL_P (insn)
1495 : 0 : || BARRIER_P (insn)
1496 : 0 : || (INSN_P (insn) && single_set (insn)
1497 : 0 : && GET_CODE (SET_DEST (single_set (insn))) == SUBREG)
1498 : : /* Not a single set. */
1499 : 0 : || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1500 : 0 : && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1501 : : /* But non-single-set allowed in one special case. */
1502 : 0 : && (insn != prev_nondebug_insn (tail)
1503 : 0 : || !reg_mentioned_p (count_reg, insn))))
1504 : : break;
1505 : : }
1506 : :
1507 : 0 : if (insn != NEXT_INSN (tail))
1508 : : {
1509 : 0 : if (dump_file)
1510 : : {
1511 : 0 : if (CALL_P (insn))
1512 : 0 : fprintf (dump_file, "SMS loop-with-call\n");
1513 : 0 : else if (BARRIER_P (insn))
1514 : 0 : fprintf (dump_file, "SMS loop-with-barrier\n");
1515 : 0 : else if (INSN_P (insn) && single_set (insn)
1516 : 0 : && GET_CODE (SET_DEST (single_set (insn))) == SUBREG)
1517 : 0 : fprintf (dump_file, "SMS loop with subreg in lhs\n");
1518 : : else
1519 : 0 : fprintf (dump_file,
1520 : : "SMS loop-with-not-single-set-or-prohibited-reg\n");
1521 : :
1522 : 0 : print_rtl_single (dump_file, insn);
1523 : : }
1524 : :
1525 : 0 : continue;
1526 : : }
1527 : :
1528 : : /* Always schedule the closing branch with the rest of the
1529 : : instructions. The branch is rotated to be in row ii-1 at the
1530 : : end of the scheduling procedure to make sure it's the last
1531 : : instruction in the iteration. */
1532 : 0 : if (! (g = create_ddg (bb, 1)))
1533 : : {
1534 : 0 : if (dump_file)
1535 : 0 : fprintf (dump_file, "SMS create_ddg failed\n");
1536 : 0 : continue;
1537 : : }
1538 : :
1539 : 0 : g_arr[loop->num] = g;
1540 : 0 : if (dump_file)
1541 : 0 : fprintf (dump_file, "...OK\n");
1542 : :
1543 : 94 : }
1544 : 94 : if (dump_file)
1545 : : {
1546 : 12 : fprintf (dump_file, "\nSMS transformation phase\n");
1547 : 12 : fprintf (dump_file, "=========================\n\n");
1548 : : }
1549 : :
1550 : : /* We don't want to perform SMS on new loops - created by versioning. */
1551 : 409 : for (auto loop : loops_list (cfun, 0))
1552 : : {
1553 : 127 : rtx_insn *head, *tail;
1554 : 127 : rtx count_reg;
1555 : 127 : rtx_insn *count_init;
1556 : 127 : int mii, rec_mii, stage_count, min_cycle;
1557 : 127 : int64_t loop_count = 0;
1558 : 127 : bool opt_sc_p, adjust_inplace = false;
1559 : 127 : basic_block pre_header;
1560 : :
1561 : 127 : if (! (g = g_arr[loop->num]))
1562 : 127 : continue;
1563 : :
1564 : 0 : if (dump_file)
1565 : : {
1566 : 0 : rtx_insn *insn = BB_END (loop->header);
1567 : :
1568 : 0 : fprintf (dump_file, "SMS loop num: %d", loop->num);
1569 : 0 : dump_insn_location (insn);
1570 : 0 : fprintf (dump_file, "\n");
1571 : :
1572 : 0 : print_ddg (dump_file, g);
1573 : : }
1574 : :
1575 : 0 : get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1576 : :
1577 : 0 : latch_edge = loop_latch_edge (loop);
1578 : 0 : gcc_assert (single_exit (loop));
1579 : 0 : trip_count = get_estimated_loop_iterations_int (loop);
1580 : 0 : max_trip_count = get_max_loop_iterations_int (loop);
1581 : :
1582 : 0 : if (dump_file)
1583 : : {
1584 : 0 : dump_insn_location (tail);
1585 : 0 : fprintf (dump_file, "\nSMS single-bb-loop\n");
1586 : 0 : if (profile_info && flag_branch_probabilities)
1587 : : {
1588 : 0 : fprintf (dump_file, "SMS loop-count ");
1589 : 0 : fprintf (dump_file, "%" PRId64,
1590 : 0 : (int64_t) bb->count.to_gcov_type ());
1591 : 0 : fprintf (dump_file, "\n");
1592 : : }
1593 : 0 : fprintf (dump_file, "SMS doloop\n");
1594 : 0 : fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1595 : 0 : fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1596 : 0 : fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1597 : : }
1598 : :
1599 : :
1600 : 0 : count_reg = doloop_register_get (head, tail);
1601 : 0 : gcc_assert (count_reg);
1602 : :
1603 : 0 : pre_header = loop_preheader_edge (loop)->src;
1604 : 0 : count_init = const_iteration_count (count_reg, pre_header, &loop_count,
1605 : : &adjust_inplace);
1606 : :
1607 : 0 : if (dump_file && count_init)
1608 : : {
1609 : 0 : fprintf (dump_file, "SMS const-doloop ");
1610 : 0 : fprintf (dump_file, "%" PRId64,
1611 : : loop_count);
1612 : 0 : fprintf (dump_file, "\n");
1613 : : }
1614 : :
1615 : 0 : node_order = XNEWVEC (int, g->num_nodes);
1616 : :
1617 : 0 : mii = 1; /* Need to pass some estimate of mii. */
1618 : 0 : rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1619 : 0 : mii = MAX (res_MII (g), rec_mii);
1620 : 0 : mii = MAX (mii, 1);
1621 : 0 : maxii = MAX (max_asap, param_sms_max_ii_factor * mii);
1622 : :
1623 : 0 : if (dump_file)
1624 : 0 : fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1625 : : rec_mii, mii, maxii);
1626 : :
1627 : 0 : for (;;)
1628 : : {
1629 : 0 : set_node_sched_params (g);
1630 : :
1631 : 0 : stage_count = 0;
1632 : 0 : opt_sc_p = false;
1633 : 0 : ps = sms_schedule_by_order (g, mii, maxii, node_order);
1634 : :
1635 : 0 : if (ps)
1636 : : {
1637 : : /* Try to achieve optimized SC by normalizing the partial
1638 : : schedule (having the cycles start from cycle zero).
1639 : : The branch location must be placed in row ii-1 in the
1640 : : final scheduling. If failed, shift all instructions to
1641 : : position the branch in row ii-1. */
1642 : 0 : opt_sc_p = optimize_sc (ps, g);
1643 : 0 : if (opt_sc_p)
1644 : 0 : stage_count = calculate_stage_count (ps, 0);
1645 : : else
1646 : : {
1647 : : /* Bring the branch to cycle ii-1. */
1648 : 0 : int amount = (SCHED_TIME (g->closing_branch->cuid)
1649 : 0 : - (ps->ii - 1));
1650 : :
1651 : 0 : if (dump_file)
1652 : 0 : fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1653 : :
1654 : 0 : stage_count = calculate_stage_count (ps, amount);
1655 : : }
1656 : :
1657 : 0 : gcc_assert (stage_count >= 1);
1658 : : }
1659 : :
1660 : : /* The default value of param_sms_min_sc is 2 as stage count of
1661 : : 1 means that there is no interleaving between iterations thus
1662 : : we let the scheduling passes do the job in this case. */
1663 : 0 : if (stage_count < param_sms_min_sc
1664 : 0 : || (count_init && (loop_count <= stage_count))
1665 : 0 : || (max_trip_count >= 0 && max_trip_count <= stage_count)
1666 : 0 : || (trip_count >= 0 && trip_count <= stage_count))
1667 : : {
1668 : 0 : if (dump_file)
1669 : : {
1670 : 0 : fprintf (dump_file, "SMS failed... \n");
1671 : 0 : fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1672 : : " loop-count=", stage_count);
1673 : 0 : fprintf (dump_file, "%" PRId64, loop_count);
1674 : 0 : fprintf (dump_file, ", trip-count=");
1675 : 0 : fprintf (dump_file, "%" PRId64 "max %" PRId64,
1676 : : (int64_t) trip_count, (int64_t) max_trip_count);
1677 : 0 : fprintf (dump_file, ")\n");
1678 : : }
1679 : : break;
1680 : : }
1681 : :
1682 : 0 : if (!opt_sc_p)
1683 : : {
1684 : : /* Rotate the partial schedule to have the branch in row ii-1. */
1685 : 0 : int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1686 : :
1687 : 0 : reset_sched_times (ps, amount);
1688 : 0 : rotate_partial_schedule (ps, amount);
1689 : : }
1690 : :
1691 : 0 : set_columns_for_ps (ps);
1692 : :
1693 : 0 : min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1694 : 0 : if (!schedule_reg_moves (ps))
1695 : : {
1696 : 0 : mii = ps->ii + 1;
1697 : 0 : free_partial_schedule (ps);
1698 : 0 : continue;
1699 : : }
1700 : :
1701 : : /* Moves that handle incoming values might have been added
1702 : : to a new first stage. Bump the stage count if so.
1703 : :
1704 : : ??? Perhaps we could consider rotating the schedule here
1705 : : instead? */
1706 : 0 : if (PS_MIN_CYCLE (ps) < min_cycle)
1707 : : {
1708 : 0 : reset_sched_times (ps, 0);
1709 : 0 : stage_count++;
1710 : : }
1711 : :
1712 : : /* The stage count should now be correct without rotation. */
1713 : 0 : gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1714 : 0 : PS_STAGE_COUNT (ps) = stage_count;
1715 : :
1716 : 0 : canon_loop (loop);
1717 : :
1718 : 0 : if (dump_file)
1719 : : {
1720 : 0 : dump_insn_location (tail);
1721 : 0 : fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1722 : : ps->ii, stage_count);
1723 : 0 : print_partial_schedule (ps, dump_file);
1724 : : }
1725 : :
1726 : 0 : if (count_init)
1727 : : {
1728 : 0 : if (adjust_inplace)
1729 : : {
1730 : : /* When possible, set new iteration count of loop kernel in
1731 : : place. Otherwise, generate_prolog_epilog creates an insn
1732 : : to adjust. */
1733 : 0 : SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1734 : : - stage_count + 1);
1735 : : }
1736 : : }
1737 : : else
1738 : : {
1739 : : /* case the BCT count is not known , Do loop-versioning */
1740 : 0 : rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1741 : : gen_int_mode (stage_count,
1742 : : GET_MODE (count_reg)));
1743 : 0 : profile_probability prob = profile_probability::guessed_always ()
1744 : 0 : .apply_scale (PROB_SMS_ENOUGH_ITERATIONS, 100);
1745 : :
1746 : 0 : loop_version (loop, comp_rtx, &condition_bb,
1747 : : prob, prob.invert (),
1748 : : prob, prob.invert (), true);
1749 : : }
1750 : :
1751 : : /* Now apply the scheduled kernel to the RTL of the loop. */
1752 : 0 : permute_partial_schedule (ps, g->closing_branch->first_note);
1753 : :
1754 : : /* Mark this loop as software pipelined so the later
1755 : : scheduling passes don't touch it. */
1756 : 0 : if (! flag_resched_modulo_sched)
1757 : 0 : mark_loop_unsched (loop);
1758 : :
1759 : : /* The life-info is not valid any more. */
1760 : 0 : df_set_bb_dirty (g->bb);
1761 : :
1762 : 0 : apply_reg_moves (ps);
1763 : 0 : if (dump_file)
1764 : 0 : print_node_sched_params (dump_file, g->num_nodes, ps);
1765 : : /* Generate prolog and epilog. */
1766 : 0 : generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
1767 : 0 : break;
1768 : 0 : }
1769 : :
1770 : 0 : free_partial_schedule (ps);
1771 : 0 : node_sched_param_vec.release ();
1772 : 0 : free (node_order);
1773 : 0 : free_ddg (g);
1774 : 94 : }
1775 : :
1776 : 94 : free (g_arr);
1777 : :
1778 : : /* Release scheduler data, needed until now because of DFA. */
1779 : 94 : haifa_sched_finish ();
1780 : 94 : loop_optimizer_finalize ();
1781 : : }
1782 : :
1783 : : /* The SMS scheduling algorithm itself
1784 : : -----------------------------------
1785 : : Input: 'O' an ordered list of insns of a loop.
1786 : : Output: A scheduling of the loop - kernel, prolog, and epilogue.
1787 : :
1788 : : 'Q' is the empty Set
1789 : : 'PS' is the partial schedule; it holds the currently scheduled nodes with
1790 : : their cycle/slot.
1791 : : 'PSP' previously scheduled predecessors.
1792 : : 'PSS' previously scheduled successors.
1793 : : 't(u)' the cycle where u is scheduled.
1794 : : 'l(u)' is the latency of u.
1795 : : 'd(v,u)' is the dependence distance from v to u.
1796 : : 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1797 : : the node ordering phase.
1798 : : 'check_hardware_resources_conflicts(u, PS, c)'
1799 : : run a trace around cycle/slot through DFA model
1800 : : to check resource conflicts involving instruction u
1801 : : at cycle c given the partial schedule PS.
1802 : : 'add_to_partial_schedule_at_time(u, PS, c)'
1803 : : Add the node/instruction u to the partial schedule
1804 : : PS at time c.
1805 : : 'calculate_register_pressure(PS)'
1806 : : Given a schedule of instructions, calculate the register
1807 : : pressure it implies. One implementation could be the
1808 : : maximum number of overlapping live ranges.
1809 : : 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1810 : : registers available in the hardware.
1811 : :
1812 : : 1. II = MII.
1813 : : 2. PS = empty list
1814 : : 3. for each node u in O in pre-computed order
1815 : : 4. if (PSP(u) != Q && PSS(u) == Q) then
1816 : : 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1817 : : 6. start = Early_start; end = Early_start + II - 1; step = 1
1818 : : 11. else if (PSP(u) == Q && PSS(u) != Q) then
1819 : : 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1820 : : 13. start = Late_start; end = Late_start - II + 1; step = -1
1821 : : 14. else if (PSP(u) != Q && PSS(u) != Q) then
1822 : : 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1823 : : 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1824 : : 17. start = Early_start;
1825 : : 18. end = min(Early_start + II - 1 , Late_start);
1826 : : 19. step = 1
1827 : : 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1828 : : 21. start = ASAP(u); end = start + II - 1; step = 1
1829 : : 22. endif
1830 : :
1831 : : 23. success = false
1832 : : 24. for (c = start ; c != end ; c += step)
1833 : : 25. if check_hardware_resources_conflicts(u, PS, c) then
1834 : : 26. add_to_partial_schedule_at_time(u, PS, c)
1835 : : 27. success = true
1836 : : 28. break
1837 : : 29. endif
1838 : : 30. endfor
1839 : : 31. if (success == false) then
1840 : : 32. II = II + 1
1841 : : 33. if (II > maxII) then
1842 : : 34. finish - failed to schedule
1843 : : 35. endif
1844 : : 36. goto 2.
1845 : : 37. endif
1846 : : 38. endfor
1847 : : 39. if (calculate_register_pressure(PS) > maxRP) then
1848 : : 40. goto 32.
1849 : : 41. endif
1850 : : 42. compute epilogue & prologue
1851 : : 43. finish - succeeded to schedule
1852 : :
1853 : : ??? The algorithm restricts the scheduling window to II cycles.
1854 : : In rare cases, it may be better to allow windows of II+1 cycles.
1855 : : The window would then start and end on the same row, but with
1856 : : different "must precede" and "must follow" requirements. */
1857 : :
1858 : : /* A threshold for the number of repeated unsuccessful attempts to insert
1859 : : an empty row, before we flush the partial schedule and start over. */
1860 : : #define MAX_SPLIT_NUM 10
1861 : : /* Given the partial schedule PS, this function calculates and returns the
1862 : : cycles in which we can schedule the node with the given index I.
1863 : : NOTE: Here we do the backtracking in SMS, in some special cases. We have
1864 : : noticed that there are several cases in which we fail to SMS the loop
1865 : : because the sched window of a node is empty due to tight data-deps. In
1866 : : such cases we want to unschedule some of the predecessors/successors
1867 : : until we get non-empty scheduling window. It returns -1 if the
1868 : : scheduling window is empty and zero otherwise. */
1869 : :
1870 : : static int
1871 : 0 : get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1872 : : sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1873 : : int *end_p)
1874 : : {
1875 : 0 : int start, step, end;
1876 : 0 : int early_start, late_start;
1877 : 0 : ddg_edge_ptr e;
1878 : 0 : auto_sbitmap psp (ps->g->num_nodes);
1879 : 0 : auto_sbitmap pss (ps->g->num_nodes);
1880 : 0 : sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1881 : 0 : sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1882 : 0 : int psp_not_empty;
1883 : 0 : int pss_not_empty;
1884 : 0 : int count_preds;
1885 : 0 : int count_succs;
1886 : :
1887 : : /* 1. compute sched window for u (start, end, step). */
1888 : 0 : bitmap_clear (psp);
1889 : 0 : bitmap_clear (pss);
1890 : 0 : psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1891 : 0 : pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
1892 : :
1893 : : /* We first compute a forward range (start <= end), then decide whether
1894 : : to reverse it. */
1895 : 0 : early_start = INT_MIN;
1896 : 0 : late_start = INT_MAX;
1897 : 0 : start = INT_MIN;
1898 : 0 : end = INT_MAX;
1899 : 0 : step = 1;
1900 : :
1901 : 0 : count_preds = 0;
1902 : 0 : count_succs = 0;
1903 : :
1904 : 0 : if (dump_file && (psp_not_empty || pss_not_empty))
1905 : : {
1906 : 0 : fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1907 : 0 : "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1908 : 0 : fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1909 : : "start", "early start", "late start", "end", "time");
1910 : 0 : fprintf (dump_file, "=========== =========== =========== ==========="
1911 : : " =====\n");
1912 : : }
1913 : : /* Calculate early_start and limit end. Both bounds are inclusive. */
1914 : 0 : if (psp_not_empty)
1915 : 0 : for (e = u_node->in; e != 0; e = e->next_in)
1916 : : {
1917 : 0 : int v = e->src->cuid;
1918 : :
1919 : 0 : if (bitmap_bit_p (sched_nodes, v))
1920 : : {
1921 : 0 : int p_st = SCHED_TIME (v);
1922 : 0 : int earliest = p_st + e->latency - (e->distance * ii);
1923 : 0 : int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1924 : :
1925 : 0 : if (dump_file)
1926 : : {
1927 : 0 : fprintf (dump_file, "%11s %11d %11s %11d %5d",
1928 : : "", earliest, "", latest, p_st);
1929 : 0 : print_ddg_edge (dump_file, e);
1930 : 0 : fprintf (dump_file, "\n");
1931 : : }
1932 : :
1933 : 0 : early_start = MAX (early_start, earliest);
1934 : 0 : end = MIN (end, latest);
1935 : :
1936 : 0 : if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1937 : 0 : count_preds++;
1938 : : }
1939 : : }
1940 : :
1941 : : /* Calculate late_start and limit start. Both bounds are inclusive. */
1942 : 0 : if (pss_not_empty)
1943 : 0 : for (e = u_node->out; e != 0; e = e->next_out)
1944 : : {
1945 : 0 : int v = e->dest->cuid;
1946 : :
1947 : 0 : if (bitmap_bit_p (sched_nodes, v))
1948 : : {
1949 : 0 : int s_st = SCHED_TIME (v);
1950 : 0 : int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1951 : 0 : int latest = s_st - e->latency + (e->distance * ii);
1952 : :
1953 : 0 : if (dump_file)
1954 : : {
1955 : 0 : fprintf (dump_file, "%11d %11s %11d %11s %5d",
1956 : : earliest, "", latest, "", s_st);
1957 : 0 : print_ddg_edge (dump_file, e);
1958 : 0 : fprintf (dump_file, "\n");
1959 : : }
1960 : :
1961 : 0 : start = MAX (start, earliest);
1962 : 0 : late_start = MIN (late_start, latest);
1963 : :
1964 : 0 : if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1965 : 0 : count_succs++;
1966 : : }
1967 : : }
1968 : :
1969 : 0 : if (dump_file && (psp_not_empty || pss_not_empty))
1970 : : {
1971 : 0 : fprintf (dump_file, "----------- ----------- ----------- -----------"
1972 : : " -----\n");
1973 : 0 : fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1974 : : start, early_start, late_start, end, "",
1975 : : "(max, max, min, min)");
1976 : : }
1977 : :
1978 : : /* Get a target scheduling window no bigger than ii. */
1979 : 0 : if (early_start == INT_MIN && late_start == INT_MAX)
1980 : 0 : early_start = NODE_ASAP (u_node);
1981 : 0 : else if (early_start == INT_MIN)
1982 : 0 : early_start = late_start - (ii - 1);
1983 : 0 : late_start = MIN (late_start, early_start + (ii - 1));
1984 : :
1985 : : /* Apply memory dependence limits. */
1986 : 0 : start = MAX (start, early_start);
1987 : 0 : end = MIN (end, late_start);
1988 : :
1989 : 0 : if (dump_file && (psp_not_empty || pss_not_empty))
1990 : 0 : fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1991 : : "", start, end, "", "");
1992 : :
1993 : : /* If there are at least as many successors as predecessors, schedule the
1994 : : node close to its successors. */
1995 : 0 : if (pss_not_empty && count_succs >= count_preds)
1996 : : {
1997 : 0 : std::swap (start, end);
1998 : 0 : step = -1;
1999 : : }
2000 : :
2001 : : /* Now that we've finalized the window, make END an exclusive rather
2002 : : than an inclusive bound. */
2003 : 0 : end += step;
2004 : :
2005 : 0 : *start_p = start;
2006 : 0 : *step_p = step;
2007 : 0 : *end_p = end;
2008 : :
2009 : 0 : if ((start >= end && step == 1) || (start <= end && step == -1))
2010 : : {
2011 : 0 : if (dump_file)
2012 : 0 : fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2013 : : start, end, step);
2014 : 0 : return -1;
2015 : : }
2016 : :
2017 : : return 0;
2018 : 0 : }
2019 : :
2020 : : /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2021 : : node currently been scheduled. At the end of the calculation
2022 : : MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2023 : : U_NODE which are (1) already scheduled in the first/last row of
2024 : : U_NODE's scheduling window, (2) whose dependence inequality with U
2025 : : becomes an equality when U is scheduled in this same row, and (3)
2026 : : whose dependence latency is zero.
2027 : :
2028 : : The first and last rows are calculated using the following parameters:
2029 : : START/END rows - The cycles that begins/ends the traversal on the window;
2030 : : searching for an empty cycle to schedule U_NODE.
2031 : : STEP - The direction in which we traverse the window.
2032 : : II - The initiation interval. */
2033 : :
2034 : : static void
2035 : 0 : calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2036 : : int step, int ii, sbitmap sched_nodes,
2037 : : sbitmap must_precede, sbitmap must_follow)
2038 : : {
2039 : 0 : ddg_edge_ptr e;
2040 : 0 : int first_cycle_in_window, last_cycle_in_window;
2041 : :
2042 : 0 : gcc_assert (must_precede && must_follow);
2043 : :
2044 : : /* Consider the following scheduling window:
2045 : : {first_cycle_in_window, first_cycle_in_window+1, ...,
2046 : : last_cycle_in_window}. If step is 1 then the following will be
2047 : : the order we traverse the window: {start=first_cycle_in_window,
2048 : : first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2049 : : or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2050 : : end=first_cycle_in_window-1} if step is -1. */
2051 : 0 : first_cycle_in_window = (step == 1) ? start : end - step;
2052 : 0 : last_cycle_in_window = (step == 1) ? end - step : start;
2053 : :
2054 : 0 : bitmap_clear (must_precede);
2055 : 0 : bitmap_clear (must_follow);
2056 : :
2057 : 0 : if (dump_file)
2058 : 0 : fprintf (dump_file, "\nmust_precede: ");
2059 : :
2060 : : /* Instead of checking if:
2061 : : (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2062 : : && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2063 : : first_cycle_in_window)
2064 : : && e->latency == 0
2065 : : we use the fact that latency is non-negative:
2066 : : SCHED_TIME (e->src) - (e->distance * ii) <=
2067 : : SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2068 : : first_cycle_in_window
2069 : : and check only if
2070 : : SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
2071 : 0 : for (e = u_node->in; e != 0; e = e->next_in)
2072 : 0 : if (bitmap_bit_p (sched_nodes, e->src->cuid)
2073 : 0 : && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2074 : : first_cycle_in_window))
2075 : : {
2076 : 0 : if (dump_file)
2077 : 0 : fprintf (dump_file, "%d ", e->src->cuid);
2078 : :
2079 : 0 : bitmap_set_bit (must_precede, e->src->cuid);
2080 : : }
2081 : :
2082 : 0 : if (dump_file)
2083 : 0 : fprintf (dump_file, "\nmust_follow: ");
2084 : :
2085 : : /* Instead of checking if:
2086 : : (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2087 : : && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2088 : : last_cycle_in_window)
2089 : : && e->latency == 0
2090 : : we use the fact that latency is non-negative:
2091 : : SCHED_TIME (e->dest) + (e->distance * ii) >=
2092 : : SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2093 : : last_cycle_in_window
2094 : : and check only if
2095 : : SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
2096 : 0 : for (e = u_node->out; e != 0; e = e->next_out)
2097 : 0 : if (bitmap_bit_p (sched_nodes, e->dest->cuid)
2098 : 0 : && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2099 : : last_cycle_in_window))
2100 : : {
2101 : 0 : if (dump_file)
2102 : 0 : fprintf (dump_file, "%d ", e->dest->cuid);
2103 : :
2104 : 0 : bitmap_set_bit (must_follow, e->dest->cuid);
2105 : : }
2106 : :
2107 : 0 : if (dump_file)
2108 : 0 : fprintf (dump_file, "\n");
2109 : 0 : }
2110 : :
2111 : : /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
2112 : : parameters to decide if that's possible:
2113 : : PS - The partial schedule.
2114 : : U - The serial number of U_NODE.
2115 : : NUM_SPLITS - The number of row splits made so far.
2116 : : MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2117 : : the first row of the scheduling window)
2118 : : MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2119 : : last row of the scheduling window) */
2120 : :
2121 : : static bool
2122 : 0 : try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2123 : : int u, int cycle, sbitmap sched_nodes,
2124 : : int *num_splits, sbitmap must_precede,
2125 : : sbitmap must_follow)
2126 : : {
2127 : 0 : ps_insn_ptr psi;
2128 : 0 : bool success = false;
2129 : :
2130 : 0 : verify_partial_schedule (ps, sched_nodes);
2131 : 0 : psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2132 : 0 : if (psi)
2133 : : {
2134 : 0 : SCHED_TIME (u) = cycle;
2135 : 0 : bitmap_set_bit (sched_nodes, u);
2136 : 0 : success = true;
2137 : 0 : *num_splits = 0;
2138 : 0 : if (dump_file)
2139 : 0 : fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2140 : :
2141 : : }
2142 : :
2143 : 0 : return success;
2144 : : }
2145 : :
2146 : : /* This function implements the scheduling algorithm for SMS according to the
2147 : : above algorithm. */
2148 : : static partial_schedule_ptr
2149 : 0 : sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2150 : : {
2151 : 0 : int ii = mii;
2152 : 0 : int i, c, success, num_splits = 0;
2153 : 0 : int flush_and_start_over = true;
2154 : 0 : int num_nodes = g->num_nodes;
2155 : 0 : int start, end, step; /* Place together into one struct? */
2156 : 0 : auto_sbitmap sched_nodes (num_nodes);
2157 : 0 : auto_sbitmap must_precede (num_nodes);
2158 : 0 : auto_sbitmap must_follow (num_nodes);
2159 : 0 : auto_sbitmap tobe_scheduled (num_nodes);
2160 : :
2161 : : /* Value of param_sms_dfa_history is a limit on the number of cycles that
2162 : : resource conflicts can span. ??? Should be provided by DFA, and be
2163 : : dependent on the type of insn scheduled. Set to 0 by default to save
2164 : : compile time. */
2165 : 0 : partial_schedule_ptr ps = create_partial_schedule (ii, g,
2166 : : param_sms_dfa_history);
2167 : :
2168 : 0 : bitmap_ones (tobe_scheduled);
2169 : 0 : bitmap_clear (sched_nodes);
2170 : :
2171 : 0 : while (flush_and_start_over && (ii < maxii))
2172 : : {
2173 : :
2174 : 0 : if (dump_file)
2175 : 0 : fprintf (dump_file, "Starting with ii=%d\n", ii);
2176 : 0 : flush_and_start_over = false;
2177 : 0 : bitmap_clear (sched_nodes);
2178 : :
2179 : 0 : for (i = 0; i < num_nodes; i++)
2180 : : {
2181 : 0 : int u = nodes_order[i];
2182 : 0 : ddg_node_ptr u_node = &ps->g->nodes[u];
2183 : 0 : rtx_insn *insn = u_node->insn;
2184 : :
2185 : 0 : gcc_checking_assert (NONDEBUG_INSN_P (insn));
2186 : :
2187 : 0 : if (bitmap_bit_p (sched_nodes, u))
2188 : 0 : continue;
2189 : :
2190 : : /* Try to get non-empty scheduling window. */
2191 : 0 : success = 0;
2192 : 0 : if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2193 : : &step, &end) == 0)
2194 : : {
2195 : 0 : if (dump_file)
2196 : 0 : fprintf (dump_file, "\nTrying to schedule node %d "
2197 : : "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
2198 : 0 : (g->nodes[u].insn)), start, end, step);
2199 : :
2200 : 0 : gcc_assert ((step > 0 && start < end)
2201 : : || (step < 0 && start > end));
2202 : :
2203 : 0 : calculate_must_precede_follow (u_node, start, end, step, ii,
2204 : : sched_nodes, must_precede,
2205 : : must_follow);
2206 : :
2207 : 0 : for (c = start; c != end; c += step)
2208 : : {
2209 : 0 : sbitmap tmp_precede, tmp_follow;
2210 : :
2211 : 0 : set_must_precede_follow (&tmp_follow, must_follow,
2212 : : &tmp_precede, must_precede,
2213 : : c, start, end, step);
2214 : 0 : success =
2215 : 0 : try_scheduling_node_in_cycle (ps, u, c,
2216 : : sched_nodes,
2217 : : &num_splits, tmp_precede,
2218 : : tmp_follow);
2219 : 0 : if (success)
2220 : : break;
2221 : : }
2222 : :
2223 : 0 : verify_partial_schedule (ps, sched_nodes);
2224 : : }
2225 : 0 : if (!success)
2226 : : {
2227 : 0 : int split_row;
2228 : :
2229 : 0 : if (ii++ == maxii)
2230 : : break;
2231 : :
2232 : 0 : if (num_splits >= MAX_SPLIT_NUM)
2233 : : {
2234 : 0 : num_splits = 0;
2235 : 0 : flush_and_start_over = true;
2236 : 0 : verify_partial_schedule (ps, sched_nodes);
2237 : 0 : reset_partial_schedule (ps, ii);
2238 : 0 : verify_partial_schedule (ps, sched_nodes);
2239 : 0 : break;
2240 : : }
2241 : :
2242 : 0 : num_splits++;
2243 : : /* The scheduling window is exclusive of 'end'
2244 : : whereas compute_split_window() expects an inclusive,
2245 : : ordered range. */
2246 : 0 : if (step == 1)
2247 : 0 : split_row = compute_split_row (sched_nodes, start, end - 1,
2248 : : ps->ii, u_node);
2249 : : else
2250 : 0 : split_row = compute_split_row (sched_nodes, end + 1, start,
2251 : : ps->ii, u_node);
2252 : :
2253 : 0 : ps_insert_empty_row (ps, split_row, sched_nodes);
2254 : 0 : i--; /* Go back and retry node i. */
2255 : :
2256 : 0 : if (dump_file)
2257 : 0 : fprintf (dump_file, "num_splits=%d\n", num_splits);
2258 : : }
2259 : :
2260 : : /* ??? If (success), check register pressure estimates. */
2261 : : } /* Continue with next node. */
2262 : : } /* While flush_and_start_over. */
2263 : 0 : if (ii >= maxii)
2264 : : {
2265 : 0 : free_partial_schedule (ps);
2266 : 0 : ps = NULL;
2267 : : }
2268 : : else
2269 : 0 : gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
2270 : :
2271 : 0 : return ps;
2272 : 0 : }
2273 : :
2274 : : /* This function inserts a new empty row into PS at the position
2275 : : according to SPLITROW, keeping all already scheduled instructions
2276 : : intact and updating their SCHED_TIME and cycle accordingly. */
2277 : : static void
2278 : 0 : ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2279 : : sbitmap sched_nodes)
2280 : : {
2281 : 0 : ps_insn_ptr crr_insn;
2282 : 0 : ps_insn_ptr *rows_new;
2283 : 0 : int ii = ps->ii;
2284 : 0 : int new_ii = ii + 1;
2285 : 0 : int row;
2286 : 0 : int *rows_length_new;
2287 : :
2288 : 0 : verify_partial_schedule (ps, sched_nodes);
2289 : :
2290 : : /* We normalize sched_time and rotate ps to have only non-negative sched
2291 : : times, for simplicity of updating cycles after inserting new row. */
2292 : 0 : split_row -= ps->min_cycle;
2293 : 0 : split_row = SMODULO (split_row, ii);
2294 : 0 : if (dump_file)
2295 : 0 : fprintf (dump_file, "split_row=%d\n", split_row);
2296 : :
2297 : 0 : reset_sched_times (ps, PS_MIN_CYCLE (ps));
2298 : 0 : rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2299 : :
2300 : 0 : rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2301 : 0 : rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2302 : 0 : for (row = 0; row < split_row; row++)
2303 : : {
2304 : 0 : rows_new[row] = ps->rows[row];
2305 : 0 : rows_length_new[row] = ps->rows_length[row];
2306 : 0 : ps->rows[row] = NULL;
2307 : 0 : for (crr_insn = rows_new[row];
2308 : 0 : crr_insn; crr_insn = crr_insn->next_in_row)
2309 : : {
2310 : 0 : int u = crr_insn->id;
2311 : 0 : int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2312 : :
2313 : 0 : SCHED_TIME (u) = new_time;
2314 : 0 : crr_insn->cycle = new_time;
2315 : 0 : SCHED_ROW (u) = new_time % new_ii;
2316 : 0 : SCHED_STAGE (u) = new_time / new_ii;
2317 : : }
2318 : :
2319 : : }
2320 : :
2321 : 0 : rows_new[split_row] = NULL;
2322 : :
2323 : 0 : for (row = split_row; row < ii; row++)
2324 : : {
2325 : 0 : rows_new[row + 1] = ps->rows[row];
2326 : 0 : rows_length_new[row + 1] = ps->rows_length[row];
2327 : 0 : ps->rows[row] = NULL;
2328 : 0 : for (crr_insn = rows_new[row + 1];
2329 : 0 : crr_insn; crr_insn = crr_insn->next_in_row)
2330 : : {
2331 : 0 : int u = crr_insn->id;
2332 : 0 : int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2333 : :
2334 : 0 : SCHED_TIME (u) = new_time;
2335 : 0 : crr_insn->cycle = new_time;
2336 : 0 : SCHED_ROW (u) = new_time % new_ii;
2337 : 0 : SCHED_STAGE (u) = new_time / new_ii;
2338 : : }
2339 : : }
2340 : :
2341 : : /* Updating ps. */
2342 : 0 : ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2343 : 0 : + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2344 : 0 : ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2345 : 0 : + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2346 : 0 : free (ps->rows);
2347 : 0 : ps->rows = rows_new;
2348 : 0 : free (ps->rows_length);
2349 : 0 : ps->rows_length = rows_length_new;
2350 : 0 : ps->ii = new_ii;
2351 : 0 : gcc_assert (ps->min_cycle >= 0);
2352 : :
2353 : 0 : verify_partial_schedule (ps, sched_nodes);
2354 : :
2355 : 0 : if (dump_file)
2356 : 0 : fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2357 : : ps->max_cycle);
2358 : 0 : }
2359 : :
2360 : : /* Given U_NODE which is the node that failed to be scheduled; LOW and
2361 : : UP which are the boundaries of it's scheduling window; compute using
2362 : : SCHED_NODES and II a row in the partial schedule that can be split
2363 : : which will separate a critical predecessor from a critical successor
2364 : : thereby expanding the window, and return it. */
2365 : : static int
2366 : 0 : compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2367 : : ddg_node_ptr u_node)
2368 : : {
2369 : 0 : ddg_edge_ptr e;
2370 : 0 : int lower = INT_MIN, upper = INT_MAX;
2371 : 0 : int crit_pred = -1;
2372 : 0 : int crit_succ = -1;
2373 : 0 : int crit_cycle;
2374 : :
2375 : 0 : for (e = u_node->in; e != 0; e = e->next_in)
2376 : : {
2377 : 0 : int v = e->src->cuid;
2378 : :
2379 : 0 : if (bitmap_bit_p (sched_nodes, v)
2380 : 0 : && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2381 : 0 : if (SCHED_TIME (v) > lower)
2382 : : {
2383 : 0 : crit_pred = v;
2384 : 0 : lower = SCHED_TIME (v);
2385 : : }
2386 : : }
2387 : :
2388 : 0 : if (crit_pred >= 0)
2389 : : {
2390 : 0 : crit_cycle = SCHED_TIME (crit_pred) + 1;
2391 : 0 : return SMODULO (crit_cycle, ii);
2392 : : }
2393 : :
2394 : 0 : for (e = u_node->out; e != 0; e = e->next_out)
2395 : : {
2396 : 0 : int v = e->dest->cuid;
2397 : :
2398 : 0 : if (bitmap_bit_p (sched_nodes, v)
2399 : 0 : && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2400 : 0 : if (SCHED_TIME (v) < upper)
2401 : : {
2402 : 0 : crit_succ = v;
2403 : 0 : upper = SCHED_TIME (v);
2404 : : }
2405 : : }
2406 : :
2407 : 0 : if (crit_succ >= 0)
2408 : : {
2409 : 0 : crit_cycle = SCHED_TIME (crit_succ);
2410 : 0 : return SMODULO (crit_cycle, ii);
2411 : : }
2412 : :
2413 : 0 : if (dump_file)
2414 : 0 : fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2415 : :
2416 : 0 : return SMODULO ((low + up + 1) / 2, ii);
2417 : : }
2418 : :
2419 : : static void
2420 : 0 : verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2421 : : {
2422 : 0 : int row;
2423 : 0 : ps_insn_ptr crr_insn;
2424 : :
2425 : 0 : for (row = 0; row < ps->ii; row++)
2426 : : {
2427 : 0 : int length = 0;
2428 : :
2429 : 0 : for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2430 : : {
2431 : 0 : int u = crr_insn->id;
2432 : :
2433 : 0 : length++;
2434 : 0 : gcc_assert (bitmap_bit_p (sched_nodes, u));
2435 : : /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2436 : : popcount (sched_nodes) == number of insns in ps. */
2437 : 0 : gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2438 : 0 : gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2439 : : }
2440 : :
2441 : 0 : gcc_assert (ps->rows_length[row] == length);
2442 : : }
2443 : 0 : }
2444 : :
2445 : :
2446 : : /* This page implements the algorithm for ordering the nodes of a DDG
2447 : : for modulo scheduling, activated through the
2448 : : "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2449 : :
2450 : : #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2451 : : #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2452 : : #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2453 : : #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2454 : : #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2455 : : #define DEPTH(x) (ASAP ((x)))
2456 : :
2457 : : typedef struct node_order_params * nopa;
2458 : :
2459 : : static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2460 : : static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2461 : : static nopa calculate_order_params (ddg_ptr, int, int *);
2462 : : static int find_max_asap (ddg_ptr, sbitmap);
2463 : : static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2464 : : static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2465 : :
2466 : : enum sms_direction {BOTTOMUP, TOPDOWN};
2467 : :
2468 : : struct node_order_params
2469 : : {
2470 : : int asap;
2471 : : int alap;
2472 : : int height;
2473 : : };
2474 : :
2475 : : /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2476 : : static void
2477 : 0 : check_nodes_order (int *node_order, int num_nodes)
2478 : : {
2479 : 0 : int i;
2480 : 0 : auto_sbitmap tmp (num_nodes);
2481 : :
2482 : 0 : bitmap_clear (tmp);
2483 : :
2484 : 0 : if (dump_file)
2485 : 0 : fprintf (dump_file, "SMS final nodes order: \n");
2486 : :
2487 : 0 : for (i = 0; i < num_nodes; i++)
2488 : : {
2489 : 0 : int u = node_order[i];
2490 : :
2491 : 0 : if (dump_file)
2492 : 0 : fprintf (dump_file, "%d ", u);
2493 : 0 : gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
2494 : :
2495 : 0 : bitmap_set_bit (tmp, u);
2496 : : }
2497 : :
2498 : 0 : if (dump_file)
2499 : 0 : fprintf (dump_file, "\n");
2500 : 0 : }
2501 : :
2502 : : /* Order the nodes of G for scheduling and pass the result in
2503 : : NODE_ORDER. Also set aux.count of each node to ASAP.
2504 : : Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2505 : : static int
2506 : 0 : sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2507 : : {
2508 : 0 : int i;
2509 : 0 : int rec_mii = 0;
2510 : 0 : ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2511 : :
2512 : 0 : nopa nops = calculate_order_params (g, mii, pmax_asap);
2513 : :
2514 : 0 : if (dump_file)
2515 : 0 : print_sccs (dump_file, sccs, g);
2516 : :
2517 : 0 : order_nodes_of_sccs (sccs, node_order);
2518 : :
2519 : 0 : if (sccs->num_sccs > 0)
2520 : : /* First SCC has the largest recurrence_length. */
2521 : 0 : rec_mii = sccs->sccs[0]->recurrence_length;
2522 : :
2523 : : /* Save ASAP before destroying node_order_params. */
2524 : 0 : for (i = 0; i < g->num_nodes; i++)
2525 : : {
2526 : 0 : ddg_node_ptr v = &g->nodes[i];
2527 : 0 : v->aux.count = ASAP (v);
2528 : : }
2529 : :
2530 : 0 : free (nops);
2531 : 0 : free_ddg_all_sccs (sccs);
2532 : 0 : check_nodes_order (node_order, g->num_nodes);
2533 : :
2534 : 0 : return rec_mii;
2535 : : }
2536 : :
2537 : : static void
2538 : 0 : order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2539 : : {
2540 : 0 : int i, pos = 0;
2541 : 0 : ddg_ptr g = all_sccs->ddg;
2542 : 0 : int num_nodes = g->num_nodes;
2543 : 0 : auto_sbitmap prev_sccs (num_nodes);
2544 : 0 : auto_sbitmap on_path (num_nodes);
2545 : 0 : auto_sbitmap tmp (num_nodes);
2546 : 0 : auto_sbitmap ones (num_nodes);
2547 : :
2548 : 0 : bitmap_clear (prev_sccs);
2549 : 0 : bitmap_ones (ones);
2550 : :
2551 : : /* Perform the node ordering starting from the SCC with the highest recMII.
2552 : : For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2553 : 0 : for (i = 0; i < all_sccs->num_sccs; i++)
2554 : : {
2555 : 0 : ddg_scc_ptr scc = all_sccs->sccs[i];
2556 : :
2557 : : /* Add nodes on paths from previous SCCs to the current SCC. */
2558 : 0 : find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2559 : 0 : bitmap_ior (tmp, scc->nodes, on_path);
2560 : :
2561 : : /* Add nodes on paths from the current SCC to previous SCCs. */
2562 : 0 : find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2563 : 0 : bitmap_ior (tmp, tmp, on_path);
2564 : :
2565 : : /* Remove nodes of previous SCCs from current extended SCC. */
2566 : 0 : bitmap_and_compl (tmp, tmp, prev_sccs);
2567 : :
2568 : 0 : pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2569 : : /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2570 : : }
2571 : :
2572 : : /* Handle the remaining nodes that do not belong to any scc. Each call
2573 : : to order_nodes_in_scc handles a single connected component. */
2574 : 0 : while (pos < g->num_nodes)
2575 : : {
2576 : 0 : bitmap_and_compl (tmp, ones, prev_sccs);
2577 : 0 : pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2578 : : }
2579 : 0 : }
2580 : :
2581 : : /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2582 : : static struct node_order_params *
2583 : 0 : calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2584 : : {
2585 : 0 : int u;
2586 : 0 : int max_asap;
2587 : 0 : int num_nodes = g->num_nodes;
2588 : 0 : ddg_edge_ptr e;
2589 : : /* Allocate a place to hold ordering params for each node in the DDG. */
2590 : 0 : nopa node_order_params_arr;
2591 : :
2592 : : /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2593 : 0 : node_order_params_arr = (nopa) xcalloc (num_nodes,
2594 : : sizeof (struct node_order_params));
2595 : :
2596 : : /* Set the aux pointer of each node to point to its order_params structure. */
2597 : 0 : for (u = 0; u < num_nodes; u++)
2598 : 0 : g->nodes[u].aux.info = &node_order_params_arr[u];
2599 : :
2600 : : /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2601 : : calculate ASAP, ALAP, mobility, distance, and height for each node
2602 : : in the dependence (direct acyclic) graph. */
2603 : :
2604 : : /* We assume that the nodes in the array are in topological order. */
2605 : :
2606 : : max_asap = 0;
2607 : 0 : for (u = 0; u < num_nodes; u++)
2608 : : {
2609 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2610 : :
2611 : 0 : ASAP (u_node) = 0;
2612 : 0 : for (e = u_node->in; e; e = e->next_in)
2613 : 0 : if (e->distance == 0)
2614 : 0 : ASAP (u_node) = MAX (ASAP (u_node),
2615 : : ASAP (e->src) + e->latency);
2616 : 0 : max_asap = MAX (max_asap, ASAP (u_node));
2617 : : }
2618 : :
2619 : 0 : for (u = num_nodes - 1; u > -1; u--)
2620 : : {
2621 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2622 : :
2623 : 0 : ALAP (u_node) = max_asap;
2624 : 0 : HEIGHT (u_node) = 0;
2625 : 0 : for (e = u_node->out; e; e = e->next_out)
2626 : 0 : if (e->distance == 0)
2627 : : {
2628 : 0 : ALAP (u_node) = MIN (ALAP (u_node),
2629 : : ALAP (e->dest) - e->latency);
2630 : 0 : HEIGHT (u_node) = MAX (HEIGHT (u_node),
2631 : : HEIGHT (e->dest) + e->latency);
2632 : : }
2633 : : }
2634 : 0 : if (dump_file)
2635 : : {
2636 : 0 : fprintf (dump_file, "\nOrder params\n");
2637 : 0 : for (u = 0; u < num_nodes; u++)
2638 : : {
2639 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2640 : :
2641 : 0 : fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2642 : 0 : ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2643 : : }
2644 : : }
2645 : :
2646 : 0 : *pmax_asap = max_asap;
2647 : 0 : return node_order_params_arr;
2648 : : }
2649 : :
2650 : : static int
2651 : 0 : find_max_asap (ddg_ptr g, sbitmap nodes)
2652 : : {
2653 : 0 : unsigned int u = 0;
2654 : 0 : int max_asap = -1;
2655 : 0 : int result = -1;
2656 : 0 : sbitmap_iterator sbi;
2657 : :
2658 : 0 : EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2659 : : {
2660 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2661 : :
2662 : 0 : if (max_asap < ASAP (u_node))
2663 : : {
2664 : 0 : max_asap = ASAP (u_node);
2665 : 0 : result = u;
2666 : : }
2667 : : }
2668 : 0 : return result;
2669 : : }
2670 : :
2671 : : static int
2672 : 0 : find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2673 : : {
2674 : 0 : unsigned int u = 0;
2675 : 0 : int max_hv = -1;
2676 : 0 : int min_mob = INT_MAX;
2677 : 0 : int result = -1;
2678 : 0 : sbitmap_iterator sbi;
2679 : :
2680 : 0 : EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2681 : : {
2682 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2683 : :
2684 : 0 : if (max_hv < HEIGHT (u_node))
2685 : : {
2686 : 0 : max_hv = HEIGHT (u_node);
2687 : 0 : min_mob = MOB (u_node);
2688 : 0 : result = u;
2689 : : }
2690 : 0 : else if ((max_hv == HEIGHT (u_node))
2691 : 0 : && (min_mob > MOB (u_node)))
2692 : : {
2693 : 0 : min_mob = MOB (u_node);
2694 : 0 : result = u;
2695 : : }
2696 : : }
2697 : 0 : return result;
2698 : : }
2699 : :
2700 : : static int
2701 : 0 : find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2702 : : {
2703 : 0 : unsigned int u = 0;
2704 : 0 : int max_dv = -1;
2705 : 0 : int min_mob = INT_MAX;
2706 : 0 : int result = -1;
2707 : 0 : sbitmap_iterator sbi;
2708 : :
2709 : 0 : EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2710 : : {
2711 : 0 : ddg_node_ptr u_node = &g->nodes[u];
2712 : :
2713 : 0 : if (max_dv < DEPTH (u_node))
2714 : : {
2715 : 0 : max_dv = DEPTH (u_node);
2716 : 0 : min_mob = MOB (u_node);
2717 : 0 : result = u;
2718 : : }
2719 : 0 : else if ((max_dv == DEPTH (u_node))
2720 : 0 : && (min_mob > MOB (u_node)))
2721 : : {
2722 : 0 : min_mob = MOB (u_node);
2723 : 0 : result = u;
2724 : : }
2725 : : }
2726 : 0 : return result;
2727 : : }
2728 : :
2729 : : /* Places the nodes of SCC into the NODE_ORDER array starting
2730 : : at position POS, according to the SMS ordering algorithm.
2731 : : NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2732 : : the NODE_ORDER array, starting from position zero. */
2733 : : static int
2734 : 0 : order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2735 : : int * node_order, int pos)
2736 : : {
2737 : 0 : enum sms_direction dir;
2738 : 0 : int num_nodes = g->num_nodes;
2739 : 0 : auto_sbitmap workset (num_nodes);
2740 : 0 : auto_sbitmap tmp (num_nodes);
2741 : 0 : sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2742 : 0 : auto_sbitmap predecessors (num_nodes);
2743 : 0 : auto_sbitmap successors (num_nodes);
2744 : :
2745 : 0 : bitmap_clear (predecessors);
2746 : 0 : find_predecessors (predecessors, g, nodes_ordered);
2747 : :
2748 : 0 : bitmap_clear (successors);
2749 : 0 : find_successors (successors, g, nodes_ordered);
2750 : :
2751 : 0 : bitmap_clear (tmp);
2752 : 0 : if (bitmap_and (tmp, predecessors, scc))
2753 : : {
2754 : 0 : bitmap_copy (workset, tmp);
2755 : 0 : dir = BOTTOMUP;
2756 : : }
2757 : 0 : else if (bitmap_and (tmp, successors, scc))
2758 : : {
2759 : 0 : bitmap_copy (workset, tmp);
2760 : 0 : dir = TOPDOWN;
2761 : : }
2762 : : else
2763 : : {
2764 : 0 : int u;
2765 : :
2766 : 0 : bitmap_clear (workset);
2767 : 0 : if ((u = find_max_asap (g, scc)) >= 0)
2768 : 0 : bitmap_set_bit (workset, u);
2769 : : dir = BOTTOMUP;
2770 : : }
2771 : :
2772 : 0 : bitmap_clear (zero_bitmap);
2773 : 0 : while (!bitmap_equal_p (workset, zero_bitmap))
2774 : : {
2775 : 0 : int v;
2776 : 0 : ddg_node_ptr v_node;
2777 : 0 : sbitmap v_node_preds;
2778 : 0 : sbitmap v_node_succs;
2779 : :
2780 : 0 : if (dir == TOPDOWN)
2781 : : {
2782 : 0 : while (!bitmap_equal_p (workset, zero_bitmap))
2783 : : {
2784 : 0 : v = find_max_hv_min_mob (g, workset);
2785 : 0 : v_node = &g->nodes[v];
2786 : 0 : node_order[pos++] = v;
2787 : 0 : v_node_succs = NODE_SUCCESSORS (v_node);
2788 : 0 : bitmap_and (tmp, v_node_succs, scc);
2789 : :
2790 : : /* Don't consider the already ordered successors again. */
2791 : 0 : bitmap_and_compl (tmp, tmp, nodes_ordered);
2792 : 0 : bitmap_ior (workset, workset, tmp);
2793 : 0 : bitmap_clear_bit (workset, v);
2794 : 0 : bitmap_set_bit (nodes_ordered, v);
2795 : : }
2796 : 0 : dir = BOTTOMUP;
2797 : 0 : bitmap_clear (predecessors);
2798 : 0 : find_predecessors (predecessors, g, nodes_ordered);
2799 : 0 : bitmap_and (workset, predecessors, scc);
2800 : : }
2801 : : else
2802 : : {
2803 : 0 : while (!bitmap_equal_p (workset, zero_bitmap))
2804 : : {
2805 : 0 : v = find_max_dv_min_mob (g, workset);
2806 : 0 : v_node = &g->nodes[v];
2807 : 0 : node_order[pos++] = v;
2808 : 0 : v_node_preds = NODE_PREDECESSORS (v_node);
2809 : 0 : bitmap_and (tmp, v_node_preds, scc);
2810 : :
2811 : : /* Don't consider the already ordered predecessors again. */
2812 : 0 : bitmap_and_compl (tmp, tmp, nodes_ordered);
2813 : 0 : bitmap_ior (workset, workset, tmp);
2814 : 0 : bitmap_clear_bit (workset, v);
2815 : 0 : bitmap_set_bit (nodes_ordered, v);
2816 : : }
2817 : 0 : dir = TOPDOWN;
2818 : 0 : bitmap_clear (successors);
2819 : 0 : find_successors (successors, g, nodes_ordered);
2820 : 0 : bitmap_and (workset, successors, scc);
2821 : : }
2822 : : }
2823 : 0 : sbitmap_free (zero_bitmap);
2824 : 0 : return pos;
2825 : 0 : }
2826 : :
2827 : :
2828 : : /* This page contains functions for manipulating partial-schedules during
2829 : : modulo scheduling. */
2830 : :
2831 : : /* Create a partial schedule and allocate a memory to hold II rows. */
2832 : :
2833 : : static partial_schedule_ptr
2834 : 0 : create_partial_schedule (int ii, ddg_ptr g, int history)
2835 : : {
2836 : 0 : partial_schedule_ptr ps = XNEW (struct partial_schedule);
2837 : 0 : ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2838 : 0 : ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2839 : 0 : ps->reg_moves.create (0);
2840 : 0 : ps->ii = ii;
2841 : 0 : ps->history = history;
2842 : 0 : ps->min_cycle = INT_MAX;
2843 : 0 : ps->max_cycle = INT_MIN;
2844 : 0 : ps->g = g;
2845 : :
2846 : 0 : return ps;
2847 : : }
2848 : :
2849 : : /* Free the PS_INSNs in rows array of the given partial schedule.
2850 : : ??? Consider caching the PS_INSN's. */
2851 : : static void
2852 : 0 : free_ps_insns (partial_schedule_ptr ps)
2853 : : {
2854 : 0 : int i;
2855 : :
2856 : 0 : for (i = 0; i < ps->ii; i++)
2857 : : {
2858 : 0 : while (ps->rows[i])
2859 : : {
2860 : 0 : ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2861 : :
2862 : 0 : free (ps->rows[i]);
2863 : 0 : ps->rows[i] = ps_insn;
2864 : : }
2865 : 0 : ps->rows[i] = NULL;
2866 : : }
2867 : 0 : }
2868 : :
2869 : : /* Free all the memory allocated to the partial schedule. */
2870 : :
2871 : : static void
2872 : 0 : free_partial_schedule (partial_schedule_ptr ps)
2873 : : {
2874 : 0 : ps_reg_move_info *move;
2875 : 0 : unsigned int i;
2876 : :
2877 : 0 : if (!ps)
2878 : 0 : return;
2879 : :
2880 : 0 : FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
2881 : 0 : sbitmap_free (move->uses);
2882 : 0 : ps->reg_moves.release ();
2883 : :
2884 : 0 : free_ps_insns (ps);
2885 : 0 : free (ps->rows);
2886 : 0 : free (ps->rows_length);
2887 : 0 : free (ps);
2888 : : }
2889 : :
2890 : : /* Clear the rows array with its PS_INSNs, and create a new one with
2891 : : NEW_II rows. */
2892 : :
2893 : : static void
2894 : 0 : reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2895 : : {
2896 : 0 : if (!ps)
2897 : : return;
2898 : 0 : free_ps_insns (ps);
2899 : 0 : if (new_ii == ps->ii)
2900 : : return;
2901 : 0 : ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2902 : : * sizeof (ps_insn_ptr));
2903 : 0 : memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2904 : 0 : ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2905 : 0 : memset (ps->rows_length, 0, new_ii * sizeof (int));
2906 : 0 : ps->ii = new_ii;
2907 : 0 : ps->min_cycle = INT_MAX;
2908 : 0 : ps->max_cycle = INT_MIN;
2909 : : }
2910 : :
2911 : : /* Prints the partial schedule as an ii rows array, for each rows
2912 : : print the ids of the insns in it. */
2913 : : void
2914 : 0 : print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2915 : : {
2916 : 0 : int i;
2917 : :
2918 : 0 : for (i = 0; i < ps->ii; i++)
2919 : : {
2920 : 0 : ps_insn_ptr ps_i = ps->rows[i];
2921 : :
2922 : 0 : fprintf (dump, "\n[ROW %d ]: ", i);
2923 : 0 : while (ps_i)
2924 : : {
2925 : 0 : rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
2926 : :
2927 : 0 : if (JUMP_P (insn))
2928 : 0 : fprintf (dump, "%d (branch), ", INSN_UID (insn));
2929 : : else
2930 : 0 : fprintf (dump, "%d, ", INSN_UID (insn));
2931 : :
2932 : 0 : ps_i = ps_i->next_in_row;
2933 : : }
2934 : : }
2935 : 0 : }
2936 : :
2937 : : /* Creates an object of PS_INSN and initializes it to the given parameters. */
2938 : : static ps_insn_ptr
2939 : 0 : create_ps_insn (int id, int cycle)
2940 : : {
2941 : 0 : ps_insn_ptr ps_i = XNEW (struct ps_insn);
2942 : :
2943 : 0 : ps_i->id = id;
2944 : 0 : ps_i->next_in_row = NULL;
2945 : 0 : ps_i->prev_in_row = NULL;
2946 : 0 : ps_i->cycle = cycle;
2947 : :
2948 : 0 : return ps_i;
2949 : : }
2950 : :
2951 : :
2952 : : /* Removes the given PS_INSN from the partial schedule. */
2953 : : static void
2954 : 0 : remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2955 : : {
2956 : 0 : int row;
2957 : :
2958 : 0 : gcc_assert (ps && ps_i);
2959 : :
2960 : 0 : row = SMODULO (ps_i->cycle, ps->ii);
2961 : 0 : if (! ps_i->prev_in_row)
2962 : : {
2963 : 0 : gcc_assert (ps_i == ps->rows[row]);
2964 : 0 : ps->rows[row] = ps_i->next_in_row;
2965 : 0 : if (ps->rows[row])
2966 : 0 : ps->rows[row]->prev_in_row = NULL;
2967 : : }
2968 : : else
2969 : : {
2970 : 0 : ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2971 : 0 : if (ps_i->next_in_row)
2972 : 0 : ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2973 : : }
2974 : :
2975 : 0 : ps->rows_length[row] -= 1;
2976 : 0 : free (ps_i);
2977 : 0 : return;
2978 : : }
2979 : :
2980 : : /* Unlike what literature describes for modulo scheduling (which focuses
2981 : : on VLIW machines) the order of the instructions inside a cycle is
2982 : : important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2983 : : where the current instruction should go relative to the already
2984 : : scheduled instructions in the given cycle. Go over these
2985 : : instructions and find the first possible column to put it in. */
2986 : : static bool
2987 : 0 : ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2988 : : sbitmap must_precede, sbitmap must_follow)
2989 : : {
2990 : 0 : ps_insn_ptr next_ps_i;
2991 : 0 : ps_insn_ptr first_must_follow = NULL;
2992 : 0 : ps_insn_ptr last_must_precede = NULL;
2993 : 0 : ps_insn_ptr last_in_row = NULL;
2994 : 0 : int row;
2995 : :
2996 : 0 : if (! ps_i)
2997 : : return false;
2998 : :
2999 : 0 : row = SMODULO (ps_i->cycle, ps->ii);
3000 : :
3001 : : /* Find the first must follow and the last must precede
3002 : : and insert the node immediately after the must precede
3003 : : but make sure that it there is no must follow after it. */
3004 : 0 : for (next_ps_i = ps->rows[row];
3005 : 0 : next_ps_i;
3006 : 0 : next_ps_i = next_ps_i->next_in_row)
3007 : : {
3008 : 0 : if (must_follow
3009 : 0 : && bitmap_bit_p (must_follow, next_ps_i->id)
3010 : 0 : && ! first_must_follow)
3011 : : first_must_follow = next_ps_i;
3012 : 0 : if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
3013 : : {
3014 : : /* If we have already met a node that must follow, then
3015 : : there is no possible column. */
3016 : 0 : if (first_must_follow)
3017 : : return false;
3018 : : else
3019 : : last_must_precede = next_ps_i;
3020 : : }
3021 : : /* The closing branch must be the last in the row. */
3022 : 0 : if (JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3023 : : return false;
3024 : :
3025 : 0 : last_in_row = next_ps_i;
3026 : : }
3027 : :
3028 : : /* The closing branch is scheduled as well. Make sure there is no
3029 : : dependent instruction after it as the branch should be the last
3030 : : instruction in the row. */
3031 : 0 : if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3032 : : {
3033 : 0 : if (first_must_follow)
3034 : : return false;
3035 : 0 : if (last_in_row)
3036 : : {
3037 : : /* Make the branch the last in the row. New instructions
3038 : : will be inserted at the beginning of the row or after the
3039 : : last must_precede instruction thus the branch is guaranteed
3040 : : to remain the last instruction in the row. */
3041 : 0 : last_in_row->next_in_row = ps_i;
3042 : 0 : ps_i->prev_in_row = last_in_row;
3043 : 0 : ps_i->next_in_row = NULL;
3044 : : }
3045 : : else
3046 : 0 : ps->rows[row] = ps_i;
3047 : 0 : return true;
3048 : : }
3049 : :
3050 : : /* Now insert the node after INSERT_AFTER_PSI. */
3051 : :
3052 : 0 : if (! last_must_precede)
3053 : : {
3054 : 0 : ps_i->next_in_row = ps->rows[row];
3055 : 0 : ps_i->prev_in_row = NULL;
3056 : 0 : if (ps_i->next_in_row)
3057 : 0 : ps_i->next_in_row->prev_in_row = ps_i;
3058 : 0 : ps->rows[row] = ps_i;
3059 : : }
3060 : : else
3061 : : {
3062 : 0 : ps_i->next_in_row = last_must_precede->next_in_row;
3063 : 0 : last_must_precede->next_in_row = ps_i;
3064 : 0 : ps_i->prev_in_row = last_must_precede;
3065 : 0 : if (ps_i->next_in_row)
3066 : 0 : ps_i->next_in_row->prev_in_row = ps_i;
3067 : : }
3068 : :
3069 : : return true;
3070 : : }
3071 : :
3072 : : /* Advances the PS_INSN one column in its current row; returns false
3073 : : in failure and true in success. Bit N is set in MUST_FOLLOW if
3074 : : the node with cuid N must be come after the node pointed to by
3075 : : PS_I when scheduled in the same cycle. */
3076 : : static bool
3077 : 0 : ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3078 : : sbitmap must_follow)
3079 : : {
3080 : 0 : ps_insn_ptr prev, next;
3081 : 0 : int row;
3082 : :
3083 : 0 : if (!ps || !ps_i)
3084 : : return false;
3085 : :
3086 : 0 : row = SMODULO (ps_i->cycle, ps->ii);
3087 : :
3088 : 0 : if (! ps_i->next_in_row)
3089 : : return false;
3090 : :
3091 : : /* Check if next_in_row is dependent on ps_i, both having same sched
3092 : : times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
3093 : 0 : if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
3094 : : return false;
3095 : :
3096 : : /* Advance PS_I over its next_in_row in the doubly linked list. */
3097 : 0 : prev = ps_i->prev_in_row;
3098 : 0 : next = ps_i->next_in_row;
3099 : :
3100 : 0 : if (ps_i == ps->rows[row])
3101 : 0 : ps->rows[row] = next;
3102 : :
3103 : 0 : ps_i->next_in_row = next->next_in_row;
3104 : :
3105 : 0 : if (next->next_in_row)
3106 : 0 : next->next_in_row->prev_in_row = ps_i;
3107 : :
3108 : 0 : next->next_in_row = ps_i;
3109 : 0 : ps_i->prev_in_row = next;
3110 : :
3111 : 0 : next->prev_in_row = prev;
3112 : 0 : if (prev)
3113 : 0 : prev->next_in_row = next;
3114 : :
3115 : : return true;
3116 : : }
3117 : :
3118 : : /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3119 : : Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
3120 : : set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3121 : : before/after (respectively) the node pointed to by PS_I when scheduled
3122 : : in the same cycle. */
3123 : : static ps_insn_ptr
3124 : 0 : add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3125 : : sbitmap must_precede, sbitmap must_follow)
3126 : : {
3127 : 0 : ps_insn_ptr ps_i;
3128 : 0 : int row = SMODULO (cycle, ps->ii);
3129 : :
3130 : 0 : if (ps->rows_length[row] >= issue_rate)
3131 : : return NULL;
3132 : :
3133 : 0 : ps_i = create_ps_insn (id, cycle);
3134 : :
3135 : : /* Finds and inserts PS_I according to MUST_FOLLOW and
3136 : : MUST_PRECEDE. */
3137 : 0 : if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3138 : : {
3139 : 0 : free (ps_i);
3140 : 0 : return NULL;
3141 : : }
3142 : :
3143 : 0 : ps->rows_length[row] += 1;
3144 : 0 : return ps_i;
3145 : : }
3146 : :
3147 : : /* Advance time one cycle. Assumes DFA is being used. */
3148 : : static void
3149 : 0 : advance_one_cycle (void)
3150 : : {
3151 : 0 : if (targetm.sched.dfa_pre_cycle_insn)
3152 : 0 : state_transition (curr_state,
3153 : : targetm.sched.dfa_pre_cycle_insn ());
3154 : :
3155 : 0 : state_transition (curr_state, NULL);
3156 : :
3157 : 0 : if (targetm.sched.dfa_post_cycle_insn)
3158 : 0 : state_transition (curr_state,
3159 : 0 : targetm.sched.dfa_post_cycle_insn ());
3160 : 0 : }
3161 : :
3162 : :
3163 : :
3164 : : /* Checks if PS has resource conflicts according to DFA, starting from
3165 : : FROM cycle to TO cycle; returns true if there are conflicts and false
3166 : : if there are no conflicts. Assumes DFA is being used. */
3167 : : static bool
3168 : 0 : ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3169 : : {
3170 : 0 : int cycle;
3171 : :
3172 : 0 : state_reset (curr_state);
3173 : :
3174 : 0 : for (cycle = from; cycle <= to; cycle++)
3175 : : {
3176 : 0 : ps_insn_ptr crr_insn;
3177 : : /* Holds the remaining issue slots in the current row. */
3178 : 0 : int can_issue_more = issue_rate;
3179 : :
3180 : : /* Walk through the DFA for the current row. */
3181 : 0 : for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3182 : 0 : crr_insn;
3183 : 0 : crr_insn = crr_insn->next_in_row)
3184 : : {
3185 : 0 : rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
3186 : :
3187 : : /* Check if there is room for the current insn. */
3188 : 0 : if (!can_issue_more || state_dead_lock_p (curr_state))
3189 : 0 : return true;
3190 : :
3191 : : /* Update the DFA state and return with failure if the DFA found
3192 : : resource conflicts. */
3193 : 0 : if (state_transition (curr_state, insn) >= 0)
3194 : : return true;
3195 : :
3196 : 0 : if (targetm.sched.variable_issue)
3197 : 0 : can_issue_more =
3198 : 0 : targetm.sched.variable_issue (sched_dump, sched_verbose,
3199 : : insn, can_issue_more);
3200 : : /* A naked CLOBBER or USE generates no instruction, so don't
3201 : : let them consume issue slots. */
3202 : 0 : else if (GET_CODE (PATTERN (insn)) != USE
3203 : 0 : && GET_CODE (PATTERN (insn)) != CLOBBER)
3204 : 0 : can_issue_more--;
3205 : : }
3206 : :
3207 : : /* Advance the DFA to the next cycle. */
3208 : 0 : advance_one_cycle ();
3209 : : }
3210 : : return false;
3211 : : }
3212 : :
3213 : : /* Checks if the given node causes resource conflicts when added to PS at
3214 : : cycle C. If not the node is added to PS and returned; otherwise zero
3215 : : is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3216 : : cuid N must be come before/after (respectively) the node pointed to by
3217 : : PS_I when scheduled in the same cycle. */
3218 : : ps_insn_ptr
3219 : 0 : ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3220 : : int c, sbitmap must_precede,
3221 : : sbitmap must_follow)
3222 : : {
3223 : 0 : int i, first, amount;
3224 : 0 : bool has_conflicts = false;
3225 : 0 : ps_insn_ptr ps_i;
3226 : :
3227 : : /* First add the node to the PS, if this succeeds check for
3228 : : conflicts, trying different issue slots in the same row. */
3229 : 0 : if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3230 : : return NULL; /* Failed to insert the node at the given cycle. */
3231 : :
3232 : 0 : while (1)
3233 : : {
3234 : 0 : has_conflicts = ps_has_conflicts (ps, c, c);
3235 : 0 : if (ps->history > 0 && !has_conflicts)
3236 : : {
3237 : : /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h,
3238 : : but not more than ii intervals. */
3239 : 0 : first = c - ps->history;
3240 : 0 : amount = 2 * ps->history + 1;
3241 : 0 : if (amount > ps->ii)
3242 : : amount = ps->ii;
3243 : 0 : for (i = first; i < first + amount; i++)
3244 : : {
3245 : 0 : has_conflicts = ps_has_conflicts (ps,
3246 : : i - ps->history,
3247 : 0 : i + ps->history);
3248 : 0 : if (has_conflicts)
3249 : : break;
3250 : : }
3251 : : }
3252 : 0 : if (!has_conflicts)
3253 : : break;
3254 : : /* Try different issue slots to find one that the given node can be
3255 : : scheduled in without conflicts. */
3256 : 0 : if (! ps_insn_advance_column (ps, ps_i, must_follow))
3257 : : break;
3258 : : }
3259 : :
3260 : 0 : if (has_conflicts)
3261 : : {
3262 : 0 : remove_node_from_ps (ps, ps_i);
3263 : 0 : return NULL;
3264 : : }
3265 : :
3266 : 0 : ps->min_cycle = MIN (ps->min_cycle, c);
3267 : 0 : ps->max_cycle = MAX (ps->max_cycle, c);
3268 : 0 : return ps_i;
3269 : : }
3270 : :
3271 : : /* Calculate the stage count of the partial schedule PS. The calculation
3272 : : takes into account the rotation amount passed in ROTATION_AMOUNT. */
3273 : : int
3274 : 0 : calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3275 : : {
3276 : 0 : int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3277 : 0 : int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3278 : 0 : int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3279 : :
3280 : : /* The calculation of stage count is done adding the number of stages
3281 : : before cycle zero and after cycle zero. */
3282 : 0 : stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3283 : :
3284 : 0 : return stage_count;
3285 : : }
3286 : :
3287 : : /* Rotate the rows of PS such that insns scheduled at time
3288 : : START_CYCLE will appear in row 0. Updates max/min_cycles. */
3289 : : void
3290 : 0 : rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3291 : : {
3292 : 0 : int i, row, backward_rotates;
3293 : 0 : int last_row = ps->ii - 1;
3294 : :
3295 : 0 : if (start_cycle == 0)
3296 : : return;
3297 : :
3298 : 0 : backward_rotates = SMODULO (start_cycle, ps->ii);
3299 : :
3300 : : /* Revisit later and optimize this into a single loop. */
3301 : 0 : for (i = 0; i < backward_rotates; i++)
3302 : : {
3303 : 0 : ps_insn_ptr first_row = ps->rows[0];
3304 : 0 : int first_row_length = ps->rows_length[0];
3305 : :
3306 : 0 : for (row = 0; row < last_row; row++)
3307 : : {
3308 : 0 : ps->rows[row] = ps->rows[row + 1];
3309 : 0 : ps->rows_length[row] = ps->rows_length[row + 1];
3310 : : }
3311 : :
3312 : 0 : ps->rows[last_row] = first_row;
3313 : 0 : ps->rows_length[last_row] = first_row_length;
3314 : : }
3315 : :
3316 : 0 : ps->max_cycle -= start_cycle;
3317 : 0 : ps->min_cycle -= start_cycle;
3318 : : }
3319 : :
3320 : : #endif /* INSN_SCHEDULING */
3321 : :
3322 : : /* Run instruction scheduler. */
3323 : : /* Perform SMS module scheduling. */
3324 : :
3325 : : namespace {
3326 : :
3327 : : const pass_data pass_data_sms =
3328 : : {
3329 : : RTL_PASS, /* type */
3330 : : "sms", /* name */
3331 : : OPTGROUP_NONE, /* optinfo_flags */
3332 : : TV_SMS, /* tv_id */
3333 : : 0, /* properties_required */
3334 : : 0, /* properties_provided */
3335 : : 0, /* properties_destroyed */
3336 : : 0, /* todo_flags_start */
3337 : : TODO_df_finish, /* todo_flags_finish */
3338 : : };
3339 : :
3340 : : class pass_sms : public rtl_opt_pass
3341 : : {
3342 : : public:
3343 : 285081 : pass_sms (gcc::context *ctxt)
3344 : 570162 : : rtl_opt_pass (pass_data_sms, ctxt)
3345 : : {}
3346 : :
3347 : : /* opt_pass methods: */
3348 : 1449863 : bool gate (function *) final override
3349 : : {
3350 : 1449863 : return (optimize > 0 && flag_modulo_sched);
3351 : : }
3352 : :
3353 : : unsigned int execute (function *) final override;
3354 : :
3355 : : }; // class pass_sms
3356 : :
3357 : : unsigned int
3358 : 141 : pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3359 : : {
3360 : : #ifdef INSN_SCHEDULING
3361 : 141 : basic_block bb;
3362 : :
3363 : : /* Collect loop information to be used in SMS. */
3364 : 141 : cfg_layout_initialize (0);
3365 : 141 : sms_schedule ();
3366 : :
3367 : : /* Update the life information, because we add pseudos. */
3368 : 141 : max_regno = max_reg_num ();
3369 : :
3370 : : /* Finalize layout changes. */
3371 : 848 : FOR_EACH_BB_FN (bb, fun)
3372 : 707 : if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3373 : 566 : bb->aux = bb->next_bb;
3374 : 141 : free_dominance_info (CDI_DOMINATORS);
3375 : 141 : cfg_layout_finalize ();
3376 : : #endif /* INSN_SCHEDULING */
3377 : 141 : return 0;
3378 : : }
3379 : :
3380 : : } // anon namespace
3381 : :
3382 : : rtl_opt_pass *
3383 : 285081 : make_pass_sms (gcc::context *ctxt)
3384 : : {
3385 : 285081 : return new pass_sms (ctxt);
3386 : : }
|