Line data Source code
1 : /* Swing Modulo Scheduling implementation.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 : Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 :
22 : #include "config.h"
23 : #include "system.h"
24 : #include "coretypes.h"
25 : #include "backend.h"
26 : #include "target.h"
27 : #include "rtl.h"
28 : #include "tree.h"
29 : #include "cfghooks.h"
30 : #include "df.h"
31 : #include "memmodel.h"
32 : #include "optabs.h"
33 : #include "regs.h"
34 : #include "emit-rtl.h"
35 : #include "gcov-io.h"
36 : #include "profile.h"
37 : #include "insn-attr.h"
38 : #include "cfgrtl.h"
39 : #include "sched-int.h"
40 : #include "cfgloop.h"
41 : #include "expr.h"
42 : #include "ddg.h"
43 : #include "tree-pass.h"
44 : #include "dbgcnt.h"
45 : #include "loop-unroll.h"
46 : #include "hard-reg-set.h"
47 :
48 : #ifdef INSN_SCHEDULING
49 :
50 : /* This file contains the implementation of the Swing Modulo Scheduler,
51 : described in the following references:
52 : [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
53 : Lifetime--sensitive modulo scheduling in a production environment.
54 : IEEE Trans. on Comps., 50(3), March 2001
55 : [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
56 : Swing Modulo Scheduling: A Lifetime Sensitive Approach.
57 : PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
58 :
59 : The basic structure is:
60 : 1. Build a data-dependence graph (DDG) for each loop.
61 : 2. Use the DDG to order the insns of a loop (not in topological order
62 : necessarily, but rather) trying to place each insn after all its
63 : predecessors _or_ after all its successors.
64 : 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
65 : 4. Use the ordering to perform list-scheduling of the loop:
66 : 1. Set II = MII. We will try to schedule the loop within II cycles.
67 : 2. Try to schedule the insns one by one according to the ordering.
68 : For each insn compute an interval of cycles by considering already-
69 : scheduled preds and succs (and associated latencies); try to place
70 : the insn in the cycles of this window checking for potential
71 : resource conflicts (using the DFA interface).
72 : Note: this is different from the cycle-scheduling of schedule_insns;
73 : here the insns are not scheduled monotonically top-down (nor bottom-
74 : up).
75 : 3. If failed in scheduling all insns - bump II++ and try again, unless
76 : II reaches an upper bound MaxII, in which case report failure.
77 : 5. If we succeeded in scheduling the loop within II cycles, we now
78 : generate prolog and epilog, decrease the counter of the loop, and
79 : perform modulo variable expansion for live ranges that span more than
80 : II cycles (i.e. use register copies to prevent a def from overwriting
81 : itself before reaching the use).
82 :
83 : SMS works with countable loops (1) whose control part can be easily
84 : decoupled from the rest of the loop and (2) whose loop count can
85 : be easily adjusted. This is because we peel a constant number of
86 : iterations into a prologue and epilogue for which we want to avoid
87 : emitting the control part, and a kernel which is to iterate that
88 : constant number of iterations less than the original loop. So the
89 : control part should be a set of insns clearly identified and having
90 : its own iv, not otherwise used in the loop (at-least for now), which
91 : initializes a register before the loop to the number of iterations.
92 : Currently SMS relies on the do-loop pattern to recognize such loops,
93 : where (1) the control part comprises of all insns defining and/or
94 : using a certain 'count' register and (2) the loop count can be
95 : adjusted by modifying this register prior to the loop.
96 : TODO: Rely on cfgloop analysis instead. */
97 :
98 : /* This page defines partial-schedule structures and functions for
99 : modulo scheduling. */
100 :
101 : typedef struct partial_schedule *partial_schedule_ptr;
102 : typedef struct ps_insn *ps_insn_ptr;
103 :
104 : /* The minimum (absolute) cycle that a node of ps was scheduled in. */
105 : #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
106 :
107 : /* The maximum (absolute) cycle that a node of ps was scheduled in. */
108 : #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
109 :
110 : /* Perform signed modulo, always returning a non-negative value. */
111 : #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
112 :
113 : /* The number of different iterations the nodes in ps span, assuming
114 : the stage boundaries are placed efficiently. */
115 : #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
116 : + 1 + ii - 1) / ii)
117 : /* The stage count of ps. */
118 : #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
119 :
120 : /* A single instruction in the partial schedule. */
121 : struct ps_insn
122 : {
123 : /* Identifies the instruction to be scheduled. Values smaller than
124 : the ddg's num_nodes refer directly to ddg nodes. A value of
125 : X - num_nodes refers to register move X. */
126 : int id;
127 :
128 : /* The (absolute) cycle in which the PS instruction is scheduled.
129 : Same as SCHED_TIME (node). */
130 : int cycle;
131 :
132 : /* The next/prev PS_INSN in the same row. */
133 : ps_insn_ptr next_in_row,
134 : prev_in_row;
135 :
136 : };
137 :
138 : /* Information about a register move that has been added to a partial
139 : schedule. */
140 : struct ps_reg_move_info
141 : {
142 : /* The source of the move is defined by the ps_insn with id DEF.
143 : The destination is used by the ps_insns with the ids in USES. */
144 : int def;
145 : sbitmap uses;
146 :
147 : /* The original form of USES' instructions used OLD_REG, but they
148 : should now use NEW_REG. */
149 : rtx old_reg;
150 : rtx new_reg;
151 :
152 : /* The number of consecutive stages that the move occupies. */
153 : int num_consecutive_stages;
154 :
155 : /* An instruction that sets NEW_REG to the correct value. The first
156 : move associated with DEF will have an rhs of OLD_REG; later moves
157 : use the result of the previous move. */
158 : rtx_insn *insn;
159 : };
160 :
161 : /* Holds the partial schedule as an array of II rows. Each entry of the
162 : array points to a linked list of PS_INSNs, which represents the
163 : instructions that are scheduled for that row. */
164 : struct partial_schedule
165 : {
166 : int ii; /* Number of rows in the partial schedule. */
167 : int history; /* Threshold for conflict checking using DFA. */
168 :
169 : /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
170 : ps_insn_ptr *rows;
171 :
172 : /* All the moves added for this partial schedule. Index X has
173 : a ps_insn id of X + g->num_nodes. */
174 : vec<ps_reg_move_info> reg_moves;
175 :
176 : /* rows_length[i] holds the number of instructions in the row.
177 : It is used only (as an optimization) to back off quickly from
178 : trying to schedule a node in a full row; that is, to avoid running
179 : through futile DFA state transitions. */
180 : int *rows_length;
181 :
182 : /* The earliest absolute cycle of an insn in the partial schedule. */
183 : int min_cycle;
184 :
185 : /* The latest absolute cycle of an insn in the partial schedule. */
186 : int max_cycle;
187 :
188 : ddg_ptr g; /* The DDG of the insns in the partial schedule. */
189 :
190 : int stage_count; /* The stage count of the partial schedule. */
191 : };
192 :
193 :
194 : static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
195 : static void free_partial_schedule (partial_schedule_ptr);
196 : static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
197 : void print_partial_schedule (partial_schedule_ptr, FILE *);
198 : static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
199 : static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
200 : int, int, sbitmap, sbitmap);
201 : static void rotate_partial_schedule (partial_schedule_ptr, int);
202 : void set_row_column_for_ps (partial_schedule_ptr);
203 : static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
204 : static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
205 :
206 :
207 : /* This page defines constants and structures for the modulo scheduling
208 : driver. */
209 :
210 : static int sms_order_nodes (ddg_ptr, int, int *, int *);
211 : static void set_node_sched_params (ddg_ptr);
212 : static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
213 : static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
214 : static int calculate_stage_count (partial_schedule_ptr, int);
215 : static void calculate_must_precede_follow (ddg_node_ptr, int, int,
216 : int, int, sbitmap, sbitmap, sbitmap);
217 : static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
218 : sbitmap, int, int *, int *, int *);
219 : static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
220 : sbitmap, int *, sbitmap, sbitmap);
221 : static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
222 :
223 : #define NODE_ASAP(node) ((node)->aux.count)
224 :
225 : #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
226 : #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
227 : #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
228 : #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
229 : #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
230 :
231 : /* The scheduling parameters held for each node. */
232 : typedef struct node_sched_params
233 : {
234 : int time; /* The absolute scheduling cycle. */
235 :
236 : int row; /* Holds time % ii. */
237 : int stage; /* Holds time / ii. */
238 :
239 : /* The column of a node inside the ps. If nodes u, v are on the same row,
240 : u will precede v if column (u) < column (v). */
241 : int column;
242 : } *node_sched_params_ptr;
243 :
244 : /* The following three functions are copied from the current scheduler
245 : code in order to use sched_analyze() for computing the dependencies.
246 : They are used when initializing the sched_info structure. */
247 : static const char *
248 0 : sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
249 : {
250 0 : static char tmp[80];
251 :
252 0 : sprintf (tmp, "i%4d", INSN_UID (insn));
253 0 : return tmp;
254 : }
255 :
256 : static void
257 0 : compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
258 : regset used ATTRIBUTE_UNUSED)
259 : {
260 0 : }
261 :
262 : static struct common_sched_info_def sms_common_sched_info;
263 :
264 : static struct sched_deps_info_def sms_sched_deps_info =
265 : {
266 : compute_jump_reg_dependencies,
267 : NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
268 : NULL,
269 : 0, 0, 0
270 : };
271 :
272 : static struct haifa_sched_info sms_sched_info =
273 : {
274 : NULL,
275 : NULL,
276 : NULL,
277 : NULL,
278 : NULL,
279 : sms_print_insn,
280 : NULL,
281 : NULL, /* insn_finishes_block_p */
282 : NULL, NULL,
283 : NULL, NULL,
284 : 0, 0,
285 :
286 : NULL, NULL, NULL, NULL,
287 : NULL, NULL,
288 : 0
289 : };
290 :
291 : /* Partial schedule instruction ID in PS is a register move. Return
292 : information about it. */
293 : static struct ps_reg_move_info *
294 0 : ps_reg_move (partial_schedule_ptr ps, int id)
295 : {
296 0 : gcc_checking_assert (id >= ps->g->num_nodes);
297 0 : return &ps->reg_moves[id - ps->g->num_nodes];
298 : }
299 :
300 : /* Return the rtl instruction that is being scheduled by partial schedule
301 : instruction ID, which belongs to schedule PS. */
302 : static rtx_insn *
303 0 : ps_rtl_insn (partial_schedule_ptr ps, int id)
304 : {
305 0 : if (id < ps->g->num_nodes)
306 0 : return ps->g->nodes[id].insn;
307 : else
308 0 : return ps_reg_move (ps, id)->insn;
309 : }
310 :
311 : /* Partial schedule instruction ID, which belongs to PS, occurred in
312 : the original (unscheduled) loop. Return the first instruction
313 : in the loop that was associated with ps_rtl_insn (PS, ID).
314 : If the instruction had some notes before it, this is the first
315 : of those notes. */
316 : static rtx_insn *
317 0 : ps_first_note (partial_schedule_ptr ps, int id)
318 : {
319 0 : gcc_assert (id < ps->g->num_nodes);
320 0 : return ps->g->nodes[id].first_note;
321 : }
322 :
323 : /* Return the number of consecutive stages that are occupied by
324 : partial schedule instruction ID in PS. */
325 : static int
326 0 : ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
327 : {
328 0 : if (id < ps->g->num_nodes)
329 : return 1;
330 : else
331 0 : return ps_reg_move (ps, id)->num_consecutive_stages;
332 : }
333 :
334 : /* Given HEAD and TAIL which are the first and last insns in a loop;
335 : return the register which controls the loop. Return zero if it has
336 : more than one occurrence in the loop besides the control part or the
337 : do-loop pattern is not of the form we expect. */
338 : static rtx
339 156 : doloop_register_get (rtx_insn *head, rtx_insn *tail)
340 : {
341 156 : rtx reg, condition;
342 156 : rtx_insn *insn, *first_insn_not_to_check;
343 :
344 156 : if (!JUMP_P (tail))
345 : return NULL_RTX;
346 :
347 156 : if (!targetm.code_for_doloop_end)
348 : return NULL_RTX;
349 :
350 : /* TODO: Free SMS's dependence on doloop_condition_get. */
351 0 : condition = doloop_condition_get (tail);
352 0 : if (! condition)
353 : return NULL_RTX;
354 :
355 0 : if (REG_P (XEXP (condition, 0)))
356 : reg = XEXP (condition, 0);
357 0 : else if (GET_CODE (XEXP (condition, 0)) == PLUS
358 0 : && REG_P (XEXP (XEXP (condition, 0), 0)))
359 : {
360 0 : if (CONST_INT_P (XEXP (condition, 1))
361 0 : && INTVAL (XEXP (condition, 1)) == -1)
362 : reg = XEXP (XEXP (condition, 0), 0);
363 : else
364 : return NULL_RTX;
365 : }
366 : else
367 0 : gcc_unreachable ();
368 :
369 : /* Check that the COUNT_REG has no other occurrences in the loop
370 : until the decrement. We assume the control part consists of
371 : either a single (parallel) branch-on-count or a (non-parallel)
372 : branch immediately preceded by a single (decrement) insn. */
373 0 : first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
374 0 : : prev_nondebug_insn (tail));
375 :
376 0 : for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
377 0 : if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
378 : {
379 0 : if (dump_file)
380 : {
381 0 : fprintf (dump_file, "SMS count_reg found ");
382 0 : print_rtl_single (dump_file, reg);
383 0 : fprintf (dump_file, " outside control in insn:\n");
384 0 : print_rtl_single (dump_file, insn);
385 : }
386 :
387 0 : return NULL_RTX;
388 : }
389 :
390 : return reg;
391 : }
392 :
393 : /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
394 : that the number of iterations is a compile-time constant. If so,
395 : return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
396 : this constant. Otherwise return 0. */
397 : static rtx_insn *
398 0 : const_iteration_count (rtx count_reg, basic_block pre_header,
399 : int64_t *count, bool* adjust_inplace)
400 : {
401 0 : rtx_insn *insn;
402 0 : rtx_insn *head, *tail;
403 :
404 0 : *adjust_inplace = false;
405 0 : bool read_after = false;
406 :
407 0 : if (! pre_header)
408 : return NULL;
409 :
410 0 : get_ebb_head_tail (pre_header, pre_header, &head, &tail);
411 :
412 0 : for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
413 0 : if (single_set (insn) && rtx_equal_p (count_reg,
414 0 : SET_DEST (single_set (insn))))
415 : {
416 0 : rtx pat = single_set (insn);
417 :
418 0 : if (CONST_INT_P (SET_SRC (pat)))
419 : {
420 0 : *count = INTVAL (SET_SRC (pat));
421 0 : *adjust_inplace = !read_after;
422 0 : return insn;
423 : }
424 :
425 : return NULL;
426 : }
427 0 : else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
428 : {
429 0 : read_after = true;
430 0 : if (reg_set_p (count_reg, insn))
431 : break;
432 : }
433 :
434 : return NULL;
435 : }
436 :
437 : /* A very simple resource-based lower bound on the initiation interval.
438 : ??? Improve the accuracy of this bound by considering the
439 : utilization of various units. */
440 : static int
441 0 : res_MII (ddg_ptr g)
442 : {
443 0 : if (targetm.sched.sms_res_mii)
444 0 : return targetm.sched.sms_res_mii (g);
445 :
446 0 : return g->num_nodes / issue_rate;
447 : }
448 :
449 :
450 : /* A vector that contains the sched data for each ps_insn. */
451 : static vec<node_sched_params> node_sched_param_vec;
452 :
453 : /* Allocate sched_params for each node and initialize it. */
454 : static void
455 0 : set_node_sched_params (ddg_ptr g)
456 : {
457 0 : node_sched_param_vec.truncate (0);
458 0 : node_sched_param_vec.safe_grow_cleared (g->num_nodes, true);
459 0 : }
460 :
461 : /* Make sure that node_sched_param_vec has an entry for every move in PS. */
462 : static void
463 0 : extend_node_sched_params (partial_schedule_ptr ps)
464 : {
465 0 : node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
466 0 : + ps->reg_moves.length (), true);
467 0 : }
468 :
469 : /* Update the sched_params (time, row and stage) for node U using the II,
470 : the CYCLE of U and MIN_CYCLE.
471 : We're not simply taking the following
472 : SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
473 : because the stages may not be aligned on cycle 0. */
474 : static void
475 0 : update_node_sched_params (int u, int ii, int cycle, int min_cycle)
476 : {
477 0 : int sc_until_cycle_zero;
478 0 : int stage;
479 :
480 0 : SCHED_TIME (u) = cycle;
481 0 : SCHED_ROW (u) = SMODULO (cycle, ii);
482 :
483 : /* The calculation of stage count is done adding the number
484 : of stages before cycle zero and after cycle zero. */
485 0 : sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
486 :
487 0 : if (SCHED_TIME (u) < 0)
488 : {
489 0 : stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
490 0 : SCHED_STAGE (u) = sc_until_cycle_zero - stage;
491 : }
492 : else
493 : {
494 0 : stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
495 0 : SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
496 : }
497 0 : }
498 :
499 : static void
500 0 : print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
501 : {
502 0 : int i;
503 :
504 0 : if (! file)
505 : return;
506 0 : for (i = 0; i < num_nodes; i++)
507 : {
508 0 : node_sched_params_ptr nsp = SCHED_PARAMS (i);
509 :
510 0 : fprintf (file, "Node = %d; INSN = %d\n", i,
511 0 : INSN_UID (ps_rtl_insn (ps, i)));
512 0 : fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
513 0 : fprintf (file, " time = %d:\n", nsp->time);
514 0 : fprintf (file, " stage = %d:\n", nsp->stage);
515 : }
516 : }
517 :
518 : /* Set SCHED_COLUMN for each instruction in row ROW of PS. */
519 : static void
520 0 : set_columns_for_row (partial_schedule_ptr ps, int row)
521 : {
522 0 : ps_insn_ptr cur_insn;
523 0 : int column;
524 :
525 0 : column = 0;
526 0 : for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
527 0 : SCHED_COLUMN (cur_insn->id) = column++;
528 0 : }
529 :
530 : /* Set SCHED_COLUMN for each instruction in PS. */
531 : static void
532 0 : set_columns_for_ps (partial_schedule_ptr ps)
533 : {
534 0 : int row;
535 :
536 0 : for (row = 0; row < ps->ii; row++)
537 0 : set_columns_for_row (ps, row);
538 0 : }
539 :
540 : /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
541 : Its single predecessor has already been scheduled, as has its
542 : ddg node successors. (The move may have also another move as its
543 : successor, in which case that successor will be scheduled later.)
544 :
545 : The move is part of a chain that satisfies register dependencies
546 : between a producing ddg node and various consuming ddg nodes.
547 : If some of these dependencies have a distance of 1 (meaning that
548 : the use is upward-exposed) then DISTANCE1_USES is nonnull and
549 : contains the set of uses with distance-1 dependencies.
550 : DISTANCE1_USES is null otherwise.
551 :
552 : MUST_FOLLOW is a scratch bitmap that is big enough to hold
553 : all current ps_insn ids.
554 :
555 : Return true on success. */
556 : static bool
557 0 : schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
558 : sbitmap distance1_uses, sbitmap must_follow)
559 : {
560 0 : unsigned int u;
561 0 : int this_time, this_distance, this_start, this_end, this_latency;
562 0 : int start, end, c, ii;
563 0 : sbitmap_iterator sbi;
564 0 : ps_reg_move_info *move;
565 0 : rtx_insn *this_insn;
566 0 : ps_insn_ptr psi;
567 :
568 0 : move = ps_reg_move (ps, i_reg_move);
569 0 : ii = ps->ii;
570 0 : if (dump_file)
571 : {
572 0 : fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
573 0 : ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
574 : PS_MIN_CYCLE (ps));
575 0 : print_rtl_single (dump_file, move->insn);
576 0 : fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
577 0 : fprintf (dump_file, "=========== =========== =====\n");
578 : }
579 :
580 0 : start = INT_MIN;
581 0 : end = INT_MAX;
582 :
583 : /* For dependencies of distance 1 between a producer ddg node A
584 : and consumer ddg node B, we have a chain of dependencies:
585 :
586 : A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
587 :
588 : where Mi is the ith move. For dependencies of distance 0 between
589 : a producer ddg node A and consumer ddg node C, we have a chain of
590 : dependencies:
591 :
592 : A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
593 :
594 : where Mi' occupies the same position as Mi but occurs a stage later.
595 : We can only schedule each move once, so if we have both types of
596 : chain, we model the second as:
597 :
598 : A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
599 :
600 : First handle the dependencies between the previously-scheduled
601 : predecessor and the move. */
602 0 : this_insn = ps_rtl_insn (ps, move->def);
603 0 : this_latency = insn_latency (this_insn, move->insn);
604 0 : this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
605 0 : this_time = SCHED_TIME (move->def) - this_distance * ii;
606 0 : this_start = this_time + this_latency;
607 0 : this_end = this_time + ii;
608 0 : if (dump_file)
609 0 : fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
610 0 : this_start, this_end, SCHED_TIME (move->def),
611 0 : INSN_UID (this_insn), this_latency, this_distance,
612 0 : INSN_UID (move->insn));
613 :
614 0 : if (start < this_start)
615 : start = this_start;
616 0 : if (end > this_end)
617 : end = this_end;
618 :
619 : /* Handle the dependencies between the move and previously-scheduled
620 : successors. */
621 0 : EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
622 : {
623 0 : this_insn = ps_rtl_insn (ps, u);
624 0 : this_latency = insn_latency (move->insn, this_insn);
625 0 : if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
626 : this_distance = -1;
627 : else
628 : this_distance = 0;
629 0 : this_time = SCHED_TIME (u) + this_distance * ii;
630 0 : this_start = this_time - ii;
631 0 : this_end = this_time - this_latency;
632 0 : if (dump_file)
633 0 : fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
634 0 : this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
635 0 : this_latency, this_distance, INSN_UID (this_insn));
636 :
637 0 : if (start < this_start)
638 : start = this_start;
639 0 : if (end > this_end)
640 : end = this_end;
641 : }
642 :
643 0 : if (dump_file)
644 : {
645 0 : fprintf (dump_file, "----------- ----------- -----\n");
646 0 : fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
647 : }
648 :
649 0 : bitmap_clear (must_follow);
650 0 : bitmap_set_bit (must_follow, move->def);
651 :
652 0 : start = MAX (start, end - (ii - 1));
653 0 : for (c = end; c >= start; c--)
654 : {
655 0 : psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
656 : move->uses, must_follow);
657 0 : if (psi)
658 : {
659 0 : update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
660 0 : if (dump_file)
661 0 : fprintf (dump_file, "\nScheduled register move INSN %d at"
662 0 : " time %d, row %d\n\n", INSN_UID (move->insn), c,
663 0 : SCHED_ROW (i_reg_move));
664 0 : return true;
665 : }
666 : }
667 :
668 0 : if (dump_file)
669 0 : fprintf (dump_file, "\nNo available slot\n\n");
670 :
671 : return false;
672 : }
673 :
674 : /*
675 : Breaking intra-loop register anti-dependences:
676 : Each intra-loop register anti-dependence implies a cross-iteration true
677 : dependence of distance 1. Therefore, we can remove such false dependencies
678 : and figure out if the partial schedule broke them by checking if (for a
679 : true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
680 : if so generate a register move. The number of such moves is equal to:
681 : SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
682 : nreg_moves = ----------------------------------- + 1 - { dependence.
683 : ii { 1 if not.
684 : */
685 : static bool
686 0 : schedule_reg_moves (partial_schedule_ptr ps)
687 : {
688 0 : ddg_ptr g = ps->g;
689 0 : int ii = ps->ii;
690 0 : int i;
691 :
692 0 : for (i = 0; i < g->num_nodes; i++)
693 : {
694 0 : ddg_node_ptr u = &g->nodes[i];
695 0 : ddg_edge_ptr e;
696 0 : int nreg_moves = 0, i_reg_move;
697 0 : rtx prev_reg, old_reg;
698 0 : int first_move;
699 0 : int distances[2];
700 0 : sbitmap distance1_uses;
701 0 : rtx set = single_set (u->insn);
702 :
703 : /* Skip instructions that do not set a register. */
704 0 : if (set && !REG_P (SET_DEST (set)))
705 0 : continue;
706 :
707 : /* Compute the number of reg_moves needed for u, by looking at life
708 : ranges started at u (excluding self-loops). */
709 0 : distances[0] = distances[1] = false;
710 0 : for (e = u->out; e; e = e->next_out)
711 0 : if (e->type == TRUE_DEP && e->dest != e->src)
712 : {
713 0 : int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
714 0 : - SCHED_TIME (e->src->cuid)) / ii;
715 :
716 0 : if (e->distance == 1)
717 0 : nreg_moves4e = (SCHED_TIME (e->dest->cuid)
718 0 : - SCHED_TIME (e->src->cuid) + ii) / ii;
719 :
720 : /* If dest precedes src in the schedule of the kernel, then dest
721 : will read before src writes and we can save one reg_copy. */
722 0 : if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
723 0 : && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
724 0 : nreg_moves4e--;
725 :
726 0 : if (nreg_moves4e >= 1)
727 : {
728 : /* !single_set instructions are not supported yet and
729 : thus we do not except to encounter them in the loop
730 : except from the doloop part. For the latter case
731 : we assume no regmoves are generated as the doloop
732 : instructions are tied to the branch with an edge. */
733 0 : gcc_assert (set);
734 : /* If the instruction contains auto-inc register then
735 : validate that the regmov is being generated for the
736 : target regsiter rather then the inc'ed register. */
737 0 : gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
738 : }
739 :
740 0 : if (nreg_moves4e)
741 : {
742 0 : gcc_assert (e->distance < 2);
743 0 : distances[e->distance] = true;
744 : }
745 0 : nreg_moves = MAX (nreg_moves, nreg_moves4e);
746 : }
747 :
748 0 : if (nreg_moves == 0)
749 0 : continue;
750 :
751 : /* Create NREG_MOVES register moves. */
752 0 : first_move = ps->reg_moves.length ();
753 0 : ps->reg_moves.safe_grow_cleared (first_move + nreg_moves, true);
754 0 : extend_node_sched_params (ps);
755 :
756 : /* Record the moves associated with this node. */
757 0 : first_move += ps->g->num_nodes;
758 :
759 : /* Generate each move. */
760 0 : old_reg = prev_reg = SET_DEST (set);
761 0 : if (HARD_REGISTER_P (old_reg))
762 0 : return false;
763 :
764 0 : for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
765 : {
766 0 : ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
767 :
768 0 : move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
769 0 : move->uses = sbitmap_alloc (first_move + nreg_moves);
770 0 : move->old_reg = old_reg;
771 0 : move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
772 0 : move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
773 0 : move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
774 0 : bitmap_clear (move->uses);
775 :
776 0 : prev_reg = move->new_reg;
777 : }
778 :
779 0 : distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
780 :
781 0 : if (distance1_uses)
782 0 : bitmap_clear (distance1_uses);
783 :
784 : /* Every use of the register defined by node may require a different
785 : copy of this register, depending on the time the use is scheduled.
786 : Record which uses require which move results. */
787 0 : for (e = u->out; e; e = e->next_out)
788 0 : if (e->type == TRUE_DEP && e->dest != e->src)
789 : {
790 0 : int dest_copy = (SCHED_TIME (e->dest->cuid)
791 0 : - SCHED_TIME (e->src->cuid)) / ii;
792 :
793 0 : if (e->distance == 1)
794 0 : dest_copy = (SCHED_TIME (e->dest->cuid)
795 0 : - SCHED_TIME (e->src->cuid) + ii) / ii;
796 :
797 0 : if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
798 0 : && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
799 0 : dest_copy--;
800 :
801 0 : if (dest_copy)
802 : {
803 0 : ps_reg_move_info *move;
804 :
805 0 : move = ps_reg_move (ps, first_move + dest_copy - 1);
806 0 : bitmap_set_bit (move->uses, e->dest->cuid);
807 0 : if (e->distance == 1)
808 0 : bitmap_set_bit (distance1_uses, e->dest->cuid);
809 : }
810 : }
811 :
812 0 : auto_sbitmap must_follow (first_move + nreg_moves);
813 0 : for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
814 0 : if (!schedule_reg_move (ps, first_move + i_reg_move,
815 : distance1_uses, must_follow))
816 : break;
817 0 : if (distance1_uses)
818 0 : sbitmap_free (distance1_uses);
819 0 : if (i_reg_move < nreg_moves)
820 0 : return false;
821 0 : }
822 : return true;
823 : }
824 :
825 : /* Emit the moves associated with PS. Apply the substitutions
826 : associated with them. */
827 : static void
828 0 : apply_reg_moves (partial_schedule_ptr ps)
829 : {
830 0 : ps_reg_move_info *move;
831 0 : int i;
832 :
833 0 : FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
834 : {
835 0 : unsigned int i_use;
836 0 : sbitmap_iterator sbi;
837 :
838 0 : EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
839 : {
840 0 : replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
841 0 : df_insn_rescan (ps->g->nodes[i_use].insn);
842 : }
843 : }
844 0 : }
845 :
846 : /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
847 : SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
848 : will move to cycle zero. */
849 : static void
850 0 : reset_sched_times (partial_schedule_ptr ps, int amount)
851 : {
852 0 : int row;
853 0 : int ii = ps->ii;
854 0 : ps_insn_ptr crr_insn;
855 :
856 0 : for (row = 0; row < ii; row++)
857 0 : for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
858 : {
859 0 : int u = crr_insn->id;
860 0 : int normalized_time = SCHED_TIME (u) - amount;
861 0 : int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
862 :
863 0 : if (dump_file)
864 : {
865 : /* Print the scheduling times after the rotation. */
866 0 : rtx_insn *insn = ps_rtl_insn (ps, u);
867 :
868 0 : fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
869 : "crr_insn->cycle=%d, min_cycle=%d", u,
870 0 : INSN_UID (insn), normalized_time, new_min_cycle);
871 0 : if (JUMP_P (insn))
872 0 : fprintf (dump_file, " (branch)");
873 0 : fprintf (dump_file, "\n");
874 : }
875 :
876 0 : gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
877 0 : gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
878 :
879 0 : crr_insn->cycle = normalized_time;
880 0 : update_node_sched_params (u, ii, normalized_time, new_min_cycle);
881 : }
882 0 : }
883 :
884 : /* Permute the insns according to their order in PS, from row 0 to
885 : row ii-1, and position them right before LAST. This schedules
886 : the insns of the loop kernel. */
887 : static void
888 0 : permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
889 : {
890 0 : int ii = ps->ii;
891 0 : int row;
892 0 : ps_insn_ptr ps_ij;
893 :
894 0 : for (row = 0; row < ii ; row++)
895 0 : for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
896 : {
897 0 : rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
898 :
899 0 : if (PREV_INSN (last) != insn)
900 : {
901 0 : if (ps_ij->id < ps->g->num_nodes)
902 0 : reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
903 : PREV_INSN (last));
904 : else
905 0 : add_insn_before (insn, last, NULL);
906 : }
907 : }
908 0 : }
909 :
910 : /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
911 : respectively only if cycle C falls on the border of the scheduling
912 : window boundaries marked by START and END cycles. STEP is the
913 : direction of the window. */
914 : static inline void
915 0 : set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
916 : sbitmap *tmp_precede, sbitmap must_precede, int c,
917 : int start, int end, int step)
918 : {
919 0 : *tmp_precede = NULL;
920 0 : *tmp_follow = NULL;
921 :
922 0 : if (c == start)
923 : {
924 0 : if (step == 1)
925 : *tmp_precede = must_precede;
926 : else /* step == -1. */
927 0 : *tmp_follow = must_follow;
928 : }
929 0 : if (c == end - step)
930 : {
931 0 : if (step == 1)
932 : *tmp_follow = must_follow;
933 : else /* step == -1. */
934 0 : *tmp_precede = must_precede;
935 : }
936 :
937 : }
938 :
939 : /* Return True if the branch can be moved to row ii-1 while
940 : normalizing the partial schedule PS to start from cycle zero and thus
941 : optimize the SC. Otherwise return False. */
942 : static bool
943 0 : optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
944 : {
945 0 : int amount = PS_MIN_CYCLE (ps);
946 0 : int start, end, step;
947 0 : int ii = ps->ii;
948 0 : bool ok = false;
949 0 : int stage_count, stage_count_curr;
950 :
951 : /* Compare the SC after normalization and SC after bringing the branch
952 : to row ii-1. If they are equal just bail out. */
953 0 : stage_count = calculate_stage_count (ps, amount);
954 0 : stage_count_curr =
955 0 : calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
956 :
957 0 : if (stage_count == stage_count_curr)
958 : {
959 0 : if (dump_file)
960 0 : fprintf (dump_file, "SMS SC already optimized.\n");
961 :
962 0 : return false;
963 : }
964 :
965 0 : if (dump_file)
966 : {
967 0 : fprintf (dump_file, "SMS Trying to optimize branch location\n");
968 0 : fprintf (dump_file, "SMS partial schedule before trial:\n");
969 0 : print_partial_schedule (ps, dump_file);
970 : }
971 :
972 : /* First, normalize the partial scheduling. */
973 0 : reset_sched_times (ps, amount);
974 0 : rotate_partial_schedule (ps, amount);
975 0 : if (dump_file)
976 : {
977 0 : fprintf (dump_file,
978 : "SMS partial schedule after normalization (ii, %d, SC %d):\n",
979 : ii, stage_count);
980 0 : print_partial_schedule (ps, dump_file);
981 : }
982 :
983 0 : if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
984 : return true;
985 :
986 0 : auto_sbitmap sched_nodes (g->num_nodes);
987 0 : bitmap_ones (sched_nodes);
988 :
989 : /* Calculate the new placement of the branch. It should be in row
990 : ii-1 and fall into it's scheduling window. */
991 0 : if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
992 : &step, &end) == 0)
993 : {
994 0 : bool success;
995 0 : ps_insn_ptr next_ps_i;
996 0 : int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
997 0 : int row = SMODULO (branch_cycle, ps->ii);
998 0 : int num_splits = 0;
999 0 : sbitmap tmp_precede, tmp_follow;
1000 0 : int min_cycle, c;
1001 :
1002 0 : if (dump_file)
1003 0 : fprintf (dump_file, "\nTrying to schedule node %d "
1004 : "INSN = %d in (%d .. %d) step %d\n",
1005 : g->closing_branch->cuid,
1006 0 : (INSN_UID (g->closing_branch->insn)), start, end, step);
1007 :
1008 0 : gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
1009 0 : if (step == 1)
1010 : {
1011 0 : c = start + ii - SMODULO (start, ii) - 1;
1012 0 : gcc_assert (c >= start);
1013 0 : if (c >= end)
1014 : {
1015 0 : if (dump_file)
1016 0 : fprintf (dump_file,
1017 : "SMS failed to schedule branch at cycle: %d\n", c);
1018 0 : return false;
1019 : }
1020 : }
1021 : else
1022 : {
1023 0 : c = start - SMODULO (start, ii) - 1;
1024 0 : gcc_assert (c <= start);
1025 :
1026 0 : if (c <= end)
1027 : {
1028 0 : if (dump_file)
1029 0 : fprintf (dump_file,
1030 : "SMS failed to schedule branch at cycle: %d\n", c);
1031 0 : return false;
1032 : }
1033 : }
1034 :
1035 0 : auto_sbitmap must_precede (g->num_nodes);
1036 0 : auto_sbitmap must_follow (g->num_nodes);
1037 :
1038 : /* Try to schedule the branch is it's new cycle. */
1039 0 : calculate_must_precede_follow (g->closing_branch, start, end,
1040 : step, ii, sched_nodes,
1041 : must_precede, must_follow);
1042 :
1043 0 : set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1044 : must_precede, c, start, end, step);
1045 :
1046 : /* Find the element in the partial schedule related to the closing
1047 : branch so we can remove it from it's current cycle. */
1048 0 : for (next_ps_i = ps->rows[row];
1049 0 : next_ps_i; next_ps_i = next_ps_i->next_in_row)
1050 0 : if (next_ps_i->id == g->closing_branch->cuid)
1051 : break;
1052 :
1053 0 : min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1054 0 : remove_node_from_ps (ps, next_ps_i);
1055 0 : success =
1056 0 : try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1057 : sched_nodes, &num_splits,
1058 : tmp_precede, tmp_follow);
1059 0 : gcc_assert (num_splits == 0);
1060 0 : if (!success)
1061 : {
1062 0 : if (dump_file)
1063 0 : fprintf (dump_file,
1064 : "SMS failed to schedule branch at cycle: %d, "
1065 : "bringing it back to cycle %d\n", c, branch_cycle);
1066 :
1067 : /* The branch was failed to be placed in row ii - 1.
1068 : Put it back in it's original place in the partial
1069 : schedualing. */
1070 0 : set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1071 : must_precede, branch_cycle, start, end,
1072 : step);
1073 0 : success =
1074 0 : try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1075 : branch_cycle, sched_nodes,
1076 : &num_splits, tmp_precede,
1077 : tmp_follow);
1078 0 : gcc_assert (success && (num_splits == 0));
1079 : ok = false;
1080 : }
1081 : else
1082 : {
1083 : /* The branch is placed in row ii - 1. */
1084 0 : if (dump_file)
1085 0 : fprintf (dump_file,
1086 : "SMS success in moving branch to cycle %d\n", c);
1087 :
1088 0 : update_node_sched_params (g->closing_branch->cuid, ii, c,
1089 : PS_MIN_CYCLE (ps));
1090 0 : ok = true;
1091 : }
1092 :
1093 : /* This might have been added to a new first stage. */
1094 0 : if (PS_MIN_CYCLE (ps) < min_cycle)
1095 0 : reset_sched_times (ps, 0);
1096 0 : }
1097 :
1098 : return ok;
1099 0 : }
1100 :
1101 : static void
1102 0 : duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1103 : int to_stage, rtx count_reg, class loop *loop)
1104 : {
1105 0 : int row;
1106 0 : ps_insn_ptr ps_ij;
1107 0 : copy_bb_data id;
1108 :
1109 0 : for (row = 0; row < ps->ii; row++)
1110 0 : for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1111 : {
1112 0 : int u = ps_ij->id;
1113 0 : int first_u, last_u;
1114 0 : rtx_insn *u_insn;
1115 :
1116 : /* Do not duplicate any insn which refers to count_reg as it
1117 : belongs to the control part.
1118 : The closing branch is scheduled as well and thus should
1119 : be ignored.
1120 : TODO: This should be done by analyzing the control part of
1121 : the loop. */
1122 0 : u_insn = ps_rtl_insn (ps, u);
1123 0 : if (reg_mentioned_p (count_reg, u_insn)
1124 0 : || JUMP_P (u_insn))
1125 0 : continue;
1126 :
1127 0 : first_u = SCHED_STAGE (u);
1128 0 : last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1129 0 : if (from_stage <= last_u && to_stage >= first_u)
1130 : {
1131 0 : if (u < ps->g->num_nodes)
1132 0 : duplicate_insn_chain (ps_first_note (ps, u), u_insn,
1133 : loop, &id);
1134 : else
1135 0 : emit_insn (copy_rtx (PATTERN (u_insn)));
1136 : }
1137 : }
1138 0 : }
1139 :
1140 :
1141 : /* Generate the instructions (including reg_moves) for prolog & epilog. */
1142 : static void
1143 0 : generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
1144 : rtx count_reg, bool adjust_init)
1145 : {
1146 0 : int i;
1147 0 : int last_stage = PS_STAGE_COUNT (ps) - 1;
1148 0 : edge e;
1149 :
1150 : /* Generate the prolog, inserting its insns on the loop-entry edge. */
1151 0 : start_sequence ();
1152 :
1153 0 : if (adjust_init)
1154 : {
1155 : /* Generate instructions at the beginning of the prolog to
1156 : adjust the loop count by STAGE_COUNT. If loop count is constant
1157 : and it not used anywhere in prologue, this constant is adjusted by
1158 : STAGE_COUNT outside of generate_prolog_epilog function. */
1159 0 : rtx sub_reg = NULL_RTX;
1160 :
1161 0 : sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1162 0 : gen_int_mode (last_stage,
1163 0 : GET_MODE (count_reg)),
1164 : count_reg, 1, OPTAB_DIRECT);
1165 0 : gcc_assert (REG_P (sub_reg));
1166 0 : if (REGNO (sub_reg) != REGNO (count_reg))
1167 0 : emit_move_insn (count_reg, sub_reg);
1168 : }
1169 :
1170 0 : for (i = 0; i < last_stage; i++)
1171 0 : duplicate_insns_of_cycles (ps, 0, i, count_reg, loop);
1172 :
1173 : /* Put the prolog on the entry edge. */
1174 0 : e = loop_preheader_edge (loop);
1175 0 : split_edge_and_insert (e, get_insns ());
1176 0 : if (!flag_resched_modulo_sched)
1177 0 : e->dest->flags |= BB_DISABLE_SCHEDULE;
1178 :
1179 0 : end_sequence ();
1180 :
1181 : /* Generate the epilog, inserting its insns on the loop-exit edge. */
1182 0 : start_sequence ();
1183 :
1184 0 : for (i = 0; i < last_stage; i++)
1185 0 : duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg, loop);
1186 :
1187 : /* Put the epilogue on the exit edge. */
1188 0 : gcc_assert (single_exit (loop));
1189 0 : e = single_exit (loop);
1190 0 : split_edge_and_insert (e, get_insns ());
1191 0 : if (!flag_resched_modulo_sched)
1192 0 : e->dest->flags |= BB_DISABLE_SCHEDULE;
1193 :
1194 0 : end_sequence ();
1195 0 : }
1196 :
1197 : /* Mark LOOP as software pipelined so the later
1198 : scheduling passes don't touch it. */
1199 : static void
1200 0 : mark_loop_unsched (class loop *loop)
1201 : {
1202 0 : unsigned i;
1203 0 : basic_block *bbs = get_loop_body (loop);
1204 :
1205 0 : for (i = 0; i < loop->num_nodes; i++)
1206 0 : bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1207 :
1208 0 : free (bbs);
1209 0 : }
1210 :
1211 : /* Return true if all the BBs of the loop are empty except the
1212 : loop header. */
1213 : static bool
1214 440 : loop_single_full_bb_p (class loop *loop)
1215 : {
1216 440 : unsigned i;
1217 440 : basic_block *bbs = get_loop_body (loop);
1218 :
1219 880 : for (i = 0; i < loop->num_nodes ; i++)
1220 : {
1221 568 : rtx_insn *head, *tail;
1222 568 : bool empty_bb = true;
1223 :
1224 568 : if (bbs[i] == loop->header)
1225 440 : continue;
1226 :
1227 : /* Make sure that basic blocks other than the header
1228 : have only notes labels or jumps. */
1229 128 : get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1230 262 : for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1231 : {
1232 134 : if (NOTE_P (head) || LABEL_P (head)
1233 134 : || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1234 6 : continue;
1235 : empty_bb = false;
1236 : break;
1237 : }
1238 :
1239 128 : if (! empty_bb)
1240 : {
1241 128 : free (bbs);
1242 128 : return false;
1243 : }
1244 : }
1245 312 : free (bbs);
1246 312 : return true;
1247 : }
1248 :
1249 : /* Dump file:line from INSN's location info to dump_file. */
1250 :
1251 : static void
1252 22 : dump_insn_location (rtx_insn *insn)
1253 : {
1254 22 : if (dump_file && INSN_HAS_LOCATION (insn))
1255 : {
1256 16 : expanded_location xloc = insn_location (insn);
1257 16 : fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
1258 : }
1259 22 : }
1260 :
1261 : /* A simple loop from SMS point of view; it is a loop that is composed of
1262 : either a single basic block or two BBs - a header and a latch. */
1263 : #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
1264 : && (EDGE_COUNT (loop->latch->preds) == 1) \
1265 : && (EDGE_COUNT (loop->latch->succs) == 1))
1266 :
1267 : /* Return true if the loop is in its canonical form and false if not.
1268 : i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
1269 : static bool
1270 434 : loop_canon_p (class loop *loop)
1271 : {
1272 :
1273 434 : if (loop->inner || !loop_outer (loop))
1274 : {
1275 142 : if (dump_file)
1276 3 : fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1277 142 : return false;
1278 : }
1279 :
1280 292 : if (!single_exit (loop))
1281 : {
1282 8 : if (dump_file)
1283 : {
1284 3 : rtx_insn *insn = BB_END (loop->header);
1285 :
1286 3 : fprintf (dump_file, "SMS loop many exits");
1287 3 : dump_insn_location (insn);
1288 3 : fprintf (dump_file, "\n");
1289 : }
1290 8 : return false;
1291 : }
1292 :
1293 284 : if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1294 : {
1295 5 : if (dump_file)
1296 : {
1297 0 : rtx_insn *insn = BB_END (loop->header);
1298 :
1299 0 : fprintf (dump_file, "SMS loop many BBs.");
1300 0 : dump_insn_location (insn);
1301 0 : fprintf (dump_file, "\n");
1302 : }
1303 5 : return false;
1304 : }
1305 :
1306 : return true;
1307 : }
1308 :
1309 : /* If there are more than one entry for the loop,
1310 : make it one by splitting the first entry edge and
1311 : redirecting the others to the new BB. */
1312 : static void
1313 0 : canon_loop (class loop *loop)
1314 : {
1315 0 : edge e;
1316 0 : edge_iterator i;
1317 :
1318 : /* Avoid annoying special cases of edges going to exit
1319 : block. */
1320 0 : FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1321 0 : if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1322 0 : split_edge (e);
1323 :
1324 0 : if (loop->latch == loop->header
1325 0 : || EDGE_COUNT (loop->latch->succs) > 1)
1326 : {
1327 0 : FOR_EACH_EDGE (e, i, loop->header->preds)
1328 0 : if (e->src == loop->latch)
1329 : break;
1330 0 : split_edge (e);
1331 : }
1332 0 : }
1333 :
1334 : /* Setup infos. */
1335 : static void
1336 267 : setup_sched_infos (void)
1337 : {
1338 267 : memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1339 : sizeof (sms_common_sched_info));
1340 267 : sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1341 267 : common_sched_info = &sms_common_sched_info;
1342 :
1343 267 : sched_deps_info = &sms_sched_deps_info;
1344 267 : current_sched_info = &sms_sched_info;
1345 267 : }
1346 :
1347 : /* Probability in % that the sms-ed loop rolls enough so that optimized
1348 : version may be entered. Just a guess. */
1349 : #define PROB_SMS_ENOUGH_ITERATIONS 80
1350 :
1351 : /* Main entry point, perform SMS scheduling on the loops of the function
1352 : that consist of single basic blocks. */
1353 : static void
1354 313 : sms_schedule (void)
1355 : {
1356 313 : rtx_insn *insn;
1357 313 : ddg_ptr *g_arr, g;
1358 313 : int * node_order;
1359 313 : int maxii, max_asap;
1360 313 : partial_schedule_ptr ps;
1361 313 : basic_block bb = NULL;
1362 313 : basic_block condition_bb = NULL;
1363 313 : edge latch_edge;
1364 313 : HOST_WIDE_INT trip_count, max_trip_count;
1365 313 : HARD_REG_SET prohibited_regs;
1366 :
1367 313 : loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1368 : | LOOPS_HAVE_RECORDED_EXITS);
1369 626 : if (number_of_loops (cfun) <= 1)
1370 : {
1371 46 : loop_optimizer_finalize ();
1372 46 : return; /* There are no loops to schedule. */
1373 : }
1374 :
1375 : /* Initialize issue_rate. */
1376 267 : if (targetm.sched.issue_rate)
1377 : {
1378 267 : int temp = reload_completed;
1379 :
1380 267 : reload_completed = 1;
1381 267 : issue_rate = targetm.sched.issue_rate ();
1382 267 : reload_completed = temp;
1383 : }
1384 : else
1385 0 : issue_rate = 1;
1386 :
1387 : /* Initialize the scheduler. */
1388 267 : setup_sched_infos ();
1389 267 : haifa_sched_init ();
1390 :
1391 : /* Allocate memory to hold the DDG array one entry for each loop.
1392 : We use loop->num as index into this array. */
1393 267 : g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
1394 :
1395 534 : REG_SET_TO_HARD_REG_SET (prohibited_regs, &df->regular_block_artificial_uses);
1396 :
1397 267 : if (dump_file)
1398 : {
1399 12 : fprintf (dump_file, "\n\nSMS analysis phase\n");
1400 12 : fprintf (dump_file, "===================\n\n");
1401 : }
1402 :
1403 : /* Build DDGs for all the relevant loops and hold them in G_ARR
1404 : indexed by the loop index. */
1405 1235 : for (auto loop : loops_list (cfun, 0))
1406 : {
1407 434 : rtx_insn *head, *tail;
1408 434 : rtx count_reg;
1409 :
1410 : /* For debugging. */
1411 434 : if (dbg_cnt (sms_sched_loop) == false)
1412 : {
1413 0 : if (dump_file)
1414 0 : fprintf (dump_file, "SMS reached max limit... \n");
1415 :
1416 0 : break;
1417 : }
1418 :
1419 434 : if (dump_file)
1420 : {
1421 19 : rtx_insn *insn = BB_END (loop->header);
1422 :
1423 19 : fprintf (dump_file, "SMS loop num: %d", loop->num);
1424 19 : dump_insn_location (insn);
1425 19 : fprintf (dump_file, "\n");
1426 : }
1427 :
1428 434 : if (! loop_canon_p (loop))
1429 589 : continue;
1430 :
1431 279 : if (! loop_single_full_bb_p (loop))
1432 : {
1433 123 : if (dump_file)
1434 0 : fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1435 123 : continue;
1436 : }
1437 :
1438 156 : bb = loop->header;
1439 :
1440 156 : get_ebb_head_tail (bb, bb, &head, &tail);
1441 156 : latch_edge = loop_latch_edge (loop);
1442 156 : gcc_assert (single_exit (loop));
1443 156 : trip_count = get_estimated_loop_iterations_int (loop);
1444 156 : max_trip_count = get_max_loop_iterations_int (loop);
1445 :
1446 : /* Perform SMS only on loops that their average count is above threshold. */
1447 :
1448 156 : if (latch_edge->count () > profile_count::zero ()
1449 312 : && (latch_edge->count ()
1450 312 : < (single_exit (loop)->count ()
1451 312 : * param_sms_loop_average_count_threshold)))
1452 : {
1453 0 : if (dump_file)
1454 : {
1455 0 : dump_insn_location (tail);
1456 0 : fprintf (dump_file, "\nSMS single-bb-loop\n");
1457 0 : if (profile_info && flag_branch_probabilities)
1458 : {
1459 0 : fprintf (dump_file, "SMS loop-count ");
1460 0 : fprintf (dump_file, "%" PRId64,
1461 0 : (int64_t) bb->count.to_gcov_type ());
1462 0 : fprintf (dump_file, "\n");
1463 0 : fprintf (dump_file, "SMS trip-count ");
1464 0 : fprintf (dump_file, "%" PRId64 "max %" PRId64,
1465 : (int64_t) trip_count, (int64_t) max_trip_count);
1466 0 : fprintf (dump_file, "\n");
1467 : }
1468 : }
1469 0 : continue;
1470 : }
1471 :
1472 : /* Make sure this is a doloop. */
1473 156 : if (!(count_reg = doloop_register_get (head, tail)))
1474 : {
1475 156 : if (dump_file)
1476 13 : fprintf (dump_file, "SMS doloop_register_get failed\n");
1477 156 : continue;
1478 : }
1479 :
1480 : /* Don't handle BBs with calls or barriers
1481 : or !single_set with the exception of do-loop control part insns.
1482 : ??? Should handle insns defining subregs. */
1483 0 : for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1484 : {
1485 0 : if (INSN_P (insn))
1486 : {
1487 : HARD_REG_SET regs;
1488 0 : CLEAR_HARD_REG_SET (regs);
1489 0 : note_stores (insn, record_hard_reg_sets, ®s);
1490 0 : if (hard_reg_set_intersect_p (regs, prohibited_regs))
1491 : break;
1492 : }
1493 :
1494 0 : if (CALL_P (insn)
1495 0 : || BARRIER_P (insn)
1496 0 : || (INSN_P (insn) && single_set (insn)
1497 0 : && GET_CODE (SET_DEST (single_set (insn))) == SUBREG)
1498 : /* Not a single set. */
1499 0 : || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1500 0 : && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1501 : /* But non-single-set allowed in one special case. */
1502 0 : && (insn != prev_nondebug_insn (tail)
1503 0 : || !reg_mentioned_p (count_reg, insn))))
1504 : break;
1505 : }
1506 :
1507 0 : if (insn != NEXT_INSN (tail))
1508 : {
1509 0 : if (dump_file)
1510 : {
1511 0 : if (CALL_P (insn))
1512 0 : fprintf (dump_file, "SMS loop-with-call\n");
1513 0 : else if (BARRIER_P (insn))
1514 0 : fprintf (dump_file, "SMS loop-with-barrier\n");
1515 0 : else if (INSN_P (insn) && single_set (insn)
1516 0 : && GET_CODE (SET_DEST (single_set (insn))) == SUBREG)
1517 0 : fprintf (dump_file, "SMS loop with subreg in lhs\n");
1518 : else
1519 0 : fprintf (dump_file,
1520 : "SMS loop-with-not-single-set-or-prohibited-reg\n");
1521 :
1522 0 : print_rtl_single (dump_file, insn);
1523 : }
1524 :
1525 0 : continue;
1526 : }
1527 :
1528 : /* Always schedule the closing branch with the rest of the
1529 : instructions. The branch is rotated to be in row ii-1 at the
1530 : end of the scheduling procedure to make sure it's the last
1531 : instruction in the iteration. */
1532 0 : if (! (g = create_ddg (bb, 1)))
1533 : {
1534 0 : if (dump_file)
1535 0 : fprintf (dump_file, "SMS create_ddg failed\n");
1536 0 : continue;
1537 : }
1538 :
1539 0 : g_arr[loop->num] = g;
1540 0 : if (dump_file)
1541 0 : fprintf (dump_file, "...OK\n");
1542 :
1543 267 : }
1544 267 : if (dump_file)
1545 : {
1546 12 : fprintf (dump_file, "\nSMS transformation phase\n");
1547 12 : fprintf (dump_file, "=========================\n\n");
1548 : }
1549 :
1550 : /* We don't want to perform SMS on new loops - created by versioning. */
1551 1235 : for (auto loop : loops_list (cfun, 0))
1552 : {
1553 434 : rtx_insn *head, *tail;
1554 434 : rtx count_reg;
1555 434 : rtx_insn *count_init;
1556 434 : int mii, rec_mii, stage_count, min_cycle;
1557 434 : int64_t loop_count = 0;
1558 434 : bool opt_sc_p, adjust_inplace = false;
1559 434 : basic_block pre_header;
1560 :
1561 434 : if (! (g = g_arr[loop->num]))
1562 434 : continue;
1563 :
1564 0 : if (dump_file)
1565 : {
1566 0 : rtx_insn *insn = BB_END (loop->header);
1567 :
1568 0 : fprintf (dump_file, "SMS loop num: %d", loop->num);
1569 0 : dump_insn_location (insn);
1570 0 : fprintf (dump_file, "\n");
1571 :
1572 0 : print_ddg (dump_file, g);
1573 : }
1574 :
1575 0 : get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1576 :
1577 0 : latch_edge = loop_latch_edge (loop);
1578 0 : gcc_assert (single_exit (loop));
1579 0 : trip_count = get_estimated_loop_iterations_int (loop);
1580 0 : max_trip_count = get_max_loop_iterations_int (loop);
1581 :
1582 0 : if (dump_file)
1583 : {
1584 0 : dump_insn_location (tail);
1585 0 : fprintf (dump_file, "\nSMS single-bb-loop\n");
1586 0 : if (profile_info && flag_branch_probabilities)
1587 : {
1588 0 : fprintf (dump_file, "SMS loop-count ");
1589 0 : fprintf (dump_file, "%" PRId64,
1590 0 : (int64_t) bb->count.to_gcov_type ());
1591 0 : fprintf (dump_file, "\n");
1592 : }
1593 0 : fprintf (dump_file, "SMS doloop\n");
1594 0 : fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1595 0 : fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1596 0 : fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1597 : }
1598 :
1599 :
1600 0 : count_reg = doloop_register_get (head, tail);
1601 0 : gcc_assert (count_reg);
1602 :
1603 0 : pre_header = loop_preheader_edge (loop)->src;
1604 0 : count_init = const_iteration_count (count_reg, pre_header, &loop_count,
1605 : &adjust_inplace);
1606 :
1607 0 : if (dump_file && count_init)
1608 : {
1609 0 : fprintf (dump_file, "SMS const-doloop ");
1610 0 : fprintf (dump_file, "%" PRId64,
1611 : loop_count);
1612 0 : fprintf (dump_file, "\n");
1613 : }
1614 :
1615 0 : node_order = XNEWVEC (int, g->num_nodes);
1616 :
1617 0 : mii = 1; /* Need to pass some estimate of mii. */
1618 0 : rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1619 0 : mii = MAX (res_MII (g), rec_mii);
1620 0 : mii = MAX (mii, 1);
1621 0 : maxii = MAX (max_asap, param_sms_max_ii_factor * mii);
1622 :
1623 0 : if (dump_file)
1624 0 : fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1625 : rec_mii, mii, maxii);
1626 :
1627 0 : for (;;)
1628 : {
1629 0 : set_node_sched_params (g);
1630 :
1631 0 : stage_count = 0;
1632 0 : opt_sc_p = false;
1633 0 : ps = sms_schedule_by_order (g, mii, maxii, node_order);
1634 :
1635 0 : if (ps)
1636 : {
1637 : /* Try to achieve optimized SC by normalizing the partial
1638 : schedule (having the cycles start from cycle zero).
1639 : The branch location must be placed in row ii-1 in the
1640 : final scheduling. If failed, shift all instructions to
1641 : position the branch in row ii-1. */
1642 0 : opt_sc_p = optimize_sc (ps, g);
1643 0 : if (opt_sc_p)
1644 0 : stage_count = calculate_stage_count (ps, 0);
1645 : else
1646 : {
1647 : /* Bring the branch to cycle ii-1. */
1648 0 : int amount = (SCHED_TIME (g->closing_branch->cuid)
1649 0 : - (ps->ii - 1));
1650 :
1651 0 : if (dump_file)
1652 0 : fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1653 :
1654 0 : stage_count = calculate_stage_count (ps, amount);
1655 : }
1656 :
1657 0 : gcc_assert (stage_count >= 1);
1658 : }
1659 :
1660 : /* The default value of param_sms_min_sc is 2 as stage count of
1661 : 1 means that there is no interleaving between iterations thus
1662 : we let the scheduling passes do the job in this case. */
1663 0 : if (stage_count < param_sms_min_sc
1664 0 : || (count_init && (loop_count <= stage_count))
1665 0 : || (max_trip_count >= 0 && max_trip_count <= stage_count)
1666 0 : || (trip_count >= 0 && trip_count <= stage_count))
1667 : {
1668 0 : if (dump_file)
1669 : {
1670 0 : fprintf (dump_file, "SMS failed... \n");
1671 0 : fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1672 : " loop-count=", stage_count);
1673 0 : fprintf (dump_file, "%" PRId64, loop_count);
1674 0 : fprintf (dump_file, ", trip-count=");
1675 0 : fprintf (dump_file, "%" PRId64 "max %" PRId64,
1676 : (int64_t) trip_count, (int64_t) max_trip_count);
1677 0 : fprintf (dump_file, ")\n");
1678 : }
1679 : break;
1680 : }
1681 :
1682 0 : if (!opt_sc_p)
1683 : {
1684 : /* Rotate the partial schedule to have the branch in row ii-1. */
1685 0 : int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1686 :
1687 0 : reset_sched_times (ps, amount);
1688 0 : rotate_partial_schedule (ps, amount);
1689 : }
1690 :
1691 0 : set_columns_for_ps (ps);
1692 :
1693 0 : min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1694 0 : if (!schedule_reg_moves (ps))
1695 : {
1696 0 : mii = ps->ii + 1;
1697 0 : free_partial_schedule (ps);
1698 0 : continue;
1699 : }
1700 :
1701 : /* Moves that handle incoming values might have been added
1702 : to a new first stage. Bump the stage count if so.
1703 :
1704 : ??? Perhaps we could consider rotating the schedule here
1705 : instead? */
1706 0 : if (PS_MIN_CYCLE (ps) < min_cycle)
1707 : {
1708 0 : reset_sched_times (ps, 0);
1709 0 : stage_count++;
1710 : }
1711 :
1712 : /* The stage count should now be correct without rotation. */
1713 0 : gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1714 0 : PS_STAGE_COUNT (ps) = stage_count;
1715 :
1716 0 : canon_loop (loop);
1717 :
1718 0 : if (dump_file)
1719 : {
1720 0 : dump_insn_location (tail);
1721 0 : fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1722 : ps->ii, stage_count);
1723 0 : print_partial_schedule (ps, dump_file);
1724 : }
1725 :
1726 0 : if (count_init)
1727 : {
1728 0 : if (adjust_inplace)
1729 : {
1730 : /* When possible, set new iteration count of loop kernel in
1731 : place. Otherwise, generate_prolog_epilog creates an insn
1732 : to adjust. */
1733 0 : SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1734 : - stage_count + 1);
1735 : }
1736 : }
1737 : else
1738 : {
1739 : /* case the BCT count is not known , Do loop-versioning */
1740 0 : rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1741 : gen_int_mode (stage_count,
1742 : GET_MODE (count_reg)));
1743 0 : profile_probability prob = profile_probability::guessed_always ()
1744 0 : .apply_scale (PROB_SMS_ENOUGH_ITERATIONS, 100);
1745 :
1746 0 : loop_version (loop, comp_rtx, &condition_bb,
1747 : prob, prob.invert (),
1748 : prob, prob.invert (), true);
1749 : }
1750 :
1751 : /* Now apply the scheduled kernel to the RTL of the loop. */
1752 0 : permute_partial_schedule (ps, g->closing_branch->first_note);
1753 :
1754 : /* Mark this loop as software pipelined so the later
1755 : scheduling passes don't touch it. */
1756 0 : if (! flag_resched_modulo_sched)
1757 0 : mark_loop_unsched (loop);
1758 :
1759 : /* The life-info is not valid any more. */
1760 0 : df_set_bb_dirty (g->bb);
1761 :
1762 0 : apply_reg_moves (ps);
1763 0 : if (dump_file)
1764 0 : print_node_sched_params (dump_file, g->num_nodes, ps);
1765 : /* Generate prolog and epilog. */
1766 0 : generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
1767 0 : break;
1768 0 : }
1769 :
1770 0 : free_partial_schedule (ps);
1771 0 : node_sched_param_vec.release ();
1772 0 : free (node_order);
1773 0 : free_ddg (g);
1774 267 : }
1775 :
1776 267 : free (g_arr);
1777 :
1778 : /* Release scheduler data, needed until now because of DFA. */
1779 267 : haifa_sched_finish ();
1780 267 : loop_optimizer_finalize ();
1781 : }
1782 :
1783 : /* The SMS scheduling algorithm itself
1784 : -----------------------------------
1785 : Input: 'O' an ordered list of insns of a loop.
1786 : Output: A scheduling of the loop - kernel, prolog, and epilogue.
1787 :
1788 : 'Q' is the empty Set
1789 : 'PS' is the partial schedule; it holds the currently scheduled nodes with
1790 : their cycle/slot.
1791 : 'PSP' previously scheduled predecessors.
1792 : 'PSS' previously scheduled successors.
1793 : 't(u)' the cycle where u is scheduled.
1794 : 'l(u)' is the latency of u.
1795 : 'd(v,u)' is the dependence distance from v to u.
1796 : 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1797 : the node ordering phase.
1798 : 'check_hardware_resources_conflicts(u, PS, c)'
1799 : run a trace around cycle/slot through DFA model
1800 : to check resource conflicts involving instruction u
1801 : at cycle c given the partial schedule PS.
1802 : 'add_to_partial_schedule_at_time(u, PS, c)'
1803 : Add the node/instruction u to the partial schedule
1804 : PS at time c.
1805 : 'calculate_register_pressure(PS)'
1806 : Given a schedule of instructions, calculate the register
1807 : pressure it implies. One implementation could be the
1808 : maximum number of overlapping live ranges.
1809 : 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1810 : registers available in the hardware.
1811 :
1812 : 1. II = MII.
1813 : 2. PS = empty list
1814 : 3. for each node u in O in pre-computed order
1815 : 4. if (PSP(u) != Q && PSS(u) == Q) then
1816 : 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1817 : 6. start = Early_start; end = Early_start + II - 1; step = 1
1818 : 11. else if (PSP(u) == Q && PSS(u) != Q) then
1819 : 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1820 : 13. start = Late_start; end = Late_start - II + 1; step = -1
1821 : 14. else if (PSP(u) != Q && PSS(u) != Q) then
1822 : 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1823 : 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1824 : 17. start = Early_start;
1825 : 18. end = min(Early_start + II - 1 , Late_start);
1826 : 19. step = 1
1827 : 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1828 : 21. start = ASAP(u); end = start + II - 1; step = 1
1829 : 22. endif
1830 :
1831 : 23. success = false
1832 : 24. for (c = start ; c != end ; c += step)
1833 : 25. if check_hardware_resources_conflicts(u, PS, c) then
1834 : 26. add_to_partial_schedule_at_time(u, PS, c)
1835 : 27. success = true
1836 : 28. break
1837 : 29. endif
1838 : 30. endfor
1839 : 31. if (success == false) then
1840 : 32. II = II + 1
1841 : 33. if (II > maxII) then
1842 : 34. finish - failed to schedule
1843 : 35. endif
1844 : 36. goto 2.
1845 : 37. endif
1846 : 38. endfor
1847 : 39. if (calculate_register_pressure(PS) > maxRP) then
1848 : 40. goto 32.
1849 : 41. endif
1850 : 42. compute epilogue & prologue
1851 : 43. finish - succeeded to schedule
1852 :
1853 : ??? The algorithm restricts the scheduling window to II cycles.
1854 : In rare cases, it may be better to allow windows of II+1 cycles.
1855 : The window would then start and end on the same row, but with
1856 : different "must precede" and "must follow" requirements. */
1857 :
1858 : /* A threshold for the number of repeated unsuccessful attempts to insert
1859 : an empty row, before we flush the partial schedule and start over. */
1860 : #define MAX_SPLIT_NUM 10
1861 : /* Given the partial schedule PS, this function calculates and returns the
1862 : cycles in which we can schedule the node with the given index I.
1863 : NOTE: Here we do the backtracking in SMS, in some special cases. We have
1864 : noticed that there are several cases in which we fail to SMS the loop
1865 : because the sched window of a node is empty due to tight data-deps. In
1866 : such cases we want to unschedule some of the predecessors/successors
1867 : until we get non-empty scheduling window. It returns -1 if the
1868 : scheduling window is empty and zero otherwise. */
1869 :
1870 : static int
1871 0 : get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1872 : sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1873 : int *end_p)
1874 : {
1875 0 : int start, step, end;
1876 0 : int early_start, late_start;
1877 0 : ddg_edge_ptr e;
1878 0 : auto_sbitmap psp (ps->g->num_nodes);
1879 0 : auto_sbitmap pss (ps->g->num_nodes);
1880 0 : sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1881 0 : sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1882 0 : int psp_not_empty;
1883 0 : int pss_not_empty;
1884 0 : int count_preds;
1885 0 : int count_succs;
1886 :
1887 : /* 1. compute sched window for u (start, end, step). */
1888 0 : bitmap_clear (psp);
1889 0 : bitmap_clear (pss);
1890 0 : psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1891 0 : pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
1892 :
1893 : /* We first compute a forward range (start <= end), then decide whether
1894 : to reverse it. */
1895 0 : early_start = INT_MIN;
1896 0 : late_start = INT_MAX;
1897 0 : start = INT_MIN;
1898 0 : end = INT_MAX;
1899 0 : step = 1;
1900 :
1901 0 : count_preds = 0;
1902 0 : count_succs = 0;
1903 :
1904 0 : if (dump_file && (psp_not_empty || pss_not_empty))
1905 : {
1906 0 : fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1907 0 : "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1908 0 : fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1909 : "start", "early start", "late start", "end", "time");
1910 0 : fprintf (dump_file, "=========== =========== =========== ==========="
1911 : " =====\n");
1912 : }
1913 : /* Calculate early_start and limit end. Both bounds are inclusive. */
1914 0 : if (psp_not_empty)
1915 0 : for (e = u_node->in; e != 0; e = e->next_in)
1916 : {
1917 0 : int v = e->src->cuid;
1918 :
1919 0 : if (bitmap_bit_p (sched_nodes, v))
1920 : {
1921 0 : int p_st = SCHED_TIME (v);
1922 0 : int earliest = p_st + e->latency - (e->distance * ii);
1923 0 : int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1924 :
1925 0 : if (dump_file)
1926 : {
1927 0 : fprintf (dump_file, "%11s %11d %11s %11d %5d",
1928 : "", earliest, "", latest, p_st);
1929 0 : print_ddg_edge (dump_file, e);
1930 0 : fprintf (dump_file, "\n");
1931 : }
1932 :
1933 0 : early_start = MAX (early_start, earliest);
1934 0 : end = MIN (end, latest);
1935 :
1936 0 : if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1937 0 : count_preds++;
1938 : }
1939 : }
1940 :
1941 : /* Calculate late_start and limit start. Both bounds are inclusive. */
1942 0 : if (pss_not_empty)
1943 0 : for (e = u_node->out; e != 0; e = e->next_out)
1944 : {
1945 0 : int v = e->dest->cuid;
1946 :
1947 0 : if (bitmap_bit_p (sched_nodes, v))
1948 : {
1949 0 : int s_st = SCHED_TIME (v);
1950 0 : int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1951 0 : int latest = s_st - e->latency + (e->distance * ii);
1952 :
1953 0 : if (dump_file)
1954 : {
1955 0 : fprintf (dump_file, "%11d %11s %11d %11s %5d",
1956 : earliest, "", latest, "", s_st);
1957 0 : print_ddg_edge (dump_file, e);
1958 0 : fprintf (dump_file, "\n");
1959 : }
1960 :
1961 0 : start = MAX (start, earliest);
1962 0 : late_start = MIN (late_start, latest);
1963 :
1964 0 : if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1965 0 : count_succs++;
1966 : }
1967 : }
1968 :
1969 0 : if (dump_file && (psp_not_empty || pss_not_empty))
1970 : {
1971 0 : fprintf (dump_file, "----------- ----------- ----------- -----------"
1972 : " -----\n");
1973 0 : fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1974 : start, early_start, late_start, end, "",
1975 : "(max, max, min, min)");
1976 : }
1977 :
1978 : /* Get a target scheduling window no bigger than ii. */
1979 0 : if (early_start == INT_MIN && late_start == INT_MAX)
1980 0 : early_start = NODE_ASAP (u_node);
1981 0 : else if (early_start == INT_MIN)
1982 0 : early_start = late_start - (ii - 1);
1983 0 : late_start = MIN (late_start, early_start + (ii - 1));
1984 :
1985 : /* Apply memory dependence limits. */
1986 0 : start = MAX (start, early_start);
1987 0 : end = MIN (end, late_start);
1988 :
1989 0 : if (dump_file && (psp_not_empty || pss_not_empty))
1990 0 : fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1991 : "", start, end, "", "");
1992 :
1993 : /* If there are at least as many successors as predecessors, schedule the
1994 : node close to its successors. */
1995 0 : if (pss_not_empty && count_succs >= count_preds)
1996 : {
1997 0 : std::swap (start, end);
1998 0 : step = -1;
1999 : }
2000 :
2001 : /* Now that we've finalized the window, make END an exclusive rather
2002 : than an inclusive bound. */
2003 0 : end += step;
2004 :
2005 0 : *start_p = start;
2006 0 : *step_p = step;
2007 0 : *end_p = end;
2008 :
2009 0 : if ((start >= end && step == 1) || (start <= end && step == -1))
2010 : {
2011 0 : if (dump_file)
2012 0 : fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2013 : start, end, step);
2014 0 : return -1;
2015 : }
2016 :
2017 : return 0;
2018 0 : }
2019 :
2020 : /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2021 : node currently been scheduled. At the end of the calculation
2022 : MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2023 : U_NODE which are (1) already scheduled in the first/last row of
2024 : U_NODE's scheduling window, (2) whose dependence inequality with U
2025 : becomes an equality when U is scheduled in this same row, and (3)
2026 : whose dependence latency is zero.
2027 :
2028 : The first and last rows are calculated using the following parameters:
2029 : START/END rows - The cycles that begins/ends the traversal on the window;
2030 : searching for an empty cycle to schedule U_NODE.
2031 : STEP - The direction in which we traverse the window.
2032 : II - The initiation interval. */
2033 :
2034 : static void
2035 0 : calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2036 : int step, int ii, sbitmap sched_nodes,
2037 : sbitmap must_precede, sbitmap must_follow)
2038 : {
2039 0 : ddg_edge_ptr e;
2040 0 : int first_cycle_in_window, last_cycle_in_window;
2041 :
2042 0 : gcc_assert (must_precede && must_follow);
2043 :
2044 : /* Consider the following scheduling window:
2045 : {first_cycle_in_window, first_cycle_in_window+1, ...,
2046 : last_cycle_in_window}. If step is 1 then the following will be
2047 : the order we traverse the window: {start=first_cycle_in_window,
2048 : first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2049 : or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2050 : end=first_cycle_in_window-1} if step is -1. */
2051 0 : first_cycle_in_window = (step == 1) ? start : end - step;
2052 0 : last_cycle_in_window = (step == 1) ? end - step : start;
2053 :
2054 0 : bitmap_clear (must_precede);
2055 0 : bitmap_clear (must_follow);
2056 :
2057 0 : if (dump_file)
2058 0 : fprintf (dump_file, "\nmust_precede: ");
2059 :
2060 : /* Instead of checking if:
2061 : (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2062 : && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2063 : first_cycle_in_window)
2064 : && e->latency == 0
2065 : we use the fact that latency is non-negative:
2066 : SCHED_TIME (e->src) - (e->distance * ii) <=
2067 : SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2068 : first_cycle_in_window
2069 : and check only if
2070 : SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
2071 0 : for (e = u_node->in; e != 0; e = e->next_in)
2072 0 : if (bitmap_bit_p (sched_nodes, e->src->cuid)
2073 0 : && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2074 : first_cycle_in_window))
2075 : {
2076 0 : if (dump_file)
2077 0 : fprintf (dump_file, "%d ", e->src->cuid);
2078 :
2079 0 : bitmap_set_bit (must_precede, e->src->cuid);
2080 : }
2081 :
2082 0 : if (dump_file)
2083 0 : fprintf (dump_file, "\nmust_follow: ");
2084 :
2085 : /* Instead of checking if:
2086 : (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2087 : && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2088 : last_cycle_in_window)
2089 : && e->latency == 0
2090 : we use the fact that latency is non-negative:
2091 : SCHED_TIME (e->dest) + (e->distance * ii) >=
2092 : SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2093 : last_cycle_in_window
2094 : and check only if
2095 : SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
2096 0 : for (e = u_node->out; e != 0; e = e->next_out)
2097 0 : if (bitmap_bit_p (sched_nodes, e->dest->cuid)
2098 0 : && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2099 : last_cycle_in_window))
2100 : {
2101 0 : if (dump_file)
2102 0 : fprintf (dump_file, "%d ", e->dest->cuid);
2103 :
2104 0 : bitmap_set_bit (must_follow, e->dest->cuid);
2105 : }
2106 :
2107 0 : if (dump_file)
2108 0 : fprintf (dump_file, "\n");
2109 0 : }
2110 :
2111 : /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
2112 : parameters to decide if that's possible:
2113 : PS - The partial schedule.
2114 : U - The serial number of U_NODE.
2115 : NUM_SPLITS - The number of row splits made so far.
2116 : MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2117 : the first row of the scheduling window)
2118 : MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2119 : last row of the scheduling window) */
2120 :
2121 : static bool
2122 0 : try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2123 : int u, int cycle, sbitmap sched_nodes,
2124 : int *num_splits, sbitmap must_precede,
2125 : sbitmap must_follow)
2126 : {
2127 0 : ps_insn_ptr psi;
2128 0 : bool success = false;
2129 :
2130 0 : verify_partial_schedule (ps, sched_nodes);
2131 0 : psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2132 0 : if (psi)
2133 : {
2134 0 : SCHED_TIME (u) = cycle;
2135 0 : bitmap_set_bit (sched_nodes, u);
2136 0 : success = true;
2137 0 : *num_splits = 0;
2138 0 : if (dump_file)
2139 0 : fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2140 :
2141 : }
2142 :
2143 0 : return success;
2144 : }
2145 :
2146 : /* This function implements the scheduling algorithm for SMS according to the
2147 : above algorithm. */
2148 : static partial_schedule_ptr
2149 0 : sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2150 : {
2151 0 : int ii = mii;
2152 0 : int i, c, success, num_splits = 0;
2153 0 : int flush_and_start_over = true;
2154 0 : int num_nodes = g->num_nodes;
2155 0 : int start, end, step; /* Place together into one struct? */
2156 0 : auto_sbitmap sched_nodes (num_nodes);
2157 0 : auto_sbitmap must_precede (num_nodes);
2158 0 : auto_sbitmap must_follow (num_nodes);
2159 0 : auto_sbitmap tobe_scheduled (num_nodes);
2160 :
2161 : /* Value of param_sms_dfa_history is a limit on the number of cycles that
2162 : resource conflicts can span. ??? Should be provided by DFA, and be
2163 : dependent on the type of insn scheduled. Set to 0 by default to save
2164 : compile time. */
2165 0 : partial_schedule_ptr ps = create_partial_schedule (ii, g,
2166 : param_sms_dfa_history);
2167 :
2168 0 : bitmap_ones (tobe_scheduled);
2169 0 : bitmap_clear (sched_nodes);
2170 :
2171 0 : while (flush_and_start_over && (ii < maxii))
2172 : {
2173 :
2174 0 : if (dump_file)
2175 0 : fprintf (dump_file, "Starting with ii=%d\n", ii);
2176 0 : flush_and_start_over = false;
2177 0 : bitmap_clear (sched_nodes);
2178 :
2179 0 : for (i = 0; i < num_nodes; i++)
2180 : {
2181 0 : int u = nodes_order[i];
2182 0 : ddg_node_ptr u_node = &ps->g->nodes[u];
2183 0 : rtx_insn *insn = u_node->insn;
2184 :
2185 0 : gcc_checking_assert (NONDEBUG_INSN_P (insn));
2186 :
2187 0 : if (bitmap_bit_p (sched_nodes, u))
2188 0 : continue;
2189 :
2190 : /* Try to get non-empty scheduling window. */
2191 0 : success = 0;
2192 0 : if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2193 : &step, &end) == 0)
2194 : {
2195 0 : if (dump_file)
2196 0 : fprintf (dump_file, "\nTrying to schedule node %d "
2197 : "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
2198 0 : (g->nodes[u].insn)), start, end, step);
2199 :
2200 0 : gcc_assert ((step > 0 && start < end)
2201 : || (step < 0 && start > end));
2202 :
2203 0 : calculate_must_precede_follow (u_node, start, end, step, ii,
2204 : sched_nodes, must_precede,
2205 : must_follow);
2206 :
2207 0 : for (c = start; c != end; c += step)
2208 : {
2209 0 : sbitmap tmp_precede, tmp_follow;
2210 :
2211 0 : set_must_precede_follow (&tmp_follow, must_follow,
2212 : &tmp_precede, must_precede,
2213 : c, start, end, step);
2214 0 : success =
2215 0 : try_scheduling_node_in_cycle (ps, u, c,
2216 : sched_nodes,
2217 : &num_splits, tmp_precede,
2218 : tmp_follow);
2219 0 : if (success)
2220 : break;
2221 : }
2222 :
2223 0 : verify_partial_schedule (ps, sched_nodes);
2224 : }
2225 0 : if (!success)
2226 : {
2227 0 : int split_row;
2228 :
2229 0 : if (ii++ == maxii)
2230 : break;
2231 :
2232 0 : if (num_splits >= MAX_SPLIT_NUM)
2233 : {
2234 0 : num_splits = 0;
2235 0 : flush_and_start_over = true;
2236 0 : verify_partial_schedule (ps, sched_nodes);
2237 0 : reset_partial_schedule (ps, ii);
2238 0 : verify_partial_schedule (ps, sched_nodes);
2239 0 : break;
2240 : }
2241 :
2242 0 : num_splits++;
2243 : /* The scheduling window is exclusive of 'end'
2244 : whereas compute_split_window() expects an inclusive,
2245 : ordered range. */
2246 0 : if (step == 1)
2247 0 : split_row = compute_split_row (sched_nodes, start, end - 1,
2248 : ps->ii, u_node);
2249 : else
2250 0 : split_row = compute_split_row (sched_nodes, end + 1, start,
2251 : ps->ii, u_node);
2252 :
2253 0 : ps_insert_empty_row (ps, split_row, sched_nodes);
2254 0 : i--; /* Go back and retry node i. */
2255 :
2256 0 : if (dump_file)
2257 0 : fprintf (dump_file, "num_splits=%d\n", num_splits);
2258 : }
2259 :
2260 : /* ??? If (success), check register pressure estimates. */
2261 : } /* Continue with next node. */
2262 : } /* While flush_and_start_over. */
2263 0 : if (ii >= maxii)
2264 : {
2265 0 : free_partial_schedule (ps);
2266 0 : ps = NULL;
2267 : }
2268 : else
2269 0 : gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
2270 :
2271 0 : return ps;
2272 0 : }
2273 :
2274 : /* This function inserts a new empty row into PS at the position
2275 : according to SPLITROW, keeping all already scheduled instructions
2276 : intact and updating their SCHED_TIME and cycle accordingly. */
2277 : static void
2278 0 : ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2279 : sbitmap sched_nodes)
2280 : {
2281 0 : ps_insn_ptr crr_insn;
2282 0 : ps_insn_ptr *rows_new;
2283 0 : int ii = ps->ii;
2284 0 : int new_ii = ii + 1;
2285 0 : int row;
2286 0 : int *rows_length_new;
2287 :
2288 0 : verify_partial_schedule (ps, sched_nodes);
2289 :
2290 : /* We normalize sched_time and rotate ps to have only non-negative sched
2291 : times, for simplicity of updating cycles after inserting new row. */
2292 0 : split_row -= ps->min_cycle;
2293 0 : split_row = SMODULO (split_row, ii);
2294 0 : if (dump_file)
2295 0 : fprintf (dump_file, "split_row=%d\n", split_row);
2296 :
2297 0 : reset_sched_times (ps, PS_MIN_CYCLE (ps));
2298 0 : rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2299 :
2300 0 : rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2301 0 : rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2302 0 : for (row = 0; row < split_row; row++)
2303 : {
2304 0 : rows_new[row] = ps->rows[row];
2305 0 : rows_length_new[row] = ps->rows_length[row];
2306 0 : ps->rows[row] = NULL;
2307 0 : for (crr_insn = rows_new[row];
2308 0 : crr_insn; crr_insn = crr_insn->next_in_row)
2309 : {
2310 0 : int u = crr_insn->id;
2311 0 : int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2312 :
2313 0 : SCHED_TIME (u) = new_time;
2314 0 : crr_insn->cycle = new_time;
2315 0 : SCHED_ROW (u) = new_time % new_ii;
2316 0 : SCHED_STAGE (u) = new_time / new_ii;
2317 : }
2318 :
2319 : }
2320 :
2321 0 : rows_new[split_row] = NULL;
2322 :
2323 0 : for (row = split_row; row < ii; row++)
2324 : {
2325 0 : rows_new[row + 1] = ps->rows[row];
2326 0 : rows_length_new[row + 1] = ps->rows_length[row];
2327 0 : ps->rows[row] = NULL;
2328 0 : for (crr_insn = rows_new[row + 1];
2329 0 : crr_insn; crr_insn = crr_insn->next_in_row)
2330 : {
2331 0 : int u = crr_insn->id;
2332 0 : int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2333 :
2334 0 : SCHED_TIME (u) = new_time;
2335 0 : crr_insn->cycle = new_time;
2336 0 : SCHED_ROW (u) = new_time % new_ii;
2337 0 : SCHED_STAGE (u) = new_time / new_ii;
2338 : }
2339 : }
2340 :
2341 : /* Updating ps. */
2342 0 : ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2343 0 : + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2344 0 : ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2345 0 : + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2346 0 : free (ps->rows);
2347 0 : ps->rows = rows_new;
2348 0 : free (ps->rows_length);
2349 0 : ps->rows_length = rows_length_new;
2350 0 : ps->ii = new_ii;
2351 0 : gcc_assert (ps->min_cycle >= 0);
2352 :
2353 0 : verify_partial_schedule (ps, sched_nodes);
2354 :
2355 0 : if (dump_file)
2356 0 : fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2357 : ps->max_cycle);
2358 0 : }
2359 :
2360 : /* Given U_NODE which is the node that failed to be scheduled; LOW and
2361 : UP which are the boundaries of it's scheduling window; compute using
2362 : SCHED_NODES and II a row in the partial schedule that can be split
2363 : which will separate a critical predecessor from a critical successor
2364 : thereby expanding the window, and return it. */
2365 : static int
2366 0 : compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2367 : ddg_node_ptr u_node)
2368 : {
2369 0 : ddg_edge_ptr e;
2370 0 : int lower = INT_MIN, upper = INT_MAX;
2371 0 : int crit_pred = -1;
2372 0 : int crit_succ = -1;
2373 0 : int crit_cycle;
2374 :
2375 0 : for (e = u_node->in; e != 0; e = e->next_in)
2376 : {
2377 0 : int v = e->src->cuid;
2378 :
2379 0 : if (bitmap_bit_p (sched_nodes, v)
2380 0 : && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2381 0 : if (SCHED_TIME (v) > lower)
2382 : {
2383 0 : crit_pred = v;
2384 0 : lower = SCHED_TIME (v);
2385 : }
2386 : }
2387 :
2388 0 : if (crit_pred >= 0)
2389 : {
2390 0 : crit_cycle = SCHED_TIME (crit_pred) + 1;
2391 0 : return SMODULO (crit_cycle, ii);
2392 : }
2393 :
2394 0 : for (e = u_node->out; e != 0; e = e->next_out)
2395 : {
2396 0 : int v = e->dest->cuid;
2397 :
2398 0 : if (bitmap_bit_p (sched_nodes, v)
2399 0 : && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2400 0 : if (SCHED_TIME (v) < upper)
2401 : {
2402 0 : crit_succ = v;
2403 0 : upper = SCHED_TIME (v);
2404 : }
2405 : }
2406 :
2407 0 : if (crit_succ >= 0)
2408 : {
2409 0 : crit_cycle = SCHED_TIME (crit_succ);
2410 0 : return SMODULO (crit_cycle, ii);
2411 : }
2412 :
2413 0 : if (dump_file)
2414 0 : fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2415 :
2416 0 : return SMODULO ((low + up + 1) / 2, ii);
2417 : }
2418 :
2419 : static void
2420 0 : verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2421 : {
2422 0 : int row;
2423 0 : ps_insn_ptr crr_insn;
2424 :
2425 0 : for (row = 0; row < ps->ii; row++)
2426 : {
2427 0 : int length = 0;
2428 :
2429 0 : for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2430 : {
2431 0 : int u = crr_insn->id;
2432 :
2433 0 : length++;
2434 0 : gcc_assert (bitmap_bit_p (sched_nodes, u));
2435 : /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2436 : popcount (sched_nodes) == number of insns in ps. */
2437 0 : gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2438 0 : gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2439 : }
2440 :
2441 0 : gcc_assert (ps->rows_length[row] == length);
2442 : }
2443 0 : }
2444 :
2445 :
2446 : /* This page implements the algorithm for ordering the nodes of a DDG
2447 : for modulo scheduling, activated through the
2448 : "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2449 :
2450 : #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2451 : #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2452 : #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2453 : #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2454 : #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2455 : #define DEPTH(x) (ASAP ((x)))
2456 :
2457 : typedef struct node_order_params * nopa;
2458 :
2459 : static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2460 : static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2461 : static nopa calculate_order_params (ddg_ptr, int, int *);
2462 : static int find_max_asap (ddg_ptr, sbitmap);
2463 : static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2464 : static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2465 :
2466 : enum sms_direction {BOTTOMUP, TOPDOWN};
2467 :
2468 : struct node_order_params
2469 : {
2470 : int asap;
2471 : int alap;
2472 : int height;
2473 : };
2474 :
2475 : /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2476 : static void
2477 0 : check_nodes_order (int *node_order, int num_nodes)
2478 : {
2479 0 : int i;
2480 0 : auto_sbitmap tmp (num_nodes);
2481 :
2482 0 : bitmap_clear (tmp);
2483 :
2484 0 : if (dump_file)
2485 0 : fprintf (dump_file, "SMS final nodes order: \n");
2486 :
2487 0 : for (i = 0; i < num_nodes; i++)
2488 : {
2489 0 : int u = node_order[i];
2490 :
2491 0 : if (dump_file)
2492 0 : fprintf (dump_file, "%d ", u);
2493 0 : gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
2494 :
2495 0 : bitmap_set_bit (tmp, u);
2496 : }
2497 :
2498 0 : if (dump_file)
2499 0 : fprintf (dump_file, "\n");
2500 0 : }
2501 :
2502 : /* Order the nodes of G for scheduling and pass the result in
2503 : NODE_ORDER. Also set aux.count of each node to ASAP.
2504 : Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2505 : static int
2506 0 : sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2507 : {
2508 0 : int i;
2509 0 : int rec_mii = 0;
2510 0 : ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2511 :
2512 0 : nopa nops = calculate_order_params (g, mii, pmax_asap);
2513 :
2514 0 : if (dump_file)
2515 0 : print_sccs (dump_file, sccs, g);
2516 :
2517 0 : order_nodes_of_sccs (sccs, node_order);
2518 :
2519 0 : if (sccs->num_sccs > 0)
2520 : /* First SCC has the largest recurrence_length. */
2521 0 : rec_mii = sccs->sccs[0]->recurrence_length;
2522 :
2523 : /* Save ASAP before destroying node_order_params. */
2524 0 : for (i = 0; i < g->num_nodes; i++)
2525 : {
2526 0 : ddg_node_ptr v = &g->nodes[i];
2527 0 : v->aux.count = ASAP (v);
2528 : }
2529 :
2530 0 : free (nops);
2531 0 : free_ddg_all_sccs (sccs);
2532 0 : check_nodes_order (node_order, g->num_nodes);
2533 :
2534 0 : return rec_mii;
2535 : }
2536 :
2537 : static void
2538 0 : order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2539 : {
2540 0 : int i, pos = 0;
2541 0 : ddg_ptr g = all_sccs->ddg;
2542 0 : int num_nodes = g->num_nodes;
2543 0 : auto_sbitmap prev_sccs (num_nodes);
2544 0 : auto_sbitmap on_path (num_nodes);
2545 0 : auto_sbitmap tmp (num_nodes);
2546 0 : auto_sbitmap ones (num_nodes);
2547 :
2548 0 : bitmap_clear (prev_sccs);
2549 0 : bitmap_ones (ones);
2550 :
2551 : /* Perform the node ordering starting from the SCC with the highest recMII.
2552 : For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2553 0 : for (i = 0; i < all_sccs->num_sccs; i++)
2554 : {
2555 0 : ddg_scc_ptr scc = all_sccs->sccs[i];
2556 :
2557 : /* Add nodes on paths from previous SCCs to the current SCC. */
2558 0 : find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2559 0 : bitmap_ior (tmp, scc->nodes, on_path);
2560 :
2561 : /* Add nodes on paths from the current SCC to previous SCCs. */
2562 0 : find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2563 0 : bitmap_ior (tmp, tmp, on_path);
2564 :
2565 : /* Remove nodes of previous SCCs from current extended SCC. */
2566 0 : bitmap_and_compl (tmp, tmp, prev_sccs);
2567 :
2568 0 : pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2569 : /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2570 : }
2571 :
2572 : /* Handle the remaining nodes that do not belong to any scc. Each call
2573 : to order_nodes_in_scc handles a single connected component. */
2574 0 : while (pos < g->num_nodes)
2575 : {
2576 0 : bitmap_and_compl (tmp, ones, prev_sccs);
2577 0 : pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2578 : }
2579 0 : }
2580 :
2581 : /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2582 : static struct node_order_params *
2583 0 : calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2584 : {
2585 0 : int u;
2586 0 : int max_asap;
2587 0 : int num_nodes = g->num_nodes;
2588 0 : ddg_edge_ptr e;
2589 : /* Allocate a place to hold ordering params for each node in the DDG. */
2590 0 : nopa node_order_params_arr;
2591 :
2592 : /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2593 0 : node_order_params_arr = (nopa) xcalloc (num_nodes,
2594 : sizeof (struct node_order_params));
2595 :
2596 : /* Set the aux pointer of each node to point to its order_params structure. */
2597 0 : for (u = 0; u < num_nodes; u++)
2598 0 : g->nodes[u].aux.info = &node_order_params_arr[u];
2599 :
2600 : /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2601 : calculate ASAP, ALAP, mobility, distance, and height for each node
2602 : in the dependence (direct acyclic) graph. */
2603 :
2604 : /* We assume that the nodes in the array are in topological order. */
2605 :
2606 : max_asap = 0;
2607 0 : for (u = 0; u < num_nodes; u++)
2608 : {
2609 0 : ddg_node_ptr u_node = &g->nodes[u];
2610 :
2611 0 : ASAP (u_node) = 0;
2612 0 : for (e = u_node->in; e; e = e->next_in)
2613 0 : if (e->distance == 0)
2614 0 : ASAP (u_node) = MAX (ASAP (u_node),
2615 : ASAP (e->src) + e->latency);
2616 0 : max_asap = MAX (max_asap, ASAP (u_node));
2617 : }
2618 :
2619 0 : for (u = num_nodes - 1; u > -1; u--)
2620 : {
2621 0 : ddg_node_ptr u_node = &g->nodes[u];
2622 :
2623 0 : ALAP (u_node) = max_asap;
2624 0 : HEIGHT (u_node) = 0;
2625 0 : for (e = u_node->out; e; e = e->next_out)
2626 0 : if (e->distance == 0)
2627 : {
2628 0 : ALAP (u_node) = MIN (ALAP (u_node),
2629 : ALAP (e->dest) - e->latency);
2630 0 : HEIGHT (u_node) = MAX (HEIGHT (u_node),
2631 : HEIGHT (e->dest) + e->latency);
2632 : }
2633 : }
2634 0 : if (dump_file)
2635 : {
2636 0 : fprintf (dump_file, "\nOrder params\n");
2637 0 : for (u = 0; u < num_nodes; u++)
2638 : {
2639 0 : ddg_node_ptr u_node = &g->nodes[u];
2640 :
2641 0 : fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2642 0 : ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2643 : }
2644 : }
2645 :
2646 0 : *pmax_asap = max_asap;
2647 0 : return node_order_params_arr;
2648 : }
2649 :
2650 : static int
2651 0 : find_max_asap (ddg_ptr g, sbitmap nodes)
2652 : {
2653 0 : unsigned int u = 0;
2654 0 : int max_asap = -1;
2655 0 : int result = -1;
2656 0 : sbitmap_iterator sbi;
2657 :
2658 0 : EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2659 : {
2660 0 : ddg_node_ptr u_node = &g->nodes[u];
2661 :
2662 0 : if (max_asap < ASAP (u_node))
2663 : {
2664 0 : max_asap = ASAP (u_node);
2665 0 : result = u;
2666 : }
2667 : }
2668 0 : return result;
2669 : }
2670 :
2671 : static int
2672 0 : find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2673 : {
2674 0 : unsigned int u = 0;
2675 0 : int max_hv = -1;
2676 0 : int min_mob = INT_MAX;
2677 0 : int result = -1;
2678 0 : sbitmap_iterator sbi;
2679 :
2680 0 : EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2681 : {
2682 0 : ddg_node_ptr u_node = &g->nodes[u];
2683 :
2684 0 : if (max_hv < HEIGHT (u_node))
2685 : {
2686 0 : max_hv = HEIGHT (u_node);
2687 0 : min_mob = MOB (u_node);
2688 0 : result = u;
2689 : }
2690 0 : else if ((max_hv == HEIGHT (u_node))
2691 0 : && (min_mob > MOB (u_node)))
2692 : {
2693 0 : min_mob = MOB (u_node);
2694 0 : result = u;
2695 : }
2696 : }
2697 0 : return result;
2698 : }
2699 :
2700 : static int
2701 0 : find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2702 : {
2703 0 : unsigned int u = 0;
2704 0 : int max_dv = -1;
2705 0 : int min_mob = INT_MAX;
2706 0 : int result = -1;
2707 0 : sbitmap_iterator sbi;
2708 :
2709 0 : EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2710 : {
2711 0 : ddg_node_ptr u_node = &g->nodes[u];
2712 :
2713 0 : if (max_dv < DEPTH (u_node))
2714 : {
2715 0 : max_dv = DEPTH (u_node);
2716 0 : min_mob = MOB (u_node);
2717 0 : result = u;
2718 : }
2719 0 : else if ((max_dv == DEPTH (u_node))
2720 0 : && (min_mob > MOB (u_node)))
2721 : {
2722 0 : min_mob = MOB (u_node);
2723 0 : result = u;
2724 : }
2725 : }
2726 0 : return result;
2727 : }
2728 :
2729 : /* Places the nodes of SCC into the NODE_ORDER array starting
2730 : at position POS, according to the SMS ordering algorithm.
2731 : NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2732 : the NODE_ORDER array, starting from position zero. */
2733 : static int
2734 0 : order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2735 : int * node_order, int pos)
2736 : {
2737 0 : enum sms_direction dir;
2738 0 : int num_nodes = g->num_nodes;
2739 0 : auto_sbitmap workset (num_nodes);
2740 0 : auto_sbitmap tmp (num_nodes);
2741 0 : sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2742 0 : auto_sbitmap predecessors (num_nodes);
2743 0 : auto_sbitmap successors (num_nodes);
2744 :
2745 0 : bitmap_clear (predecessors);
2746 0 : find_predecessors (predecessors, g, nodes_ordered);
2747 :
2748 0 : bitmap_clear (successors);
2749 0 : find_successors (successors, g, nodes_ordered);
2750 :
2751 0 : bitmap_clear (tmp);
2752 0 : if (bitmap_and (tmp, predecessors, scc))
2753 : {
2754 0 : bitmap_copy (workset, tmp);
2755 0 : dir = BOTTOMUP;
2756 : }
2757 0 : else if (bitmap_and (tmp, successors, scc))
2758 : {
2759 0 : bitmap_copy (workset, tmp);
2760 0 : dir = TOPDOWN;
2761 : }
2762 : else
2763 : {
2764 0 : int u;
2765 :
2766 0 : bitmap_clear (workset);
2767 0 : if ((u = find_max_asap (g, scc)) >= 0)
2768 0 : bitmap_set_bit (workset, u);
2769 : dir = BOTTOMUP;
2770 : }
2771 :
2772 0 : bitmap_clear (zero_bitmap);
2773 0 : while (!bitmap_equal_p (workset, zero_bitmap))
2774 : {
2775 0 : int v;
2776 0 : ddg_node_ptr v_node;
2777 0 : sbitmap v_node_preds;
2778 0 : sbitmap v_node_succs;
2779 :
2780 0 : if (dir == TOPDOWN)
2781 : {
2782 0 : while (!bitmap_equal_p (workset, zero_bitmap))
2783 : {
2784 0 : v = find_max_hv_min_mob (g, workset);
2785 0 : v_node = &g->nodes[v];
2786 0 : node_order[pos++] = v;
2787 0 : v_node_succs = NODE_SUCCESSORS (v_node);
2788 0 : bitmap_and (tmp, v_node_succs, scc);
2789 :
2790 : /* Don't consider the already ordered successors again. */
2791 0 : bitmap_and_compl (tmp, tmp, nodes_ordered);
2792 0 : bitmap_ior (workset, workset, tmp);
2793 0 : bitmap_clear_bit (workset, v);
2794 0 : bitmap_set_bit (nodes_ordered, v);
2795 : }
2796 0 : dir = BOTTOMUP;
2797 0 : bitmap_clear (predecessors);
2798 0 : find_predecessors (predecessors, g, nodes_ordered);
2799 0 : bitmap_and (workset, predecessors, scc);
2800 : }
2801 : else
2802 : {
2803 0 : while (!bitmap_equal_p (workset, zero_bitmap))
2804 : {
2805 0 : v = find_max_dv_min_mob (g, workset);
2806 0 : v_node = &g->nodes[v];
2807 0 : node_order[pos++] = v;
2808 0 : v_node_preds = NODE_PREDECESSORS (v_node);
2809 0 : bitmap_and (tmp, v_node_preds, scc);
2810 :
2811 : /* Don't consider the already ordered predecessors again. */
2812 0 : bitmap_and_compl (tmp, tmp, nodes_ordered);
2813 0 : bitmap_ior (workset, workset, tmp);
2814 0 : bitmap_clear_bit (workset, v);
2815 0 : bitmap_set_bit (nodes_ordered, v);
2816 : }
2817 0 : dir = TOPDOWN;
2818 0 : bitmap_clear (successors);
2819 0 : find_successors (successors, g, nodes_ordered);
2820 0 : bitmap_and (workset, successors, scc);
2821 : }
2822 : }
2823 0 : sbitmap_free (zero_bitmap);
2824 0 : return pos;
2825 0 : }
2826 :
2827 :
2828 : /* This page contains functions for manipulating partial-schedules during
2829 : modulo scheduling. */
2830 :
2831 : /* Create a partial schedule and allocate a memory to hold II rows. */
2832 :
2833 : static partial_schedule_ptr
2834 0 : create_partial_schedule (int ii, ddg_ptr g, int history)
2835 : {
2836 0 : partial_schedule_ptr ps = XNEW (struct partial_schedule);
2837 0 : ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2838 0 : ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2839 0 : ps->reg_moves.create (0);
2840 0 : ps->ii = ii;
2841 0 : ps->history = history;
2842 0 : ps->min_cycle = INT_MAX;
2843 0 : ps->max_cycle = INT_MIN;
2844 0 : ps->g = g;
2845 :
2846 0 : return ps;
2847 : }
2848 :
2849 : /* Free the PS_INSNs in rows array of the given partial schedule.
2850 : ??? Consider caching the PS_INSN's. */
2851 : static void
2852 0 : free_ps_insns (partial_schedule_ptr ps)
2853 : {
2854 0 : int i;
2855 :
2856 0 : for (i = 0; i < ps->ii; i++)
2857 : {
2858 0 : while (ps->rows[i])
2859 : {
2860 0 : ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2861 :
2862 0 : free (ps->rows[i]);
2863 0 : ps->rows[i] = ps_insn;
2864 : }
2865 0 : ps->rows[i] = NULL;
2866 : }
2867 0 : }
2868 :
2869 : /* Free all the memory allocated to the partial schedule. */
2870 :
2871 : static void
2872 0 : free_partial_schedule (partial_schedule_ptr ps)
2873 : {
2874 0 : ps_reg_move_info *move;
2875 0 : unsigned int i;
2876 :
2877 0 : if (!ps)
2878 0 : return;
2879 :
2880 0 : FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
2881 0 : sbitmap_free (move->uses);
2882 0 : ps->reg_moves.release ();
2883 :
2884 0 : free_ps_insns (ps);
2885 0 : free (ps->rows);
2886 0 : free (ps->rows_length);
2887 0 : free (ps);
2888 : }
2889 :
2890 : /* Clear the rows array with its PS_INSNs, and create a new one with
2891 : NEW_II rows. */
2892 :
2893 : static void
2894 0 : reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2895 : {
2896 0 : if (!ps)
2897 : return;
2898 0 : free_ps_insns (ps);
2899 0 : if (new_ii == ps->ii)
2900 : return;
2901 0 : ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2902 : * sizeof (ps_insn_ptr));
2903 0 : memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2904 0 : ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2905 0 : memset (ps->rows_length, 0, new_ii * sizeof (int));
2906 0 : ps->ii = new_ii;
2907 0 : ps->min_cycle = INT_MAX;
2908 0 : ps->max_cycle = INT_MIN;
2909 : }
2910 :
2911 : /* Prints the partial schedule as an ii rows array, for each rows
2912 : print the ids of the insns in it. */
2913 : void
2914 0 : print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2915 : {
2916 0 : int i;
2917 :
2918 0 : for (i = 0; i < ps->ii; i++)
2919 : {
2920 0 : ps_insn_ptr ps_i = ps->rows[i];
2921 :
2922 0 : fprintf (dump, "\n[ROW %d ]: ", i);
2923 0 : while (ps_i)
2924 : {
2925 0 : rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
2926 :
2927 0 : if (JUMP_P (insn))
2928 0 : fprintf (dump, "%d (branch), ", INSN_UID (insn));
2929 : else
2930 0 : fprintf (dump, "%d, ", INSN_UID (insn));
2931 :
2932 0 : ps_i = ps_i->next_in_row;
2933 : }
2934 : }
2935 0 : }
2936 :
2937 : /* Creates an object of PS_INSN and initializes it to the given parameters. */
2938 : static ps_insn_ptr
2939 0 : create_ps_insn (int id, int cycle)
2940 : {
2941 0 : ps_insn_ptr ps_i = XNEW (struct ps_insn);
2942 :
2943 0 : ps_i->id = id;
2944 0 : ps_i->next_in_row = NULL;
2945 0 : ps_i->prev_in_row = NULL;
2946 0 : ps_i->cycle = cycle;
2947 :
2948 0 : return ps_i;
2949 : }
2950 :
2951 :
2952 : /* Removes the given PS_INSN from the partial schedule. */
2953 : static void
2954 0 : remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2955 : {
2956 0 : int row;
2957 :
2958 0 : gcc_assert (ps && ps_i);
2959 :
2960 0 : row = SMODULO (ps_i->cycle, ps->ii);
2961 0 : if (! ps_i->prev_in_row)
2962 : {
2963 0 : gcc_assert (ps_i == ps->rows[row]);
2964 0 : ps->rows[row] = ps_i->next_in_row;
2965 0 : if (ps->rows[row])
2966 0 : ps->rows[row]->prev_in_row = NULL;
2967 : }
2968 : else
2969 : {
2970 0 : ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2971 0 : if (ps_i->next_in_row)
2972 0 : ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2973 : }
2974 :
2975 0 : ps->rows_length[row] -= 1;
2976 0 : free (ps_i);
2977 0 : return;
2978 : }
2979 :
2980 : /* Unlike what literature describes for modulo scheduling (which focuses
2981 : on VLIW machines) the order of the instructions inside a cycle is
2982 : important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2983 : where the current instruction should go relative to the already
2984 : scheduled instructions in the given cycle. Go over these
2985 : instructions and find the first possible column to put it in. */
2986 : static bool
2987 0 : ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2988 : sbitmap must_precede, sbitmap must_follow)
2989 : {
2990 0 : ps_insn_ptr next_ps_i;
2991 0 : ps_insn_ptr first_must_follow = NULL;
2992 0 : ps_insn_ptr last_must_precede = NULL;
2993 0 : ps_insn_ptr last_in_row = NULL;
2994 0 : int row;
2995 :
2996 0 : if (! ps_i)
2997 : return false;
2998 :
2999 0 : row = SMODULO (ps_i->cycle, ps->ii);
3000 :
3001 : /* Find the first must follow and the last must precede
3002 : and insert the node immediately after the must precede
3003 : but make sure that it there is no must follow after it. */
3004 0 : for (next_ps_i = ps->rows[row];
3005 0 : next_ps_i;
3006 0 : next_ps_i = next_ps_i->next_in_row)
3007 : {
3008 0 : if (must_follow
3009 0 : && bitmap_bit_p (must_follow, next_ps_i->id)
3010 0 : && ! first_must_follow)
3011 : first_must_follow = next_ps_i;
3012 0 : if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
3013 : {
3014 : /* If we have already met a node that must follow, then
3015 : there is no possible column. */
3016 0 : if (first_must_follow)
3017 : return false;
3018 : else
3019 : last_must_precede = next_ps_i;
3020 : }
3021 : /* The closing branch must be the last in the row. */
3022 0 : if (JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3023 : return false;
3024 :
3025 0 : last_in_row = next_ps_i;
3026 : }
3027 :
3028 : /* The closing branch is scheduled as well. Make sure there is no
3029 : dependent instruction after it as the branch should be the last
3030 : instruction in the row. */
3031 0 : if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3032 : {
3033 0 : if (first_must_follow)
3034 : return false;
3035 0 : if (last_in_row)
3036 : {
3037 : /* Make the branch the last in the row. New instructions
3038 : will be inserted at the beginning of the row or after the
3039 : last must_precede instruction thus the branch is guaranteed
3040 : to remain the last instruction in the row. */
3041 0 : last_in_row->next_in_row = ps_i;
3042 0 : ps_i->prev_in_row = last_in_row;
3043 0 : ps_i->next_in_row = NULL;
3044 : }
3045 : else
3046 0 : ps->rows[row] = ps_i;
3047 0 : return true;
3048 : }
3049 :
3050 : /* Now insert the node after INSERT_AFTER_PSI. */
3051 :
3052 0 : if (! last_must_precede)
3053 : {
3054 0 : ps_i->next_in_row = ps->rows[row];
3055 0 : ps_i->prev_in_row = NULL;
3056 0 : if (ps_i->next_in_row)
3057 0 : ps_i->next_in_row->prev_in_row = ps_i;
3058 0 : ps->rows[row] = ps_i;
3059 : }
3060 : else
3061 : {
3062 0 : ps_i->next_in_row = last_must_precede->next_in_row;
3063 0 : last_must_precede->next_in_row = ps_i;
3064 0 : ps_i->prev_in_row = last_must_precede;
3065 0 : if (ps_i->next_in_row)
3066 0 : ps_i->next_in_row->prev_in_row = ps_i;
3067 : }
3068 :
3069 : return true;
3070 : }
3071 :
3072 : /* Advances the PS_INSN one column in its current row; returns false
3073 : in failure and true in success. Bit N is set in MUST_FOLLOW if
3074 : the node with cuid N must be come after the node pointed to by
3075 : PS_I when scheduled in the same cycle. */
3076 : static bool
3077 0 : ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3078 : sbitmap must_follow)
3079 : {
3080 0 : ps_insn_ptr prev, next;
3081 0 : int row;
3082 :
3083 0 : if (!ps || !ps_i)
3084 : return false;
3085 :
3086 0 : row = SMODULO (ps_i->cycle, ps->ii);
3087 :
3088 0 : if (! ps_i->next_in_row)
3089 : return false;
3090 :
3091 : /* Check if next_in_row is dependent on ps_i, both having same sched
3092 : times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
3093 0 : if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
3094 : return false;
3095 :
3096 : /* Advance PS_I over its next_in_row in the doubly linked list. */
3097 0 : prev = ps_i->prev_in_row;
3098 0 : next = ps_i->next_in_row;
3099 :
3100 0 : if (ps_i == ps->rows[row])
3101 0 : ps->rows[row] = next;
3102 :
3103 0 : ps_i->next_in_row = next->next_in_row;
3104 :
3105 0 : if (next->next_in_row)
3106 0 : next->next_in_row->prev_in_row = ps_i;
3107 :
3108 0 : next->next_in_row = ps_i;
3109 0 : ps_i->prev_in_row = next;
3110 :
3111 0 : next->prev_in_row = prev;
3112 0 : if (prev)
3113 0 : prev->next_in_row = next;
3114 :
3115 : return true;
3116 : }
3117 :
3118 : /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3119 : Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
3120 : set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3121 : before/after (respectively) the node pointed to by PS_I when scheduled
3122 : in the same cycle. */
3123 : static ps_insn_ptr
3124 0 : add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3125 : sbitmap must_precede, sbitmap must_follow)
3126 : {
3127 0 : ps_insn_ptr ps_i;
3128 0 : int row = SMODULO (cycle, ps->ii);
3129 :
3130 0 : if (ps->rows_length[row] >= issue_rate)
3131 : return NULL;
3132 :
3133 0 : ps_i = create_ps_insn (id, cycle);
3134 :
3135 : /* Finds and inserts PS_I according to MUST_FOLLOW and
3136 : MUST_PRECEDE. */
3137 0 : if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3138 : {
3139 0 : free (ps_i);
3140 0 : return NULL;
3141 : }
3142 :
3143 0 : ps->rows_length[row] += 1;
3144 0 : return ps_i;
3145 : }
3146 :
3147 : /* Advance time one cycle. Assumes DFA is being used. */
3148 : static void
3149 0 : advance_one_cycle (void)
3150 : {
3151 0 : if (targetm.sched.dfa_pre_cycle_insn)
3152 0 : state_transition (curr_state,
3153 : targetm.sched.dfa_pre_cycle_insn ());
3154 :
3155 0 : state_transition (curr_state, NULL);
3156 :
3157 0 : if (targetm.sched.dfa_post_cycle_insn)
3158 0 : state_transition (curr_state,
3159 0 : targetm.sched.dfa_post_cycle_insn ());
3160 0 : }
3161 :
3162 :
3163 :
3164 : /* Checks if PS has resource conflicts according to DFA, starting from
3165 : FROM cycle to TO cycle; returns true if there are conflicts and false
3166 : if there are no conflicts. Assumes DFA is being used. */
3167 : static bool
3168 0 : ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3169 : {
3170 0 : int cycle;
3171 :
3172 0 : state_reset (curr_state);
3173 :
3174 0 : for (cycle = from; cycle <= to; cycle++)
3175 : {
3176 0 : ps_insn_ptr crr_insn;
3177 : /* Holds the remaining issue slots in the current row. */
3178 0 : int can_issue_more = issue_rate;
3179 :
3180 : /* Walk through the DFA for the current row. */
3181 0 : for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3182 0 : crr_insn;
3183 0 : crr_insn = crr_insn->next_in_row)
3184 : {
3185 0 : rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
3186 :
3187 : /* Check if there is room for the current insn. */
3188 0 : if (!can_issue_more || state_dead_lock_p (curr_state))
3189 0 : return true;
3190 :
3191 : /* Update the DFA state and return with failure if the DFA found
3192 : resource conflicts. */
3193 0 : if (state_transition (curr_state, insn) >= 0)
3194 : return true;
3195 :
3196 0 : if (targetm.sched.variable_issue)
3197 0 : can_issue_more =
3198 0 : targetm.sched.variable_issue (sched_dump, sched_verbose,
3199 : insn, can_issue_more);
3200 : /* A naked CLOBBER or USE generates no instruction, so don't
3201 : let them consume issue slots. */
3202 0 : else if (GET_CODE (PATTERN (insn)) != USE
3203 0 : && GET_CODE (PATTERN (insn)) != CLOBBER)
3204 0 : can_issue_more--;
3205 : }
3206 :
3207 : /* Advance the DFA to the next cycle. */
3208 0 : advance_one_cycle ();
3209 : }
3210 : return false;
3211 : }
3212 :
3213 : /* Checks if the given node causes resource conflicts when added to PS at
3214 : cycle C. If not the node is added to PS and returned; otherwise zero
3215 : is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3216 : cuid N must be come before/after (respectively) the node pointed to by
3217 : PS_I when scheduled in the same cycle. */
3218 : ps_insn_ptr
3219 0 : ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3220 : int c, sbitmap must_precede,
3221 : sbitmap must_follow)
3222 : {
3223 0 : int i, first, amount;
3224 0 : bool has_conflicts = false;
3225 0 : ps_insn_ptr ps_i;
3226 :
3227 : /* First add the node to the PS, if this succeeds check for
3228 : conflicts, trying different issue slots in the same row. */
3229 0 : if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3230 : return NULL; /* Failed to insert the node at the given cycle. */
3231 :
3232 0 : while (1)
3233 : {
3234 0 : has_conflicts = ps_has_conflicts (ps, c, c);
3235 0 : if (ps->history > 0 && !has_conflicts)
3236 : {
3237 : /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h,
3238 : but not more than ii intervals. */
3239 0 : first = c - ps->history;
3240 0 : amount = 2 * ps->history + 1;
3241 0 : if (amount > ps->ii)
3242 : amount = ps->ii;
3243 0 : for (i = first; i < first + amount; i++)
3244 : {
3245 0 : has_conflicts = ps_has_conflicts (ps,
3246 : i - ps->history,
3247 0 : i + ps->history);
3248 0 : if (has_conflicts)
3249 : break;
3250 : }
3251 : }
3252 0 : if (!has_conflicts)
3253 : break;
3254 : /* Try different issue slots to find one that the given node can be
3255 : scheduled in without conflicts. */
3256 0 : if (! ps_insn_advance_column (ps, ps_i, must_follow))
3257 : break;
3258 : }
3259 :
3260 0 : if (has_conflicts)
3261 : {
3262 0 : remove_node_from_ps (ps, ps_i);
3263 0 : return NULL;
3264 : }
3265 :
3266 0 : ps->min_cycle = MIN (ps->min_cycle, c);
3267 0 : ps->max_cycle = MAX (ps->max_cycle, c);
3268 0 : return ps_i;
3269 : }
3270 :
3271 : /* Calculate the stage count of the partial schedule PS. The calculation
3272 : takes into account the rotation amount passed in ROTATION_AMOUNT. */
3273 : int
3274 0 : calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3275 : {
3276 0 : int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3277 0 : int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3278 0 : int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3279 :
3280 : /* The calculation of stage count is done adding the number of stages
3281 : before cycle zero and after cycle zero. */
3282 0 : stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3283 :
3284 0 : return stage_count;
3285 : }
3286 :
3287 : /* Rotate the rows of PS such that insns scheduled at time
3288 : START_CYCLE will appear in row 0. Updates max/min_cycles. */
3289 : void
3290 0 : rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3291 : {
3292 0 : int i, row, backward_rotates;
3293 0 : int last_row = ps->ii - 1;
3294 :
3295 0 : if (start_cycle == 0)
3296 : return;
3297 :
3298 0 : backward_rotates = SMODULO (start_cycle, ps->ii);
3299 :
3300 : /* Revisit later and optimize this into a single loop. */
3301 0 : for (i = 0; i < backward_rotates; i++)
3302 : {
3303 0 : ps_insn_ptr first_row = ps->rows[0];
3304 0 : int first_row_length = ps->rows_length[0];
3305 :
3306 0 : for (row = 0; row < last_row; row++)
3307 : {
3308 0 : ps->rows[row] = ps->rows[row + 1];
3309 0 : ps->rows_length[row] = ps->rows_length[row + 1];
3310 : }
3311 :
3312 0 : ps->rows[last_row] = first_row;
3313 0 : ps->rows_length[last_row] = first_row_length;
3314 : }
3315 :
3316 0 : ps->max_cycle -= start_cycle;
3317 0 : ps->min_cycle -= start_cycle;
3318 : }
3319 :
3320 : #endif /* INSN_SCHEDULING */
3321 :
3322 : /* Run instruction scheduler. */
3323 : /* Perform SMS module scheduling. */
3324 :
3325 : namespace {
3326 :
3327 : const pass_data pass_data_sms =
3328 : {
3329 : RTL_PASS, /* type */
3330 : "sms", /* name */
3331 : OPTGROUP_NONE, /* optinfo_flags */
3332 : TV_SMS, /* tv_id */
3333 : 0, /* properties_required */
3334 : 0, /* properties_provided */
3335 : 0, /* properties_destroyed */
3336 : 0, /* todo_flags_start */
3337 : TODO_df_finish, /* todo_flags_finish */
3338 : };
3339 :
3340 : class pass_sms : public rtl_opt_pass
3341 : {
3342 : public:
3343 285722 : pass_sms (gcc::context *ctxt)
3344 571444 : : rtl_opt_pass (pass_data_sms, ctxt)
3345 : {}
3346 :
3347 : /* opt_pass methods: */
3348 1471370 : bool gate (function *) final override
3349 : {
3350 1471370 : return (optimize > 0 && flag_modulo_sched);
3351 : }
3352 :
3353 : unsigned int execute (function *) final override;
3354 :
3355 : }; // class pass_sms
3356 :
3357 : unsigned int
3358 313 : pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3359 : {
3360 : #ifdef INSN_SCHEDULING
3361 313 : basic_block bb;
3362 :
3363 : /* Collect loop information to be used in SMS. */
3364 313 : cfg_layout_initialize (0);
3365 313 : sms_schedule ();
3366 :
3367 : /* Update the life information, because we add pseudos. */
3368 313 : max_regno = max_reg_num ();
3369 :
3370 : /* Finalize layout changes. */
3371 2314 : FOR_EACH_BB_FN (bb, fun)
3372 2001 : if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3373 1688 : bb->aux = bb->next_bb;
3374 313 : free_dominance_info (CDI_DOMINATORS);
3375 313 : cfg_layout_finalize ();
3376 : #endif /* INSN_SCHEDULING */
3377 313 : return 0;
3378 : }
3379 :
3380 : } // anon namespace
3381 :
3382 : rtl_opt_pass *
3383 285722 : make_pass_sms (gcc::context *ctxt)
3384 : {
3385 285722 : return new pass_sms (ctxt);
3386 : }
|