Line data Source code
1 : /* Instruction scheduling pass.
2 : Copyright (C) 1992-2026 Free Software Foundation, Inc.
3 : Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
4 : and currently maintained by, Jim Wilson (wilson@cygnus.com)
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify it under
9 : the terms of the GNU General Public License as published by the Free
10 : Software Foundation; either version 3, or (at your option) any later
11 : version.
12 :
13 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 : for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : /* This pass implements list scheduling within basic blocks. It is
23 : run twice: (1) after flow analysis, but before register allocation,
24 : and (2) after register allocation.
25 :
26 : The first run performs interblock scheduling, moving insns between
27 : different blocks in the same "region", and the second runs only
28 : basic block scheduling.
29 :
30 : Interblock motions performed are useful motions and speculative
31 : motions, including speculative loads. Motions requiring code
32 : duplication are not supported. The identification of motion type
33 : and the check for validity of speculative motions requires
34 : construction and analysis of the function's control flow graph.
35 :
36 : The main entry point for this pass is schedule_insns(), called for
37 : each function. The work of the scheduler is organized in three
38 : levels: (1) function level: insns are subject to splitting,
39 : control-flow-graph is constructed, regions are computed (after
40 : reload, each region is of one block), (2) region level: control
41 : flow graph attributes required for interblock scheduling are
42 : computed (dominators, reachability, etc.), data dependences and
43 : priorities are computed, and (3) block level: insns in the block
44 : are actually scheduled. */
45 :
46 : #include "config.h"
47 : #include "system.h"
48 : #include "coretypes.h"
49 : #include "backend.h"
50 : #include "target.h"
51 : #include "rtl.h"
52 : #include "df.h"
53 : #include "memmodel.h"
54 : #include "tm_p.h"
55 : #include "insn-config.h"
56 : #include "emit-rtl.h"
57 : #include "recog.h"
58 : #include "profile.h"
59 : #include "insn-attr.h"
60 : #include "except.h"
61 : #include "cfganal.h"
62 : #include "sched-int.h"
63 : #include "sel-sched.h"
64 : #include "tree-pass.h"
65 : #include "dbgcnt.h"
66 : #include "pretty-print.h"
67 : #include "print-rtl.h"
68 :
69 : /* Disable warnings about quoting issues in the pp_xxx calls below
70 : that (intentionally) don't follow GCC diagnostic conventions. */
71 : #if __GNUC__ >= 10
72 : # pragma GCC diagnostic push
73 : # pragma GCC diagnostic ignored "-Wformat-diag"
74 : #endif
75 :
76 : #ifdef INSN_SCHEDULING
77 :
78 : /* Some accessor macros for h_i_d members only used within this file. */
79 : #define FED_BY_SPEC_LOAD(INSN) (HID (INSN)->fed_by_spec_load)
80 : #define IS_LOAD_INSN(INSN) (HID (insn)->is_load_insn)
81 :
82 : /* nr_inter/spec counts interblock/speculative motion for the function. */
83 : static int nr_inter, nr_spec;
84 :
85 : static bool is_cfg_nonregular (void);
86 :
87 : /* Number of regions in the procedure. */
88 : int nr_regions = 0;
89 :
90 : /* Same as above before adding any new regions. */
91 : static int nr_regions_initial = 0;
92 :
93 : /* Table of region descriptions. */
94 : region *rgn_table = NULL;
95 :
96 : /* Array of lists of regions' blocks. */
97 : int *rgn_bb_table = NULL;
98 :
99 : /* Topological order of blocks in the region (if b2 is reachable from
100 : b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
101 : always referred to by either block or b, while its topological
102 : order name (in the region) is referred to by bb. */
103 : int *block_to_bb = NULL;
104 :
105 : /* The number of the region containing a block. */
106 : int *containing_rgn = NULL;
107 :
108 : /* ebb_head [i] - is index in rgn_bb_table of the head basic block of i'th ebb.
109 : Currently we can get a ebb only through splitting of currently
110 : scheduling block, therefore, we don't need ebb_head array for every region,
111 : hence, its sufficient to hold it for current one only. */
112 : int *ebb_head = NULL;
113 :
114 : /* The minimum probability of reaching a source block so that it will be
115 : considered for speculative scheduling. */
116 : static int min_spec_prob;
117 :
118 : static void find_single_block_region (bool);
119 : static void find_rgns (void);
120 : static bool too_large (int, int *, int *);
121 :
122 : /* Blocks of the current region being scheduled. */
123 : int current_nr_blocks;
124 : int current_blocks;
125 :
126 : /* A speculative motion requires checking live information on the path
127 : from 'source' to 'target'. The split blocks are those to be checked.
128 : After a speculative motion, live information should be modified in
129 : the 'update' blocks.
130 :
131 : Lists of split and update blocks for each candidate of the current
132 : target are in array bblst_table. */
133 : static basic_block *bblst_table;
134 : static int bblst_size, bblst_last;
135 :
136 : /* Arrays that hold the DFA state at the end of a basic block, to re-use
137 : as the initial state at the start of successor blocks. The BB_STATE
138 : array holds the actual DFA state, and BB_STATE_ARRAY[I] is a pointer
139 : into BB_STATE for basic block I. FIXME: This should be a vec. */
140 : static char *bb_state_array = NULL;
141 : static state_t *bb_state = NULL;
142 :
143 : /* Target info declarations.
144 :
145 : The block currently being scheduled is referred to as the "target" block,
146 : while other blocks in the region from which insns can be moved to the
147 : target are called "source" blocks. The candidate structure holds info
148 : about such sources: are they valid? Speculative? Etc. */
149 : struct bblst
150 : {
151 : basic_block *first_member;
152 : int nr_members;
153 : };
154 :
155 : struct candidate
156 : {
157 : char is_valid;
158 : char is_speculative;
159 : int src_prob;
160 : bblst split_bbs;
161 : bblst update_bbs;
162 : };
163 :
164 : static candidate *candidate_table;
165 : #define IS_VALID(src) (candidate_table[src].is_valid)
166 : #define IS_SPECULATIVE(src) (candidate_table[src].is_speculative)
167 : #define IS_SPECULATIVE_INSN(INSN) \
168 : (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
169 : #define SRC_PROB(src) ( candidate_table[src].src_prob )
170 :
171 : /* The bb being currently scheduled. */
172 : int target_bb;
173 :
174 : /* List of edges. */
175 : struct edgelst
176 : {
177 : edge *first_member;
178 : int nr_members;
179 : };
180 :
181 : static edge *edgelst_table;
182 : static int edgelst_last;
183 :
184 : static void extract_edgelst (sbitmap, edgelst *);
185 :
186 : /* Target info functions. */
187 : static void split_edges (int, int, edgelst *);
188 : static void compute_trg_info (int);
189 : void debug_candidate (int);
190 : void debug_candidates (int);
191 :
192 : /* Dominators array: dom[i] contains the sbitmap of dominators of
193 : bb i in the region. */
194 : static sbitmap *dom;
195 :
196 : /* bb 0 is the only region entry. */
197 : #define IS_RGN_ENTRY(bb) (!bb)
198 :
199 : /* Is bb_src dominated by bb_trg. */
200 : #define IS_DOMINATED(bb_src, bb_trg) \
201 : ( bitmap_bit_p (dom[bb_src], bb_trg) )
202 :
203 : /* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
204 : the probability of bb i relative to the region entry. */
205 : static int *prob;
206 :
207 : /* Bit-set of edges, where bit i stands for edge i. */
208 : typedef sbitmap edgeset;
209 :
210 : /* Number of edges in the region. */
211 : static int rgn_nr_edges;
212 :
213 : /* Array of size rgn_nr_edges. */
214 : static edge *rgn_edges;
215 :
216 : /* Mapping from each edge in the graph to its number in the rgn. */
217 : #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
218 : #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
219 :
220 : /* The split edges of a source bb is different for each target
221 : bb. In order to compute this efficiently, the 'potential-split edges'
222 : are computed for each bb prior to scheduling a region. This is actually
223 : the split edges of each bb relative to the region entry.
224 :
225 : pot_split[bb] is the set of potential split edges of bb. */
226 : static edgeset *pot_split;
227 :
228 : /* For every bb, a set of its ancestor edges. */
229 : static edgeset *ancestor_edges;
230 :
231 : #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
232 :
233 : /* Speculative scheduling functions. */
234 : static bool check_live_1 (int, rtx);
235 : static void update_live_1 (int, rtx);
236 : static bool is_pfree (rtx, int, int);
237 : static bool find_conditional_protection (rtx_insn *, int);
238 : static bool is_conditionally_protected (rtx, int, int);
239 : static bool is_prisky (rtx, int, int);
240 : static bool is_exception_free (rtx_insn *, int, int);
241 :
242 : static bool sets_likely_spilled (rtx);
243 : static void sets_likely_spilled_1 (rtx, const_rtx, void *);
244 : static void add_branch_dependences (rtx_insn *, rtx_insn *);
245 : static void compute_block_dependences (int);
246 :
247 : static void schedule_region (int);
248 : static void concat_insn_mem_list (rtx_insn_list *, rtx_expr_list *,
249 : rtx_insn_list **, rtx_expr_list **);
250 : static void propagate_deps (int, class deps_desc *);
251 : static void free_pending_lists (void);
252 :
253 : /* Functions for construction of the control flow graph. */
254 :
255 : /* Return true if control flow graph should not be constructed,
256 : false otherwise.
257 :
258 : We decide not to build the control flow graph if there is possibly more
259 : than one entry to the function, if computed branches exist, if we
260 : have nonlocal gotos, or if we have an unreachable loop. */
261 :
262 : static bool
263 184 : is_cfg_nonregular (void)
264 : {
265 184 : basic_block b;
266 184 : rtx_insn *insn;
267 :
268 : /* If we have a label that could be the target of a nonlocal goto, then
269 : the cfg is not well structured. */
270 184 : if (nonlocal_goto_handler_labels)
271 : return true;
272 :
273 : /* If we have any forced labels, then the cfg is not well structured. */
274 184 : if (forced_labels)
275 : return true;
276 :
277 : /* If we have exception handlers, then we consider the cfg not well
278 : structured. ?!? We should be able to handle this now that we
279 : compute an accurate cfg for EH. */
280 184 : if (current_function_has_exception_handlers ())
281 : return true;
282 :
283 : /* If we have insns which refer to labels as non-jumped-to operands,
284 : then we consider the cfg not well structured. */
285 1701 : FOR_EACH_BB_FN (b, cfun)
286 13853 : FOR_BB_INSNS (b, insn)
287 : {
288 12330 : rtx note, set, dest;
289 12330 : rtx_insn *next;
290 :
291 : /* If this function has a computed jump, then we consider the cfg
292 : not well structured. */
293 12330 : if (JUMP_P (insn) && computed_jump_p (insn))
294 : return true;
295 :
296 12330 : if (!INSN_P (insn))
297 3475 : continue;
298 :
299 8855 : note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
300 8855 : if (note == NULL_RTX)
301 8845 : continue;
302 :
303 : /* For that label not to be seen as a referred-to label, this
304 : must be a single-set which is feeding a jump *only*. This
305 : could be a conditional jump with the label split off for
306 : machine-specific reasons or a casesi/tablejump. */
307 10 : next = next_nonnote_insn (insn);
308 10 : if (next == NULL_RTX
309 10 : || !JUMP_P (next)
310 0 : || (JUMP_LABEL (next) != XEXP (note, 0)
311 0 : && find_reg_note (next, REG_LABEL_TARGET,
312 : XEXP (note, 0)) == NULL_RTX)
313 10 : || BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next))
314 10 : return true;
315 :
316 0 : set = single_set (insn);
317 0 : if (set == NULL_RTX)
318 : return true;
319 :
320 0 : dest = SET_DEST (set);
321 0 : if (!REG_P (dest) || !dead_or_set_p (next, dest))
322 0 : return true;
323 : }
324 :
325 : /* Unreachable loops with more than one basic block are detected
326 : during the DFS traversal in find_rgns.
327 :
328 : Unreachable loops with a single block are detected here. This
329 : test is redundant with the one in find_rgns, but it's much
330 : cheaper to go ahead and catch the trivial case here. */
331 1668 : FOR_EACH_BB_FN (b, cfun)
332 : {
333 1516 : if (EDGE_COUNT (b->preds) == 0
334 1500 : || (single_pred_p (b)
335 1022 : && single_pred (b) == b))
336 : return true;
337 : }
338 :
339 : /* All the tests passed. Consider the cfg well structured. */
340 : return false;
341 : }
342 :
343 : /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */
344 :
345 : static void
346 51 : extract_edgelst (sbitmap set, edgelst *el)
347 : {
348 51 : unsigned int i = 0;
349 51 : sbitmap_iterator sbi;
350 :
351 : /* edgelst table space is reused in each call to extract_edgelst. */
352 51 : edgelst_last = 0;
353 :
354 51 : el->first_member = &edgelst_table[edgelst_last];
355 51 : el->nr_members = 0;
356 :
357 : /* Iterate over each word in the bitset. */
358 174 : EXECUTE_IF_SET_IN_BITMAP (set, 0, i, sbi)
359 : {
360 72 : edgelst_table[edgelst_last++] = rgn_edges[i];
361 72 : el->nr_members++;
362 : }
363 51 : }
364 :
365 : /* Functions for the construction of regions. */
366 :
367 : /* Print the regions, for debugging purposes. Callable from debugger. */
368 :
369 : DEBUG_FUNCTION void
370 0 : debug_regions (void)
371 : {
372 0 : int rgn, bb;
373 :
374 0 : fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
375 0 : for (rgn = 0; rgn < nr_regions; rgn++)
376 : {
377 0 : fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
378 0 : rgn_table[rgn].rgn_nr_blocks);
379 0 : fprintf (sched_dump, ";;\tbb/block: ");
380 :
381 : /* We don't have ebb_head initialized yet, so we can't use
382 : BB_TO_BLOCK (). */
383 0 : current_blocks = RGN_BLOCKS (rgn);
384 :
385 0 : for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
386 0 : fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
387 :
388 0 : fprintf (sched_dump, "\n\n");
389 : }
390 0 : }
391 :
392 : /* Print the region's basic blocks. */
393 :
394 : DEBUG_FUNCTION void
395 0 : debug_region (int rgn)
396 : {
397 0 : int bb;
398 :
399 0 : fprintf (stderr, "\n;; ------------ REGION %d ----------\n\n", rgn);
400 0 : fprintf (stderr, ";;\trgn %d nr_blocks %d:\n", rgn,
401 0 : rgn_table[rgn].rgn_nr_blocks);
402 0 : fprintf (stderr, ";;\tbb/block: ");
403 :
404 : /* We don't have ebb_head initialized yet, so we can't use
405 : BB_TO_BLOCK (). */
406 0 : current_blocks = RGN_BLOCKS (rgn);
407 :
408 0 : for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
409 0 : fprintf (stderr, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
410 :
411 0 : fprintf (stderr, "\n\n");
412 :
413 0 : for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
414 : {
415 0 : dump_bb (stderr,
416 0 : BASIC_BLOCK_FOR_FN (cfun, rgn_bb_table[current_blocks + bb]),
417 : 0, TDF_SLIM | TDF_BLOCKS);
418 0 : fprintf (stderr, "\n");
419 : }
420 :
421 0 : fprintf (stderr, "\n");
422 :
423 0 : }
424 :
425 : /* True when a bb with index BB_INDEX contained in region RGN. */
426 : static bool
427 0 : bb_in_region_p (int bb_index, int rgn)
428 : {
429 0 : int i;
430 :
431 0 : for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
432 0 : if (rgn_bb_table[current_blocks + i] == bb_index)
433 : return true;
434 :
435 : return false;
436 : }
437 :
438 : /* Dump region RGN to file F using dot syntax. */
439 : void
440 0 : dump_region_dot (FILE *f, int rgn)
441 : {
442 0 : int i;
443 :
444 0 : fprintf (f, "digraph Region_%d {\n", rgn);
445 :
446 : /* We don't have ebb_head initialized yet, so we can't use
447 : BB_TO_BLOCK (). */
448 0 : current_blocks = RGN_BLOCKS (rgn);
449 :
450 0 : for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
451 : {
452 0 : edge e;
453 0 : edge_iterator ei;
454 0 : int src_bb_num = rgn_bb_table[current_blocks + i];
455 0 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, src_bb_num);
456 :
457 0 : FOR_EACH_EDGE (e, ei, bb->succs)
458 0 : if (bb_in_region_p (e->dest->index, rgn))
459 0 : fprintf (f, "\t%d -> %d\n", src_bb_num, e->dest->index);
460 : }
461 0 : fprintf (f, "}\n");
462 0 : }
463 :
464 : /* The same, but first open a file specified by FNAME. */
465 : void
466 0 : dump_region_dot_file (const char *fname, int rgn)
467 : {
468 0 : FILE *f = fopen (fname, "wt");
469 0 : dump_region_dot (f, rgn);
470 0 : fclose (f);
471 0 : }
472 :
473 : /* Build a single block region for each basic block in the function.
474 : This allows for using the same code for interblock and basic block
475 : scheduling. */
476 :
477 : static void
478 963987 : find_single_block_region (bool ebbs_p)
479 : {
480 963987 : basic_block bb, ebb_start;
481 963987 : int i = 0;
482 :
483 963987 : nr_regions = 0;
484 :
485 963987 : if (ebbs_p) {
486 49 : int probability_cutoff;
487 49 : if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
488 2 : probability_cutoff = param_tracer_min_branch_probability_feedback;
489 : else
490 47 : probability_cutoff = param_tracer_min_branch_probability;
491 49 : probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
492 :
493 171 : FOR_EACH_BB_FN (ebb_start, cfun)
494 : {
495 122 : RGN_NR_BLOCKS (nr_regions) = 0;
496 122 : RGN_BLOCKS (nr_regions) = i;
497 122 : RGN_DONT_CALC_DEPS (nr_regions) = 0;
498 122 : RGN_HAS_REAL_EBB (nr_regions) = 0;
499 :
500 122 : for (bb = ebb_start; ; bb = bb->next_bb)
501 : {
502 132 : edge e;
503 :
504 132 : rgn_bb_table[i] = bb->index;
505 132 : RGN_NR_BLOCKS (nr_regions)++;
506 132 : CONTAINING_RGN (bb->index) = nr_regions;
507 132 : BLOCK_TO_BB (bb->index) = i - RGN_BLOCKS (nr_regions);
508 132 : i++;
509 :
510 132 : if (bb->next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
511 83 : || LABEL_P (BB_HEAD (bb->next_bb)))
512 : break;
513 :
514 15 : e = find_fallthru_edge (bb->succs);
515 15 : if (! e)
516 : break;
517 15 : if (e->probability.initialized_p ()
518 15 : && e->probability.to_reg_br_prob_base () <= probability_cutoff)
519 : break;
520 : }
521 :
522 122 : ebb_start = bb;
523 122 : nr_regions++;
524 : }
525 : }
526 : else
527 11293704 : FOR_EACH_BB_FN (bb, cfun)
528 : {
529 10329766 : rgn_bb_table[nr_regions] = bb->index;
530 10329766 : RGN_NR_BLOCKS (nr_regions) = 1;
531 10329766 : RGN_BLOCKS (nr_regions) = nr_regions;
532 10329766 : RGN_DONT_CALC_DEPS (nr_regions) = 0;
533 10329766 : RGN_HAS_REAL_EBB (nr_regions) = 0;
534 :
535 10329766 : CONTAINING_RGN (bb->index) = nr_regions;
536 10329766 : BLOCK_TO_BB (bb->index) = 0;
537 10329766 : nr_regions++;
538 : }
539 963987 : }
540 :
541 : /* Estimate number of the insns in the BB. */
542 : static int
543 84 : rgn_estimate_number_of_insns (basic_block bb)
544 : {
545 84 : int count;
546 :
547 84 : count = INSN_LUID (BB_END (bb)) - INSN_LUID (BB_HEAD (bb));
548 :
549 84 : if (MAY_HAVE_DEBUG_INSNS)
550 : {
551 : rtx_insn *insn;
552 :
553 599 : FOR_BB_INSNS (bb, insn)
554 583 : if (DEBUG_INSN_P (insn))
555 231 : count--;
556 : }
557 :
558 84 : return count;
559 : }
560 :
561 : /* Update number of blocks and the estimate for number of insns
562 : in the region. Return true if the region is "too large" for interblock
563 : scheduling (compile time considerations). */
564 :
565 : static bool
566 109 : too_large (int block, int *num_bbs, int *num_insns)
567 : {
568 109 : (*num_bbs)++;
569 218 : (*num_insns) += (common_sched_info->estimate_number_of_insns
570 109 : (BASIC_BLOCK_FOR_FN (cfun, block)));
571 :
572 109 : return ((*num_bbs > param_max_sched_region_blocks)
573 109 : || (*num_insns > param_max_sched_region_insns));
574 : }
575 :
576 : /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
577 : is still an inner loop. Put in max_hdr[blk] the header of the most inner
578 : loop containing blk. */
579 : #define UPDATE_LOOP_RELATIONS(blk, hdr) \
580 : { \
581 : if (max_hdr[blk] == -1) \
582 : max_hdr[blk] = hdr; \
583 : else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
584 : bitmap_clear_bit (inner, hdr); \
585 : else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
586 : { \
587 : bitmap_clear_bit (inner,max_hdr[blk]); \
588 : max_hdr[blk] = hdr; \
589 : } \
590 : }
591 :
592 : /* Find regions for interblock scheduling.
593 :
594 : A region for scheduling can be:
595 :
596 : * A loop-free procedure, or
597 :
598 : * A reducible inner loop, or
599 :
600 : * A basic block not contained in any other region.
601 :
602 : ?!? In theory we could build other regions based on extended basic
603 : blocks or reverse extended basic blocks. Is it worth the trouble?
604 :
605 : Loop blocks that form a region are put into the region's block list
606 : in topological order.
607 :
608 : This procedure stores its results into the following global (ick) variables
609 :
610 : * rgn_nr
611 : * rgn_table
612 : * rgn_bb_table
613 : * block_to_bb
614 : * containing region
615 :
616 : We use dominator relationships to avoid making regions out of non-reducible
617 : loops.
618 :
619 : This procedure needs to be converted to work on pred/succ lists instead
620 : of edge tables. That would simplify it somewhat. */
621 :
622 : static void
623 125 : haifa_find_rgns (void)
624 : {
625 125 : int *max_hdr, *dfs_nr, *degree;
626 125 : char no_loops = 1;
627 125 : int node, child, loop_head, i, head, tail;
628 125 : int count = 0, sp, idx = 0;
629 125 : edge_iterator current_edge;
630 125 : edge_iterator *stack;
631 125 : int num_bbs, num_insns;
632 125 : bool unreachable;
633 125 : bool too_large_failure;
634 125 : basic_block bb;
635 :
636 : /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
637 : and a mapping from block to its loop header (if the block is contained
638 : in a loop, else -1).
639 :
640 : Store results in HEADER, INNER, and MAX_HDR respectively, these will
641 : be used as inputs to the second traversal.
642 :
643 : STACK, SP and DFS_NR are only used during the first traversal. */
644 :
645 : /* Allocate and initialize variables for the first traversal. */
646 125 : max_hdr = XNEWVEC (int, last_basic_block_for_fn (cfun));
647 125 : dfs_nr = XCNEWVEC (int, last_basic_block_for_fn (cfun));
648 125 : stack = XNEWVEC (edge_iterator, n_edges_for_fn (cfun));
649 :
650 : /* Note if a block is a natural inner loop header. */
651 125 : auto_sbitmap inner (last_basic_block_for_fn (cfun));
652 125 : bitmap_ones (inner);
653 :
654 : /* Note if a block is a natural loop header. */
655 125 : auto_sbitmap header (last_basic_block_for_fn (cfun));
656 125 : bitmap_clear (header);
657 :
658 : /* Note if a block is in the block queue. */
659 125 : auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
660 125 : bitmap_clear (in_queue);
661 :
662 : /* Note if a block is in the block queue. */
663 125 : auto_sbitmap in_stack (last_basic_block_for_fn (cfun));
664 125 : bitmap_clear (in_stack);
665 :
666 1490 : for (i = 0; i < last_basic_block_for_fn (cfun); i++)
667 1240 : max_hdr[i] = -1;
668 :
669 : #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
670 : #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
671 :
672 : /* DFS traversal to find inner loops in the cfg. */
673 :
674 125 : current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))->succs);
675 125 : sp = -1;
676 :
677 1874 : while (1)
678 : {
679 1874 : if (EDGE_PASSED (current_edge))
680 : {
681 : /* We have reached a leaf node or a node that was already
682 : processed. Pop edges off the stack until we find
683 : an edge that has not yet been processed. */
684 1354 : while (sp >= 0 && EDGE_PASSED (current_edge))
685 : {
686 : /* Pop entry off the stack. */
687 883 : current_edge = stack[sp--];
688 883 : node = ei_edge (current_edge)->src->index;
689 883 : gcc_assert (node != ENTRY_BLOCK);
690 883 : child = ei_edge (current_edge)->dest->index;
691 883 : gcc_assert (child != EXIT_BLOCK);
692 883 : bitmap_clear_bit (in_stack, child);
693 883 : if (max_hdr[child] >= 0 && bitmap_bit_p (in_stack, max_hdr[child]))
694 200 : UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
695 883 : ei_next (¤t_edge);
696 : }
697 :
698 : /* See if have finished the DFS tree traversal. */
699 471 : if (sp < 0 && EDGE_PASSED (current_edge))
700 : break;
701 :
702 : /* Nope, continue the traversal with the popped node. */
703 346 : continue;
704 : }
705 :
706 : /* Process a node. */
707 1403 : node = ei_edge (current_edge)->src->index;
708 1403 : gcc_assert (node != ENTRY_BLOCK);
709 1403 : bitmap_set_bit (in_stack, node);
710 1403 : dfs_nr[node] = ++count;
711 :
712 : /* We don't traverse to the exit block. */
713 1403 : child = ei_edge (current_edge)->dest->index;
714 1403 : if (child == EXIT_BLOCK)
715 : {
716 121 : SET_EDGE_PASSED (current_edge);
717 121 : ei_next (¤t_edge);
718 121 : continue;
719 : }
720 :
721 : /* If the successor is in the stack, then we've found a loop.
722 : Mark the loop, if it is not a natural loop, then it will
723 : be rejected during the second traversal. */
724 1282 : if (bitmap_bit_p (in_stack, child))
725 : {
726 132 : no_loops = 0;
727 132 : bitmap_set_bit (header, child);
728 132 : UPDATE_LOOP_RELATIONS (node, child);
729 132 : SET_EDGE_PASSED (current_edge);
730 132 : ei_next (¤t_edge);
731 132 : continue;
732 : }
733 :
734 : /* If the child was already visited, then there is no need to visit
735 : it again. Just update the loop relationships and restart
736 : with a new edge. */
737 1150 : if (dfs_nr[child])
738 : {
739 267 : if (max_hdr[child] >= 0 && bitmap_bit_p (in_stack, max_hdr[child]))
740 44 : UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
741 267 : SET_EDGE_PASSED (current_edge);
742 267 : ei_next (¤t_edge);
743 267 : continue;
744 : }
745 :
746 : /* Push an entry on the stack and continue DFS traversal. */
747 883 : stack[++sp] = current_edge;
748 883 : SET_EDGE_PASSED (current_edge);
749 883 : current_edge = ei_start (ei_edge (current_edge)->dest->succs);
750 : }
751 :
752 : /* Reset ->aux field used by EDGE_PASSED. */
753 1363 : FOR_ALL_BB_FN (bb, cfun)
754 : {
755 1238 : edge_iterator ei;
756 1238 : edge e;
757 2766 : FOR_EACH_EDGE (e, ei, bb->succs)
758 1528 : e->aux = NULL;
759 : }
760 :
761 :
762 : /* Another check for unreachable blocks. The earlier test in
763 : is_cfg_nonregular only finds unreachable blocks that do not
764 : form a loop.
765 :
766 : The DFS traversal will mark every block that is reachable from
767 : the entry node by placing a nonzero value in dfs_nr. Thus if
768 : dfs_nr is zero for any block, then it must be unreachable. */
769 125 : unreachable = false;
770 1039 : FOR_EACH_BB_FN (bb, cfun)
771 948 : if (dfs_nr[bb->index] == 0)
772 : {
773 : unreachable = true;
774 : break;
775 : }
776 :
777 : /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
778 : to hold degree counts. */
779 125 : degree = dfs_nr;
780 :
781 1113 : FOR_EACH_BB_FN (bb, cfun)
782 1976 : degree[bb->index] = EDGE_COUNT (bb->preds);
783 :
784 : /* Do not perform region scheduling if there are any unreachable
785 : blocks. */
786 125 : if (!unreachable)
787 : {
788 91 : int *queue, *degree1 = NULL;
789 : /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put
790 : there basic blocks, which are forced to be region heads.
791 : This is done to try to assemble few smaller regions
792 : from a too_large region. */
793 91 : sbitmap extended_rgn_header = NULL;
794 91 : bool extend_regions_p;
795 :
796 91 : if (no_loops)
797 20 : bitmap_set_bit (header, 0);
798 :
799 : /* Second traversal:find reducible inner loops and topologically sort
800 : block of each region. */
801 :
802 91 : queue = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
803 :
804 91 : extend_regions_p = param_max_sched_extend_regions_iters > 0;
805 91 : if (extend_regions_p)
806 : {
807 2 : degree1 = XNEWVEC (int, last_basic_block_for_fn (cfun));
808 2 : extended_rgn_header =
809 2 : sbitmap_alloc (last_basic_block_for_fn (cfun));
810 2 : bitmap_clear (extended_rgn_header);
811 : }
812 :
813 : /* Find blocks which are inner loop headers. We still have non-reducible
814 : loops to consider at this point. */
815 918 : FOR_EACH_BB_FN (bb, cfun)
816 : {
817 827 : if (bitmap_bit_p (header, bb->index) && bitmap_bit_p (inner, bb->index))
818 : {
819 84 : edge e;
820 84 : edge_iterator ei;
821 84 : basic_block jbb;
822 :
823 : /* Now check that the loop is reducible. We do this separate
824 : from finding inner loops so that we do not find a reducible
825 : loop which contains an inner non-reducible loop.
826 :
827 : A simple way to find reducible/natural loops is to verify
828 : that each block in the loop is dominated by the loop
829 : header.
830 :
831 : If there exists a block that is not dominated by the loop
832 : header, then the block is reachable from outside the loop
833 : and thus the loop is not a natural loop. */
834 882 : FOR_EACH_BB_FN (jbb, cfun)
835 : {
836 : /* First identify blocks in the loop, except for the loop
837 : entry block. */
838 806 : if (bb->index == max_hdr[jbb->index] && bb != jbb)
839 : {
840 : /* Now verify that the block is dominated by the loop
841 : header. */
842 97 : if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
843 : break;
844 : }
845 : }
846 :
847 : /* If we exited the loop early, then I is the header of
848 : a non-reducible loop and we should quit processing it
849 : now. */
850 84 : if (jbb != EXIT_BLOCK_PTR_FOR_FN (cfun))
851 8 : continue;
852 :
853 : /* I is a header of an inner loop, or block 0 in a subroutine
854 : with no loops at all. */
855 76 : head = tail = -1;
856 76 : too_large_failure = false;
857 76 : loop_head = max_hdr[bb->index];
858 :
859 76 : if (extend_regions_p)
860 : /* We save degree in case when we meet a too_large region
861 : and cancel it. We need a correct degree later when
862 : calling extend_rgns. */
863 1 : memcpy (degree1, degree,
864 1 : last_basic_block_for_fn (cfun) * sizeof (int));
865 :
866 : /* Decrease degree of all I's successors for topological
867 : ordering. */
868 207 : FOR_EACH_EDGE (e, ei, bb->succs)
869 131 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
870 130 : --degree[e->dest->index];
871 :
872 : /* Estimate # insns, and count # blocks in the region. */
873 76 : num_bbs = 1;
874 76 : num_insns = common_sched_info->estimate_number_of_insns (bb);
875 :
876 : /* Find all loop latches (blocks with back edges to the loop
877 : header) or all the leaf blocks in the cfg has no loops.
878 :
879 : Place those blocks into the queue. */
880 76 : if (no_loops)
881 : {
882 0 : FOR_EACH_BB_FN (jbb, cfun)
883 : /* Leaf nodes have only a single successor which must
884 : be EXIT_BLOCK. */
885 0 : if (single_succ_p (jbb)
886 0 : && single_succ (jbb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
887 : {
888 0 : queue[++tail] = jbb->index;
889 0 : bitmap_set_bit (in_queue, jbb->index);
890 :
891 0 : if (too_large (jbb->index, &num_bbs, &num_insns))
892 : {
893 : too_large_failure = true;
894 : break;
895 : }
896 : }
897 : }
898 : else
899 : {
900 76 : edge e;
901 :
902 244 : FOR_EACH_EDGE (e, ei, bb->preds)
903 : {
904 169 : if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
905 0 : continue;
906 :
907 169 : node = e->src->index;
908 :
909 169 : if (max_hdr[node] == loop_head && node != bb->index)
910 : {
911 : /* This is a loop latch. */
912 33 : queue[++tail] = node;
913 33 : bitmap_set_bit (in_queue, node);
914 :
915 33 : if (too_large (node, &num_bbs, &num_insns))
916 : {
917 : too_large_failure = true;
918 : break;
919 : }
920 : }
921 : }
922 : }
923 :
924 : /* Now add all the blocks in the loop to the queue.
925 :
926 : We know the loop is a natural loop; however the algorithm
927 : above will not always mark certain blocks as being in the
928 : loop. Consider:
929 : node children
930 : a b,c
931 : b c
932 : c a,d
933 : d b
934 :
935 : The algorithm in the DFS traversal may not mark B & D as part
936 : of the loop (i.e. they will not have max_hdr set to A).
937 :
938 : We know they cannot be loop latches (else they would have
939 : had max_hdr set since they'd have a backedge to a dominator
940 : block). So we don't need them on the initial queue.
941 :
942 : We know they are part of the loop because they are dominated
943 : by the loop header and can be reached by a backwards walk of
944 : the edges starting with nodes on the initial queue.
945 :
946 : It is safe and desirable to include those nodes in the
947 : loop/scheduling region. To do so we would need to decrease
948 : the degree of a node if it is the target of a backedge
949 : within the loop itself as the node is placed in the queue.
950 :
951 : We do not do this because I'm not sure that the actual
952 : scheduling code will properly handle this case. ?!? */
953 :
954 160 : while (head < tail && !too_large_failure)
955 : {
956 84 : edge e;
957 84 : child = queue[++head];
958 :
959 188 : FOR_EACH_EDGE (e, ei,
960 : BASIC_BLOCK_FOR_FN (cfun, child)->preds)
961 : {
962 105 : node = e->src->index;
963 :
964 : /* See discussion above about nodes not marked as in
965 : this loop during the initial DFS traversal. */
966 105 : if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
967 105 : || max_hdr[node] != loop_head)
968 : {
969 : tail = -1;
970 : break;
971 : }
972 105 : else if (!bitmap_bit_p (in_queue, node) && node != bb->index)
973 : {
974 54 : queue[++tail] = node;
975 54 : bitmap_set_bit (in_queue, node);
976 :
977 54 : if (too_large (node, &num_bbs, &num_insns))
978 : {
979 : too_large_failure = true;
980 : break;
981 : }
982 : }
983 : }
984 : }
985 :
986 76 : if (tail >= 0 && !too_large_failure)
987 : {
988 : /* Place the loop header into list of region blocks. */
989 28 : degree[bb->index] = -1;
990 28 : rgn_bb_table[idx] = bb->index;
991 28 : RGN_NR_BLOCKS (nr_regions) = num_bbs;
992 28 : RGN_BLOCKS (nr_regions) = idx++;
993 28 : RGN_DONT_CALC_DEPS (nr_regions) = 0;
994 28 : RGN_HAS_REAL_EBB (nr_regions) = 0;
995 28 : CONTAINING_RGN (bb->index) = nr_regions;
996 28 : BLOCK_TO_BB (bb->index) = count = 0;
997 :
998 : /* Remove blocks from queue[] when their in degree
999 : becomes zero. Repeat until no blocks are left on the
1000 : list. This produces a topological list of blocks in
1001 : the region. */
1002 152 : while (tail >= 0)
1003 : {
1004 124 : if (head < 0)
1005 0 : head = tail;
1006 124 : child = queue[head];
1007 124 : if (degree[child] == 0)
1008 : {
1009 76 : edge e;
1010 :
1011 76 : degree[child] = -1;
1012 76 : rgn_bb_table[idx++] = child;
1013 76 : BLOCK_TO_BB (child) = ++count;
1014 76 : CONTAINING_RGN (child) = nr_regions;
1015 76 : queue[head] = queue[tail--];
1016 :
1017 204 : FOR_EACH_EDGE (e, ei,
1018 : BASIC_BLOCK_FOR_FN (cfun,
1019 : child)->succs)
1020 128 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1021 128 : --degree[e->dest->index];
1022 : }
1023 : else
1024 48 : --head;
1025 : }
1026 28 : ++nr_regions;
1027 : }
1028 48 : else if (extend_regions_p)
1029 : {
1030 : /* Restore DEGREE. */
1031 0 : int *t = degree;
1032 :
1033 0 : degree = degree1;
1034 0 : degree1 = t;
1035 :
1036 : /* And force successors of BB to be region heads.
1037 : This may provide several smaller regions instead
1038 : of one too_large region. */
1039 0 : FOR_EACH_EDGE (e, ei, bb->succs)
1040 0 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1041 0 : bitmap_set_bit (extended_rgn_header, e->dest->index);
1042 : }
1043 : }
1044 : }
1045 91 : free (queue);
1046 :
1047 91 : if (extend_regions_p)
1048 : {
1049 2 : free (degree1);
1050 :
1051 2 : bitmap_ior (header, header, extended_rgn_header);
1052 2 : sbitmap_free (extended_rgn_header);
1053 :
1054 2 : extend_rgns (degree, &idx, header, max_hdr);
1055 : }
1056 : }
1057 :
1058 : /* Any block that did not end up in a region is placed into a region
1059 : by itself. */
1060 1113 : FOR_EACH_BB_FN (bb, cfun)
1061 988 : if (degree[bb->index] >= 0)
1062 : {
1063 862 : rgn_bb_table[idx] = bb->index;
1064 862 : RGN_NR_BLOCKS (nr_regions) = 1;
1065 862 : RGN_BLOCKS (nr_regions) = idx++;
1066 862 : RGN_DONT_CALC_DEPS (nr_regions) = 0;
1067 862 : RGN_HAS_REAL_EBB (nr_regions) = 0;
1068 862 : CONTAINING_RGN (bb->index) = nr_regions++;
1069 862 : BLOCK_TO_BB (bb->index) = 0;
1070 : }
1071 :
1072 125 : free (max_hdr);
1073 125 : free (degree);
1074 125 : free (stack);
1075 125 : }
1076 :
1077 :
1078 : /* Wrapper function.
1079 : If FLAG_SEL_SCHED_PIPELINING is set, then use custom function to form
1080 : regions. Otherwise just call find_rgns_haifa. */
1081 : static void
1082 168 : find_rgns (void)
1083 : {
1084 168 : if (sel_sched_p () && flag_sel_sched_pipelining)
1085 43 : sel_find_rgns ();
1086 : else
1087 125 : haifa_find_rgns ();
1088 168 : }
1089 :
1090 : static int gather_region_statistics (int **);
1091 : static void print_region_statistics (int *, int, int *, int);
1092 :
1093 : /* Calculate the histogram that shows the number of regions having the
1094 : given number of basic blocks, and store it in the RSP array. Return
1095 : the size of this array. */
1096 : static int
1097 0 : gather_region_statistics (int **rsp)
1098 : {
1099 0 : int i, *a = 0, a_sz = 0;
1100 :
1101 : /* a[i] is the number of regions that have (i + 1) basic blocks. */
1102 0 : for (i = 0; i < nr_regions; i++)
1103 : {
1104 0 : int nr_blocks = RGN_NR_BLOCKS (i);
1105 :
1106 0 : gcc_assert (nr_blocks >= 1);
1107 :
1108 0 : if (nr_blocks > a_sz)
1109 : {
1110 0 : a = XRESIZEVEC (int, a, nr_blocks);
1111 0 : do
1112 0 : a[a_sz++] = 0;
1113 0 : while (a_sz != nr_blocks);
1114 : }
1115 :
1116 0 : a[nr_blocks - 1]++;
1117 : }
1118 :
1119 0 : *rsp = a;
1120 0 : return a_sz;
1121 : }
1122 :
1123 : /* Print regions statistics. S1 and S2 denote the data before and after
1124 : calling extend_rgns, respectively. */
1125 : static void
1126 0 : print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz)
1127 : {
1128 0 : int i;
1129 :
1130 : /* We iterate until s2_sz because extend_rgns does not decrease
1131 : the maximal region size. */
1132 0 : for (i = 1; i < s2_sz; i++)
1133 : {
1134 0 : int n1, n2;
1135 :
1136 0 : n2 = s2[i];
1137 :
1138 0 : if (n2 == 0)
1139 0 : continue;
1140 :
1141 0 : if (i >= s1_sz)
1142 : n1 = 0;
1143 : else
1144 0 : n1 = s1[i];
1145 :
1146 0 : fprintf (sched_dump, ";; Region extension statistics: size %d: " \
1147 : "was %d + %d more\n", i + 1, n1, n2 - n1);
1148 : }
1149 0 : }
1150 :
1151 : /* Extend regions.
1152 : DEGREE - Array of incoming edge count, considering only
1153 : the edges, that don't have their sources in formed regions yet.
1154 : IDXP - pointer to the next available index in rgn_bb_table.
1155 : HEADER - set of all region heads.
1156 : LOOP_HDR - mapping from block to the containing loop
1157 : (two blocks can reside within one region if they have
1158 : the same loop header). */
1159 : void
1160 45 : extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr)
1161 : {
1162 45 : int *order, i, idx = *idxp, iter = 0, max_iter, *max_hdr;
1163 45 : int nblocks = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
1164 45 : bool rescan = false;
1165 :
1166 45 : max_iter = param_max_sched_extend_regions_iters;
1167 :
1168 45 : max_hdr = XNEWVEC (int, last_basic_block_for_fn (cfun));
1169 :
1170 45 : order = XNEWVEC (int, last_basic_block_for_fn (cfun));
1171 45 : post_order_compute (order, false, false);
1172 :
1173 622 : for (i = nblocks - 1; i >= 0; i--)
1174 : {
1175 577 : int bbn = order[i];
1176 577 : if (degree[bbn] >= 0)
1177 : {
1178 302 : max_hdr[bbn] = bbn;
1179 302 : rescan = true;
1180 : }
1181 : else
1182 : /* This block already was processed in find_rgns. */
1183 275 : max_hdr[bbn] = -1;
1184 : }
1185 :
1186 : /* The idea is to topologically walk through CFG in top-down order.
1187 : During the traversal, if all the predecessors of a node are
1188 : marked to be in the same region (they all have the same max_hdr),
1189 : then current node is also marked to be a part of that region.
1190 : Otherwise the node starts its own region.
1191 : CFG should be traversed until no further changes are made. On each
1192 : iteration the set of the region heads is extended (the set of those
1193 : blocks that have max_hdr[bbi] == bbi). This set is upper bounded by the
1194 : set of all basic blocks, thus the algorithm is guaranteed to
1195 : terminate. */
1196 :
1197 49 : while (rescan && iter < max_iter)
1198 : {
1199 : rescan = false;
1200 :
1201 52 : for (i = nblocks - 1; i >= 0; i--)
1202 : {
1203 48 : edge e;
1204 48 : edge_iterator ei;
1205 48 : int bbn = order[i];
1206 :
1207 48 : if (max_hdr[bbn] != -1 && !bitmap_bit_p (header, bbn))
1208 : {
1209 41 : int hdr = -1;
1210 :
1211 91 : FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, bbn)->preds)
1212 : {
1213 53 : int predn = e->src->index;
1214 :
1215 53 : if (predn != ENTRY_BLOCK
1216 : /* If pred wasn't processed in find_rgns. */
1217 51 : && max_hdr[predn] != -1
1218 : /* And pred and bb reside in the same loop.
1219 : (Or out of any loop). */
1220 50 : && loop_hdr[bbn] == loop_hdr[predn])
1221 : {
1222 50 : if (hdr == -1)
1223 : /* Then bb extends the containing region of pred. */
1224 : hdr = max_hdr[predn];
1225 12 : else if (hdr != max_hdr[predn])
1226 : /* Too bad, there are at least two predecessors
1227 : that reside in different regions. Thus, BB should
1228 : begin its own region. */
1229 : {
1230 : hdr = bbn;
1231 : break;
1232 : }
1233 : }
1234 : else
1235 : /* BB starts its own region. */
1236 : {
1237 : hdr = bbn;
1238 : break;
1239 : }
1240 : }
1241 :
1242 41 : if (hdr == bbn)
1243 : {
1244 : /* If BB start its own region,
1245 : update set of headers with BB. */
1246 3 : bitmap_set_bit (header, bbn);
1247 3 : rescan = true;
1248 : }
1249 : else
1250 38 : gcc_assert (hdr != -1);
1251 :
1252 41 : max_hdr[bbn] = hdr;
1253 : }
1254 : }
1255 :
1256 4 : iter++;
1257 : }
1258 :
1259 : /* Statistics were gathered on the SPEC2000 package of tests with
1260 : mainline weekly snapshot gcc-4.1-20051015 on ia64.
1261 :
1262 : Statistics for SPECint:
1263 : 1 iteration : 1751 cases (38.7%)
1264 : 2 iterations: 2770 cases (61.3%)
1265 : Blocks wrapped in regions by find_rgns without extension: 18295 blocks
1266 : Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks
1267 : (We don't count single block regions here).
1268 :
1269 : Statistics for SPECfp:
1270 : 1 iteration : 621 cases (35.9%)
1271 : 2 iterations: 1110 cases (64.1%)
1272 : Blocks wrapped in regions by find_rgns without extension: 6476 blocks
1273 : Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks
1274 : (We don't count single block regions here).
1275 :
1276 : By default we do at most 2 iterations.
1277 : This can be overridden with max-sched-extend-regions-iters parameter:
1278 : 0 - disable region extension,
1279 : N > 0 - do at most N iterations. */
1280 :
1281 45 : if (sched_verbose && iter != 0)
1282 0 : fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter,
1283 : rescan ? "... failed" : "");
1284 :
1285 45 : if (!rescan && iter != 0)
1286 : {
1287 2 : int *s1 = NULL, s1_sz = 0;
1288 :
1289 : /* Save the old statistics for later printout. */
1290 2 : if (sched_verbose >= 6)
1291 0 : s1_sz = gather_region_statistics (&s1);
1292 :
1293 : /* We have succeeded. Now assemble the regions. */
1294 26 : for (i = nblocks - 1; i >= 0; i--)
1295 : {
1296 24 : int bbn = order[i];
1297 :
1298 24 : if (max_hdr[bbn] == bbn)
1299 : /* BBN is a region head. */
1300 : {
1301 3 : edge e;
1302 3 : edge_iterator ei;
1303 3 : int num_bbs = 0, j, num_insns = 0, large;
1304 :
1305 3 : large = too_large (bbn, &num_bbs, &num_insns);
1306 :
1307 3 : degree[bbn] = -1;
1308 3 : rgn_bb_table[idx] = bbn;
1309 3 : RGN_BLOCKS (nr_regions) = idx++;
1310 3 : RGN_DONT_CALC_DEPS (nr_regions) = 0;
1311 3 : RGN_HAS_REAL_EBB (nr_regions) = 0;
1312 3 : CONTAINING_RGN (bbn) = nr_regions;
1313 3 : BLOCK_TO_BB (bbn) = 0;
1314 :
1315 8 : FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, bbn)->succs)
1316 5 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1317 4 : degree[e->dest->index]--;
1318 :
1319 3 : if (!large)
1320 : /* Here we check whether the region is too_large. */
1321 22 : for (j = i - 1; j >= 0; j--)
1322 : {
1323 20 : int succn = order[j];
1324 20 : if (max_hdr[succn] == bbn)
1325 : {
1326 19 : if ((large = too_large (succn, &num_bbs, &num_insns)))
1327 : break;
1328 : }
1329 : }
1330 :
1331 3 : if (large)
1332 : /* If the region is too_large, then wrap every block of
1333 : the region into single block region.
1334 : Here we wrap region head only. Other blocks are
1335 : processed in the below cycle. */
1336 : {
1337 1 : RGN_NR_BLOCKS (nr_regions) = 1;
1338 1 : nr_regions++;
1339 : }
1340 :
1341 3 : num_bbs = 1;
1342 :
1343 26 : for (j = i - 1; j >= 0; j--)
1344 : {
1345 23 : int succn = order[j];
1346 :
1347 23 : if (max_hdr[succn] == bbn)
1348 : /* This cycle iterates over all basic blocks, that
1349 : are supposed to be in the region with head BBN,
1350 : and wraps them into that region (or in single
1351 : block region). */
1352 : {
1353 19 : gcc_assert (degree[succn] == 0);
1354 :
1355 19 : degree[succn] = -1;
1356 19 : rgn_bb_table[idx] = succn;
1357 19 : BLOCK_TO_BB (succn) = large ? 0 : num_bbs++;
1358 19 : CONTAINING_RGN (succn) = nr_regions;
1359 :
1360 19 : if (large)
1361 : /* Wrap SUCCN into single block region. */
1362 : {
1363 15 : RGN_BLOCKS (nr_regions) = idx;
1364 15 : RGN_NR_BLOCKS (nr_regions) = 1;
1365 15 : RGN_DONT_CALC_DEPS (nr_regions) = 0;
1366 15 : RGN_HAS_REAL_EBB (nr_regions) = 0;
1367 15 : nr_regions++;
1368 : }
1369 :
1370 19 : idx++;
1371 :
1372 45 : FOR_EACH_EDGE (e, ei,
1373 : BASIC_BLOCK_FOR_FN (cfun, succn)->succs)
1374 26 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1375 25 : degree[e->dest->index]--;
1376 : }
1377 : }
1378 :
1379 3 : if (!large)
1380 : {
1381 2 : RGN_NR_BLOCKS (nr_regions) = num_bbs;
1382 2 : nr_regions++;
1383 : }
1384 : }
1385 : }
1386 :
1387 2 : if (sched_verbose >= 6)
1388 : {
1389 0 : int *s2, s2_sz;
1390 :
1391 : /* Get the new statistics and print the comparison with the
1392 : one before calling this function. */
1393 0 : s2_sz = gather_region_statistics (&s2);
1394 0 : print_region_statistics (s1, s1_sz, s2, s2_sz);
1395 0 : free (s1);
1396 0 : free (s2);
1397 : }
1398 : }
1399 :
1400 45 : free (order);
1401 45 : free (max_hdr);
1402 :
1403 45 : *idxp = idx;
1404 45 : }
1405 :
1406 : /* Functions for regions scheduling information. */
1407 :
1408 : /* Compute dominators, probability, and potential-split-edges of bb.
1409 : Assume that these values were already computed for bb's predecessors. */
1410 :
1411 : static void
1412 48 : compute_dom_prob_ps (int bb)
1413 : {
1414 48 : edge_iterator in_ei;
1415 48 : edge in_edge;
1416 :
1417 : /* We shouldn't have any real ebbs yet. */
1418 48 : gcc_assert (ebb_head [bb] == bb + current_blocks);
1419 :
1420 48 : if (IS_RGN_ENTRY (bb))
1421 : {
1422 14 : bitmap_set_bit (dom[bb], 0);
1423 14 : prob[bb] = REG_BR_PROB_BASE;
1424 14 : return;
1425 : }
1426 :
1427 34 : prob[bb] = 0;
1428 :
1429 : /* Initialize dom[bb] to '111..1'. */
1430 34 : bitmap_ones (dom[bb]);
1431 :
1432 69 : FOR_EACH_EDGE (in_edge, in_ei,
1433 : BASIC_BLOCK_FOR_FN (cfun, BB_TO_BLOCK (bb))->preds)
1434 : {
1435 35 : int pred_bb;
1436 35 : edge out_edge;
1437 35 : edge_iterator out_ei;
1438 :
1439 35 : if (in_edge->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1440 0 : continue;
1441 :
1442 35 : pred_bb = BLOCK_TO_BB (in_edge->src->index);
1443 35 : bitmap_and (dom[bb], dom[bb], dom[pred_bb]);
1444 35 : bitmap_ior (ancestor_edges[bb],
1445 35 : ancestor_edges[bb], ancestor_edges[pred_bb]);
1446 :
1447 35 : bitmap_set_bit (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
1448 :
1449 35 : bitmap_ior (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
1450 :
1451 103 : FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
1452 68 : bitmap_set_bit (pot_split[bb], EDGE_TO_BIT (out_edge));
1453 :
1454 70 : prob[bb] += combine_probabilities
1455 70 : (prob[pred_bb],
1456 35 : in_edge->probability.initialized_p ()
1457 35 : ? in_edge->probability.to_reg_br_prob_base ()
1458 : : 0);
1459 : // The rounding divide in combine_probabilities can result in an extra
1460 : // probability increment propagating along 50-50 edges. Eventually when
1461 : // the edges re-merge, the accumulated probability can go slightly above
1462 : // REG_BR_PROB_BASE.
1463 35 : if (prob[bb] > REG_BR_PROB_BASE)
1464 0 : prob[bb] = REG_BR_PROB_BASE;
1465 : }
1466 :
1467 34 : bitmap_set_bit (dom[bb], bb);
1468 34 : bitmap_and_compl (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1469 :
1470 34 : if (sched_verbose >= 2)
1471 0 : fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1472 0 : (100 * prob[bb]) / REG_BR_PROB_BASE);
1473 : }
1474 :
1475 : /* Functions for target info. */
1476 :
1477 : /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1478 : Note that bb_trg dominates bb_src. */
1479 :
1480 : static void
1481 51 : split_edges (int bb_src, int bb_trg, edgelst *bl)
1482 : {
1483 51 : auto_sbitmap src (SBITMAP_SIZE (pot_split[bb_src]));
1484 51 : bitmap_copy (src, pot_split[bb_src]);
1485 :
1486 51 : bitmap_and_compl (src, src, pot_split[bb_trg]);
1487 51 : extract_edgelst (src, bl);
1488 51 : }
1489 :
1490 : /* Find the valid candidate-source-blocks for the target block TRG, compute
1491 : their probability, and check if they are speculative or not.
1492 : For speculative sources, compute their update-blocks and split-blocks. */
1493 :
1494 : static void
1495 48 : compute_trg_info (int trg)
1496 : {
1497 48 : candidate *sp;
1498 48 : edgelst el = { NULL, 0 };
1499 48 : int i, j, k, update_idx;
1500 48 : basic_block block;
1501 48 : edge_iterator ei;
1502 48 : edge e;
1503 :
1504 48 : candidate_table = XNEWVEC (candidate, current_nr_blocks);
1505 :
1506 48 : bblst_last = 0;
1507 : /* bblst_table holds split blocks and update blocks for each block after
1508 : the current one in the region. split blocks and update blocks are
1509 : the TO blocks of region edges, so there can be at most rgn_nr_edges
1510 : of them. */
1511 48 : bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1512 48 : bblst_table = XNEWVEC (basic_block, bblst_size);
1513 :
1514 48 : edgelst_last = 0;
1515 48 : edgelst_table = XNEWVEC (edge, rgn_nr_edges);
1516 :
1517 : /* Define some of the fields for the target bb as well. */
1518 48 : sp = candidate_table + trg;
1519 48 : sp->is_valid = 1;
1520 48 : sp->is_speculative = 0;
1521 48 : sp->src_prob = REG_BR_PROB_BASE;
1522 :
1523 48 : auto_sbitmap visited (last_basic_block_for_fn (cfun));
1524 :
1525 118 : for (i = trg + 1; i < current_nr_blocks; i++)
1526 : {
1527 70 : sp = candidate_table + i;
1528 :
1529 70 : sp->is_valid = IS_DOMINATED (i, trg);
1530 70 : if (sp->is_valid)
1531 : {
1532 62 : int tf = prob[trg], cf = prob[i];
1533 :
1534 : /* In CFGs with low probability edges TF can possibly be zero. */
1535 62 : sp->src_prob = (tf ?
1536 62 : profile_count::from_gcov_type (cf)
1537 : .probability_in
1538 62 : (profile_count::from_gcov_type (tf))
1539 62 : .to_reg_br_prob_base ()
1540 : : 0);
1541 62 : sp->is_valid = (sp->src_prob >= min_spec_prob);
1542 : }
1543 :
1544 70 : if (sp->is_valid)
1545 : {
1546 51 : split_edges (i, trg, &el);
1547 51 : sp->is_speculative = (el.nr_members) ? 1 : 0;
1548 51 : if (sp->is_speculative && !flag_schedule_speculative)
1549 0 : sp->is_valid = 0;
1550 : }
1551 :
1552 70 : if (sp->is_valid)
1553 : {
1554 : /* Compute split blocks and store them in bblst_table.
1555 : The TO block of every split edge is a split block. */
1556 51 : sp->split_bbs.first_member = &bblst_table[bblst_last];
1557 51 : sp->split_bbs.nr_members = el.nr_members;
1558 123 : for (j = 0; j < el.nr_members; bblst_last++, j++)
1559 72 : bblst_table[bblst_last] = el.first_member[j]->dest;
1560 51 : sp->update_bbs.first_member = &bblst_table[bblst_last];
1561 :
1562 : /* Compute update blocks and store them in bblst_table.
1563 : For every split edge, look at the FROM block, and check
1564 : all out edges. For each out edge that is not a split edge,
1565 : add the TO block to the update block list. This list can end
1566 : up with a lot of duplicates. We need to weed them out to avoid
1567 : overrunning the end of the bblst_table. */
1568 :
1569 51 : update_idx = 0;
1570 51 : bitmap_clear (visited);
1571 174 : for (j = 0; j < el.nr_members; j++)
1572 : {
1573 72 : block = el.first_member[j]->src;
1574 216 : FOR_EACH_EDGE (e, ei, block->succs)
1575 : {
1576 144 : if (!bitmap_bit_p (visited, e->dest->index))
1577 : {
1578 285 : for (k = 0; k < el.nr_members; k++)
1579 213 : if (e == el.first_member[k])
1580 : break;
1581 :
1582 144 : if (k >= el.nr_members)
1583 : {
1584 72 : bblst_table[bblst_last++] = e->dest;
1585 72 : bitmap_set_bit (visited, e->dest->index);
1586 72 : update_idx++;
1587 : }
1588 : }
1589 : }
1590 : }
1591 51 : sp->update_bbs.nr_members = update_idx;
1592 :
1593 : /* Make sure we didn't overrun the end of bblst_table. */
1594 51 : gcc_assert (bblst_last <= bblst_size);
1595 : }
1596 : else
1597 : {
1598 19 : sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1599 :
1600 19 : sp->is_speculative = 0;
1601 19 : sp->src_prob = 0;
1602 : }
1603 : }
1604 48 : }
1605 :
1606 : /* Free the computed target info. */
1607 : static void
1608 48 : free_trg_info (void)
1609 : {
1610 48 : free (candidate_table);
1611 48 : free (bblst_table);
1612 48 : free (edgelst_table);
1613 48 : }
1614 :
1615 : /* Print candidates info, for debugging purposes. Callable from debugger. */
1616 :
1617 : DEBUG_FUNCTION void
1618 0 : debug_candidate (int i)
1619 : {
1620 0 : if (!candidate_table[i].is_valid)
1621 : return;
1622 :
1623 0 : if (candidate_table[i].is_speculative)
1624 : {
1625 0 : int j;
1626 0 : fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1627 :
1628 0 : fprintf (sched_dump, "split path: ");
1629 0 : for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1630 : {
1631 0 : int b = candidate_table[i].split_bbs.first_member[j]->index;
1632 :
1633 0 : fprintf (sched_dump, " %d ", b);
1634 : }
1635 0 : fprintf (sched_dump, "\n");
1636 :
1637 0 : fprintf (sched_dump, "update path: ");
1638 0 : for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1639 : {
1640 0 : int b = candidate_table[i].update_bbs.first_member[j]->index;
1641 :
1642 0 : fprintf (sched_dump, " %d ", b);
1643 : }
1644 0 : fprintf (sched_dump, "\n");
1645 : }
1646 : else
1647 : {
1648 0 : fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1649 : }
1650 : }
1651 :
1652 : /* Print candidates info, for debugging purposes. Callable from debugger. */
1653 :
1654 : DEBUG_FUNCTION void
1655 0 : debug_candidates (int trg)
1656 : {
1657 0 : int i;
1658 :
1659 0 : fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1660 0 : BB_TO_BLOCK (trg), trg);
1661 0 : for (i = trg + 1; i < current_nr_blocks; i++)
1662 0 : debug_candidate (i);
1663 0 : }
1664 :
1665 : /* Functions for speculative scheduling. */
1666 :
1667 : static bitmap_head not_in_df;
1668 :
1669 : /* Return false if x is a set of a register alive in the beginning of one
1670 : of the split-blocks of src, otherwise return true. */
1671 :
1672 : static bool
1673 19 : check_live_1 (int src, rtx x)
1674 : {
1675 19 : int i;
1676 19 : int regno;
1677 19 : rtx reg = SET_DEST (x);
1678 :
1679 19 : if (reg == 0)
1680 : return true;
1681 :
1682 19 : while (GET_CODE (reg) == SUBREG
1683 19 : || GET_CODE (reg) == ZERO_EXTRACT
1684 38 : || GET_CODE (reg) == STRICT_LOW_PART)
1685 0 : reg = XEXP (reg, 0);
1686 :
1687 19 : if (GET_CODE (reg) == PARALLEL)
1688 : {
1689 0 : int i;
1690 :
1691 0 : for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1692 0 : if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1693 0 : if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1694 : return true;
1695 :
1696 : return false;
1697 : }
1698 :
1699 19 : if (!REG_P (reg))
1700 : return true;
1701 :
1702 16 : regno = REGNO (reg);
1703 :
1704 16 : if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1705 : {
1706 : /* Global registers are assumed live. */
1707 : return false;
1708 : }
1709 : else
1710 : {
1711 16 : if (regno < FIRST_PSEUDO_REGISTER)
1712 : {
1713 : /* Check for hard registers. */
1714 12 : int j = REG_NREGS (reg);
1715 24 : while (--j >= 0)
1716 : {
1717 24 : for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1718 : {
1719 12 : basic_block b = candidate_table[src].split_bbs.first_member[i];
1720 12 : int t = bitmap_bit_p (¬_in_df, b->index);
1721 :
1722 : /* We can have split blocks, that were recently generated.
1723 : Such blocks are always outside current region. */
1724 12 : gcc_assert (!t || (CONTAINING_RGN (b->index)
1725 : != CONTAINING_RGN (BB_TO_BLOCK (src))));
1726 :
1727 12 : if (t || REGNO_REG_SET_P (df_get_live_in (b), regno + j))
1728 0 : return false;
1729 : }
1730 : }
1731 : }
1732 : else
1733 : {
1734 : /* Check for pseudo registers. */
1735 8 : for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1736 : {
1737 5 : basic_block b = candidate_table[src].split_bbs.first_member[i];
1738 5 : int t = bitmap_bit_p (¬_in_df, b->index);
1739 :
1740 5 : gcc_assert (!t || (CONTAINING_RGN (b->index)
1741 : != CONTAINING_RGN (BB_TO_BLOCK (src))));
1742 :
1743 5 : if (t || REGNO_REG_SET_P (df_get_live_in (b), regno))
1744 1 : return false;
1745 : }
1746 : }
1747 : }
1748 :
1749 : return true;
1750 : }
1751 :
1752 : /* If x is a set of a register R, mark that R is alive in the beginning
1753 : of every update-block of src. */
1754 :
1755 : static void
1756 0 : update_live_1 (int src, rtx x)
1757 : {
1758 0 : int i;
1759 0 : int regno;
1760 0 : rtx reg = SET_DEST (x);
1761 :
1762 0 : if (reg == 0)
1763 : return;
1764 :
1765 0 : while (GET_CODE (reg) == SUBREG
1766 0 : || GET_CODE (reg) == ZERO_EXTRACT
1767 0 : || GET_CODE (reg) == STRICT_LOW_PART)
1768 0 : reg = XEXP (reg, 0);
1769 :
1770 0 : if (GET_CODE (reg) == PARALLEL)
1771 : {
1772 0 : int i;
1773 :
1774 0 : for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1775 0 : if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1776 0 : update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1777 :
1778 : return;
1779 : }
1780 :
1781 0 : if (!REG_P (reg))
1782 : return;
1783 :
1784 : /* Global registers are always live, so the code below does not apply
1785 : to them. */
1786 :
1787 0 : regno = REGNO (reg);
1788 :
1789 0 : if (! HARD_REGISTER_NUM_P (regno)
1790 0 : || !global_regs[regno])
1791 : {
1792 0 : for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1793 : {
1794 0 : basic_block b = candidate_table[src].update_bbs.first_member[i];
1795 0 : bitmap_set_range (df_get_live_in (b), regno, REG_NREGS (reg));
1796 : }
1797 : }
1798 : }
1799 :
1800 : /* Return true if insn can be speculatively moved from block src to trg,
1801 : otherwise return false. Called before first insertion of insn to
1802 : ready-list or before the scheduling. */
1803 :
1804 : static bool
1805 35 : check_live (rtx_insn *insn, int src)
1806 : {
1807 : /* Find the registers set by instruction. */
1808 35 : if (GET_CODE (PATTERN (insn)) == SET
1809 35 : || GET_CODE (PATTERN (insn)) == CLOBBER)
1810 19 : return check_live_1 (src, PATTERN (insn));
1811 16 : else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1812 : {
1813 0 : int j;
1814 0 : for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1815 0 : if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1816 0 : || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1817 0 : && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1818 : return false;
1819 :
1820 : return true;
1821 : }
1822 :
1823 : return true;
1824 : }
1825 :
1826 : /* Update the live registers info after insn was moved speculatively from
1827 : block src to trg. */
1828 :
1829 : static void
1830 8 : update_live (rtx_insn *insn, int src)
1831 : {
1832 : /* Find the registers set by instruction. */
1833 8 : if (GET_CODE (PATTERN (insn)) == SET
1834 8 : || GET_CODE (PATTERN (insn)) == CLOBBER)
1835 0 : update_live_1 (src, PATTERN (insn));
1836 8 : else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1837 : {
1838 0 : int j;
1839 0 : for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1840 0 : if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1841 0 : || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1842 0 : update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1843 : }
1844 8 : }
1845 :
1846 : /* True if block bb_to is equal to, or reachable from block bb_from. */
1847 : #define IS_REACHABLE(bb_from, bb_to) \
1848 : (bb_from == bb_to \
1849 : || IS_RGN_ENTRY (bb_from) \
1850 : || (bitmap_bit_p \
1851 : (ancestor_edges[bb_to], \
1852 : EDGE_TO_BIT (single_pred_edge \
1853 : (BASIC_BLOCK_FOR_FN (cfun, \
1854 : BB_TO_BLOCK (bb_from)))))))
1855 :
1856 : /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1857 :
1858 : static void
1859 0 : set_spec_fed (rtx load_insn)
1860 : {
1861 0 : sd_iterator_def sd_it;
1862 0 : dep_t dep;
1863 :
1864 0 : FOR_EACH_DEP (load_insn, SD_LIST_FORW, sd_it, dep)
1865 0 : if (DEP_TYPE (dep) == REG_DEP_TRUE)
1866 0 : FED_BY_SPEC_LOAD (DEP_CON (dep)) = 1;
1867 0 : }
1868 :
1869 : /* On the path from the insn to load_insn_bb, find a conditional
1870 : branch depending on insn, that guards the speculative load. */
1871 :
1872 : static bool
1873 0 : find_conditional_protection (rtx_insn *insn, int load_insn_bb)
1874 : {
1875 0 : sd_iterator_def sd_it;
1876 0 : dep_t dep;
1877 :
1878 : /* Iterate through DEF-USE forward dependences. */
1879 0 : FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
1880 : {
1881 0 : rtx_insn *next = DEP_CON (dep);
1882 :
1883 0 : if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1884 0 : CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1885 0 : && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1886 0 : && load_insn_bb != INSN_BB (next)
1887 0 : && DEP_TYPE (dep) == REG_DEP_TRUE
1888 0 : && (JUMP_P (next)
1889 0 : || find_conditional_protection (next, load_insn_bb)))
1890 0 : return true;
1891 : }
1892 : return false;
1893 : } /* find_conditional_protection */
1894 :
1895 : /* Returns true if the same insn1 that participates in the computation
1896 : of load_insn's address is feeding a conditional branch that is
1897 : guarding on load_insn. This is true if we find two DEF-USE
1898 : chains:
1899 : insn1 -> ... -> conditional-branch
1900 : insn1 -> ... -> load_insn,
1901 : and if a flow path exists:
1902 : insn1 -> ... -> conditional-branch -> ... -> load_insn,
1903 : and if insn1 is on the path
1904 : region-entry -> ... -> bb_trg -> ... load_insn.
1905 :
1906 : Locate insn1 by climbing on INSN_BACK_DEPS from load_insn.
1907 : Locate the branch by following INSN_FORW_DEPS from insn1. */
1908 :
1909 : static bool
1910 0 : is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1911 : {
1912 0 : sd_iterator_def sd_it;
1913 0 : dep_t dep;
1914 :
1915 0 : FOR_EACH_DEP (load_insn, SD_LIST_BACK, sd_it, dep)
1916 : {
1917 0 : rtx_insn *insn1 = DEP_PRO (dep);
1918 :
1919 : /* Must be a DEF-USE dependence upon non-branch. */
1920 0 : if (DEP_TYPE (dep) != REG_DEP_TRUE
1921 0 : || JUMP_P (insn1))
1922 0 : continue;
1923 :
1924 : /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1925 0 : if (INSN_BB (insn1) == bb_src
1926 0 : || (CONTAINING_RGN (BLOCK_NUM (insn1))
1927 0 : != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1928 0 : || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1929 0 : && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1930 0 : continue;
1931 :
1932 : /* Now search for the conditional-branch. */
1933 0 : if (find_conditional_protection (insn1, bb_src))
1934 : return true;
1935 :
1936 : /* Recursive step: search another insn1, "above" current insn1. */
1937 0 : return is_conditionally_protected (insn1, bb_src, bb_trg);
1938 : }
1939 :
1940 : /* The chain does not exist. */
1941 : return false;
1942 : } /* is_conditionally_protected */
1943 :
1944 : /* Returns true if a clue for "similar load" 'insn2' is found, and hence
1945 : load_insn can move speculatively from bb_src to bb_trg. All the
1946 : following must hold:
1947 :
1948 : (1) both loads have 1 base register (PFREE_CANDIDATEs).
1949 : (2) load_insn and load1 have a def-use dependence upon
1950 : the same insn 'insn1'.
1951 : (3) either load2 is in bb_trg, or:
1952 : - there's only one split-block, and
1953 : - load1 is on the escape path, and
1954 :
1955 : From all these we can conclude that the two loads access memory
1956 : addresses that differ at most by a constant, and hence if moving
1957 : load_insn would cause an exception, it would have been caused by
1958 : load2 anyhow. */
1959 :
1960 : static bool
1961 0 : is_pfree (rtx load_insn, int bb_src, int bb_trg)
1962 : {
1963 0 : sd_iterator_def back_sd_it;
1964 0 : dep_t back_dep;
1965 0 : candidate *candp = candidate_table + bb_src;
1966 :
1967 0 : if (candp->split_bbs.nr_members != 1)
1968 : /* Must have exactly one escape block. */
1969 : return false;
1970 :
1971 0 : FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep)
1972 : {
1973 0 : rtx_insn *insn1 = DEP_PRO (back_dep);
1974 :
1975 0 : if (DEP_TYPE (back_dep) == REG_DEP_TRUE)
1976 : /* Found a DEF-USE dependence (insn1, load_insn). */
1977 : {
1978 0 : sd_iterator_def fore_sd_it;
1979 0 : dep_t fore_dep;
1980 :
1981 0 : FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep)
1982 : {
1983 0 : rtx_insn *insn2 = DEP_CON (fore_dep);
1984 :
1985 0 : if (DEP_TYPE (fore_dep) == REG_DEP_TRUE)
1986 : {
1987 : /* Found a DEF-USE dependence (insn1, insn2). */
1988 0 : if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1989 : /* insn2 not guaranteed to be a 1 base reg load. */
1990 0 : continue;
1991 :
1992 0 : if (INSN_BB (insn2) == bb_trg)
1993 : /* insn2 is the similar load, in the target block. */
1994 0 : return true;
1995 :
1996 0 : if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1997 : /* insn2 is a similar load, in a split-block. */
1998 : return true;
1999 : }
2000 : }
2001 : }
2002 : }
2003 :
2004 : /* Couldn't find a similar load. */
2005 : return false;
2006 : } /* is_pfree */
2007 :
2008 : /* Return true if load_insn is prisky (i.e. if load_insn is fed by
2009 : a load moved speculatively, or if load_insn is protected by
2010 : a compare on load_insn's address). */
2011 :
2012 : static bool
2013 0 : is_prisky (rtx load_insn, int bb_src, int bb_trg)
2014 : {
2015 0 : if (FED_BY_SPEC_LOAD (load_insn))
2016 : return true;
2017 :
2018 0 : if (sd_lists_empty_p (load_insn, SD_LIST_BACK))
2019 : /* Dependence may 'hide' out of the region. */
2020 : return true;
2021 :
2022 0 : if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2023 : return true;
2024 :
2025 : return false;
2026 : }
2027 :
2028 : /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2029 : Return true if insn is exception-free (and the motion is valid)
2030 : and false otherwise. */
2031 :
2032 : static bool
2033 26 : is_exception_free (rtx_insn *insn, int bb_src, int bb_trg)
2034 : {
2035 26 : int insn_class = haifa_classify_insn (insn);
2036 :
2037 : /* Handle non-load insns. */
2038 26 : switch (insn_class)
2039 : {
2040 : case TRAP_FREE:
2041 : return true;
2042 : case TRAP_RISKY:
2043 : return false;
2044 3 : default:;
2045 : }
2046 :
2047 : /* Handle loads. */
2048 3 : if (!flag_schedule_speculative_load)
2049 : return false;
2050 0 : IS_LOAD_INSN (insn) = 1;
2051 0 : switch (insn_class)
2052 : {
2053 : case IFREE:
2054 : return true;
2055 : case IRISKY:
2056 : return false;
2057 0 : case PFREE_CANDIDATE:
2058 0 : if (is_pfree (insn, bb_src, bb_trg))
2059 : return true;
2060 : /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2061 : /* FALLTHRU */
2062 0 : case PRISKY_CANDIDATE:
2063 0 : if (!flag_schedule_speculative_load_dangerous
2064 0 : || is_prisky (insn, bb_src, bb_trg))
2065 0 : return false;
2066 : break;
2067 0 : default:;
2068 : }
2069 :
2070 0 : return flag_schedule_speculative_load_dangerous;
2071 : }
2072 :
2073 : /* The number of insns from the current block scheduled so far. */
2074 : static int sched_target_n_insns;
2075 : /* The number of insns from the current block to be scheduled in total. */
2076 : static int target_n_insns;
2077 : /* The number of insns from the entire region scheduled so far. */
2078 : static int sched_n_insns;
2079 :
2080 : /* Implementations of the sched_info functions for region scheduling. */
2081 : static void init_ready_list (void);
2082 : static bool can_schedule_ready_p (rtx_insn *);
2083 : static void begin_schedule_ready (rtx_insn *);
2084 : static ds_t new_ready (rtx_insn *, ds_t);
2085 : static bool schedule_more_p (void);
2086 : static const char *rgn_print_insn (const rtx_insn *, int);
2087 : static int rgn_rank (rtx_insn *, rtx_insn *);
2088 : static void compute_jump_reg_dependencies (rtx, regset);
2089 :
2090 : /* Functions for speculative scheduling. */
2091 : static void rgn_add_remove_insn (rtx_insn *, int);
2092 : static void rgn_add_block (basic_block, basic_block);
2093 : static void rgn_fix_recovery_cfg (int, int, int);
2094 : static basic_block advance_target_bb (basic_block, rtx_insn *);
2095 :
2096 : /* Return true if there are more insns that should be scheduled. */
2097 :
2098 : static bool
2099 111817479 : schedule_more_p (void)
2100 : {
2101 111817479 : return sched_target_n_insns < target_n_insns;
2102 : }
2103 :
2104 : /* Add all insns that are initially ready to the ready list READY. Called
2105 : once before scheduling a set of insns. */
2106 :
2107 : static void
2108 10313163 : init_ready_list (void)
2109 : {
2110 10313163 : rtx_insn *prev_head = current_sched_info->prev_head;
2111 10313163 : rtx_insn *next_tail = current_sched_info->next_tail;
2112 10313163 : int bb_src;
2113 10313163 : rtx_insn *insn;
2114 :
2115 10313163 : target_n_insns = 0;
2116 10313163 : sched_target_n_insns = 0;
2117 10313163 : sched_n_insns = 0;
2118 :
2119 : /* Print debugging information. */
2120 10313163 : if (sched_verbose >= 5)
2121 0 : debug_rgn_dependencies (target_bb);
2122 :
2123 : /* Prepare current target block info. */
2124 10313163 : if (current_nr_blocks > 1)
2125 48 : compute_trg_info (target_bb);
2126 :
2127 : /* Initialize ready list with all 'ready' insns in target block.
2128 : Count number of insns in the target block being scheduled. */
2129 129246193 : for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2130 : {
2131 108619867 : gcc_assert (TODO_SPEC (insn) == HARD_DEP || TODO_SPEC (insn) == DEP_POSTPONED);
2132 108619867 : TODO_SPEC (insn) = HARD_DEP;
2133 108619867 : try_ready (insn);
2134 108619867 : target_n_insns++;
2135 :
2136 108619867 : gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL));
2137 : }
2138 :
2139 : /* Add to ready list all 'ready' insns in valid source blocks.
2140 : For speculative insns, check-live, exception-free, and
2141 : issue-delay. */
2142 10313233 : for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2143 70 : if (IS_VALID (bb_src))
2144 : {
2145 51 : rtx_insn *src_head;
2146 51 : rtx_insn *src_next_tail;
2147 51 : rtx_insn *tail, *head;
2148 :
2149 51 : get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src),
2150 : &head, &tail);
2151 51 : src_next_tail = NEXT_INSN (tail);
2152 51 : src_head = head;
2153 :
2154 712 : for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2155 661 : if (INSN_P (insn))
2156 : {
2157 637 : gcc_assert (TODO_SPEC (insn) == HARD_DEP || TODO_SPEC (insn) == DEP_POSTPONED);
2158 637 : TODO_SPEC (insn) = HARD_DEP;
2159 637 : try_ready (insn);
2160 : }
2161 : }
2162 10313163 : }
2163 :
2164 : /* Called after taking INSN from the ready list. Returns true if this
2165 : insn can be scheduled, nonzero if we should silently discard it. */
2166 :
2167 : static bool
2168 60126190 : can_schedule_ready_p (rtx_insn *insn)
2169 : {
2170 : /* An interblock motion? */
2171 60126190 : if (INSN_BB (insn) != target_bb && IS_SPECULATIVE_INSN (insn))
2172 : {
2173 : /* Cannot schedule this insn unless all operands are live. */
2174 0 : if (!check_live (insn, INSN_BB (insn)))
2175 : return false;
2176 :
2177 : /* Should not move expensive instructions speculatively. */
2178 0 : if (GET_CODE (PATTERN (insn)) != CLOBBER
2179 0 : && !targetm.sched.can_speculate_insn (insn))
2180 : return false;
2181 : }
2182 :
2183 : return true;
2184 : }
2185 :
2186 : /* Updates counter and other information. Split from can_schedule_ready_p ()
2187 : because when we schedule insn speculatively then insn passed to
2188 : can_schedule_ready_p () differs from the one passed to
2189 : begin_schedule_ready (). */
2190 : static void
2191 108619875 : begin_schedule_ready (rtx_insn *insn)
2192 : {
2193 : /* An interblock motion? */
2194 108619875 : if (INSN_BB (insn) != target_bb)
2195 : {
2196 8 : if (IS_SPECULATIVE_INSN (insn))
2197 : {
2198 8 : gcc_assert (check_live (insn, INSN_BB (insn)));
2199 :
2200 8 : update_live (insn, INSN_BB (insn));
2201 :
2202 : /* For speculative load, mark insns fed by it. */
2203 8 : if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2204 0 : set_spec_fed (insn);
2205 :
2206 8 : nr_spec++;
2207 : }
2208 8 : nr_inter++;
2209 : }
2210 : else
2211 : {
2212 : /* In block motion. */
2213 108619867 : sched_target_n_insns++;
2214 : }
2215 108619875 : sched_n_insns++;
2216 108619875 : }
2217 :
2218 : /* Called after INSN has all its hard dependencies resolved and the speculation
2219 : of type TS is enough to overcome them all.
2220 : Return nonzero if it should be moved to the ready list or the queue, or zero
2221 : if we should silently discard it. */
2222 : static ds_t
2223 108619911 : new_ready (rtx_insn *next, ds_t ts)
2224 : {
2225 108619911 : if (INSN_BB (next) != target_bb)
2226 : {
2227 44 : int not_ex_free = 0;
2228 :
2229 : /* For speculative insns, before inserting to ready/queue,
2230 : check live, exception-free, and issue-delay. */
2231 44 : if (!IS_VALID (INSN_BB (next))
2232 40 : || CANT_MOVE (next)
2233 98 : || (IS_SPECULATIVE_INSN (next)
2234 27 : && ((recog_memoized (next) >= 0
2235 19 : && min_insn_conflict_delay (curr_state, next, next)
2236 19 : > param_max_sched_insn_conflict_delay)
2237 27 : || IS_SPECULATION_CHECK_P (next)
2238 27 : || !check_live (next, INSN_BB (next))
2239 26 : || (not_ex_free = !is_exception_free (next, INSN_BB (next),
2240 : target_bb)))))
2241 : {
2242 24 : if (not_ex_free
2243 : /* We are here because is_exception_free () == false.
2244 : But we possibly can handle that with control speculation. */
2245 6 : && sched_deps_info->generate_spec_deps
2246 0 : && spec_info->mask & BEGIN_CONTROL)
2247 : {
2248 0 : ds_t new_ds;
2249 :
2250 : /* Add control speculation to NEXT's dependency type. */
2251 0 : new_ds = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK);
2252 :
2253 : /* Check if NEXT can be speculated with new dependency type. */
2254 0 : if (sched_insn_is_legitimate_for_speculation_p (next, new_ds))
2255 : /* Here we got new control-speculative instruction. */
2256 : ts = new_ds;
2257 : else
2258 : /* NEXT isn't ready yet. */
2259 24 : ts = DEP_POSTPONED;
2260 : }
2261 : else
2262 : /* NEXT isn't ready yet. */
2263 : ts = DEP_POSTPONED;
2264 : }
2265 : }
2266 :
2267 108619911 : return ts;
2268 : }
2269 :
2270 : /* Return a string that contains the insn uid and optionally anything else
2271 : necessary to identify this insn in an output. It's valid to use a
2272 : static buffer for this. The ALIGNED parameter should cause the string
2273 : to be formatted so that multiple output lines will line up nicely. */
2274 :
2275 : static const char *
2276 983 : rgn_print_insn (const rtx_insn *insn, int aligned)
2277 : {
2278 983 : static char tmp[80];
2279 :
2280 983 : if (aligned)
2281 983 : sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2282 : else
2283 : {
2284 0 : if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2285 0 : sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2286 : else
2287 0 : sprintf (tmp, "%d", INSN_UID (insn));
2288 : }
2289 983 : return tmp;
2290 : }
2291 :
2292 : /* Compare priority of two insns. Return a positive number if the second
2293 : insn is to be preferred for scheduling, and a negative one if the first
2294 : is to be preferred. Zero if they are equally good. */
2295 :
2296 : static int
2297 243934095 : rgn_rank (rtx_insn *insn1, rtx_insn *insn2)
2298 : {
2299 : /* Some comparison make sense in interblock scheduling only. */
2300 243934095 : if (INSN_BB (insn1) != INSN_BB (insn2))
2301 : {
2302 0 : int spec_val, prob_val;
2303 :
2304 : /* Prefer an inblock motion on an interblock motion. */
2305 0 : if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2306 : return 1;
2307 0 : if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2308 : return -1;
2309 :
2310 : /* Prefer a useful motion on a speculative one. */
2311 0 : spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2312 0 : if (spec_val)
2313 : return spec_val;
2314 :
2315 : /* Prefer a more probable (speculative) insn. */
2316 0 : prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2317 0 : if (prob_val)
2318 : return prob_val;
2319 : }
2320 : return 0;
2321 : }
2322 :
2323 : /* NEXT is an instruction that depends on INSN (a backward dependence);
2324 : return true if we should include this dependence in priority
2325 : calculations. */
2326 :
2327 : bool
2328 143201314 : contributes_to_priority (rtx_insn *next, rtx_insn *insn)
2329 : {
2330 : /* NEXT and INSN reside in one ebb. */
2331 143201314 : return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn));
2332 : }
2333 :
2334 : /* INSN is a JUMP_INSN. Store the set of registers that must be
2335 : considered as used by this jump in USED. */
2336 :
2337 : static void
2338 4841599 : compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
2339 : regset used ATTRIBUTE_UNUSED)
2340 : {
2341 : /* Nothing to do here, since we postprocess jumps in
2342 : add_branch_dependences. */
2343 4841599 : }
2344 :
2345 : /* This variable holds common_sched_info hooks and data relevant to
2346 : the interblock scheduler. */
2347 : static struct common_sched_info_def rgn_common_sched_info;
2348 :
2349 :
2350 : /* This holds data for the dependence analysis relevant to
2351 : the interblock scheduler. */
2352 : static struct sched_deps_info_def rgn_sched_deps_info;
2353 :
2354 : /* This holds constant data used for initializing the above structure
2355 : for the Haifa scheduler. */
2356 : static const struct sched_deps_info_def rgn_const_sched_deps_info =
2357 : {
2358 : compute_jump_reg_dependencies,
2359 : NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2360 : 0, 0, 0
2361 : };
2362 :
2363 : /* Same as above, but for the selective scheduler. */
2364 : static const struct sched_deps_info_def rgn_const_sel_sched_deps_info =
2365 : {
2366 : compute_jump_reg_dependencies,
2367 : NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2368 : 0, 0, 0
2369 : };
2370 :
2371 : /* Return true if scheduling INSN will trigger finish of scheduling
2372 : current block. */
2373 : static bool
2374 111247825 : rgn_insn_finishes_block_p (rtx_insn *insn)
2375 : {
2376 111247825 : if (INSN_BB (insn) == target_bb
2377 111247825 : && sched_target_n_insns + 1 == target_n_insns)
2378 : /* INSN is the last not-scheduled instruction in the current block. */
2379 5828555 : return true;
2380 :
2381 : return false;
2382 : }
2383 :
2384 : /* Used in schedule_insns to initialize current_sched_info for scheduling
2385 : regions (or single basic blocks). */
2386 :
2387 : static const struct haifa_sched_info rgn_const_sched_info =
2388 : {
2389 : init_ready_list,
2390 : can_schedule_ready_p,
2391 : schedule_more_p,
2392 : new_ready,
2393 : rgn_rank,
2394 : rgn_print_insn,
2395 : contributes_to_priority,
2396 : rgn_insn_finishes_block_p,
2397 :
2398 : NULL, NULL,
2399 : NULL, NULL,
2400 : 0, 0,
2401 :
2402 : rgn_add_remove_insn,
2403 : begin_schedule_ready,
2404 : NULL,
2405 : advance_target_bb,
2406 : NULL, NULL,
2407 : SCHED_RGN
2408 : };
2409 :
2410 : /* This variable holds the data and hooks needed to the Haifa scheduler backend
2411 : for the interblock scheduler frontend. */
2412 : static struct haifa_sched_info rgn_sched_info;
2413 :
2414 : /* Returns maximum priority that an insn was assigned to. */
2415 :
2416 : int
2417 901 : get_rgn_sched_max_insns_priority (void)
2418 : {
2419 901 : return rgn_sched_info.sched_max_insns_priority;
2420 : }
2421 :
2422 : /* Determine if PAT sets a TARGET_CLASS_LIKELY_SPILLED_P register. */
2423 :
2424 : static bool
2425 1508 : sets_likely_spilled (rtx pat)
2426 : {
2427 1508 : bool ret = false;
2428 0 : note_pattern_stores (pat, sets_likely_spilled_1, &ret);
2429 1508 : return ret;
2430 : }
2431 :
2432 : static void
2433 1754 : sets_likely_spilled_1 (rtx x, const_rtx pat, void *data)
2434 : {
2435 1754 : bool *ret = (bool *) data;
2436 :
2437 1754 : if (GET_CODE (pat) == SET
2438 1579 : && REG_P (x)
2439 1426 : && HARD_REGISTER_P (x)
2440 2593 : && targetm.class_likely_spilled_p (REGNO_REG_CLASS (REGNO (x))))
2441 335 : *ret = true;
2442 1754 : }
2443 :
2444 : /* A bitmap to note insns that participate in any dependency. Used in
2445 : add_branch_dependences. */
2446 : static sbitmap insn_referenced;
2447 :
2448 : /* Add dependences so that branches are scheduled to run last in their
2449 : block. */
2450 : static void
2451 10331472 : add_branch_dependences (rtx_insn *head, rtx_insn *tail)
2452 : {
2453 10331472 : rtx_insn *insn, *last;
2454 :
2455 : /* For all branches, calls, uses, clobbers, and instructions
2456 : that can throw exceptions, force them to remain in order at the end of
2457 : the block by adding dependencies and giving the last a high priority.
2458 : There may be notes present, and prev_head may also be a note.
2459 :
2460 : Branches must obviously remain at the end. Calls should remain at the
2461 : end since moving them results in worse register allocation. Uses remain
2462 : at the end to ensure proper register allocation.
2463 :
2464 : Predecessors of SCHED_GROUP_P instructions at the end remain at the end.
2465 :
2466 : COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
2467 :
2468 : Insns setting TARGET_CLASS_LIKELY_SPILLED_P registers (usually return
2469 : values) are not moved before reload because we can wind up with register
2470 : allocation failures. */
2471 :
2472 11965046 : while (tail != head && DEBUG_INSN_P (tail))
2473 1633574 : tail = PREV_INSN (tail);
2474 :
2475 : insn = tail;
2476 : last = 0;
2477 24036837 : while (CALL_P (insn)
2478 24036837 : || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
2479 14545084 : || (NONJUMP_INSN_P (insn)
2480 12928833 : && (GET_CODE (PATTERN (insn)) == USE
2481 12644988 : || GET_CODE (PATTERN (insn)) == CLOBBER
2482 12642890 : || can_throw_internal (insn)
2483 12535451 : || (!reload_completed
2484 1508 : && sets_likely_spilled (PATTERN (insn)))))
2485 14151367 : || NOTE_P (insn)
2486 37007603 : || (last != 0 && SCHED_GROUP_P (last)))
2487 : {
2488 15245384 : if (!NOTE_P (insn))
2489 : {
2490 14064783 : if (last != 0
2491 14064783 : && sd_find_dep_between (insn, last, false) == NULL)
2492 : {
2493 20044 : if (! sched_insns_conditions_mutex_p (last, insn))
2494 20044 : add_dependence (last, insn, REG_DEP_ANTI);
2495 20044 : bitmap_set_bit (insn_referenced, INSN_LUID (insn));
2496 : }
2497 :
2498 14064783 : CANT_MOVE (insn) = 1;
2499 :
2500 14064783 : last = insn;
2501 : }
2502 :
2503 : /* Don't overrun the bounds of the basic block. */
2504 15245384 : if (insn == head)
2505 : break;
2506 :
2507 21872367 : do
2508 21872367 : insn = PREV_INSN (insn);
2509 21872367 : while (insn != head && DEBUG_INSN_P (insn));
2510 : }
2511 :
2512 : /* Selective scheduling handles control dependencies by itself, and
2513 : CANT_MOVE flags ensure that other insns will be kept in place. */
2514 10331472 : if (sel_sched_p ())
2515 : return;
2516 :
2517 : /* Make sure these insns are scheduled last in their block. */
2518 10330422 : insn = last;
2519 10330422 : if (insn != 0)
2520 91303054 : while (insn != head)
2521 : {
2522 82309255 : insn = prev_nonnote_insn (insn);
2523 :
2524 82309255 : if (bitmap_bit_p (insn_referenced, INSN_LUID (insn))
2525 82309255 : || DEBUG_INSN_P (insn))
2526 42313504 : continue;
2527 :
2528 39995751 : if (! sched_insns_conditions_mutex_p (last, insn))
2529 39995751 : add_dependence (last, insn, REG_DEP_ANTI);
2530 : }
2531 :
2532 10330422 : if (!targetm.have_conditional_execution ())
2533 : return;
2534 :
2535 : /* Finally, if the block ends in a jump, and we are doing intra-block
2536 : scheduling, make sure that the branch depends on any COND_EXEC insns
2537 : inside the block to avoid moving the COND_EXECs past the branch insn.
2538 :
2539 : We only have to do this after reload, because (1) before reload there
2540 : are no COND_EXEC insns, and (2) the region scheduler is an intra-block
2541 : scheduler after reload.
2542 :
2543 : FIXME: We could in some cases move COND_EXEC insns past the branch if
2544 : this scheduler would be a little smarter. Consider this code:
2545 :
2546 : T = [addr]
2547 : C ? addr += 4
2548 : !C ? X += 12
2549 : C ? T += 1
2550 : C ? jump foo
2551 :
2552 : On a target with a one cycle stall on a memory access the optimal
2553 : sequence would be:
2554 :
2555 : T = [addr]
2556 : C ? addr += 4
2557 : C ? T += 1
2558 : C ? jump foo
2559 : !C ? X += 12
2560 :
2561 : We don't want to put the 'X += 12' before the branch because it just
2562 : wastes a cycle of execution time when the branch is taken.
2563 :
2564 : Note that in the example "!C" will always be true. That is another
2565 : possible improvement for handling COND_EXECs in this scheduler: it
2566 : could remove always-true predicates. */
2567 :
2568 0 : if (!reload_completed || ! (JUMP_P (tail) || JUMP_TABLE_DATA_P (tail)))
2569 : return;
2570 :
2571 : insn = tail;
2572 0 : while (insn != head)
2573 : {
2574 0 : insn = PREV_INSN (insn);
2575 :
2576 : /* Note that we want to add this dependency even when
2577 : sched_insns_conditions_mutex_p returns true. The whole point
2578 : is that we _want_ this dependency, even if these insns really
2579 : are independent. */
2580 0 : if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
2581 0 : add_dependence (tail, insn, REG_DEP_ANTI);
2582 : }
2583 : }
2584 :
2585 : /* Data structures for the computation of data dependences in a regions. We
2586 : keep one `deps' structure for every basic block. Before analyzing the
2587 : data dependences for a bb, its variables are initialized as a function of
2588 : the variables of its predecessors. When the analysis for a bb completes,
2589 : we save the contents to the corresponding bb_deps[bb] variable. */
2590 :
2591 : static class deps_desc *bb_deps;
2592 :
2593 : static void
2594 1100 : concat_insn_mem_list (rtx_insn_list *copy_insns,
2595 : rtx_expr_list *copy_mems,
2596 : rtx_insn_list **old_insns_p,
2597 : rtx_expr_list **old_mems_p)
2598 : {
2599 1100 : rtx_insn_list *new_insns = *old_insns_p;
2600 1100 : rtx_expr_list *new_mems = *old_mems_p;
2601 :
2602 2080 : while (copy_insns)
2603 : {
2604 980 : new_insns = alloc_INSN_LIST (copy_insns->insn (), new_insns);
2605 980 : new_mems = alloc_EXPR_LIST (VOIDmode, copy_mems->element (), new_mems);
2606 980 : copy_insns = copy_insns->next ();
2607 980 : copy_mems = copy_mems->next ();
2608 : }
2609 :
2610 1100 : *old_insns_p = new_insns;
2611 1100 : *old_mems_p = new_mems;
2612 1100 : }
2613 :
2614 : /* Join PRED_DEPS to the SUCC_DEPS. */
2615 : void
2616 550 : deps_join (class deps_desc *succ_deps, class deps_desc *pred_deps)
2617 : {
2618 550 : unsigned reg;
2619 550 : reg_set_iterator rsi;
2620 :
2621 : /* The reg_last lists are inherited by successor. */
2622 21140 : EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2623 : {
2624 20590 : struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2625 20590 : struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2626 :
2627 20590 : succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2628 20590 : succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2629 20590 : succ_rl->implicit_sets
2630 20590 : = concat_INSN_LIST (pred_rl->implicit_sets, succ_rl->implicit_sets);
2631 20590 : succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2632 : succ_rl->clobbers);
2633 20590 : succ_rl->uses_length += pred_rl->uses_length;
2634 20590 : succ_rl->clobbers_length += pred_rl->clobbers_length;
2635 : }
2636 550 : IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2637 :
2638 : /* Mem read/write lists are inherited by successor. */
2639 550 : concat_insn_mem_list (pred_deps->pending_read_insns,
2640 : pred_deps->pending_read_mems,
2641 : &succ_deps->pending_read_insns,
2642 : &succ_deps->pending_read_mems);
2643 550 : concat_insn_mem_list (pred_deps->pending_write_insns,
2644 : pred_deps->pending_write_mems,
2645 : &succ_deps->pending_write_insns,
2646 : &succ_deps->pending_write_mems);
2647 :
2648 550 : succ_deps->pending_jump_insns
2649 550 : = concat_INSN_LIST (pred_deps->pending_jump_insns,
2650 : succ_deps->pending_jump_insns);
2651 550 : succ_deps->last_pending_memory_flush
2652 550 : = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2653 : succ_deps->last_pending_memory_flush);
2654 :
2655 550 : succ_deps->pending_read_list_length += pred_deps->pending_read_list_length;
2656 550 : succ_deps->pending_write_list_length += pred_deps->pending_write_list_length;
2657 550 : succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2658 :
2659 : /* last_function_call is inherited by successor. */
2660 550 : succ_deps->last_function_call
2661 550 : = concat_INSN_LIST (pred_deps->last_function_call,
2662 : succ_deps->last_function_call);
2663 :
2664 : /* last_function_call_may_noreturn is inherited by successor. */
2665 550 : succ_deps->last_function_call_may_noreturn
2666 550 : = concat_INSN_LIST (pred_deps->last_function_call_may_noreturn,
2667 : succ_deps->last_function_call_may_noreturn);
2668 :
2669 : /* sched_before_next_call is inherited by successor. */
2670 550 : succ_deps->sched_before_next_call
2671 550 : = concat_INSN_LIST (pred_deps->sched_before_next_call,
2672 : succ_deps->sched_before_next_call);
2673 550 : }
2674 :
2675 : /* After computing the dependencies for block BB, propagate the dependencies
2676 : found in TMP_DEPS to the successors of the block. */
2677 : static void
2678 409 : propagate_deps (int bb, class deps_desc *pred_deps)
2679 : {
2680 409 : basic_block block = BASIC_BLOCK_FOR_FN (cfun, BB_TO_BLOCK (bb));
2681 409 : edge_iterator ei;
2682 409 : edge e;
2683 :
2684 : /* bb's structures are inherited by its successors. */
2685 1109 : FOR_EACH_EDGE (e, ei, block->succs)
2686 : {
2687 : /* Only bbs "below" bb, in the same region, are interesting. */
2688 700 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2689 698 : || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2690 459 : || BLOCK_TO_BB (e->dest->index) <= bb)
2691 328 : continue;
2692 :
2693 372 : deps_join (bb_deps + BLOCK_TO_BB (e->dest->index), pred_deps);
2694 : }
2695 :
2696 : /* These lists should point to the right place, for correct
2697 : freeing later. */
2698 409 : bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2699 409 : bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2700 409 : bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2701 409 : bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2702 409 : bb_deps[bb].pending_jump_insns = pred_deps->pending_jump_insns;
2703 :
2704 : /* Can't allow these to be freed twice. */
2705 409 : pred_deps->pending_read_insns = 0;
2706 409 : pred_deps->pending_read_mems = 0;
2707 409 : pred_deps->pending_write_insns = 0;
2708 409 : pred_deps->pending_write_mems = 0;
2709 409 : pred_deps->pending_jump_insns = 0;
2710 409 : }
2711 :
2712 : /* Compute dependences inside bb. In a multiple blocks region:
2713 : (1) a bb is analyzed after its predecessors, and (2) the lists in
2714 : effect at the end of bb (after analyzing for bb) are inherited by
2715 : bb's successors.
2716 :
2717 : Specifically for reg-reg data dependences, the block insns are
2718 : scanned by sched_analyze () top-to-bottom. Three lists are
2719 : maintained by sched_analyze (): reg_last[].sets for register DEFs,
2720 : reg_last[].implicit_sets for implicit hard register DEFs, and
2721 : reg_last[].uses for register USEs.
2722 :
2723 : When analysis is completed for bb, we update for its successors:
2724 : ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2725 : ; - IMPLICIT_DEFS[succ] = Union (IMPLICIT_DEFS [succ], IMPLICIT_DEFS [bb])
2726 : ; - USES[succ] = Union (USES [succ], DEFS [bb])
2727 :
2728 : The mechanism for computing mem-mem data dependence is very
2729 : similar, and the result is interblock dependences in the region. */
2730 :
2731 : static void
2732 10331472 : compute_block_dependences (int bb)
2733 : {
2734 10331472 : rtx_insn *head, *tail;
2735 10331472 : class deps_desc tmp_deps;
2736 :
2737 10331472 : tmp_deps = bb_deps[bb];
2738 :
2739 : /* Do the analysis for this block. */
2740 10331472 : gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2741 10331472 : get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2742 :
2743 10331472 : sched_analyze (&tmp_deps, head, tail);
2744 :
2745 10331472 : add_branch_dependences (head, tail);
2746 :
2747 10331472 : if (current_nr_blocks > 1)
2748 409 : propagate_deps (bb, &tmp_deps);
2749 :
2750 : /* Free up the INSN_LISTs. */
2751 10331472 : free_deps (&tmp_deps);
2752 :
2753 10331472 : if (targetm.sched.dependencies_evaluation_hook)
2754 10331472 : targetm.sched.dependencies_evaluation_hook (head, tail);
2755 10331472 : }
2756 :
2757 : /* Free dependencies of instructions inside BB. */
2758 : static void
2759 10330422 : free_block_dependencies (int bb)
2760 : {
2761 10330422 : rtx_insn *head;
2762 10330422 : rtx_insn *tail;
2763 :
2764 10330422 : get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2765 :
2766 10330422 : if (no_real_insns_p (head, tail))
2767 17259 : return;
2768 :
2769 10313163 : sched_free_deps (head, tail, true);
2770 : }
2771 :
2772 : /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2773 : them to the unused_*_list variables, so that they can be reused. */
2774 :
2775 : static void
2776 10331158 : free_pending_lists (void)
2777 : {
2778 10331158 : int bb;
2779 :
2780 20662630 : for (bb = 0; bb < current_nr_blocks; bb++)
2781 : {
2782 10331472 : free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2783 10331472 : free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2784 10331472 : free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2785 10331472 : free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2786 10331472 : free_INSN_LIST_list (&bb_deps[bb].pending_jump_insns);
2787 : }
2788 10331158 : }
2789 :
2790 : /* Print dependences for debugging starting from FROM_BB.
2791 : Callable from debugger. */
2792 : /* Print dependences for debugging starting from FROM_BB.
2793 : Callable from debugger. */
2794 : DEBUG_FUNCTION void
2795 0 : debug_rgn_dependencies (int from_bb)
2796 : {
2797 0 : int bb;
2798 :
2799 0 : fprintf (sched_dump,
2800 : ";; --------------- forward dependences: ------------ \n");
2801 :
2802 0 : for (bb = from_bb; bb < current_nr_blocks; bb++)
2803 : {
2804 0 : rtx_insn *head, *tail;
2805 :
2806 0 : get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2807 0 : fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2808 0 : BB_TO_BLOCK (bb), bb);
2809 :
2810 0 : debug_dependencies (head, tail);
2811 : }
2812 0 : }
2813 :
2814 : /* Print dependencies information for instructions between HEAD and TAIL.
2815 : ??? This function would probably fit best in haifa-sched.cc. */
2816 0 : void debug_dependencies (rtx_insn *head, rtx_insn *tail)
2817 : {
2818 0 : rtx_insn *insn;
2819 0 : rtx_insn *next_tail = NEXT_INSN (tail);
2820 :
2821 0 : fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2822 : "insn", "code", "bb", "dep", "prio", "cost",
2823 : "reservation");
2824 0 : fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2825 : "----", "----", "--", "---", "----", "----",
2826 : "-----------");
2827 :
2828 0 : for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2829 : {
2830 0 : if (! INSN_P (insn))
2831 : {
2832 0 : int n;
2833 0 : fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2834 0 : if (NOTE_P (insn))
2835 : {
2836 0 : n = NOTE_KIND (insn);
2837 0 : fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2838 : }
2839 : else
2840 0 : fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2841 0 : continue;
2842 0 : }
2843 :
2844 0 : fprintf (sched_dump,
2845 : ";; %s%5d%6d%6d%6d%6d%6d ",
2846 0 : (SCHED_GROUP_P (insn) ? "+" : " "),
2847 0 : INSN_UID (insn),
2848 : INSN_CODE (insn),
2849 0 : BLOCK_NUM (insn),
2850 0 : sched_emulate_haifa_p ? -1 : sd_lists_size (insn, SD_LIST_BACK),
2851 0 : (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2852 0 : : INSN_PRIORITY (insn))
2853 0 : : INSN_PRIORITY (insn)),
2854 0 : (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2855 0 : : insn_sched_cost (insn))
2856 0 : : insn_sched_cost (insn)));
2857 :
2858 0 : if (recog_memoized (insn) < 0)
2859 0 : fprintf (sched_dump, "nothing");
2860 : else
2861 0 : print_reservation (sched_dump, insn);
2862 :
2863 0 : fprintf (sched_dump, "\t: FW:");
2864 0 : {
2865 0 : sd_iterator_def sd_it;
2866 0 : dep_t dep;
2867 :
2868 0 : FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
2869 0 : fprintf (sched_dump, " %d%s%s%s", INSN_UID (DEP_CON (dep)),
2870 0 : DEP_TYPE (dep) == REG_DEP_TRUE ? "t" : "",
2871 0 : DEP_NONREG (dep) ? "n" : "",
2872 0 : DEP_MULTIPLE (dep) ? "m" : "");
2873 0 : if (sched_verbose >= 5)
2874 : {
2875 0 : fprintf (sched_dump, "\n;;\t\t\t\t\t\t: BK:");
2876 0 : FOR_EACH_DEP (insn, SD_LIST_HARD_BACK, sd_it, dep)
2877 0 : fprintf (sched_dump, " %d%s%s%s", INSN_UID (DEP_PRO (dep)),
2878 0 : DEP_TYPE (dep) == REG_DEP_TRUE ? "t" : "",
2879 0 : DEP_NONREG (dep) ? "n" : "",
2880 0 : DEP_MULTIPLE (dep) ? "m" : "");
2881 : }
2882 : }
2883 0 : fprintf (sched_dump, "\n");
2884 : }
2885 :
2886 0 : fprintf (sched_dump, "\n");
2887 0 : }
2888 :
2889 : /* Dump dependency graph for the current region to a file using dot syntax. */
2890 :
2891 : void
2892 0 : dump_rgn_dependencies_dot (FILE *file)
2893 : {
2894 0 : rtx_insn *head, *tail, *con, *pro;
2895 0 : sd_iterator_def sd_it;
2896 0 : dep_t dep;
2897 0 : int bb;
2898 0 : pretty_printer pp;
2899 :
2900 0 : pp.set_output_stream (file);
2901 0 : pp_printf (&pp, "digraph SchedDG {\n");
2902 :
2903 0 : for (bb = 0; bb < current_nr_blocks; ++bb)
2904 : {
2905 : /* Begin subgraph (basic block). */
2906 0 : pp_printf (&pp, "subgraph cluster_block_%d {\n", bb);
2907 0 : pp_printf (&pp, "\t" "color=blue;" "\n");
2908 0 : pp_printf (&pp, "\t" "style=bold;" "\n");
2909 0 : pp_printf (&pp, "\t" "label=\"BB #%d\";\n", BB_TO_BLOCK (bb));
2910 :
2911 : /* Setup head and tail (no support for EBBs). */
2912 0 : gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2913 0 : get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2914 0 : tail = NEXT_INSN (tail);
2915 :
2916 : /* Dump all insns. */
2917 0 : for (con = head; con != tail; con = NEXT_INSN (con))
2918 : {
2919 0 : if (!INSN_P (con))
2920 0 : continue;
2921 :
2922 : /* Pretty print the insn. */
2923 0 : pp_printf (&pp, "\t%d [label=\"{", INSN_UID (con));
2924 0 : pp_write_text_to_stream (&pp);
2925 0 : print_insn (&pp, con, /*verbose=*/false);
2926 0 : pp_write_text_as_dot_label_to_stream (&pp, /*for_record=*/true);
2927 0 : pp_write_text_to_stream (&pp);
2928 :
2929 : /* Dump instruction attributes. */
2930 0 : pp_printf (&pp, "|{ uid:%d | luid:%d | prio:%d }}\",shape=record]\n",
2931 0 : INSN_UID (con), INSN_LUID (con), INSN_PRIORITY (con));
2932 :
2933 : /* Dump all deps. */
2934 0 : FOR_EACH_DEP (con, SD_LIST_BACK, sd_it, dep)
2935 : {
2936 0 : int weight = 0;
2937 0 : const char *color;
2938 0 : pro = DEP_PRO (dep);
2939 :
2940 0 : switch (DEP_TYPE (dep))
2941 : {
2942 : case REG_DEP_TRUE:
2943 : color = "black";
2944 : weight = 1;
2945 : break;
2946 0 : case REG_DEP_OUTPUT:
2947 0 : case REG_DEP_ANTI:
2948 0 : color = "orange";
2949 0 : break;
2950 0 : case REG_DEP_CONTROL:
2951 0 : color = "blue";
2952 0 : break;
2953 0 : default:
2954 0 : gcc_unreachable ();
2955 : }
2956 :
2957 0 : pp_printf (&pp, "\t%d -> %d [color=%s",
2958 0 : INSN_UID (pro), INSN_UID (con), color);
2959 0 : if (int cost = dep_cost (dep))
2960 0 : pp_printf (&pp, ",label=%d", cost);
2961 0 : pp_printf (&pp, ",weight=%d", weight);
2962 0 : pp_printf (&pp, "];\n");
2963 : }
2964 : }
2965 0 : pp_printf (&pp, "}\n");
2966 : }
2967 :
2968 0 : pp_printf (&pp, "}\n");
2969 0 : pp_flush (&pp);
2970 0 : }
2971 :
2972 : /* Dump dependency graph for the current region to a file using dot syntax. */
2973 :
2974 : DEBUG_FUNCTION void
2975 0 : dump_rgn_dependencies_dot (const char *fname)
2976 : {
2977 0 : FILE *fp;
2978 :
2979 0 : fp = fopen (fname, "w");
2980 0 : if (!fp)
2981 : {
2982 0 : perror ("fopen");
2983 0 : return;
2984 : }
2985 :
2986 0 : dump_rgn_dependencies_dot (fp);
2987 0 : fclose (fp);
2988 : }
2989 :
2990 :
2991 : /* Returns true if all the basic blocks of the current region have
2992 : NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2993 : bool
2994 10331158 : sched_is_disabled_for_current_region_p (void)
2995 : {
2996 10331158 : int bb;
2997 :
2998 10331158 : for (bb = 0; bb < current_nr_blocks; bb++)
2999 10331158 : if (!(BASIC_BLOCK_FOR_FN (cfun,
3000 10331158 : BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
3001 : return false;
3002 :
3003 : return true;
3004 : }
3005 :
3006 : /* Free all region dependencies saved in INSN_BACK_DEPS and
3007 : INSN_RESOLVED_BACK_DEPS. The Haifa scheduler does this on the fly
3008 : when scheduling, so this function is supposed to be called from
3009 : the selective scheduling only. */
3010 : void
3011 770 : free_rgn_deps (void)
3012 : {
3013 770 : int bb;
3014 :
3015 1820 : for (bb = 0; bb < current_nr_blocks; bb++)
3016 : {
3017 1050 : rtx_insn *head, *tail;
3018 :
3019 1050 : gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
3020 1050 : get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
3021 :
3022 1050 : sched_free_deps (head, tail, false);
3023 : }
3024 770 : }
3025 :
3026 : static int rgn_n_insns;
3027 :
3028 : /* Compute insn priority for a current region. */
3029 : void
3030 10331158 : compute_priorities (void)
3031 : {
3032 10331158 : int bb;
3033 :
3034 10331158 : current_sched_info->sched_max_insns_priority = 0;
3035 20662630 : for (bb = 0; bb < current_nr_blocks; bb++)
3036 : {
3037 10331472 : rtx_insn *head, *tail;
3038 :
3039 10331472 : gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
3040 10331472 : get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
3041 :
3042 10331472 : if (no_real_insns_p (head, tail))
3043 17290 : continue;
3044 :
3045 10314182 : rgn_n_insns += set_priorities (head, tail);
3046 : }
3047 10331158 : current_sched_info->sched_max_insns_priority++;
3048 10331158 : }
3049 :
3050 : /* (Re-)initialize the arrays of DFA states at the end of each basic block.
3051 :
3052 : SAVED_LAST_BASIC_BLOCK is the previous length of the arrays. It must be
3053 : zero for the first call to this function, to allocate the arrays for the
3054 : first time.
3055 :
3056 : This function is called once during initialization of the scheduler, and
3057 : called again to resize the arrays if new basic blocks have been created,
3058 : for example for speculation recovery code. */
3059 :
3060 : static void
3061 11277318 : realloc_bb_state_array (int saved_last_basic_block)
3062 : {
3063 11277318 : char *old_bb_state_array = bb_state_array;
3064 11277318 : size_t lbb = (size_t) last_basic_block_for_fn (cfun);
3065 11277318 : size_t slbb = (size_t) saved_last_basic_block;
3066 :
3067 : /* Nothing to do if nothing changed since the last time this was called. */
3068 11277318 : if (saved_last_basic_block == last_basic_block_for_fn (cfun))
3069 : return;
3070 :
3071 : /* The selective scheduler doesn't use the state arrays. */
3072 964155 : if (sel_sched_p ())
3073 : {
3074 131 : gcc_assert (bb_state_array == NULL && bb_state == NULL);
3075 : return;
3076 : }
3077 :
3078 964024 : gcc_checking_assert (saved_last_basic_block == 0
3079 : || (bb_state_array != NULL && bb_state != NULL));
3080 :
3081 964024 : bb_state_array = XRESIZEVEC (char, bb_state_array, lbb * dfa_state_size);
3082 964024 : bb_state = XRESIZEVEC (state_t, bb_state, lbb);
3083 :
3084 : /* If BB_STATE_ARRAY has moved, fixup all the state pointers array.
3085 : Otherwise only fixup the newly allocated ones. For the state
3086 : array itself, only initialize the new entries. */
3087 964024 : bool bb_state_array_moved = (bb_state_array != old_bb_state_array);
3088 14186586 : for (size_t i = bb_state_array_moved ? 0 : slbb; i < lbb; i++)
3089 12258538 : bb_state[i] = (state_t) (bb_state_array + i * dfa_state_size);
3090 13222562 : for (size_t i = slbb; i < lbb; i++)
3091 12258538 : state_reset (bb_state[i]);
3092 : }
3093 :
3094 : /* Free the arrays of DFA states at the end of each basic block. */
3095 :
3096 : static void
3097 964155 : free_bb_state_array (void)
3098 : {
3099 964155 : free (bb_state_array);
3100 964155 : free (bb_state);
3101 964155 : bb_state_array = NULL;
3102 964155 : bb_state = NULL;
3103 964155 : }
3104 :
3105 : /* If LAST_BB falls through to another block B, record that B should
3106 : start with DFA start STATE. */
3107 :
3108 : static void
3109 10330422 : save_state_for_fallthru_edge (basic_block last_bb, state_t state)
3110 : {
3111 10330422 : edge f = find_fallthru_edge (last_bb->succs);
3112 10330422 : if (f
3113 10330422 : && (!f->probability.initialized_p ()
3114 6873494 : || (f->probability.to_reg_br_prob_base () * 100
3115 : / REG_BR_PROB_BASE
3116 6873494 : >= param_sched_state_edge_prob_cutoff)))
3117 : {
3118 6510731 : memcpy (bb_state[f->dest->index], state,
3119 : dfa_state_size);
3120 6510731 : if (sched_verbose >= 5)
3121 0 : fprintf (sched_dump, "saving state for edge %d->%d\n",
3122 0 : f->src->index, f->dest->index);
3123 : }
3124 10330422 : }
3125 :
3126 : /* Schedule a region. A region is either an inner loop, a loop-free
3127 : subroutine, or a single basic block. Each bb in the region is
3128 : scheduled after its flow predecessors. */
3129 :
3130 : static void
3131 10330388 : schedule_region (int rgn)
3132 : {
3133 10330388 : int bb;
3134 10330388 : int sched_rgn_n_insns = 0;
3135 :
3136 10330388 : rgn_n_insns = 0;
3137 :
3138 : /* Do not support register pressure sensitive scheduling for the new regions
3139 : as we don't update the liveness info for them. */
3140 10330388 : if (sched_pressure != SCHED_PRESSURE_NONE
3141 415 : && rgn >= nr_regions_initial)
3142 : {
3143 0 : free_global_sched_pressure_data ();
3144 0 : sched_pressure = SCHED_PRESSURE_NONE;
3145 : }
3146 :
3147 10330388 : rgn_setup_region (rgn);
3148 :
3149 : /* Don't schedule region that is marked by
3150 : NOTE_DISABLE_SCHED_OF_BLOCK. */
3151 10330388 : if (sched_is_disabled_for_current_region_p ())
3152 : return;
3153 :
3154 10330388 : sched_rgn_compute_dependencies (rgn);
3155 :
3156 10330388 : sched_rgn_local_init (rgn);
3157 :
3158 : /* Set priorities. */
3159 10330388 : compute_priorities ();
3160 :
3161 10330388 : sched_extend_ready_list (rgn_n_insns);
3162 :
3163 10330388 : if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
3164 : {
3165 415 : sched_init_region_reg_pressure_info ();
3166 854 : for (bb = 0; bb < current_nr_blocks; bb++)
3167 : {
3168 439 : basic_block first_bb, last_bb;
3169 439 : rtx_insn *head, *tail;
3170 :
3171 439 : first_bb = EBB_FIRST_BB (bb);
3172 439 : last_bb = EBB_LAST_BB (bb);
3173 :
3174 439 : get_ebb_head_tail (first_bb, last_bb, &head, &tail);
3175 :
3176 439 : if (no_real_insns_p (head, tail))
3177 : {
3178 9 : gcc_assert (first_bb == last_bb);
3179 9 : continue;
3180 : }
3181 430 : sched_setup_bb_reg_pressure_info (first_bb, PREV_INSN (head));
3182 : }
3183 : }
3184 :
3185 : /* Now we can schedule all blocks. */
3186 20660810 : for (bb = 0; bb < current_nr_blocks; bb++)
3187 : {
3188 10330422 : basic_block first_bb, last_bb, curr_bb;
3189 10330422 : rtx_insn *head, *tail;
3190 :
3191 10330422 : first_bb = EBB_FIRST_BB (bb);
3192 10330422 : last_bb = EBB_LAST_BB (bb);
3193 :
3194 10330422 : get_ebb_head_tail (first_bb, last_bb, &head, &tail);
3195 :
3196 10330422 : if (no_real_insns_p (head, tail))
3197 : {
3198 17259 : gcc_assert (first_bb == last_bb);
3199 17259 : save_state_for_fallthru_edge (last_bb, bb_state[first_bb->index]);
3200 17259 : continue;
3201 : }
3202 :
3203 10313163 : current_sched_info->prev_head = PREV_INSN (head);
3204 10313163 : current_sched_info->next_tail = NEXT_INSN (tail);
3205 :
3206 10313163 : remove_notes (head, tail);
3207 :
3208 10313163 : unlink_bb_notes (first_bb, last_bb);
3209 :
3210 10313163 : target_bb = bb;
3211 :
3212 10313163 : gcc_assert (flag_schedule_interblock || current_nr_blocks == 1);
3213 10313163 : current_sched_info->queue_must_finish_empty = current_nr_blocks == 1;
3214 :
3215 10313163 : curr_bb = first_bb;
3216 10313163 : int saved_last_basic_block = last_basic_block_for_fn (cfun);
3217 :
3218 10313163 : schedule_block (&curr_bb, bb_state[first_bb->index]);
3219 10313163 : gcc_assert (EBB_FIRST_BB (bb) == first_bb);
3220 10313163 : sched_rgn_n_insns += sched_n_insns;
3221 10313163 : realloc_bb_state_array (saved_last_basic_block);
3222 10313163 : save_state_for_fallthru_edge (last_bb, curr_state);
3223 :
3224 : /* Clean up. */
3225 10313163 : if (current_nr_blocks > 1)
3226 48 : free_trg_info ();
3227 : }
3228 :
3229 : /* Sanity check: verify that all region insns were scheduled. */
3230 10330388 : gcc_assert (sched_rgn_n_insns == rgn_n_insns);
3231 :
3232 10330388 : sched_finish_ready_list ();
3233 :
3234 : /* Done with this region. */
3235 10330388 : sched_rgn_local_finish ();
3236 :
3237 : /* Free dependencies. */
3238 30991198 : for (bb = 0; bb < current_nr_blocks; ++bb)
3239 10330422 : free_block_dependencies (bb);
3240 :
3241 10330388 : gcc_assert (haifa_recovery_bb_ever_added_p
3242 : || deps_pools_are_empty_p ());
3243 : }
3244 :
3245 : /* Initialize data structures for region scheduling. */
3246 :
3247 : void
3248 964155 : sched_rgn_init (bool single_blocks_p)
3249 : {
3250 964155 : min_spec_prob = ((param_min_spec_prob * REG_BR_PROB_BASE)
3251 : / 100);
3252 :
3253 964155 : nr_inter = 0;
3254 964155 : nr_spec = 0;
3255 :
3256 964155 : extend_regions ();
3257 :
3258 964155 : CONTAINING_RGN (ENTRY_BLOCK) = -1;
3259 964155 : CONTAINING_RGN (EXIT_BLOCK) = -1;
3260 :
3261 964155 : realloc_bb_state_array (0);
3262 :
3263 : /* Compute regions for scheduling. */
3264 964155 : if (single_blocks_p
3265 316 : || n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS + 1
3266 197 : || !flag_schedule_interblock
3267 964339 : || is_cfg_nonregular ())
3268 : {
3269 963987 : find_single_block_region (sel_sched_p ());
3270 : }
3271 : else
3272 : {
3273 : /* Compute the dominators and post dominators. */
3274 168 : if (!sel_sched_p ())
3275 86 : calculate_dominance_info (CDI_DOMINATORS);
3276 :
3277 : /* Find regions. */
3278 168 : find_rgns ();
3279 :
3280 168 : if (sched_verbose >= 3)
3281 0 : debug_regions ();
3282 :
3283 : /* For now. This will move as more and more of haifa is converted
3284 : to using the cfg code. */
3285 168 : if (!sel_sched_p ())
3286 86 : free_dominance_info (CDI_DOMINATORS);
3287 : }
3288 :
3289 964155 : gcc_assert (nr_regions > 0 && nr_regions <= n_basic_blocks_for_fn (cfun));
3290 :
3291 964155 : RGN_BLOCKS (nr_regions) = (RGN_BLOCKS (nr_regions - 1)
3292 964155 : + RGN_NR_BLOCKS (nr_regions - 1));
3293 964155 : nr_regions_initial = nr_regions;
3294 964155 : }
3295 :
3296 : /* Free data structures for region scheduling. */
3297 : void
3298 964155 : sched_rgn_finish (void)
3299 : {
3300 964155 : free_bb_state_array ();
3301 :
3302 : /* Reposition the prologue and epilogue notes in case we moved the
3303 : prologue/epilogue insns. */
3304 964155 : if (reload_completed)
3305 963926 : reposition_prologue_and_epilogue_notes ();
3306 :
3307 964155 : if (sched_verbose)
3308 : {
3309 24 : if (reload_completed == 0
3310 0 : && flag_schedule_interblock)
3311 : {
3312 0 : fprintf (sched_dump,
3313 : "\n;; Procedure interblock/speculative motions == %d/%d \n",
3314 : nr_inter, nr_spec);
3315 : }
3316 : else
3317 24 : gcc_assert (nr_inter <= 0);
3318 24 : fprintf (sched_dump, "\n\n");
3319 : }
3320 :
3321 964155 : nr_regions = 0;
3322 :
3323 964155 : free (rgn_table);
3324 964155 : rgn_table = NULL;
3325 :
3326 964155 : free (rgn_bb_table);
3327 964155 : rgn_bb_table = NULL;
3328 :
3329 964155 : free (block_to_bb);
3330 964155 : block_to_bb = NULL;
3331 :
3332 964155 : free (containing_rgn);
3333 964155 : containing_rgn = NULL;
3334 :
3335 964155 : free (ebb_head);
3336 964155 : ebb_head = NULL;
3337 964155 : }
3338 :
3339 : /* Setup global variables like CURRENT_BLOCKS and CURRENT_NR_BLOCK to
3340 : point to the region RGN. */
3341 : void
3342 10331363 : rgn_setup_region (int rgn)
3343 : {
3344 10331363 : int bb;
3345 :
3346 : /* Set variables for the current region. */
3347 10331363 : current_nr_blocks = RGN_NR_BLOCKS (rgn);
3348 10331363 : current_blocks = RGN_BLOCKS (rgn);
3349 :
3350 : /* EBB_HEAD is a region-scope structure. But we realloc it for
3351 : each region to save time/memory/something else.
3352 : See comments in add_block1, for what reasons we allocate +1 element. */
3353 10331363 : ebb_head = XRESIZEVEC (int, ebb_head, current_nr_blocks + 1);
3354 30996154 : for (bb = 0; bb <= current_nr_blocks; bb++)
3355 20664791 : ebb_head[bb] = current_blocks + bb;
3356 10331363 : }
3357 :
3358 : /* Compute instruction dependencies in region RGN. */
3359 : void
3360 10331158 : sched_rgn_compute_dependencies (int rgn)
3361 : {
3362 10331158 : if (!RGN_DONT_CALC_DEPS (rgn))
3363 : {
3364 10331158 : int bb;
3365 :
3366 10331158 : if (sel_sched_p ())
3367 770 : sched_emulate_haifa_p = 1;
3368 :
3369 10331158 : init_deps_global ();
3370 :
3371 : /* Initializations for region data dependence analysis. */
3372 10331158 : bb_deps = XNEWVEC (class deps_desc, current_nr_blocks);
3373 20662630 : for (bb = 0; bb < current_nr_blocks; bb++)
3374 10331472 : init_deps (bb_deps + bb, false);
3375 :
3376 : /* Initialize bitmap used in add_branch_dependences. */
3377 10331158 : insn_referenced = sbitmap_alloc (sched_max_luid);
3378 10331158 : bitmap_clear (insn_referenced);
3379 :
3380 : /* Compute backward dependencies. */
3381 30993788 : for (bb = 0; bb < current_nr_blocks; bb++)
3382 10331472 : compute_block_dependences (bb);
3383 :
3384 10331158 : sbitmap_free (insn_referenced);
3385 10331158 : free_pending_lists ();
3386 10331158 : finish_deps_global ();
3387 10331158 : free (bb_deps);
3388 :
3389 : /* We don't want to recalculate this twice. */
3390 10331158 : RGN_DONT_CALC_DEPS (rgn) = 1;
3391 :
3392 10331158 : if (sel_sched_p ())
3393 770 : sched_emulate_haifa_p = 0;
3394 : }
3395 : else
3396 : /* (This is a recovery block. It is always a single block region.)
3397 : OR (We use selective scheduling.) */
3398 0 : gcc_assert (current_nr_blocks == 1 || sel_sched_p ());
3399 10331158 : }
3400 :
3401 : /* Init region data structures. Returns true if this region should
3402 : not be scheduled. */
3403 : void
3404 10330388 : sched_rgn_local_init (int rgn)
3405 : {
3406 10330388 : int bb;
3407 :
3408 : /* Compute interblock info: probabilities, split-edges, dominators, etc. */
3409 10330388 : if (current_nr_blocks > 1)
3410 : {
3411 14 : basic_block block;
3412 14 : edge e;
3413 14 : edge_iterator ei;
3414 :
3415 14 : prob = XNEWVEC (int, current_nr_blocks);
3416 :
3417 14 : dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
3418 14 : bitmap_vector_clear (dom, current_nr_blocks);
3419 :
3420 : /* Use ->aux to implement EDGE_TO_BIT mapping. */
3421 14 : rgn_nr_edges = 0;
3422 249 : FOR_EACH_BB_FN (block, cfun)
3423 : {
3424 235 : if (CONTAINING_RGN (block->index) != rgn)
3425 187 : continue;
3426 141 : FOR_EACH_EDGE (e, ei, block->succs)
3427 93 : SET_EDGE_TO_BIT (e, rgn_nr_edges++);
3428 : }
3429 :
3430 14 : rgn_edges = XNEWVEC (edge, rgn_nr_edges);
3431 14 : rgn_nr_edges = 0;
3432 249 : FOR_EACH_BB_FN (block, cfun)
3433 : {
3434 235 : if (CONTAINING_RGN (block->index) != rgn)
3435 187 : continue;
3436 141 : FOR_EACH_EDGE (e, ei, block->succs)
3437 93 : rgn_edges[rgn_nr_edges++] = e;
3438 : }
3439 :
3440 : /* Split edges. */
3441 14 : pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3442 14 : bitmap_vector_clear (pot_split, current_nr_blocks);
3443 14 : ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3444 14 : bitmap_vector_clear (ancestor_edges, current_nr_blocks);
3445 :
3446 : /* Compute probabilities, dominators, split_edges. */
3447 76 : for (bb = 0; bb < current_nr_blocks; bb++)
3448 48 : compute_dom_prob_ps (bb);
3449 :
3450 : /* Cleanup ->aux used for EDGE_TO_BIT mapping. */
3451 : /* We don't need them anymore. But we want to avoid duplication of
3452 : aux fields in the newly created edges. */
3453 249 : FOR_EACH_BB_FN (block, cfun)
3454 : {
3455 235 : if (CONTAINING_RGN (block->index) != rgn)
3456 187 : continue;
3457 141 : FOR_EACH_EDGE (e, ei, block->succs)
3458 93 : e->aux = NULL;
3459 : }
3460 : }
3461 10330388 : }
3462 :
3463 : /* Free data computed for the finished region. */
3464 : void
3465 14 : sched_rgn_local_free (void)
3466 : {
3467 14 : free (prob);
3468 14 : sbitmap_vector_free (dom);
3469 14 : sbitmap_vector_free (pot_split);
3470 14 : sbitmap_vector_free (ancestor_edges);
3471 14 : free (rgn_edges);
3472 14 : }
3473 :
3474 : /* Free data computed for the finished region. */
3475 : void
3476 10330388 : sched_rgn_local_finish (void)
3477 : {
3478 10330388 : if (current_nr_blocks > 1 && !sel_sched_p ())
3479 : {
3480 14 : sched_rgn_local_free ();
3481 : }
3482 10330388 : }
3483 :
3484 : /* Setup scheduler infos. */
3485 : void
3486 964925 : rgn_setup_common_sched_info (void)
3487 : {
3488 964925 : memcpy (&rgn_common_sched_info, &haifa_common_sched_info,
3489 : sizeof (rgn_common_sched_info));
3490 :
3491 964925 : rgn_common_sched_info.fix_recovery_cfg = rgn_fix_recovery_cfg;
3492 964925 : rgn_common_sched_info.add_block = rgn_add_block;
3493 964925 : rgn_common_sched_info.estimate_number_of_insns
3494 964925 : = rgn_estimate_number_of_insns;
3495 964925 : rgn_common_sched_info.sched_pass_id = SCHED_RGN_PASS;
3496 :
3497 964925 : common_sched_info = &rgn_common_sched_info;
3498 964925 : }
3499 :
3500 : /* Setup all *_sched_info structures (for the Haifa frontend
3501 : and for the dependence analysis) in the interblock scheduler. */
3502 : void
3503 964794 : rgn_setup_sched_infos (void)
3504 : {
3505 964794 : if (!sel_sched_p ())
3506 964024 : memcpy (&rgn_sched_deps_info, &rgn_const_sched_deps_info,
3507 : sizeof (rgn_sched_deps_info));
3508 : else
3509 770 : memcpy (&rgn_sched_deps_info, &rgn_const_sel_sched_deps_info,
3510 : sizeof (rgn_sched_deps_info));
3511 :
3512 964794 : sched_deps_info = &rgn_sched_deps_info;
3513 :
3514 964794 : memcpy (&rgn_sched_info, &rgn_const_sched_info, sizeof (rgn_sched_info));
3515 964794 : current_sched_info = &rgn_sched_info;
3516 964794 : }
3517 :
3518 : /* The one entry point in this file. */
3519 : void
3520 964024 : schedule_insns (void)
3521 : {
3522 964024 : int rgn;
3523 :
3524 : /* Taking care of this degenerate case makes the rest of
3525 : this code simpler. */
3526 964024 : if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3527 : return;
3528 :
3529 964024 : rgn_setup_common_sched_info ();
3530 964024 : rgn_setup_sched_infos ();
3531 :
3532 964024 : haifa_sched_init ();
3533 964024 : sched_rgn_init (reload_completed);
3534 :
3535 964024 : bitmap_initialize (¬_in_df, &bitmap_default_obstack);
3536 :
3537 : /* Schedule every region in the subroutine. */
3538 11294412 : for (rgn = 0; rgn < nr_regions; rgn++)
3539 10330388 : if (dbg_cnt (sched_region))
3540 10330388 : schedule_region (rgn);
3541 :
3542 : /* Clean up. */
3543 964024 : sched_rgn_finish ();
3544 964024 : bitmap_release (¬_in_df);
3545 :
3546 964024 : haifa_sched_finish ();
3547 : }
3548 :
3549 : /* INSN has been added to/removed from current region. */
3550 : static void
3551 0 : rgn_add_remove_insn (rtx_insn *insn, int remove_p)
3552 : {
3553 0 : if (!remove_p)
3554 0 : rgn_n_insns++;
3555 : else
3556 0 : rgn_n_insns--;
3557 :
3558 0 : if (INSN_BB (insn) == target_bb)
3559 : {
3560 0 : if (!remove_p)
3561 0 : target_n_insns++;
3562 : else
3563 0 : target_n_insns--;
3564 : }
3565 0 : }
3566 :
3567 : /* Extend internal data structures. */
3568 : void
3569 964273 : extend_regions (void)
3570 : {
3571 964273 : rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks_for_fn (cfun));
3572 964273 : rgn_bb_table = XRESIZEVEC (int, rgn_bb_table,
3573 : n_basic_blocks_for_fn (cfun));
3574 964273 : block_to_bb = XRESIZEVEC (int, block_to_bb,
3575 : last_basic_block_for_fn (cfun));
3576 964273 : containing_rgn = XRESIZEVEC (int, containing_rgn,
3577 : last_basic_block_for_fn (cfun));
3578 964273 : }
3579 :
3580 : void
3581 0 : rgn_make_new_region_out_of_new_block (basic_block bb)
3582 : {
3583 0 : int i;
3584 :
3585 0 : i = RGN_BLOCKS (nr_regions);
3586 : /* I - first free position in rgn_bb_table. */
3587 :
3588 0 : rgn_bb_table[i] = bb->index;
3589 0 : RGN_NR_BLOCKS (nr_regions) = 1;
3590 0 : RGN_HAS_REAL_EBB (nr_regions) = 0;
3591 0 : RGN_DONT_CALC_DEPS (nr_regions) = 0;
3592 0 : CONTAINING_RGN (bb->index) = nr_regions;
3593 0 : BLOCK_TO_BB (bb->index) = 0;
3594 :
3595 0 : nr_regions++;
3596 :
3597 0 : RGN_BLOCKS (nr_regions) = i + 1;
3598 0 : }
3599 :
3600 : /* BB was added to ebb after AFTER. */
3601 : static void
3602 0 : rgn_add_block (basic_block bb, basic_block after)
3603 : {
3604 0 : extend_regions ();
3605 0 : bitmap_set_bit (¬_in_df, bb->index);
3606 :
3607 0 : if (after == 0 || after == EXIT_BLOCK_PTR_FOR_FN (cfun))
3608 : {
3609 0 : rgn_make_new_region_out_of_new_block (bb);
3610 0 : RGN_DONT_CALC_DEPS (nr_regions - 1) = (after
3611 0 : == EXIT_BLOCK_PTR_FOR_FN (cfun));
3612 : }
3613 : else
3614 : {
3615 0 : int i, pos;
3616 :
3617 : /* We need to fix rgn_table, block_to_bb, containing_rgn
3618 : and ebb_head. */
3619 :
3620 0 : BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index);
3621 :
3622 : /* We extend ebb_head to one more position to
3623 : easily find the last position of the last ebb in
3624 : the current region. Thus, ebb_head[BLOCK_TO_BB (after) + 1]
3625 : is _always_ valid for access. */
3626 :
3627 0 : i = BLOCK_TO_BB (after->index) + 1;
3628 0 : pos = ebb_head[i] - 1;
3629 : /* Now POS is the index of the last block in the region. */
3630 :
3631 : /* Find index of basic block AFTER. */
3632 0 : for (; rgn_bb_table[pos] != after->index; pos--)
3633 : ;
3634 :
3635 0 : pos++;
3636 0 : gcc_assert (pos > ebb_head[i - 1]);
3637 :
3638 : /* i - ebb right after "AFTER". */
3639 : /* ebb_head[i] - VALID. */
3640 :
3641 : /* Source position: ebb_head[i]
3642 : Destination position: ebb_head[i] + 1
3643 : Last position:
3644 : RGN_BLOCKS (nr_regions) - 1
3645 : Number of elements to copy: (last_position) - (source_position) + 1
3646 : */
3647 :
3648 0 : memmove (rgn_bb_table + pos + 1,
3649 0 : rgn_bb_table + pos,
3650 0 : ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1)
3651 : * sizeof (*rgn_bb_table));
3652 :
3653 0 : rgn_bb_table[pos] = bb->index;
3654 :
3655 0 : for (; i <= current_nr_blocks; i++)
3656 0 : ebb_head [i]++;
3657 :
3658 0 : i = CONTAINING_RGN (after->index);
3659 0 : CONTAINING_RGN (bb->index) = i;
3660 :
3661 0 : RGN_HAS_REAL_EBB (i) = 1;
3662 :
3663 0 : for (++i; i <= nr_regions; i++)
3664 0 : RGN_BLOCKS (i)++;
3665 : }
3666 0 : }
3667 :
3668 : /* Fix internal data after interblock movement of jump instruction.
3669 : For parameter meaning please refer to
3670 : sched-int.h: struct sched_info: fix_recovery_cfg. */
3671 : static void
3672 0 : rgn_fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti)
3673 : {
3674 0 : int old_pos, new_pos, i;
3675 :
3676 0 : BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi);
3677 :
3678 0 : for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1;
3679 0 : rgn_bb_table[old_pos] != check_bb_nexti;
3680 : old_pos--)
3681 : ;
3682 0 : gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]);
3683 :
3684 0 : for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1;
3685 0 : rgn_bb_table[new_pos] != bbi;
3686 : new_pos--)
3687 : ;
3688 0 : new_pos++;
3689 0 : gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]);
3690 :
3691 0 : gcc_assert (new_pos < old_pos);
3692 :
3693 0 : memmove (rgn_bb_table + new_pos + 1,
3694 0 : rgn_bb_table + new_pos,
3695 0 : (old_pos - new_pos) * sizeof (*rgn_bb_table));
3696 :
3697 0 : rgn_bb_table[new_pos] = check_bb_nexti;
3698 :
3699 0 : for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++)
3700 0 : ebb_head[i]++;
3701 0 : }
3702 :
3703 : /* Return next block in ebb chain. For parameter meaning please refer to
3704 : sched-int.h: struct sched_info: advance_target_bb. */
3705 : static basic_block
3706 108619875 : advance_target_bb (basic_block bb, rtx_insn *insn)
3707 : {
3708 108619875 : if (insn)
3709 : return 0;
3710 :
3711 0 : gcc_assert (BLOCK_TO_BB (bb->index) == target_bb
3712 : && BLOCK_TO_BB (bb->next_bb->index) == target_bb);
3713 : return bb->next_bb;
3714 : }
3715 :
3716 : #endif
3717 :
3718 : /* Run instruction scheduler. */
3719 : static unsigned int
3720 35 : rest_of_handle_live_range_shrinkage (void)
3721 : {
3722 : #ifdef INSN_SCHEDULING
3723 35 : int saved;
3724 :
3725 35 : initialize_live_range_shrinkage ();
3726 35 : saved = flag_schedule_interblock;
3727 35 : flag_schedule_interblock = false;
3728 35 : schedule_insns ();
3729 35 : flag_schedule_interblock = saved;
3730 35 : finish_live_range_shrinkage ();
3731 : #endif
3732 35 : return 0;
3733 : }
3734 :
3735 : /* Run instruction scheduler. */
3736 : static unsigned int
3737 194 : rest_of_handle_sched (void)
3738 : {
3739 : #ifdef INSN_SCHEDULING
3740 194 : if (flag_selective_scheduling
3741 194 : && ! maybe_skip_selective_scheduling ())
3742 44 : run_selective_scheduling ();
3743 : else
3744 150 : schedule_insns ();
3745 : #endif
3746 194 : return 0;
3747 : }
3748 :
3749 : /* Run second scheduling pass after reload. */
3750 : static unsigned int
3751 963984 : rest_of_handle_sched2 (void)
3752 : {
3753 : #ifdef INSN_SCHEDULING
3754 963984 : if (flag_selective_scheduling2
3755 963984 : && ! maybe_skip_selective_scheduling ())
3756 87 : run_selective_scheduling ();
3757 : else
3758 : {
3759 : /* Do control and data sched analysis again,
3760 : and write some more of the results to dump file. */
3761 963897 : if (flag_sched2_use_superblocks)
3762 58 : schedule_ebbs ();
3763 : else
3764 963839 : schedule_insns ();
3765 : }
3766 : #endif
3767 963984 : return 0;
3768 : }
3769 :
3770 : static unsigned int
3771 0 : rest_of_handle_sched_fusion (void)
3772 : {
3773 : #ifdef INSN_SCHEDULING
3774 0 : sched_fusion = true;
3775 0 : schedule_insns ();
3776 0 : sched_fusion = false;
3777 : #endif
3778 0 : return 0;
3779 : }
3780 :
3781 : namespace {
3782 :
3783 : const pass_data pass_data_live_range_shrinkage =
3784 : {
3785 : RTL_PASS, /* type */
3786 : "lr_shrinkage", /* name */
3787 : OPTGROUP_NONE, /* optinfo_flags */
3788 : TV_LIVE_RANGE_SHRINKAGE, /* tv_id */
3789 : 0, /* properties_required */
3790 : 0, /* properties_provided */
3791 : 0, /* properties_destroyed */
3792 : 0, /* todo_flags_start */
3793 : TODO_df_finish, /* todo_flags_finish */
3794 : };
3795 :
3796 : class pass_live_range_shrinkage : public rtl_opt_pass
3797 : {
3798 : public:
3799 285722 : pass_live_range_shrinkage(gcc::context *ctxt)
3800 571444 : : rtl_opt_pass(pass_data_live_range_shrinkage, ctxt)
3801 : {}
3802 :
3803 : /* opt_pass methods: */
3804 1471370 : bool gate (function *) final override
3805 : {
3806 : #ifdef INSN_SCHEDULING
3807 1471370 : return flag_live_range_shrinkage;
3808 : #else
3809 : return 0;
3810 : #endif
3811 : }
3812 :
3813 35 : unsigned int execute (function *) final override
3814 : {
3815 35 : return rest_of_handle_live_range_shrinkage ();
3816 : }
3817 :
3818 : }; // class pass_live_range_shrinkage
3819 :
3820 : } // anon namespace
3821 :
3822 : rtl_opt_pass *
3823 285722 : make_pass_live_range_shrinkage (gcc::context *ctxt)
3824 : {
3825 285722 : return new pass_live_range_shrinkage (ctxt);
3826 : }
3827 :
3828 : namespace {
3829 :
3830 : const pass_data pass_data_sched =
3831 : {
3832 : RTL_PASS, /* type */
3833 : "sched1", /* name */
3834 : OPTGROUP_NONE, /* optinfo_flags */
3835 : TV_SCHED, /* tv_id */
3836 : 0, /* properties_required */
3837 : 0, /* properties_provided */
3838 : 0, /* properties_destroyed */
3839 : 0, /* todo_flags_start */
3840 : TODO_df_finish, /* todo_flags_finish */
3841 : };
3842 :
3843 : class pass_sched : public rtl_opt_pass
3844 : {
3845 : public:
3846 285722 : pass_sched (gcc::context *ctxt)
3847 571444 : : rtl_opt_pass (pass_data_sched, ctxt)
3848 : {}
3849 :
3850 : /* opt_pass methods: */
3851 : bool gate (function *) final override;
3852 194 : unsigned int execute (function *) final override
3853 : {
3854 194 : return rest_of_handle_sched ();
3855 : }
3856 :
3857 : }; // class pass_sched
3858 :
3859 : bool
3860 1471370 : pass_sched::gate (function *)
3861 : {
3862 : #ifdef INSN_SCHEDULING
3863 1471370 : return optimize > 0 && flag_schedule_insns && dbg_cnt (sched_func);
3864 : #else
3865 : return 0;
3866 : #endif
3867 : }
3868 :
3869 : } // anon namespace
3870 :
3871 : rtl_opt_pass *
3872 285722 : make_pass_sched (gcc::context *ctxt)
3873 : {
3874 285722 : return new pass_sched (ctxt);
3875 : }
3876 :
3877 : namespace {
3878 :
3879 : const pass_data pass_data_sched2 =
3880 : {
3881 : RTL_PASS, /* type */
3882 : "sched2", /* name */
3883 : OPTGROUP_NONE, /* optinfo_flags */
3884 : TV_SCHED2, /* tv_id */
3885 : 0, /* properties_required */
3886 : 0, /* properties_provided */
3887 : 0, /* properties_destroyed */
3888 : 0, /* todo_flags_start */
3889 : TODO_df_finish, /* todo_flags_finish */
3890 : };
3891 :
3892 : class pass_sched2 : public rtl_opt_pass
3893 : {
3894 : public:
3895 285722 : pass_sched2 (gcc::context *ctxt)
3896 571444 : : rtl_opt_pass (pass_data_sched2, ctxt)
3897 : {}
3898 :
3899 : /* opt_pass methods: */
3900 : bool gate (function *) final override;
3901 963984 : unsigned int execute (function *) final override
3902 : {
3903 963984 : return rest_of_handle_sched2 ();
3904 : }
3905 :
3906 : }; // class pass_sched2
3907 :
3908 : bool
3909 1471370 : pass_sched2::gate (function *)
3910 : {
3911 : #ifdef INSN_SCHEDULING
3912 1043686 : return optimize > 0 && flag_schedule_insns_after_reload
3913 2435355 : && !targetm.delay_sched2 && dbg_cnt (sched2_func);
3914 : #else
3915 : return 0;
3916 : #endif
3917 : }
3918 :
3919 : } // anon namespace
3920 :
3921 : rtl_opt_pass *
3922 285722 : make_pass_sched2 (gcc::context *ctxt)
3923 : {
3924 285722 : return new pass_sched2 (ctxt);
3925 : }
3926 :
3927 : namespace {
3928 :
3929 : const pass_data pass_data_sched_fusion =
3930 : {
3931 : RTL_PASS, /* type */
3932 : "sched_fusion", /* name */
3933 : OPTGROUP_NONE, /* optinfo_flags */
3934 : TV_SCHED_FUSION, /* tv_id */
3935 : 0, /* properties_required */
3936 : 0, /* properties_provided */
3937 : 0, /* properties_destroyed */
3938 : 0, /* todo_flags_start */
3939 : TODO_df_finish, /* todo_flags_finish */
3940 : };
3941 :
3942 : class pass_sched_fusion : public rtl_opt_pass
3943 : {
3944 : public:
3945 285722 : pass_sched_fusion (gcc::context *ctxt)
3946 571444 : : rtl_opt_pass (pass_data_sched_fusion, ctxt)
3947 : {}
3948 :
3949 : /* opt_pass methods: */
3950 : bool gate (function *) final override;
3951 0 : unsigned int execute (function *) final override
3952 : {
3953 0 : return rest_of_handle_sched_fusion ();
3954 : }
3955 :
3956 : }; // class pass_sched2
3957 :
3958 : bool
3959 1471370 : pass_sched_fusion::gate (function *)
3960 : {
3961 : #ifdef INSN_SCHEDULING
3962 : /* Scheduling fusion relies on peephole2 to do real fusion work,
3963 : so only enable it if peephole2 is in effect. */
3964 1043686 : return (optimize > 0 && flag_peephole2
3965 2435350 : && flag_schedule_fusion && targetm.sched.fusion_priority != NULL);
3966 : #else
3967 : return 0;
3968 : #endif
3969 : }
3970 :
3971 : } // anon namespace
3972 :
3973 : rtl_opt_pass *
3974 285722 : make_pass_sched_fusion (gcc::context *ctxt)
3975 : {
3976 285722 : return new pass_sched_fusion (ctxt);
3977 : }
3978 :
3979 : #if __GNUC__ >= 10
3980 : # pragma GCC diagnostic pop
3981 : #endif
|