Line data Source code
1 : /* Basic block reordering routines for the GNU compiler.
2 : Copyright (C) 2000-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3, or (at your option)
9 : any later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 : or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 : License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* This file contains the "reorder blocks" pass, which changes the control
21 : flow of a function to encounter fewer branches; the "partition blocks"
22 : pass, which divides the basic blocks into "hot" and "cold" partitions,
23 : which are kept separate; and the "duplicate computed gotos" pass, which
24 : duplicates blocks ending in an indirect jump.
25 :
26 : There are two algorithms for "reorder blocks": the "simple" algorithm,
27 : which just rearranges blocks, trying to minimize the number of executed
28 : unconditional branches; and the "software trace cache" algorithm, which
29 : also copies code, and in general tries a lot harder to have long linear
30 : pieces of machine code executed. This algorithm is described next. */
31 :
32 : /* This (greedy) algorithm constructs traces in several rounds.
33 : The construction starts from "seeds". The seed for the first round
34 : is the entry point of the function. When there are more than one seed,
35 : the one with the lowest key in the heap is selected first (see bb_to_key).
36 : Then the algorithm repeatedly adds the most probable successor to the end
37 : of a trace. Finally it connects the traces.
38 :
39 : There are two parameters: Branch Threshold and Exec Threshold.
40 : If the probability of an edge to a successor of the current basic block is
41 : lower than Branch Threshold or its count is lower than Exec Threshold,
42 : then the successor will be the seed in one of the next rounds.
43 : Each round has these parameters lower than the previous one.
44 : The last round has to have these parameters set to zero so that the
45 : remaining blocks are picked up.
46 :
47 : The algorithm selects the most probable successor from all unvisited
48 : successors and successors that have been added to this trace.
49 : The other successors (that has not been "sent" to the next round) will be
50 : other seeds for this round and the secondary traces will start from them.
51 : If the successor has not been visited in this trace, it is added to the
52 : trace (however, there is some heuristic for simple branches).
53 : If the successor has been visited in this trace, a loop has been found.
54 : If the loop has many iterations, the loop is rotated so that the source
55 : block of the most probable edge going out of the loop is the last block
56 : of the trace.
57 : If the loop has few iterations and there is no edge from the last block of
58 : the loop going out of the loop, the loop header is duplicated.
59 :
60 : When connecting traces, the algorithm first checks whether there is an edge
61 : from the last block of a trace to the first block of another trace.
62 : When there are still some unconnected traces it checks whether there exists
63 : a basic block BB such that BB is a successor of the last block of a trace
64 : and BB is a predecessor of the first block of another trace. In this case,
65 : BB is duplicated, added at the end of the first trace and the traces are
66 : connected through it.
67 : The rest of traces are simply connected so there will be a jump to the
68 : beginning of the rest of traces.
69 :
70 : The above description is for the full algorithm, which is used when the
71 : function is optimized for speed. When the function is optimized for size,
72 : in order to reduce long jumps and connect more fallthru edges, the
73 : algorithm is modified as follows:
74 : (1) Break long traces to short ones. A trace is broken at a block that has
75 : multiple predecessors/ successors during trace discovery. When connecting
76 : traces, only connect Trace n with Trace n + 1. This change reduces most
77 : long jumps compared with the above algorithm.
78 : (2) Ignore the edge probability and count for fallthru edges.
79 : (3) Keep the original order of blocks when there is no chance to fall
80 : through. We rely on the results of cfg_cleanup.
81 :
82 : To implement the change for code size optimization, block's index is
83 : selected as the key and all traces are found in one round.
84 :
85 : References:
86 :
87 : "Software Trace Cache"
88 : A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
89 : http://citeseer.nj.nec.com/15361.html
90 :
91 : */
92 :
93 : #include "config.h"
94 : #include "system.h"
95 : #include "coretypes.h"
96 : #include "backend.h"
97 : #include "target.h"
98 : #include "rtl.h"
99 : #include "tree.h"
100 : #include "cfghooks.h"
101 : #include "df.h"
102 : #include "memmodel.h"
103 : #include "optabs.h"
104 : #include "regs.h"
105 : #include "emit-rtl.h"
106 : #include "output.h"
107 : #include "expr.h"
108 : #include "tree-pass.h"
109 : #include "cfgrtl.h"
110 : #include "cfganal.h"
111 : #include "cfgbuild.h"
112 : #include "cfgcleanup.h"
113 : #include "bb-reorder.h"
114 : #include "except.h"
115 : #include "alloc-pool.h"
116 : #include "fibonacci_heap.h"
117 : #include "stringpool.h"
118 : #include "attribs.h"
119 : #include "common/common-target.h"
120 :
121 : /* The number of rounds. In most cases there will only be 4 rounds, but
122 : when partitioning hot and cold basic blocks into separate sections of
123 : the object file there will be an extra round. */
124 : #define N_ROUNDS 5
125 :
126 : struct target_bb_reorder default_target_bb_reorder;
127 : #if SWITCHABLE_TARGET
128 : struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
129 : #endif
130 :
131 : #define uncond_jump_length \
132 : (this_target_bb_reorder->x_uncond_jump_length)
133 :
134 : /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
135 : static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
136 :
137 : /* Exec thresholds in thousandths (per mille) of the count of bb 0. */
138 : static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
139 :
140 : /* If edge count is lower than DUPLICATION_THRESHOLD per mille of entry
141 : block the edge destination is not duplicated while connecting traces. */
142 : #define DUPLICATION_THRESHOLD 100
143 :
144 : typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
145 : typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
146 :
147 : /* Structure to hold needed information for each basic block. */
148 : struct bbro_basic_block_data
149 : {
150 : /* Which trace is the bb start of (-1 means it is not a start of any). */
151 : int start_of_trace;
152 :
153 : /* Which trace is the bb end of (-1 means it is not an end of any). */
154 : int end_of_trace;
155 :
156 : /* Which trace is the bb in? */
157 : int in_trace;
158 :
159 : /* Which trace was this bb visited in? */
160 : int visited;
161 :
162 : /* Cached maximum frequency of interesting incoming edges.
163 : Minus one means not yet computed. */
164 : int priority;
165 :
166 : /* Which heap is BB in (if any)? */
167 : bb_heap_t *heap;
168 :
169 : /* Which heap node is BB in (if any)? */
170 : bb_heap_node_t *node;
171 : };
172 :
173 : /* The current size of the following dynamic array. */
174 : static int array_size;
175 :
176 : /* The array which holds needed information for basic blocks. */
177 : static bbro_basic_block_data *bbd;
178 :
179 : /* To avoid frequent reallocation the size of arrays is greater than needed,
180 : the number of elements is (not less than) 1.25 * size_wanted. */
181 : #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
182 :
183 : /* Free the memory and set the pointer to NULL. */
184 : #define FREE(P) (gcc_assert (P), free (P), P = 0)
185 :
186 : /* Structure for holding information about a trace. */
187 : struct trace
188 : {
189 : /* First and last basic block of the trace. */
190 : basic_block first, last;
191 :
192 : /* The round of the STC creation which this trace was found in. */
193 : int round;
194 :
195 : /* The length (i.e. the number of basic blocks) of the trace. */
196 : int length;
197 : };
198 :
199 : /* Maximum count of one of the entry blocks. */
200 : static profile_count max_entry_count;
201 :
202 : /* Local function prototypes. */
203 : static void find_traces_1_round (int, profile_count, struct trace *, int *,
204 : int, bb_heap_t **, int);
205 : static basic_block copy_bb (basic_block, edge, basic_block, int);
206 : static long bb_to_key (basic_block);
207 : static bool better_edge_p (const_basic_block, const_edge, profile_probability,
208 : profile_count, profile_probability, profile_count,
209 : const_edge);
210 : static bool copy_bb_p (const_basic_block, int);
211 :
212 : /* Return the trace number in which BB was visited. */
213 :
214 : static int
215 37830467 : bb_visited_trace (const_basic_block bb)
216 : {
217 37830467 : gcc_assert (bb->index < array_size);
218 37830467 : return bbd[bb->index].visited;
219 : }
220 :
221 : /* This function marks BB that it was visited in trace number TRACE. */
222 :
223 : static void
224 10068217 : mark_bb_visited (basic_block bb, int trace)
225 : {
226 10068217 : bbd[bb->index].visited = trace;
227 10068217 : if (bbd[bb->index].heap)
228 : {
229 390583 : bbd[bb->index].heap->delete_node (bbd[bb->index].node);
230 390583 : bbd[bb->index].heap = NULL;
231 390583 : bbd[bb->index].node = NULL;
232 : }
233 10068217 : }
234 :
235 : /* Check to see if bb should be pushed into the next round of trace
236 : collections or not. Reasons for pushing the block forward are 1).
237 : If the block is cold, we are doing partitioning, and there will be
238 : another round (cold partition blocks are not supposed to be
239 : collected into traces until the very last round); or 2). There will
240 : be another round, and the basic block is not "hot enough" for the
241 : current round of trace collection. */
242 :
243 : static bool
244 8725473 : push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
245 : profile_count count_th)
246 : {
247 8725473 : bool there_exists_another_round;
248 8725473 : bool block_not_hot_enough;
249 :
250 8725473 : there_exists_another_round = round < number_of_rounds - 1;
251 :
252 8725473 : block_not_hot_enough = (bb->count < count_th
253 8725473 : || probably_never_executed_bb_p (cfun, bb));
254 :
255 4188672 : if (there_exists_another_round
256 : && block_not_hot_enough)
257 : return true;
258 : else
259 5291854 : return false;
260 : }
261 :
262 : /* Find the traces for Software Trace Cache. Chain each trace through
263 : RBI()->next. Store the number of traces to N_TRACES and description of
264 : traces to TRACES. */
265 :
266 : static void
267 598665 : find_traces (int *n_traces, struct trace *traces)
268 : {
269 598665 : int i;
270 598665 : int number_of_rounds;
271 598665 : edge e;
272 598665 : edge_iterator ei;
273 598665 : bb_heap_t *heap = new bb_heap_t (LONG_MIN);
274 :
275 : /* Add one extra round of trace collection when partitioning hot/cold
276 : basic blocks into separate sections. The last round is for all the
277 : cold blocks (and ONLY the cold blocks). */
278 :
279 598665 : number_of_rounds = N_ROUNDS - 1;
280 :
281 : /* Insert entry points of function into heap. */
282 598665 : max_entry_count = profile_count::zero ();
283 1197330 : FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
284 : {
285 598665 : bbd[e->dest->index].heap = heap;
286 598665 : bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
287 598665 : if (e->dest->count > max_entry_count)
288 597626 : max_entry_count = e->dest->count;
289 : }
290 :
291 : /* Find the traces. */
292 2993325 : for (i = 0; i < number_of_rounds; i++)
293 : {
294 2394660 : profile_count count_threshold;
295 :
296 2394660 : if (dump_file)
297 92 : fprintf (dump_file, "STC - round %d\n", i + 1);
298 :
299 2394660 : count_threshold = max_entry_count.apply_scale (exec_threshold[i], 1000);
300 :
301 2394660 : find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
302 : count_threshold, traces, n_traces, i, &heap,
303 : number_of_rounds);
304 : }
305 598665 : delete heap;
306 :
307 598665 : if (dump_file)
308 : {
309 115 : for (i = 0; i < *n_traces; i++)
310 : {
311 92 : basic_block bb;
312 92 : fprintf (dump_file, "Trace %d (round %d): ", i + 1,
313 92 : traces[i].round + 1);
314 92 : for (bb = traces[i].first;
315 188 : bb != traces[i].last;
316 96 : bb = (basic_block) bb->aux)
317 : {
318 96 : fprintf (dump_file, "%d [", bb->index);
319 96 : bb->count.dump (dump_file);
320 96 : fprintf (dump_file, "] ");
321 : }
322 92 : fprintf (dump_file, "%d [", bb->index);
323 92 : bb->count.dump (dump_file);
324 92 : fprintf (dump_file, "]\n");
325 : }
326 23 : fflush (dump_file);
327 : }
328 598665 : }
329 :
330 : /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
331 : (with sequential number TRACE_N). */
332 :
333 : static basic_block
334 162991 : rotate_loop (edge back_edge, struct trace *trace, int trace_n)
335 : {
336 162991 : basic_block bb;
337 :
338 : /* Information about the best end (end after rotation) of the loop. */
339 162991 : basic_block best_bb = NULL;
340 162991 : edge best_edge = NULL;
341 162991 : profile_count best_count = profile_count::uninitialized ();
342 : /* The best edge is preferred when its destination is not visited yet
343 : or is a start block of some trace. */
344 162991 : bool is_preferred = false;
345 :
346 : /* Find the most frequent edge that goes out from current trace. */
347 162991 : bb = back_edge->dest;
348 631681 : do
349 : {
350 631681 : edge e;
351 631681 : edge_iterator ei;
352 :
353 1773365 : FOR_EACH_EDGE (e, ei, bb->succs)
354 1141684 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
355 1141684 : && bb_visited_trace (e->dest) != trace_n
356 458049 : && (e->flags & EDGE_CAN_FALLTHRU)
357 1562773 : && !(e->flags & EDGE_COMPLEX))
358 : {
359 421089 : if (is_preferred)
360 : {
361 : /* The best edge is preferred. */
362 255474 : if (!bb_visited_trace (e->dest)
363 255474 : || bbd[e->dest->index].start_of_trace >= 0)
364 : {
365 : /* The current edge E is also preferred. */
366 251384 : if (e->count () > best_count)
367 : {
368 63043 : best_count = e->count ();
369 63043 : best_edge = e;
370 63043 : best_bb = bb;
371 : }
372 : }
373 : }
374 : else
375 : {
376 165615 : if (!bb_visited_trace (e->dest)
377 165615 : || bbd[e->dest->index].start_of_trace >= 0)
378 : {
379 : /* The current edge E is preferred. */
380 161723 : is_preferred = true;
381 161723 : best_count = e->count ();
382 161723 : best_edge = e;
383 161723 : best_bb = bb;
384 : }
385 : else
386 : {
387 3892 : if (!best_edge || e->count () > best_count)
388 : {
389 3225 : best_count = e->count ();
390 3225 : best_edge = e;
391 3225 : best_bb = bb;
392 : }
393 : }
394 : }
395 : }
396 631681 : bb = (basic_block) bb->aux;
397 : }
398 631681 : while (bb != back_edge->dest);
399 :
400 162991 : if (best_bb)
401 : {
402 : /* Rotate the loop so that the BEST_EDGE goes out from the last block of
403 : the trace. */
404 162910 : if (back_edge->dest == trace->first)
405 : {
406 5273 : trace->first = (basic_block) best_bb->aux;
407 : }
408 : else
409 : {
410 : basic_block prev_bb;
411 :
412 : for (prev_bb = trace->first;
413 498905 : prev_bb->aux != back_edge->dest;
414 : prev_bb = (basic_block) prev_bb->aux)
415 : ;
416 157637 : prev_bb->aux = best_bb->aux;
417 :
418 : /* Try to get rid of uncond jump to cond jump. */
419 157637 : if (single_succ_p (prev_bb))
420 : {
421 135367 : basic_block header = single_succ (prev_bb);
422 :
423 : /* Duplicate HEADER if it is a small block containing cond jump
424 : in the end. */
425 261938 : if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
426 135367 : && !CROSSING_JUMP_P (BB_END (header)))
427 0 : copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
428 : }
429 : }
430 : }
431 : else
432 : {
433 : /* We have not found suitable loop tail so do no rotation. */
434 81 : best_bb = back_edge->src;
435 : }
436 162991 : best_bb->aux = NULL;
437 162991 : return best_bb;
438 : }
439 :
440 : /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
441 : not include basic blocks whose probability is lower than BRANCH_TH or whose
442 : count is lower than EXEC_TH into traces (or whose count is lower than
443 : COUNT_TH). Store the new traces into TRACES and modify the number of
444 : traces *N_TRACES. Set the round (which the trace belongs to) to ROUND.
445 : The function expects starting basic blocks to be in *HEAP and will delete
446 : *HEAP and store starting points for the next round into new *HEAP. */
447 :
448 : static void
449 2394660 : find_traces_1_round (int branch_th, profile_count count_th,
450 : struct trace *traces, int *n_traces, int round,
451 : bb_heap_t **heap, int number_of_rounds)
452 : {
453 : /* Heap for discarded basic blocks which are possible starting points for
454 : the next round. */
455 2394660 : bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
456 2394660 : bool for_size = optimize_function_for_size_p (cfun);
457 :
458 8317312 : while (!(*heap)->empty ())
459 : {
460 5922652 : basic_block bb;
461 5922652 : struct trace *trace;
462 5922652 : edge best_edge, e;
463 5922652 : long key;
464 5922652 : edge_iterator ei;
465 :
466 5922652 : bb = (*heap)->extract_min ();
467 5922652 : bbd[bb->index].heap = NULL;
468 5922652 : bbd[bb->index].node = NULL;
469 :
470 5922652 : if (dump_file)
471 108 : fprintf (dump_file, "Getting bb %d\n", bb->index);
472 :
473 : /* If the BB's count is too low, send BB to the next round. When
474 : partitioning hot/cold blocks into separate sections, make sure all
475 : the cold blocks (and ONLY the cold blocks) go into the (extra) final
476 : round. When optimizing for size, do not push to next round. */
477 :
478 5922652 : if (!for_size
479 5922652 : && push_to_next_round_p (bb, round, number_of_rounds,
480 : count_th))
481 : {
482 1479056 : int key = bb_to_key (bb);
483 1479056 : bbd[bb->index].heap = new_heap;
484 1479056 : bbd[bb->index].node = new_heap->insert (key, bb);
485 :
486 1479056 : if (dump_file)
487 16 : fprintf (dump_file,
488 : " Possible start point of next round: %d (key: %d)\n",
489 : bb->index, key);
490 1479056 : continue;
491 1479056 : }
492 :
493 4443596 : trace = traces + *n_traces;
494 4443596 : trace->first = bb;
495 4443596 : trace->round = round;
496 4443596 : trace->length = 0;
497 4443596 : bbd[bb->index].in_trace = *n_traces;
498 4443596 : (*n_traces)++;
499 :
500 9808656 : do
501 : {
502 9808656 : bool ends_in_call;
503 :
504 : /* The probability and count of the best edge. */
505 9808656 : profile_probability best_prob = profile_probability::uninitialized ();
506 9808656 : profile_count best_count = profile_count::uninitialized ();
507 :
508 9808656 : best_edge = NULL;
509 9808656 : mark_bb_visited (bb, *n_traces);
510 9808656 : trace->length++;
511 :
512 9808656 : if (dump_file)
513 188 : fprintf (dump_file, "Basic block %d was visited in trace %d\n",
514 : bb->index, *n_traces);
515 :
516 9808656 : ends_in_call = block_ends_with_call_p (bb);
517 :
518 : /* Select the successor that will be placed after BB. */
519 24586271 : FOR_EACH_EDGE (e, ei, bb->succs)
520 : {
521 14777615 : gcc_assert (!(e->flags & EDGE_FAKE));
522 :
523 14777615 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
524 8807322 : continue;
525 :
526 14076432 : if (bb_visited_trace (e->dest)
527 14076432 : && bb_visited_trace (e->dest) != *n_traces)
528 2625021 : continue;
529 :
530 : /* If partitioning hot/cold basic blocks, don't consider edges
531 : that cross section boundaries. */
532 11451411 : if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
533 405239 : continue;
534 :
535 11046172 : profile_probability prob = e->probability;
536 11046172 : profile_count count = e->dest->count;
537 :
538 : /* The only sensible preference for a call instruction is the
539 : fallthru edge. Don't bother selecting anything else. */
540 11046172 : if (ends_in_call)
541 : {
542 992241 : if (e->flags & EDGE_CAN_FALLTHRU)
543 : {
544 517827 : best_edge = e;
545 517827 : best_prob = prob;
546 517827 : best_count = count;
547 : }
548 992241 : continue;
549 : }
550 :
551 : /* Edge that cannot be fallthru or improbable or infrequent
552 : successor (i.e. it is unsuitable successor). When optimizing
553 : for size, ignore the probability and count. */
554 9880604 : if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
555 9828388 : || !prob.initialized_p ()
556 19880864 : || ((prob.to_reg_br_prob_base () < branch_th
557 9826933 : || e->count () < count_th) && (!for_size)))
558 3382455 : continue;
559 :
560 6671476 : if (better_edge_p (bb, e, prob, count, best_prob, best_count,
561 : best_edge))
562 : {
563 6094609 : best_edge = e;
564 6094609 : best_prob = prob;
565 6094609 : best_count = count;
566 : }
567 : }
568 :
569 : /* If the best destination has multiple predecessors and can be
570 : duplicated cheaper than a jump, don't allow it to be added to
571 : a trace; we'll duplicate it when connecting the traces later.
572 : However, we need to check that this duplication wouldn't leave
573 : the best destination with only crossing predecessors, because
574 : this would change its effective partition from hot to cold. */
575 9808656 : if (best_edge
576 6075024 : && EDGE_COUNT (best_edge->dest->preds) >= 2
577 12424308 : && copy_bb_p (best_edge->dest, 0))
578 : {
579 72488 : bool only_crossing_preds = true;
580 72488 : edge e;
581 72488 : edge_iterator ei;
582 99473 : FOR_EACH_EDGE (e, ei, best_edge->dest->preds)
583 99426 : if (e != best_edge && !(e->flags & EDGE_CROSSING))
584 : {
585 : only_crossing_preds = false;
586 : break;
587 : }
588 72488 : if (!only_crossing_preds)
589 72441 : best_edge = NULL;
590 : }
591 :
592 : /* If the best destination has multiple successors or predecessors,
593 : don't allow it to be added when optimizing for size. This makes
594 : sure predecessors with smaller index are handled before the best
595 : destination. It breaks long trace and reduces long jumps.
596 :
597 : Take if-then-else as an example.
598 : A
599 : / \
600 : B C
601 : \ /
602 : D
603 : If we do not remove the best edge B->D/C->D, the final order might
604 : be A B D ... C. C is at the end of the program. If D's successors
605 : and D are complicated, might need long jumps for A->C and C->D.
606 : Similar issue for order: A C D ... B.
607 :
608 : After removing the best edge, the final result will be ABCD/ ACBD.
609 : It does not add jump compared with the previous order. But it
610 : reduces the possibility of long jumps. */
611 9808656 : if (best_edge && for_size
612 9808656 : && (EDGE_COUNT (best_edge->dest->succs) > 1
613 135287 : || EDGE_COUNT (best_edge->dest->preds) > 1))
614 : best_edge = NULL;
615 :
616 : /* Add all non-selected successors to the heaps. */
617 24586271 : FOR_EACH_EDGE (e, ei, bb->succs)
618 : {
619 23947613 : if (e == best_edge
620 8976055 : || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
621 23052487 : || bb_visited_trace (e->dest))
622 9169998 : continue;
623 :
624 5607617 : key = bb_to_key (e->dest);
625 :
626 5607617 : if (bbd[e->dest->index].heap)
627 : {
628 : /* E->DEST is already in some heap. */
629 1372103 : if (key != bbd[e->dest->index].node->get_key ())
630 : {
631 0 : if (dump_file)
632 : {
633 0 : fprintf (dump_file,
634 : "Changing key for bb %d from %ld to %ld.\n",
635 : e->dest->index,
636 0 : (long) bbd[e->dest->index].node->get_key (),
637 : key);
638 : }
639 0 : bbd[e->dest->index].heap->replace_key
640 0 : (bbd[e->dest->index].node, key);
641 : }
642 : }
643 : else
644 : {
645 4235514 : bb_heap_t *which_heap = *heap;
646 :
647 4235514 : profile_probability prob = e->probability;
648 :
649 4235514 : if (!(e->flags & EDGE_CAN_FALLTHRU)
650 3938834 : || (e->flags & EDGE_COMPLEX)
651 3887509 : || !prob.initialized_p ()
652 3886183 : || prob.to_reg_br_prob_base () < branch_th
653 6313833 : || e->count () < count_th)
654 : {
655 : /* When partitioning hot/cold basic blocks, make sure
656 : the cold blocks (and only the cold blocks) all get
657 : pushed to the last round of trace collection. When
658 : optimizing for size, do not push to next round. */
659 :
660 3162084 : if (!for_size && push_to_next_round_p (e->dest, round,
661 : number_of_rounds,
662 : count_th))
663 : which_heap = new_heap;
664 : }
665 :
666 4235514 : bbd[e->dest->index].heap = which_heap;
667 4235514 : bbd[e->dest->index].node = which_heap->insert (key, e->dest);
668 :
669 4235514 : if (dump_file)
670 : {
671 87 : fprintf (dump_file,
672 : " Possible start of %s round: %d (key: %ld)\n",
673 : (which_heap == new_heap) ? "next" : "this",
674 87 : e->dest->index, (long) key);
675 : }
676 :
677 : }
678 : }
679 :
680 9808656 : if (best_edge) /* Suitable successor was found. */
681 : {
682 5801560 : if (bb_visited_trace (best_edge->dest) == *n_traces)
683 : {
684 : /* We do nothing with one basic block loops. */
685 436500 : if (best_edge->dest != bb)
686 : {
687 611472 : if (best_edge->count ()
688 203824 : > best_edge->dest->count.apply_scale (4, 5))
689 : {
690 : /* The loop has at least 4 iterations. If the loop
691 : header is not the first block of the function
692 : we can rotate the loop. */
693 :
694 163052 : if (best_edge->dest
695 163052 : != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
696 : {
697 162991 : if (dump_file)
698 : {
699 0 : fprintf (dump_file,
700 : "Rotating loop %d - %d\n",
701 : best_edge->dest->index, bb->index);
702 : }
703 162991 : bb->aux = best_edge->dest;
704 162991 : bbd[best_edge->dest->index].in_trace =
705 162991 : (*n_traces) - 1;
706 162991 : bb = rotate_loop (best_edge, trace, *n_traces);
707 : }
708 : }
709 : else
710 : {
711 : /* The loop has less than 4 iterations. */
712 :
713 40772 : if (single_succ_p (bb)
714 54806 : && copy_bb_p (best_edge->dest,
715 : optimize_edge_for_speed_p
716 14034 : (best_edge)))
717 : {
718 6654 : bb = copy_bb (best_edge->dest, best_edge, bb,
719 : *n_traces);
720 6654 : trace->length++;
721 : }
722 : }
723 : }
724 :
725 : /* Terminate the trace. */
726 436500 : break;
727 : }
728 : else
729 : {
730 : /* Check for a situation
731 :
732 : A
733 : /|
734 : B |
735 : \|
736 : C
737 :
738 : where
739 : AB->count () + BC->count () >= AC->count ().
740 : (i.e. 2 * B->count >= AC->count )
741 : Best ordering is then A B C.
742 :
743 : When optimizing for size, A B C is always the best order.
744 :
745 : This situation is created for example by:
746 :
747 : if (A) B;
748 : C;
749 :
750 : */
751 :
752 14748002 : FOR_EACH_EDGE (e, ei, bb->succs)
753 9397625 : if (e != best_edge
754 4044929 : && (e->flags & EDGE_CAN_FALLTHRU)
755 3568055 : && !(e->flags & EDGE_COMPLEX)
756 3568055 : && !bb_visited_trace (e->dest)
757 3173996 : && single_pred_p (e->dest)
758 1671925 : && !(e->flags & EDGE_CROSSING)
759 10384076 : && single_succ_p (e->dest)
760 1001134 : && (single_succ_edge (e->dest)->flags
761 1001134 : & EDGE_CAN_FALLTHRU)
762 884032 : && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
763 884032 : && single_succ (e->dest) == best_edge->dest
764 9694253 : && (e->dest->count * 2
765 9694253 : >= best_edge->count () || for_size))
766 : {
767 14683 : best_edge = e;
768 14683 : if (dump_file)
769 0 : fprintf (dump_file, "Selecting BB %d\n",
770 0 : best_edge->dest->index);
771 : break;
772 : }
773 :
774 5365060 : bb->aux = best_edge->dest;
775 5365060 : bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
776 5365060 : bb = best_edge->dest;
777 : }
778 : }
779 : }
780 9372156 : while (best_edge);
781 4443596 : trace->last = bb;
782 4443596 : bbd[trace->first->index].start_of_trace = *n_traces - 1;
783 4443596 : if (bbd[trace->last->index].end_of_trace != *n_traces - 1)
784 : {
785 4443596 : bbd[trace->last->index].end_of_trace = *n_traces - 1;
786 : /* Update the cached maximum frequency for interesting predecessor
787 : edges for successors of the new trace end. */
788 9853082 : FOR_EACH_EDGE (e, ei, trace->last->succs)
789 5409486 : if (EDGE_FREQUENCY (e) > bbd[e->dest->index].priority)
790 4137196 : bbd[e->dest->index].priority = EDGE_FREQUENCY (e);
791 : }
792 :
793 : /* The trace is terminated so we have to recount the keys in heap
794 : (some block can have a lower key because now one of its predecessors
795 : is an end of the trace). */
796 9853082 : FOR_EACH_EDGE (e, ei, bb->succs)
797 : {
798 8820031 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
799 5409486 : || bb_visited_trace (e->dest))
800 3410545 : continue;
801 :
802 1998941 : if (bbd[e->dest->index].heap)
803 : {
804 1998941 : key = bb_to_key (e->dest);
805 1998941 : if (key != bbd[e->dest->index].node->get_key ())
806 : {
807 1305547 : if (dump_file)
808 : {
809 45 : fprintf (dump_file,
810 : "Changing key for bb %d from %ld to %ld.\n",
811 : e->dest->index,
812 45 : (long) bbd[e->dest->index].node->get_key (), key);
813 : }
814 1305547 : bbd[e->dest->index].heap->replace_key
815 1305547 : (bbd[e->dest->index].node, key);
816 : }
817 : }
818 : }
819 : }
820 :
821 2394660 : delete (*heap);
822 :
823 : /* "Return" the new heap. */
824 2394660 : *heap = new_heap;
825 2394660 : }
826 :
827 : /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
828 : it to trace after BB, mark OLD_BB visited and update pass' data structures
829 : (TRACE is a number of trace which OLD_BB is duplicated to). */
830 :
831 : static basic_block
832 259561 : copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
833 : {
834 259561 : basic_block new_bb;
835 :
836 259561 : new_bb = duplicate_block (old_bb, e, bb);
837 259561 : BB_COPY_PARTITION (new_bb, old_bb);
838 :
839 259561 : gcc_assert (e->dest == new_bb);
840 :
841 259561 : if (dump_file)
842 0 : fprintf (dump_file,
843 : "Duplicated bb %d (created bb %d)\n",
844 : old_bb->index, new_bb->index);
845 :
846 259561 : if (new_bb->index >= array_size
847 259247 : || last_basic_block_for_fn (cfun) > array_size)
848 : {
849 314 : int i;
850 314 : int new_size;
851 :
852 314 : new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
853 314 : new_size = GET_ARRAY_SIZE (new_size);
854 314 : bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
855 23489 : for (i = array_size; i < new_size; i++)
856 : {
857 23175 : bbd[i].start_of_trace = -1;
858 23175 : bbd[i].end_of_trace = -1;
859 23175 : bbd[i].in_trace = -1;
860 23175 : bbd[i].visited = 0;
861 23175 : bbd[i].priority = -1;
862 23175 : bbd[i].heap = NULL;
863 23175 : bbd[i].node = NULL;
864 : }
865 314 : array_size = new_size;
866 :
867 314 : if (dump_file)
868 : {
869 0 : fprintf (dump_file,
870 : "Growing the dynamic array to %d elements.\n",
871 : array_size);
872 : }
873 : }
874 :
875 259561 : gcc_assert (!bb_visited_trace (e->dest));
876 259561 : mark_bb_visited (new_bb, trace);
877 259561 : new_bb->aux = bb->aux;
878 259561 : bb->aux = new_bb;
879 :
880 259561 : bbd[new_bb->index].in_trace = trace;
881 :
882 259561 : return new_bb;
883 : }
884 :
885 : /* Compute and return the key (for the heap) of the basic block BB. */
886 :
887 : static long
888 9684279 : bb_to_key (basic_block bb)
889 : {
890 9684279 : edge e;
891 9684279 : edge_iterator ei;
892 :
893 : /* Use index as key to align with its original order. */
894 9684279 : if (optimize_function_for_size_p (cfun))
895 615209 : return bb->index;
896 :
897 : /* Do not start in probably never executed blocks. */
898 :
899 9069070 : if (BB_PARTITION (bb) == BB_COLD_PARTITION
900 9069070 : || probably_never_executed_bb_p (cfun, bb))
901 1965920 : return BB_FREQ_MAX;
902 :
903 : /* Prefer blocks whose predecessor is an end of some trace
904 : or whose predecessor edge is EDGE_DFS_BACK. */
905 7103150 : int priority = bbd[bb->index].priority;
906 7103150 : if (priority == -1)
907 : {
908 4049942 : priority = 0;
909 9860168 : FOR_EACH_EDGE (e, ei, bb->preds)
910 : {
911 5810226 : if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
912 5246883 : && bbd[e->src->index].end_of_trace >= 0)
913 5810226 : || (e->flags & EDGE_DFS_BACK))
914 : {
915 42085 : int edge_freq = EDGE_FREQUENCY (e);
916 :
917 42085 : if (edge_freq > priority)
918 5810226 : priority = edge_freq;
919 : }
920 : }
921 4049942 : bbd[bb->index].priority = priority;
922 : }
923 :
924 7103150 : if (priority)
925 : /* The block with priority should have significantly lower key. */
926 1551450 : return -(100 * BB_FREQ_MAX + 100 * priority + bb->count.to_frequency (cfun));
927 :
928 5551700 : return -bb->count.to_frequency (cfun);
929 : }
930 :
931 : /* Return true when the edge E from basic block BB is better than the temporary
932 : best edge (details are in function). The probability of edge E is PROB. The
933 : count of the successor is COUNT. The current best probability is
934 : BEST_PROB, the best count is BEST_COUNT.
935 : The edge is considered to be equivalent when PROB does not differ much from
936 : BEST_PROB; similarly for count. */
937 :
938 : static bool
939 6671476 : better_edge_p (const_basic_block bb, const_edge e, profile_probability prob,
940 : profile_count count, profile_probability best_prob,
941 : profile_count best_count, const_edge cur_best_edge)
942 : {
943 6671476 : bool is_better_edge;
944 :
945 : /* The BEST_* values do not have to be best, but can be a bit smaller than
946 : maximum values. */
947 6671476 : profile_probability diff_prob = best_prob / 10;
948 :
949 : /* The smaller one is better to keep the original order. */
950 6671476 : if (optimize_function_for_size_p (cfun))
951 402237 : return !cur_best_edge
952 753508 : || cur_best_edge->dest->index > e->dest->index;
953 :
954 : /* Those edges are so expensive that continuing a trace is not useful
955 : performance wise. */
956 6269239 : if (e->flags & (EDGE_ABNORMAL | EDGE_EH))
957 : return false;
958 :
959 12203596 : if (prob > best_prob + diff_prob
960 5934357 : || (!best_prob.initialized_p ()
961 10896325 : && prob > profile_probability::guessed_never ()))
962 : /* The edge has higher probability than the temporary best edge. */
963 : is_better_edge = true;
964 1008959 : else if (prob < best_prob - diff_prob)
965 : /* The edge has lower probability than the temporary best edge. */
966 : is_better_edge = false;
967 : else
968 : {
969 304965 : profile_count diff_count = best_count / 10;
970 304965 : if (count < best_count - diff_count
971 304965 : || (!best_count.initialized_p ()
972 65645 : && count.nonzero_p ()))
973 : /* The edge and the temporary best edge have almost equivalent
974 : probabilities. The higher countuency of a successor now means
975 : that there is another edge going into that successor.
976 : This successor has lower countuency so it is better. */
977 : is_better_edge = true;
978 239320 : else if (count > best_count + diff_count)
979 : /* This successor has higher countuency so it is worse. */
980 : is_better_edge = false;
981 140054 : else if (e->dest->prev_bb == bb)
982 : /* The edges have equivalent probabilities and the successors
983 : have equivalent frequencies. Select the previous successor. */
984 : is_better_edge = true;
985 : else
986 304965 : is_better_edge = false;
987 : }
988 :
989 : return is_better_edge;
990 : }
991 :
992 : /* Return true when the edge E is better than the temporary best edge
993 : CUR_BEST_EDGE. If SRC_INDEX_P is true, the function compares the src bb of
994 : E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
995 : BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
996 : TRACES record the information about traces.
997 : When optimizing for size, the edge with smaller index is better.
998 : When optimizing for speed, the edge with bigger probability or longer trace
999 : is better. */
1000 :
1001 : static bool
1002 1877111 : connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
1003 : const_edge cur_best_edge, struct trace *traces)
1004 : {
1005 1877111 : int e_index;
1006 1877111 : int b_index;
1007 1877111 : bool is_better_edge;
1008 :
1009 1877111 : if (!cur_best_edge)
1010 : return true;
1011 :
1012 468178 : if (optimize_function_for_size_p (cfun))
1013 : {
1014 49357 : e_index = src_index_p ? e->src->index : e->dest->index;
1015 49357 : b_index = src_index_p ? cur_best_edge->src->index
1016 46466 : : cur_best_edge->dest->index;
1017 : /* The smaller one is better to keep the original order. */
1018 49357 : return b_index > e_index;
1019 : }
1020 :
1021 418821 : if (src_index_p)
1022 : {
1023 36203 : e_index = e->src->index;
1024 :
1025 : /* We are looking for predecessor, so probabilities are not that
1026 : informative. We do not want to connect A to B because A has
1027 : only one successor (probability is 100%) while there is edge
1028 : A' to B where probability is 90% but which is much more frequent. */
1029 36203 : if (e->count () > cur_best_edge->count ())
1030 : /* The edge has higher probability than the temporary best edge. */
1031 : is_better_edge = true;
1032 27123 : else if (e->count () < cur_best_edge->count ())
1033 : /* The edge has lower probability than the temporary best edge. */
1034 : is_better_edge = false;
1035 9765 : else if (e->probability > cur_best_edge->probability)
1036 : /* The edge has higher probability than the temporary best edge. */
1037 : is_better_edge = true;
1038 9148 : else if (e->probability < cur_best_edge->probability)
1039 : /* The edge has lower probability than the temporary best edge. */
1040 : is_better_edge = false;
1041 7563 : else if (traces[bbd[e_index].end_of_trace].length > best_len)
1042 : /* The edge and the temporary best edge have equivalent probabilities.
1043 : The edge with longer trace is better. */
1044 : is_better_edge = true;
1045 : else
1046 : is_better_edge = false;
1047 : }
1048 : else
1049 : {
1050 382618 : e_index = e->dest->index;
1051 :
1052 382618 : if (e->probability > cur_best_edge->probability)
1053 : /* The edge has higher probability than the temporary best edge. */
1054 : is_better_edge = true;
1055 241398 : else if (e->probability < cur_best_edge->probability)
1056 : /* The edge has lower probability than the temporary best edge. */
1057 : is_better_edge = false;
1058 113354 : else if (traces[bbd[e_index].start_of_trace].length > best_len)
1059 : /* The edge and the temporary best edge have equivalent probabilities.
1060 : The edge with longer trace is better. */
1061 : is_better_edge = true;
1062 : else
1063 : is_better_edge = false;
1064 : }
1065 :
1066 : return is_better_edge;
1067 : }
1068 :
1069 : /* Connect traces in array TRACES, N_TRACES is the count of traces. */
1070 :
1071 : static void
1072 598665 : connect_traces (int n_traces, struct trace *traces)
1073 : {
1074 598665 : int i;
1075 598665 : bool *connected;
1076 598665 : bool two_passes;
1077 598665 : int last_trace;
1078 598665 : int current_pass;
1079 598665 : int current_partition;
1080 598665 : profile_count count_threshold;
1081 598665 : bool for_size = optimize_function_for_size_p (cfun);
1082 :
1083 598665 : count_threshold = max_entry_count.apply_scale (DUPLICATION_THRESHOLD, 1000);
1084 :
1085 598665 : connected = XCNEWVEC (bool, n_traces);
1086 598665 : last_trace = -1;
1087 598665 : current_pass = 1;
1088 598665 : current_partition = BB_PARTITION (traces[0].first);
1089 598665 : two_passes = false;
1090 :
1091 598665 : if (crtl->has_bb_partition)
1092 585962 : for (i = 0; i < n_traces && !two_passes; i++)
1093 521622 : if (BB_PARTITION (traces[0].first)
1094 521622 : != BB_PARTITION (traces[i].first))
1095 64336 : two_passes = true;
1096 :
1097 5735850 : for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1098 : {
1099 5137185 : int t = i;
1100 5137185 : int t2;
1101 5137185 : edge e, best;
1102 5137185 : int best_len;
1103 :
1104 5137185 : if (i >= n_traces)
1105 : {
1106 64336 : gcc_assert (two_passes && current_pass == 1);
1107 64336 : i = 0;
1108 64336 : t = i;
1109 64336 : current_pass = 2;
1110 64336 : if (current_partition == BB_HOT_PARTITION)
1111 : current_partition = BB_COLD_PARTITION;
1112 : else
1113 0 : current_partition = BB_HOT_PARTITION;
1114 : }
1115 :
1116 5137185 : if (connected[t])
1117 2137723 : continue;
1118 :
1119 3180248 : if (two_passes
1120 669721 : && BB_PARTITION (traces[t].first) != current_partition)
1121 180786 : continue;
1122 :
1123 2999462 : connected[t] = true;
1124 :
1125 : /* Find the predecessor traces. */
1126 3104170 : for (t2 = t; t2 > 0;)
1127 : {
1128 2505505 : edge_iterator ei;
1129 2505505 : best = NULL;
1130 2505505 : best_len = 0;
1131 6263016 : FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1132 : {
1133 3757511 : int si = e->src->index;
1134 :
1135 3757511 : if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1136 3757511 : && (e->flags & EDGE_CAN_FALLTHRU)
1137 3036022 : && !(e->flags & EDGE_COMPLEX)
1138 2988457 : && bbd[si].end_of_trace >= 0
1139 519509 : && !connected[bbd[si].end_of_trace]
1140 143811 : && (BB_PARTITION (e->src) == current_partition)
1141 3901313 : && connect_better_edge_p (e, true, best_len, best, traces))
1142 : {
1143 115157 : best = e;
1144 115157 : best_len = traces[bbd[si].end_of_trace].length;
1145 : }
1146 : }
1147 2505505 : if (best)
1148 : {
1149 104708 : best->src->aux = best->dest;
1150 104708 : t2 = bbd[best->src->index].end_of_trace;
1151 104708 : connected[t2] = true;
1152 :
1153 104708 : if (dump_file)
1154 : {
1155 2 : fprintf (dump_file, "Connection: %d %d\n",
1156 : best->src->index, best->dest->index);
1157 : }
1158 : }
1159 : else
1160 : break;
1161 : }
1162 :
1163 2999462 : if (last_trace >= 0)
1164 2400797 : traces[last_trace].last->aux = traces[t2].first;
1165 : last_trace = t;
1166 :
1167 : /* Find the successor traces. */
1168 5678314 : while (1)
1169 : {
1170 : /* Find the continuation of the chain. */
1171 4338888 : edge_iterator ei;
1172 4338888 : best = NULL;
1173 4338888 : best_len = 0;
1174 9602220 : FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1175 : {
1176 5263332 : int di = e->dest->index;
1177 :
1178 5263332 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1179 4562149 : && (e->flags & EDGE_CAN_FALLTHRU)
1180 4289416 : && !(e->flags & EDGE_COMPLEX)
1181 4233618 : && bbd[di].start_of_trace >= 0
1182 2277175 : && !connected[bbd[di].start_of_trace]
1183 1752358 : && (BB_PARTITION (e->dest) == current_partition)
1184 6996641 : && connect_better_edge_p (e, false, best_len, best, traces))
1185 : {
1186 1514758 : best = e;
1187 1514758 : best_len = traces[bbd[di].start_of_trace].length;
1188 : }
1189 : }
1190 :
1191 4338888 : if (for_size)
1192 : {
1193 274884 : if (!best)
1194 : /* Stop finding the successor traces. */
1195 : break;
1196 :
1197 : /* It is OK to connect block n with block n + 1 or a block
1198 : before n. For others, only connect to the loop header. */
1199 197281 : if (best->dest->index > (traces[t].last->index + 1))
1200 : {
1201 23152 : int count = EDGE_COUNT (best->dest->preds);
1202 :
1203 147472 : FOR_EACH_EDGE (e, ei, best->dest->preds)
1204 124320 : if (e->flags & EDGE_DFS_BACK)
1205 4874 : count--;
1206 :
1207 : /* If dest has multiple predecessors, skip it. We expect
1208 : that one predecessor with smaller index connects with it
1209 : later. */
1210 23152 : if (count != 1)
1211 : break;
1212 : }
1213 :
1214 : /* Only connect Trace n with Trace n + 1. It is conservative
1215 : to keep the order as close as possible to the original order.
1216 : It also helps to reduce long jumps. */
1217 180591 : if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1218 : break;
1219 :
1220 178305 : if (dump_file)
1221 5 : fprintf (dump_file, "Connection: %d %d\n",
1222 5 : best->src->index, best->dest->index);
1223 :
1224 178305 : t = bbd[best->dest->index].start_of_trace;
1225 178305 : traces[last_trace].last->aux = traces[t].first;
1226 178305 : connected[t] = true;
1227 178305 : last_trace = t;
1228 : }
1229 4064004 : else if (best)
1230 : {
1231 1106944 : if (dump_file)
1232 : {
1233 45 : fprintf (dump_file, "Connection: %d %d\n",
1234 45 : best->src->index, best->dest->index);
1235 : }
1236 1106944 : t = bbd[best->dest->index].start_of_trace;
1237 1106944 : traces[last_trace].last->aux = traces[t].first;
1238 1106944 : connected[t] = true;
1239 1106944 : last_trace = t;
1240 : }
1241 : else
1242 : {
1243 : /* Try to connect the traces by duplication of 1 block. */
1244 2957060 : edge e2;
1245 2957060 : basic_block next_bb = NULL;
1246 2957060 : bool try_copy = false;
1247 :
1248 5783405 : FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1249 2826345 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1250 2162436 : && (e->flags & EDGE_CAN_FALLTHRU)
1251 1894840 : && !(e->flags & EDGE_COMPLEX)
1252 6829282 : && (!best || e->probability > best->probability))
1253 : {
1254 1827812 : edge_iterator ei;
1255 1827812 : edge best2 = NULL;
1256 1827812 : int best2_len = 0;
1257 :
1258 : /* If the destination is a start of a trace which is only
1259 : one block long, then no need to search the successor
1260 : blocks of the trace. Accept it. */
1261 1827812 : if (bbd[e->dest->index].start_of_trace >= 0
1262 381073 : && traces[bbd[e->dest->index].start_of_trace].length
1263 : == 1)
1264 : {
1265 264278 : best = e;
1266 264278 : try_copy = true;
1267 264278 : continue;
1268 : }
1269 :
1270 3992719 : FOR_EACH_EDGE (e2, ei, e->dest->succs)
1271 : {
1272 2429185 : int di = e2->dest->index;
1273 :
1274 2429185 : if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1275 2429185 : || ((e2->flags & EDGE_CAN_FALLTHRU)
1276 2017532 : && !(e2->flags & EDGE_COMPLEX)
1277 2017530 : && bbd[di].start_of_trace >= 0
1278 714793 : && !connected[bbd[di].start_of_trace]
1279 254388 : && BB_PARTITION (e2->dest) == current_partition
1280 2151360 : && e2->count () >= count_threshold
1281 118576 : && (!best2
1282 926 : || e2->probability > best2->probability
1283 734 : || (e2->probability == best2->probability
1284 225 : && traces[bbd[di].start_of_trace].length
1285 : > best2_len))))
1286 : {
1287 396401 : best = e;
1288 396401 : best2 = e2;
1289 396401 : if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1290 117873 : best2_len = traces[bbd[di].start_of_trace].length;
1291 : else
1292 : best2_len = INT_MAX;
1293 : next_bb = e2->dest;
1294 : try_copy = true;
1295 : }
1296 : }
1297 : }
1298 :
1299 : /* Copy tiny blocks always; copy larger blocks only when the
1300 : edge is traversed frequently enough. */
1301 2957060 : if (try_copy
1302 654413 : && BB_PARTITION (best->src) == BB_PARTITION (best->dest)
1303 3609233 : && copy_bb_p (best->dest,
1304 652173 : optimize_edge_for_speed_p (best)
1305 1078109 : && (!best->count ().initialized_p ()
1306 3129999 : || best->count () >= count_threshold)))
1307 : {
1308 252907 : basic_block new_bb;
1309 :
1310 252907 : if (dump_file)
1311 : {
1312 0 : fprintf (dump_file, "Connection: %d %d ",
1313 0 : traces[t].last->index, best->dest->index);
1314 0 : if (!next_bb)
1315 0 : fputc ('\n', dump_file);
1316 0 : else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1317 0 : fprintf (dump_file, "exit\n");
1318 : else
1319 0 : fprintf (dump_file, "%d\n", next_bb->index);
1320 : }
1321 :
1322 252907 : new_bb = copy_bb (best->dest, best, traces[t].last, t);
1323 252907 : traces[t].last = new_bb;
1324 252907 : if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1325 : {
1326 54177 : t = bbd[next_bb->index].start_of_trace;
1327 54177 : traces[last_trace].last->aux = traces[t].first;
1328 54177 : connected[t] = true;
1329 54177 : last_trace = t;
1330 : }
1331 : else
1332 : break; /* Stop finding the successor traces. */
1333 : }
1334 : else
1335 : break; /* Stop finding the successor traces. */
1336 : }
1337 1339426 : }
1338 : }
1339 :
1340 598665 : if (dump_file)
1341 : {
1342 23 : basic_block bb;
1343 :
1344 23 : fprintf (dump_file, "Final order:\n");
1345 211 : for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1346 188 : fprintf (dump_file, "%d ", bb->index);
1347 23 : fprintf (dump_file, "\n");
1348 23 : fflush (dump_file);
1349 : }
1350 :
1351 598665 : FREE (connected);
1352 598665 : }
1353 :
1354 : /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1355 : when code size is allowed to grow by duplication. */
1356 :
1357 : static bool
1358 3408430 : copy_bb_p (const_basic_block bb, int code_may_grow)
1359 : {
1360 3408430 : unsigned int size = 0;
1361 3408430 : unsigned int max_size = uncond_jump_length;
1362 3408430 : rtx_insn *insn;
1363 :
1364 6481830 : if (EDGE_COUNT (bb->preds) < 2)
1365 : return false;
1366 3405449 : if (!can_duplicate_block_p (bb))
1367 : return false;
1368 :
1369 : /* Avoid duplicating blocks which have many successors (PR/13430). */
1370 3404526 : if (EDGE_COUNT (bb->succs) > 8)
1371 : return false;
1372 :
1373 3404522 : if (code_may_grow && optimize_bb_for_speed_p (bb))
1374 313578 : max_size *= param_max_grow_copy_bb_insns;
1375 :
1376 24491599 : FOR_BB_INSNS (bb, insn)
1377 : {
1378 24159550 : if (INSN_P (insn))
1379 : {
1380 16410735 : size += get_attr_min_length (insn);
1381 16410735 : if (size > max_size)
1382 : break;
1383 : }
1384 : }
1385 :
1386 3404522 : if (size <= max_size)
1387 : return true;
1388 :
1389 3072473 : if (dump_file)
1390 : {
1391 59 : fprintf (dump_file,
1392 : "Block %d can't be copied because its size = %u.\n",
1393 59 : bb->index, size);
1394 : }
1395 :
1396 : return false;
1397 : }
1398 :
1399 : /* Return the length of unconditional jump instruction. */
1400 :
1401 : int
1402 771115 : get_uncond_jump_length (void)
1403 : {
1404 771115 : unsigned int length;
1405 :
1406 771115 : start_sequence ();
1407 771115 : rtx_code_label *label = emit_label (gen_label_rtx ());
1408 771115 : rtx_insn *jump = emit_jump_insn (targetm.gen_jump (label));
1409 771115 : length = get_attr_min_length (jump);
1410 771115 : end_sequence ();
1411 :
1412 771115 : gcc_assert (length < INT_MAX);
1413 771115 : return length;
1414 : }
1415 :
1416 : /* Create a forwarder block to OLD_BB starting with NEW_LABEL and in the
1417 : other partition wrt OLD_BB. */
1418 :
1419 : static basic_block
1420 6788 : create_eh_forwarder_block (rtx_code_label *new_label, basic_block old_bb)
1421 : {
1422 : /* Split OLD_BB, so that EH pads have always only incoming EH edges,
1423 : bb_has_eh_pred bbs are treated specially by DF infrastructure. */
1424 6788 : old_bb = split_block_after_labels (old_bb)->dest;
1425 :
1426 : /* Put the new label and a jump in the new basic block. */
1427 6788 : rtx_insn *label = emit_label (new_label);
1428 6788 : rtx_code_label *old_label = block_label (old_bb);
1429 6788 : rtx_insn *jump = emit_jump_insn (targetm.gen_jump (old_label));
1430 6788 : JUMP_LABEL (jump) = old_label;
1431 :
1432 : /* Create the new basic block and put it in last position. */
1433 6788 : basic_block last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1434 6788 : basic_block new_bb = create_basic_block (label, jump, last_bb);
1435 6788 : new_bb->aux = last_bb->aux;
1436 6788 : new_bb->count = old_bb->count;
1437 6788 : last_bb->aux = new_bb;
1438 :
1439 6788 : emit_barrier_after_bb (new_bb);
1440 :
1441 6788 : make_single_succ_edge (new_bb, old_bb, 0);
1442 :
1443 : /* Make sure the new basic block is in the other partition. */
1444 6788 : unsigned new_partition = BB_PARTITION (old_bb);
1445 6788 : new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1446 6788 : BB_SET_PARTITION (new_bb, new_partition);
1447 :
1448 6788 : return new_bb;
1449 : }
1450 :
1451 : /* The common landing pad in block OLD_BB has edges from both partitions.
1452 : Add a new landing pad that will just jump to the old one and split the
1453 : edges so that no EH edge crosses partitions. */
1454 :
1455 : static void
1456 0 : sjlj_fix_up_crossing_landing_pad (basic_block old_bb)
1457 : {
1458 0 : const unsigned lp_len = cfun->eh->lp_array->length ();
1459 0 : edge_iterator ei;
1460 0 : edge e;
1461 :
1462 : /* Generate the new common landing-pad label. */
1463 0 : rtx_code_label *new_label = gen_label_rtx ();
1464 0 : LABEL_PRESERVE_P (new_label) = 1;
1465 :
1466 : /* Create the forwarder block. */
1467 0 : basic_block new_bb = create_eh_forwarder_block (new_label, old_bb);
1468 :
1469 : /* Create the map from old to new lp index and initialize it. */
1470 0 : unsigned *index_map = (unsigned *) alloca (lp_len * sizeof (unsigned));
1471 0 : memset (index_map, 0, lp_len * sizeof (unsigned));
1472 :
1473 : /* Fix up the edges. */
1474 0 : for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1475 0 : if (e->src != new_bb && BB_PARTITION (e->src) == BB_PARTITION (new_bb))
1476 : {
1477 0 : rtx_insn *insn = BB_END (e->src);
1478 0 : rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1479 :
1480 0 : gcc_assert (note != NULL);
1481 0 : const unsigned old_index = INTVAL (XEXP (note, 0));
1482 :
1483 : /* Generate the new landing-pad structure. */
1484 0 : if (index_map[old_index] == 0)
1485 : {
1486 0 : eh_landing_pad old_lp = (*cfun->eh->lp_array)[old_index];
1487 0 : eh_landing_pad new_lp = gen_eh_landing_pad (old_lp->region);
1488 0 : new_lp->post_landing_pad = old_lp->post_landing_pad;
1489 0 : new_lp->landing_pad = new_label;
1490 0 : index_map[old_index] = new_lp->index;
1491 : }
1492 0 : XEXP (note, 0) = GEN_INT (index_map[old_index]);
1493 :
1494 : /* Adjust the edge to the new destination. */
1495 0 : redirect_edge_succ (e, new_bb);
1496 0 : }
1497 : else
1498 0 : ei_next (&ei);
1499 0 : }
1500 :
1501 : /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1502 : Add a new landing pad that will just jump to the old one and split the
1503 : edges so that no EH edge crosses partitions. */
1504 :
1505 : static void
1506 6788 : dw2_fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1507 : {
1508 6788 : eh_landing_pad new_lp;
1509 6788 : edge_iterator ei;
1510 6788 : edge e;
1511 :
1512 : /* Generate the new landing-pad structure. */
1513 6788 : new_lp = gen_eh_landing_pad (old_lp->region);
1514 6788 : new_lp->post_landing_pad = old_lp->post_landing_pad;
1515 6788 : new_lp->landing_pad = gen_label_rtx ();
1516 6788 : LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1517 :
1518 : /* Create the forwarder block. */
1519 6788 : basic_block new_bb = create_eh_forwarder_block (new_lp->landing_pad, old_bb);
1520 :
1521 : /* Fix up the edges. */
1522 55507 : for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1523 48719 : if (e->src != new_bb && BB_PARTITION (e->src) == BB_PARTITION (new_bb))
1524 : {
1525 37213 : rtx_insn *insn = BB_END (e->src);
1526 37213 : rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1527 :
1528 37213 : gcc_assert (note != NULL);
1529 37213 : gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1530 37213 : XEXP (note, 0) = GEN_INT (new_lp->index);
1531 :
1532 : /* Adjust the edge to the new destination. */
1533 37213 : redirect_edge_succ (e, new_bb);
1534 37213 : }
1535 : else
1536 11506 : ei_next (&ei);
1537 6788 : }
1538 :
1539 :
1540 : /* Ensure that all hot bbs are included in a hot path through the
1541 : procedure. This is done by calling this function twice, once
1542 : with WALK_UP true (to look for paths from the entry to hot bbs) and
1543 : once with WALK_UP false (to look for paths from hot bbs to the exit).
1544 : Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1545 : to BBS_IN_HOT_PARTITION. */
1546 :
1547 : static unsigned int
1548 129422 : sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1549 : vec<basic_block> *bbs_in_hot_partition)
1550 : {
1551 : /* Callers check this. */
1552 129422 : gcc_checking_assert (cold_bb_count);
1553 :
1554 : /* Keep examining hot bbs while we still have some left to check
1555 : and there are remaining cold bbs. */
1556 129422 : vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1557 129422 : while (! hot_bbs_to_check.is_empty ()
1558 6145688 : && cold_bb_count)
1559 : {
1560 3008025 : basic_block bb = hot_bbs_to_check.pop ();
1561 3008025 : vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1562 3008025 : edge e;
1563 3008025 : edge_iterator ei;
1564 3008025 : profile_probability highest_probability
1565 3008025 : = profile_probability::uninitialized ();
1566 3008025 : profile_count highest_count = profile_count::uninitialized ();
1567 3008025 : bool found = false;
1568 :
1569 : /* Walk the preds/succs and check if there is at least one already
1570 : marked hot. Keep track of the most frequent pred/succ so that we
1571 : can mark it hot if we don't find one. */
1572 3539919 : FOR_EACH_EDGE (e, ei, edges)
1573 : {
1574 3494470 : basic_block reach_bb = walk_up ? e->src : e->dest;
1575 :
1576 3494470 : if (e->flags & EDGE_DFS_BACK)
1577 130226 : continue;
1578 :
1579 : /* Do not expect profile insanities when profile was not adjusted. */
1580 3364244 : if (e->probability == profile_probability::never ()
1581 6348120 : || e->count () == profile_count::zero ())
1582 382670 : continue;
1583 :
1584 2981574 : if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1585 : {
1586 : found = true;
1587 : break;
1588 : }
1589 : /* The following loop will look for the hottest edge via
1590 : the edge count, if it is non-zero, then fallback to
1591 : the edge probability. */
1592 18998 : if (!(e->count () > highest_count))
1593 18987 : highest_count = e->count ();
1594 18998 : if (!highest_probability.initialized_p ()
1595 550892 : || e->probability > highest_probability)
1596 18987 : highest_probability = e->probability;
1597 : }
1598 :
1599 : /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1600 : block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1601 : then the most frequent pred (or succ) needs to be adjusted. In the
1602 : case where multiple preds/succs have the same frequency (e.g. a
1603 : 50-50 branch), then both will be adjusted. */
1604 3008025 : if (found)
1605 2962576 : continue;
1606 :
1607 88469 : FOR_EACH_EDGE (e, ei, edges)
1608 : {
1609 43020 : if (e->flags & EDGE_DFS_BACK)
1610 37974 : continue;
1611 : /* Do not expect profile insanities when profile was not adjusted. */
1612 23344 : if (e->probability == profile_probability::never ()
1613 28696 : || e->count () == profile_count::zero ())
1614 18298 : continue;
1615 : /* Select the hottest edge using the edge count, if it is non-zero,
1616 : then fallback to the edge probability. */
1617 5046 : if (highest_count.initialized_p ())
1618 : {
1619 5046 : if (!(e->count () >= highest_count))
1620 0 : continue;
1621 : }
1622 0 : else if (!(e->probability >= highest_probability))
1623 0 : continue;
1624 :
1625 5046 : basic_block reach_bb = walk_up ? e->src : e->dest;
1626 :
1627 : /* We have a hot bb with an immediate dominator that is cold.
1628 : The dominator needs to be re-marked hot. */
1629 5046 : BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1630 5046 : if (dump_file)
1631 0 : fprintf (dump_file, "Promoting bb %i to hot partition to sanitize "
1632 : "profile of bb %i in %s walk\n", reach_bb->index,
1633 : bb->index, walk_up ? "backward" : "forward");
1634 5046 : cold_bb_count--;
1635 :
1636 : /* Now we need to examine newly-hot reach_bb to see if it is also
1637 : dominated by a cold bb. */
1638 5046 : bbs_in_hot_partition->safe_push (reach_bb);
1639 5046 : hot_bbs_to_check.safe_push (reach_bb);
1640 : }
1641 : }
1642 129422 : hot_bbs_to_check.release ();
1643 :
1644 129422 : return cold_bb_count;
1645 : }
1646 :
1647 :
1648 : /* Find the basic blocks that are rarely executed and need to be moved to
1649 : a separate section of the .o file (to cut down on paging and improve
1650 : cache locality). Return a vector of all edges that cross. */
1651 :
1652 : static vec<edge>
1653 264504 : find_rarely_executed_basic_blocks_and_crossing_edges (void)
1654 : {
1655 264504 : vec<edge> crossing_edges = vNULL;
1656 264504 : basic_block bb;
1657 264504 : edge e;
1658 264504 : edge_iterator ei;
1659 264504 : unsigned int cold_bb_count = 0;
1660 264504 : auto_vec<basic_block> bbs_in_hot_partition;
1661 :
1662 264504 : propagate_unlikely_bbs_forward ();
1663 :
1664 : /* Mark which partition (hot/cold) each basic block belongs in. */
1665 4864995 : FOR_EACH_BB_FN (bb, cfun)
1666 : {
1667 4600491 : bool cold_bb = false;
1668 :
1669 4600491 : if (probably_never_executed_bb_p (cfun, bb))
1670 : {
1671 279976 : cold_bb = true;
1672 :
1673 : /* Handle profile insanities created by upstream optimizations
1674 : by also checking the incoming edge weights. If there is a non-cold
1675 : incoming edge, conservatively prevent this block from being split
1676 : into the cold section. */
1677 279976 : if (!bb->count.precise_p ())
1678 236 : FOR_EACH_EDGE (e, ei, bb->preds)
1679 135 : if (!probably_never_executed_edge_p (cfun, e))
1680 : {
1681 : cold_bb = false;
1682 : break;
1683 : }
1684 : }
1685 101 : if (cold_bb)
1686 : {
1687 279976 : BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1688 279976 : cold_bb_count++;
1689 : }
1690 : else
1691 : {
1692 4320515 : BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1693 4320515 : bbs_in_hot_partition.safe_push (bb);
1694 : }
1695 : }
1696 :
1697 : /* Ensure that hot bbs are included along a hot path from the entry to exit.
1698 : Several different possibilities may include cold bbs along all paths
1699 : to/from a hot bb. One is that there are edge weight insanities
1700 : due to optimization phases that do not properly update basic block profile
1701 : counts. The second is that the entry of the function may not be hot, because
1702 : it is entered fewer times than the number of profile training runs, but there
1703 : is a loop inside the function that causes blocks within the function to be
1704 : above the threshold for hotness. This is fixed by walking up from hot bbs
1705 : to the entry block, and then down from hot bbs to the exit, performing
1706 : partitioning fixups as necessary. */
1707 264504 : if (cold_bb_count)
1708 : {
1709 64711 : mark_dfs_back_edges ();
1710 64711 : cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1711 : &bbs_in_hot_partition);
1712 64711 : if (cold_bb_count)
1713 64711 : sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1714 :
1715 64711 : hash_set <basic_block> set;
1716 64711 : find_bbs_reachable_by_hot_paths (&set);
1717 1849192 : FOR_EACH_BB_FN (bb, cfun)
1718 1784481 : if (!set.contains (bb))
1719 274930 : BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1720 64711 : }
1721 :
1722 : /* The format of .gcc_except_table does not allow landing pads to
1723 : be in a different partition as the throw. Fix this by either
1724 : moving the landing pads or inserting forwarder landing pads. */
1725 264504 : if (cfun->eh->lp_array)
1726 : {
1727 264504 : const bool sjlj
1728 264504 : = (targetm_common.except_unwind_info (&global_options) == UI_SJLJ);
1729 264504 : unsigned i;
1730 264504 : eh_landing_pad lp;
1731 :
1732 862044 : FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1733 : {
1734 597540 : bool all_same, all_diff;
1735 :
1736 597540 : if (lp == NULL
1737 81061 : || lp->landing_pad == NULL_RTX
1738 81061 : || !LABEL_P (lp->landing_pad))
1739 516522 : continue;
1740 :
1741 81018 : all_same = all_diff = true;
1742 81018 : bb = BLOCK_FOR_INSN (lp->landing_pad);
1743 279184 : FOR_EACH_EDGE (e, ei, bb->preds)
1744 : {
1745 198166 : gcc_assert (e->flags & EDGE_EH);
1746 198166 : if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1747 : all_diff = false;
1748 : else
1749 135674 : all_same = false;
1750 : }
1751 :
1752 81018 : if (all_same)
1753 : ;
1754 62561 : else if (all_diff)
1755 : {
1756 55773 : int which = BB_PARTITION (bb);
1757 55773 : which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1758 55773 : BB_SET_PARTITION (bb, which);
1759 : }
1760 6788 : else if (sjlj)
1761 0 : sjlj_fix_up_crossing_landing_pad (bb);
1762 : else
1763 6788 : dw2_fix_up_crossing_landing_pad (lp, bb);
1764 :
1765 : /* There is a single, common landing pad in SJLJ mode. */
1766 81018 : if (sjlj)
1767 : break;
1768 : }
1769 : }
1770 :
1771 : /* Mark every edge that crosses between sections. */
1772 4878571 : FOR_EACH_BB_FN (bb, cfun)
1773 11648268 : FOR_EACH_EDGE (e, ei, bb->succs)
1774 : {
1775 7034201 : unsigned int flags = e->flags;
1776 :
1777 : /* We should never have EDGE_CROSSING set yet. */
1778 7034201 : gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1779 :
1780 7034201 : if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1781 7034201 : && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1782 6734952 : && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1783 : {
1784 433536 : crossing_edges.safe_push (e);
1785 433536 : flags |= EDGE_CROSSING;
1786 : }
1787 :
1788 : /* Now that we've split eh edges as appropriate, allow landing pads
1789 : to be merged with the post-landing pads. */
1790 7034201 : flags &= ~EDGE_PRESERVE;
1791 :
1792 7034201 : e->flags = flags;
1793 : }
1794 :
1795 264504 : return crossing_edges;
1796 264504 : }
1797 :
1798 : /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru. */
1799 :
1800 : static void
1801 633539 : set_edge_can_fallthru_flag (void)
1802 : {
1803 633539 : basic_block bb;
1804 :
1805 10981623 : FOR_EACH_BB_FN (bb, cfun)
1806 : {
1807 10348084 : edge e;
1808 10348084 : edge_iterator ei;
1809 :
1810 25855042 : FOR_EACH_EDGE (e, ei, bb->succs)
1811 : {
1812 15506958 : e->flags &= ~EDGE_CAN_FALLTHRU;
1813 :
1814 : /* The FALLTHRU edge is also CAN_FALLTHRU edge. */
1815 15506958 : if (e->flags & EDGE_FALLTHRU)
1816 8955720 : e->flags |= EDGE_CAN_FALLTHRU;
1817 : }
1818 :
1819 : /* If the BB ends with an invertible condjump all (2) edges are
1820 : CAN_FALLTHRU edges. */
1821 10348084 : if (EDGE_COUNT (bb->succs) != 2)
1822 5298257 : continue;
1823 5595266 : if (!any_condjump_p (BB_END (bb)))
1824 545439 : continue;
1825 :
1826 5049827 : rtx_jump_insn *bb_end_jump = as_a <rtx_jump_insn *> (BB_END (bb));
1827 5049827 : if (!invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0))
1828 0 : continue;
1829 5049827 : invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0);
1830 5049827 : EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1831 5049827 : EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1832 : }
1833 633539 : }
1834 :
1835 : /* If any destination of a crossing edge does not have a label, add label;
1836 : Convert any easy fall-through crossing edges to unconditional jumps. */
1837 :
1838 : static void
1839 64348 : add_labels_and_missing_jumps (vec<edge> crossing_edges)
1840 : {
1841 64348 : size_t i;
1842 64348 : edge e;
1843 :
1844 497884 : FOR_EACH_VEC_ELT (crossing_edges, i, e)
1845 : {
1846 433536 : basic_block src = e->src;
1847 433536 : basic_block dest = e->dest;
1848 433536 : rtx_jump_insn *new_jump;
1849 :
1850 433536 : if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1851 0 : continue;
1852 :
1853 : /* Make sure dest has a label. */
1854 433536 : rtx_code_label *label = block_label (dest);
1855 :
1856 : /* Nothing to do for non-fallthru edges. */
1857 433536 : if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1858 0 : continue;
1859 433536 : if ((e->flags & EDGE_FALLTHRU) == 0)
1860 254736 : continue;
1861 :
1862 : /* If the block does not end with a control flow insn, then we
1863 : can trivially add a jump to the end to fixup the crossing.
1864 : Otherwise the jump will have to go in a new bb, which will
1865 : be handled by fix_up_fall_thru_edges function. */
1866 178800 : if (control_flow_insn_p (BB_END (src)))
1867 117544 : continue;
1868 :
1869 : /* Make sure there's only one successor. */
1870 61256 : gcc_assert (single_succ_p (src));
1871 :
1872 61256 : new_jump = emit_jump_insn_after (targetm.gen_jump (label), BB_END (src));
1873 61256 : BB_END (src) = new_jump;
1874 61256 : JUMP_LABEL (new_jump) = label;
1875 61256 : LABEL_NUSES (label) += 1;
1876 :
1877 61256 : emit_barrier_after_bb (src);
1878 :
1879 : /* Mark edge as non-fallthru. */
1880 61256 : e->flags &= ~EDGE_FALLTHRU;
1881 : }
1882 64348 : }
1883 :
1884 : /* Find any bb's where the fall-through edge is a crossing edge (note that
1885 : these bb's must also contain a conditional jump or end with a call
1886 : instruction; we've already dealt with fall-through edges for blocks
1887 : that didn't have a conditional jump or didn't end with call instruction
1888 : in the call to add_labels_and_missing_jumps). Convert the fall-through
1889 : edge to non-crossing edge by inserting a new bb to fall-through into.
1890 : The new bb will contain an unconditional jump (crossing edge) to the
1891 : original fall through destination. */
1892 :
1893 : static void
1894 64348 : fix_up_fall_thru_edges (void)
1895 : {
1896 64348 : basic_block cur_bb;
1897 :
1898 1856180 : FOR_EACH_BB_FN (cur_bb, cfun)
1899 : {
1900 1791832 : edge succ1;
1901 1791832 : edge succ2;
1902 1791832 : edge fall_thru = NULL;
1903 1791832 : edge cond_jump = NULL;
1904 :
1905 1791832 : fall_thru = NULL;
1906 1791832 : if (EDGE_COUNT (cur_bb->succs) > 0)
1907 1685748 : succ1 = EDGE_SUCC (cur_bb, 0);
1908 : else
1909 : succ1 = NULL;
1910 :
1911 1791832 : if (EDGE_COUNT (cur_bb->succs) > 1)
1912 1069282 : succ2 = EDGE_SUCC (cur_bb, 1);
1913 : else
1914 : succ2 = NULL;
1915 :
1916 : /* Find the fall-through edge. */
1917 :
1918 1791832 : if (succ1
1919 1685748 : && (succ1->flags & EDGE_FALLTHRU))
1920 : {
1921 : fall_thru = succ1;
1922 : cond_jump = succ2;
1923 : }
1924 902969 : else if (succ2
1925 704120 : && (succ2->flags & EDGE_FALLTHRU))
1926 : {
1927 : fall_thru = succ2;
1928 : cond_jump = succ1;
1929 : }
1930 1792794 : else if (succ2 && EDGE_COUNT (cur_bb->succs) > 2)
1931 962 : fall_thru = find_fallthru_edge (cur_bb->succs);
1932 :
1933 1592949 : if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1934 : {
1935 : /* Check to see if the fall-thru edge is a crossing edge. */
1936 :
1937 1532383 : if (fall_thru->flags & EDGE_CROSSING)
1938 : {
1939 : /* The fall_thru edge crosses; now check the cond jump edge, if
1940 : it exists. */
1941 :
1942 117544 : bool cond_jump_crosses = true;
1943 117544 : int invert_worked = 0;
1944 117544 : rtx_insn *old_jump = BB_END (cur_bb);
1945 :
1946 : /* Find the jump instruction, if there is one. */
1947 :
1948 117544 : if (cond_jump)
1949 : {
1950 117454 : if (!(cond_jump->flags & EDGE_CROSSING))
1951 116937 : cond_jump_crosses = false;
1952 :
1953 : /* We know the fall-thru edge crosses; if the cond
1954 : jump edge does NOT cross, and its destination is the
1955 : next block in the bb order, invert the jump
1956 : (i.e. fix it so the fall through does not cross and
1957 : the cond jump does). */
1958 :
1959 116937 : if (!cond_jump_crosses)
1960 : {
1961 : /* Find label in fall_thru block. We've already added
1962 : any missing labels, so there must be one. */
1963 :
1964 116937 : rtx_code_label *fall_thru_label
1965 116937 : = block_label (fall_thru->dest);
1966 :
1967 116937 : if (old_jump && fall_thru_label)
1968 : {
1969 116937 : rtx_jump_insn *old_jump_insn
1970 232198 : = dyn_cast <rtx_jump_insn *> (old_jump);
1971 114654 : if (old_jump_insn)
1972 114654 : invert_worked = invert_jump (old_jump_insn,
1973 : fall_thru_label, 0);
1974 : }
1975 :
1976 114654 : if (invert_worked)
1977 : {
1978 114644 : fall_thru->flags &= ~EDGE_FALLTHRU;
1979 114644 : cond_jump->flags |= EDGE_FALLTHRU;
1980 114644 : update_br_prob_note (cur_bb);
1981 114644 : std::swap (fall_thru, cond_jump);
1982 114644 : cond_jump->flags |= EDGE_CROSSING;
1983 114644 : fall_thru->flags &= ~EDGE_CROSSING;
1984 : }
1985 : }
1986 : }
1987 :
1988 117544 : if (cond_jump_crosses || !invert_worked)
1989 : {
1990 : /* This is the case where both edges out of the basic
1991 : block are crossing edges. Here we will fix up the
1992 : fall through edge. The jump edge will be taken care
1993 : of later. The EDGE_CROSSING flag of fall_thru edge
1994 : is unset before the call to force_nonfallthru
1995 : function because if a new basic-block is created
1996 : this edge remains in the current section boundary
1997 : while the edge between new_bb and the fall_thru->dest
1998 : becomes EDGE_CROSSING. */
1999 :
2000 2900 : fall_thru->flags &= ~EDGE_CROSSING;
2001 2900 : unsigned old_count = EDGE_COUNT (cur_bb->succs);
2002 2900 : basic_block new_bb = force_nonfallthru (fall_thru);
2003 :
2004 2900 : if (new_bb)
2005 : {
2006 2900 : new_bb->aux = cur_bb->aux;
2007 2900 : cur_bb->aux = new_bb;
2008 :
2009 : /* This is done by force_nonfallthru_and_redirect. */
2010 2900 : gcc_assert (BB_PARTITION (new_bb)
2011 : == BB_PARTITION (cur_bb));
2012 :
2013 2900 : edge e = single_succ_edge (new_bb);
2014 2900 : e->flags |= EDGE_CROSSING;
2015 2900 : if (EDGE_COUNT (cur_bb->succs) > old_count)
2016 : {
2017 : /* If asm goto has a crossing fallthrough edge
2018 : and at least one of the labels to the same bb,
2019 : force_nonfallthru can result in the fallthrough
2020 : edge being redirected and a new edge added for the
2021 : label or more labels to e->dest. As we've
2022 : temporarily cleared EDGE_CROSSING flag on the
2023 : fallthrough edge, we need to restore it again.
2024 : See PR108596. */
2025 0 : rtx_insn *j = BB_END (cur_bb);
2026 0 : gcc_checking_assert (JUMP_P (j)
2027 : && (asm_noperands (PATTERN (j))
2028 : > 0));
2029 0 : edge e2 = find_edge (cur_bb, e->dest);
2030 0 : if (e2)
2031 0 : e2->flags |= EDGE_CROSSING;
2032 : }
2033 : }
2034 : else
2035 : {
2036 : /* If a new basic-block was not created; restore
2037 : the EDGE_CROSSING flag. */
2038 0 : fall_thru->flags |= EDGE_CROSSING;
2039 : }
2040 :
2041 : /* Add barrier after new jump */
2042 2900 : emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
2043 : }
2044 : }
2045 : }
2046 : }
2047 64348 : }
2048 :
2049 : /* This function checks the destination block of a "crossing jump" to
2050 : see if it has any crossing predecessors that begin with a code label
2051 : and end with an unconditional jump. If so, it returns that predecessor
2052 : block. (This is to avoid creating lots of new basic blocks that all
2053 : contain unconditional jumps to the same destination). */
2054 :
2055 : static basic_block
2056 0 : find_jump_block (basic_block jump_dest)
2057 : {
2058 0 : basic_block source_bb = NULL;
2059 0 : edge e;
2060 0 : rtx_insn *insn;
2061 0 : edge_iterator ei;
2062 :
2063 0 : FOR_EACH_EDGE (e, ei, jump_dest->preds)
2064 0 : if (e->flags & EDGE_CROSSING)
2065 : {
2066 0 : basic_block src = e->src;
2067 :
2068 : /* Check each predecessor to see if it has a label, and contains
2069 : only one executable instruction, which is an unconditional jump.
2070 : If so, we can use it. */
2071 :
2072 0 : if (LABEL_P (BB_HEAD (src)))
2073 0 : for (insn = BB_HEAD (src);
2074 0 : !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
2075 0 : insn = NEXT_INSN (insn))
2076 : {
2077 0 : if (INSN_P (insn)
2078 : && insn == BB_END (src)
2079 : && JUMP_P (insn)
2080 : && !any_condjump_p (insn))
2081 : {
2082 : source_bb = src;
2083 : break;
2084 : }
2085 : }
2086 :
2087 : if (source_bb)
2088 : break;
2089 : }
2090 :
2091 0 : return source_bb;
2092 : }
2093 :
2094 : /* Find all BB's with conditional jumps that are crossing edges;
2095 : insert a new bb and make the conditional jump branch to the new
2096 : bb instead (make the new bb same color so conditional branch won't
2097 : be a 'crossing' edge). Insert an unconditional jump from the
2098 : new bb to the original destination of the conditional jump. */
2099 :
2100 : static void
2101 0 : fix_crossing_conditional_branches (void)
2102 : {
2103 0 : basic_block cur_bb;
2104 0 : basic_block new_bb;
2105 0 : basic_block dest;
2106 0 : edge succ1;
2107 0 : edge succ2;
2108 0 : edge crossing_edge;
2109 0 : edge new_edge;
2110 0 : rtx set_src;
2111 0 : rtx old_label = NULL_RTX;
2112 0 : rtx_code_label *new_label;
2113 :
2114 0 : FOR_EACH_BB_FN (cur_bb, cfun)
2115 : {
2116 0 : crossing_edge = NULL;
2117 0 : if (EDGE_COUNT (cur_bb->succs) > 0)
2118 0 : succ1 = EDGE_SUCC (cur_bb, 0);
2119 : else
2120 : succ1 = NULL;
2121 :
2122 0 : if (EDGE_COUNT (cur_bb->succs) > 1)
2123 0 : succ2 = EDGE_SUCC (cur_bb, 1);
2124 : else
2125 : succ2 = NULL;
2126 :
2127 : /* We already took care of fall-through edges, so only one successor
2128 : can be a crossing edge. */
2129 :
2130 0 : if (succ1 && (succ1->flags & EDGE_CROSSING))
2131 : crossing_edge = succ1;
2132 0 : else if (succ2 && (succ2->flags & EDGE_CROSSING))
2133 : crossing_edge = succ2;
2134 :
2135 : if (crossing_edge)
2136 : {
2137 0 : rtx_insn *old_jump = BB_END (cur_bb);
2138 :
2139 : /* Check to make sure the jump instruction is a
2140 : conditional jump. */
2141 :
2142 0 : set_src = NULL_RTX;
2143 :
2144 0 : if (any_condjump_p (old_jump))
2145 : {
2146 0 : if (GET_CODE (PATTERN (old_jump)) == SET)
2147 0 : set_src = SET_SRC (PATTERN (old_jump));
2148 0 : else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2149 : {
2150 0 : set_src = XVECEXP (PATTERN (old_jump), 0,0);
2151 0 : if (GET_CODE (set_src) == SET)
2152 0 : set_src = SET_SRC (set_src);
2153 : else
2154 : set_src = NULL_RTX;
2155 : }
2156 : }
2157 :
2158 0 : if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2159 : {
2160 0 : rtx_jump_insn *old_jump_insn =
2161 0 : as_a <rtx_jump_insn *> (old_jump);
2162 :
2163 0 : if (GET_CODE (XEXP (set_src, 1)) == PC)
2164 0 : old_label = XEXP (set_src, 2);
2165 0 : else if (GET_CODE (XEXP (set_src, 2)) == PC)
2166 0 : old_label = XEXP (set_src, 1);
2167 :
2168 : /* Check to see if new bb for jumping to that dest has
2169 : already been created; if so, use it; if not, create
2170 : a new one. */
2171 :
2172 0 : new_bb = find_jump_block (crossing_edge->dest);
2173 :
2174 0 : if (new_bb)
2175 : new_label = block_label (new_bb);
2176 : else
2177 : {
2178 0 : basic_block last_bb;
2179 0 : rtx_code_label *old_jump_target;
2180 0 : rtx_jump_insn *new_jump;
2181 :
2182 : /* Create new basic block to be dest for
2183 : conditional jump. */
2184 :
2185 : /* Put appropriate instructions in new bb. */
2186 :
2187 0 : new_label = gen_label_rtx ();
2188 0 : emit_label (new_label);
2189 :
2190 0 : gcc_assert (GET_CODE (old_label) == LABEL_REF);
2191 0 : old_jump_target = old_jump_insn->jump_target ();
2192 0 : new_jump = as_a <rtx_jump_insn *>
2193 0 : (emit_jump_insn (targetm.gen_jump (old_jump_target)));
2194 0 : new_jump->set_jump_target (old_jump_target);
2195 :
2196 0 : last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2197 0 : new_bb = create_basic_block (new_label, new_jump, last_bb);
2198 0 : new_bb->aux = last_bb->aux;
2199 0 : last_bb->aux = new_bb;
2200 :
2201 0 : emit_barrier_after_bb (new_bb);
2202 :
2203 : /* Make sure new bb is in same partition as source
2204 : of conditional branch. */
2205 0 : BB_COPY_PARTITION (new_bb, cur_bb);
2206 : }
2207 :
2208 : /* Make old jump branch to new bb. */
2209 :
2210 0 : redirect_jump (old_jump_insn, new_label, 0);
2211 :
2212 : /* Remove crossing_edge as predecessor of 'dest'. */
2213 :
2214 0 : dest = crossing_edge->dest;
2215 :
2216 0 : redirect_edge_succ (crossing_edge, new_bb);
2217 :
2218 : /* Make a new edge from new_bb to old dest; new edge
2219 : will be a successor for new_bb and a predecessor
2220 : for 'dest'. */
2221 :
2222 0 : if (EDGE_COUNT (new_bb->succs) == 0)
2223 0 : new_edge = make_single_succ_edge (new_bb, dest, 0);
2224 : else
2225 0 : new_edge = EDGE_SUCC (new_bb, 0);
2226 :
2227 0 : crossing_edge->flags &= ~EDGE_CROSSING;
2228 0 : new_edge->flags |= EDGE_CROSSING;
2229 : }
2230 : }
2231 : }
2232 0 : }
2233 :
2234 : /* Find any unconditional branches that cross between hot and cold
2235 : sections. Convert them into indirect jumps instead. */
2236 :
2237 : static void
2238 0 : fix_crossing_unconditional_branches (void)
2239 : {
2240 0 : basic_block cur_bb;
2241 0 : rtx_insn *last_insn;
2242 0 : rtx label;
2243 0 : rtx label_addr;
2244 0 : rtx_insn *indirect_jump_sequence;
2245 0 : rtx_insn *jump_insn = NULL;
2246 0 : rtx new_reg;
2247 0 : rtx_insn *cur_insn;
2248 0 : edge succ;
2249 :
2250 0 : FOR_EACH_BB_FN (cur_bb, cfun)
2251 : {
2252 0 : last_insn = BB_END (cur_bb);
2253 :
2254 0 : if (EDGE_COUNT (cur_bb->succs) < 1)
2255 0 : continue;
2256 :
2257 0 : succ = EDGE_SUCC (cur_bb, 0);
2258 :
2259 : /* Check to see if bb ends in a crossing (unconditional) jump. At
2260 : this point, no crossing jumps should be conditional. */
2261 :
2262 0 : if (JUMP_P (last_insn)
2263 0 : && (succ->flags & EDGE_CROSSING))
2264 : {
2265 0 : gcc_assert (!any_condjump_p (last_insn));
2266 :
2267 : /* Make sure the jump is not already an indirect or table jump. */
2268 :
2269 0 : if (!computed_jump_p (last_insn)
2270 0 : && !tablejump_p (last_insn, NULL, NULL)
2271 0 : && asm_noperands (PATTERN (last_insn)) < 0)
2272 : {
2273 : /* We have found a "crossing" unconditional branch. Now
2274 : we must convert it to an indirect jump. First create
2275 : reference of label, as target for jump. */
2276 :
2277 0 : label = JUMP_LABEL (last_insn);
2278 0 : label_addr = gen_rtx_LABEL_REF (Pmode, label);
2279 0 : LABEL_NUSES (label) += 1;
2280 :
2281 : /* Get a register to use for the indirect jump. */
2282 :
2283 0 : new_reg = gen_reg_rtx (Pmode);
2284 :
2285 : /* Generate indirect the jump sequence. */
2286 :
2287 0 : start_sequence ();
2288 0 : emit_move_insn (new_reg, label_addr);
2289 0 : emit_indirect_jump (new_reg);
2290 0 : indirect_jump_sequence = end_sequence ();
2291 :
2292 : /* Make sure every instruction in the new jump sequence has
2293 : its basic block set to be cur_bb. */
2294 :
2295 0 : for (cur_insn = indirect_jump_sequence; cur_insn;
2296 0 : cur_insn = NEXT_INSN (cur_insn))
2297 : {
2298 0 : if (!BARRIER_P (cur_insn))
2299 0 : BLOCK_FOR_INSN (cur_insn) = cur_bb;
2300 0 : if (JUMP_P (cur_insn))
2301 0 : jump_insn = cur_insn;
2302 : }
2303 :
2304 : /* Insert the new (indirect) jump sequence immediately before
2305 : the unconditional jump, then delete the unconditional jump. */
2306 :
2307 0 : emit_insn_before (indirect_jump_sequence, last_insn);
2308 0 : delete_insn (last_insn);
2309 :
2310 0 : JUMP_LABEL (jump_insn) = label;
2311 0 : LABEL_NUSES (label)++;
2312 :
2313 : /* Make BB_END for cur_bb be the jump instruction (NOT the
2314 : barrier instruction at the end of the sequence...). */
2315 :
2316 0 : BB_END (cur_bb) = jump_insn;
2317 : }
2318 : }
2319 : }
2320 0 : }
2321 :
2322 : /* Update CROSSING_JUMP_P flags on all jump insns. */
2323 :
2324 : static void
2325 64348 : update_crossing_jump_flags (void)
2326 : {
2327 64348 : basic_block bb;
2328 64348 : edge e;
2329 64348 : edge_iterator ei;
2330 :
2331 1856180 : FOR_EACH_BB_FN (bb, cfun)
2332 3828689 : FOR_EACH_EDGE (e, ei, bb->succs)
2333 2470011 : if (e->flags & EDGE_CROSSING)
2334 : {
2335 433154 : if (JUMP_P (BB_END (bb)))
2336 433057 : CROSSING_JUMP_P (BB_END (bb)) = 1;
2337 : break;
2338 : }
2339 64348 : }
2340 :
2341 : /* Reorder basic blocks using the software trace cache (STC) algorithm. */
2342 :
2343 : static void
2344 598665 : reorder_basic_blocks_software_trace_cache (void)
2345 : {
2346 598665 : if (dump_file)
2347 23 : fprintf (dump_file, "\nReordering with the STC algorithm.\n\n");
2348 :
2349 598665 : int n_traces;
2350 598665 : int i;
2351 598665 : struct trace *traces;
2352 :
2353 : /* We are estimating the length of uncond jump insn only once since the code
2354 : for getting the insn length always returns the minimal length now. */
2355 598665 : if (uncond_jump_length == 0)
2356 9083 : uncond_jump_length = get_uncond_jump_length ();
2357 :
2358 : /* We need to know some information for each basic block. */
2359 598665 : array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2360 598665 : bbd = XNEWVEC (bbro_basic_block_data, array_size);
2361 17088300 : for (i = 0; i < array_size; i++)
2362 : {
2363 16489635 : bbd[i].start_of_trace = -1;
2364 16489635 : bbd[i].end_of_trace = -1;
2365 16489635 : bbd[i].in_trace = -1;
2366 16489635 : bbd[i].visited = 0;
2367 16489635 : bbd[i].priority = -1;
2368 16489635 : bbd[i].heap = NULL;
2369 16489635 : bbd[i].node = NULL;
2370 : }
2371 :
2372 598665 : traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2373 598665 : n_traces = 0;
2374 598665 : find_traces (&n_traces, traces);
2375 598665 : connect_traces (n_traces, traces);
2376 598665 : FREE (traces);
2377 598665 : FREE (bbd);
2378 598665 : }
2379 :
2380 : /* Order edges by execution frequency, higher first.
2381 : Return:
2382 : 1 iff frequency (VE1) < frequency (VE2)
2383 : 0 iff frequency (VE1) == frequency (VE2)
2384 : -1 iff frequency (VE1) > frequency (VE2) */
2385 :
2386 : static int
2387 21006966 : edge_order (const void *ve1, const void *ve2)
2388 : {
2389 21006966 : edge e1 = *(const edge *) ve1;
2390 21006966 : edge e2 = *(const edge *) ve2;
2391 21006966 : profile_count c1 = e1->count ();
2392 21006966 : profile_count c2 = e2->count ();
2393 : /* Since profile_count::operator< does not establish a strict weak order
2394 : in presence of uninitialized counts, use 'max': this makes them appear
2395 : as if having execution frequency less than any initialized count. */
2396 21006966 : gcov_type gc1 = c1.initialized_p () ? c1.to_gcov_type () : 0;
2397 21006966 : gcov_type gc2 = c2.initialized_p () ? c2.to_gcov_type () : 0;
2398 21006966 : gcov_type m = MAX (gc1, gc2);
2399 21006966 : int low_to_high_cmp = (m == gc1) - (m == gc2);
2400 : /* gcc_stablesort sorts values in low-to-high order. But edges should
2401 : be sorted in the opposite order - with highest execution frequency first.
2402 : So return an inverted comparison to trick gcc_stablesort into
2403 : performing a reversed sorting order. */
2404 21006966 : return -1 * low_to_high_cmp;
2405 : }
2406 :
2407 : /* Reorder basic blocks using the "simple" algorithm. This tries to
2408 : maximize the dynamic number of branches that are fallthrough, without
2409 : copying instructions. The algorithm is greedy, looking at the most
2410 : frequently executed branch first. */
2411 :
2412 : static void
2413 34874 : reorder_basic_blocks_simple (void)
2414 : {
2415 34874 : if (dump_file)
2416 8 : fprintf (dump_file, "\nReordering with the \"simple\" algorithm.\n\n");
2417 :
2418 34874 : edge *edges = new edge[2 * n_basic_blocks_for_fn (cfun)];
2419 :
2420 : /* First, collect all edges that can be optimized by reordering blocks:
2421 : simple jumps and conditional jumps, as well as the function entry edge. */
2422 :
2423 34874 : int n = 0;
2424 34874 : edges[n++] = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2425 :
2426 34874 : basic_block bb;
2427 574302 : FOR_EACH_BB_FN (bb, cfun)
2428 : {
2429 539428 : rtx_insn *end = BB_END (bb);
2430 :
2431 539428 : if (computed_jump_p (end) || tablejump_p (end, NULL, NULL))
2432 371 : continue;
2433 :
2434 : /* We cannot optimize asm goto. */
2435 539057 : if (JUMP_P (end) && extract_asm_operands (end))
2436 0 : continue;
2437 :
2438 539057 : if (single_succ_p (bb))
2439 166064 : edges[n++] = EDGE_SUCC (bb, 0);
2440 372993 : else if (any_condjump_p (end))
2441 : {
2442 275056 : edge e0 = EDGE_SUCC (bb, 0);
2443 275056 : edge e1 = EDGE_SUCC (bb, 1);
2444 : /* When optimizing for size it is best to keep the original
2445 : fallthrough edges. */
2446 275056 : if (e1->flags & EDGE_FALLTHRU)
2447 150708 : std::swap (e0, e1);
2448 275056 : edges[n++] = e0;
2449 275056 : edges[n++] = e1;
2450 : }
2451 : }
2452 :
2453 : /* Sort the edges, the most desirable first. When optimizing for size
2454 : all edges are equally desirable. */
2455 :
2456 34874 : if (optimize_function_for_speed_p (cfun))
2457 34710 : gcc_stablesort (edges, n, sizeof *edges, edge_order);
2458 :
2459 : /* Now decide which of those edges to make fallthrough edges. We set
2460 : BB_VISITED if a block already has a fallthrough successor assigned
2461 : to it. We make ->AUX of an endpoint point to the opposite endpoint
2462 : of a sequence of blocks that fall through, and ->AUX will be NULL
2463 : for a block that is in such a sequence but not an endpoint anymore.
2464 :
2465 : To start with, everything points to itself, nothing is assigned yet. */
2466 :
2467 644050 : FOR_ALL_BB_FN (bb, cfun)
2468 : {
2469 609176 : bb->aux = bb;
2470 609176 : bb->flags &= ~BB_VISITED;
2471 : }
2472 :
2473 34874 : EXIT_BLOCK_PTR_FOR_FN (cfun)->aux = 0;
2474 :
2475 : /* Now for all edges, the most desirable first, see if that edge can
2476 : connect two sequences. If it can, update AUX and BB_VISITED; if it
2477 : cannot, zero out the edge in the table. */
2478 :
2479 785924 : for (int j = 0; j < n; j++)
2480 : {
2481 751050 : edge e = edges[j];
2482 :
2483 751050 : basic_block tail_a = e->src;
2484 751050 : basic_block head_b = e->dest;
2485 751050 : basic_block head_a = (basic_block) tail_a->aux;
2486 751050 : basic_block tail_b = (basic_block) head_b->aux;
2487 :
2488 : /* An edge cannot connect two sequences if:
2489 : - it crosses partitions;
2490 : - its src is not a current endpoint;
2491 : - its dest is not a current endpoint;
2492 : - or, it would create a loop. */
2493 :
2494 751050 : if (e->flags & EDGE_CROSSING
2495 751040 : || tail_a->flags & BB_VISITED
2496 516066 : || !tail_b
2497 438544 : || (!(head_b->flags & BB_VISITED) && head_b != tail_b)
2498 408679 : || tail_a == tail_b)
2499 : {
2500 380750 : edges[j] = 0;
2501 380750 : continue;
2502 : }
2503 :
2504 370300 : tail_a->aux = 0;
2505 370300 : head_b->aux = 0;
2506 370300 : head_a->aux = tail_b;
2507 370300 : tail_b->aux = head_a;
2508 370300 : tail_a->flags |= BB_VISITED;
2509 : }
2510 :
2511 : /* Put the pieces together, in the same order that the start blocks of
2512 : the sequences already had. The hot/cold partitioning gives a little
2513 : complication: as a first pass only do this for blocks in the same
2514 : partition as the start block, and (if there is anything left to do)
2515 : in a second pass handle the other partition. */
2516 :
2517 34874 : basic_block last_tail = (basic_block) ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2518 :
2519 34874 : int current_partition
2520 34874 : = BB_PARTITION (last_tail == ENTRY_BLOCK_PTR_FOR_FN (cfun)
2521 : ? EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest
2522 : : last_tail);
2523 34874 : bool need_another_pass = true;
2524 :
2525 69754 : for (int pass = 0; pass < 2 && need_another_pass; pass++)
2526 : {
2527 34880 : need_another_pass = false;
2528 :
2529 574351 : FOR_EACH_BB_FN (bb, cfun)
2530 539471 : if ((bb->flags & BB_VISITED && bb->aux) || bb->aux == bb)
2531 : {
2532 169142 : if (BB_PARTITION (bb) != current_partition)
2533 : {
2534 14 : need_another_pass = true;
2535 14 : continue;
2536 : }
2537 :
2538 169128 : last_tail->aux = bb;
2539 169128 : last_tail = (basic_block) bb->aux;
2540 : }
2541 :
2542 34880 : current_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
2543 : }
2544 :
2545 34874 : last_tail->aux = 0;
2546 :
2547 : /* Finally, link all the chosen fallthrough edges. */
2548 :
2549 785924 : for (int j = 0; j < n; j++)
2550 751050 : if (edges[j])
2551 370300 : edges[j]->src->aux = edges[j]->dest;
2552 :
2553 34874 : delete[] edges;
2554 :
2555 : /* If the entry edge no longer falls through we have to make a new
2556 : block so it can do so again. */
2557 :
2558 34874 : edge e = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2559 34874 : if (e->dest != ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux)
2560 : {
2561 7 : force_nonfallthru (e);
2562 7 : e->src->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2563 : }
2564 34874 : }
2565 :
2566 : /* Reorder basic blocks. The main entry point to this file. */
2567 :
2568 : static void
2569 1043684 : reorder_basic_blocks (void)
2570 : {
2571 1043684 : gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2572 :
2573 1043684 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2574 : return;
2575 :
2576 633539 : set_edge_can_fallthru_flag ();
2577 633539 : mark_dfs_back_edges ();
2578 :
2579 633539 : switch (flag_reorder_blocks_algorithm)
2580 : {
2581 34874 : case REORDER_BLOCKS_ALGORITHM_SIMPLE:
2582 34874 : reorder_basic_blocks_simple ();
2583 34874 : break;
2584 :
2585 598665 : case REORDER_BLOCKS_ALGORITHM_STC:
2586 598665 : reorder_basic_blocks_software_trace_cache ();
2587 598665 : break;
2588 :
2589 0 : default:
2590 0 : gcc_unreachable ();
2591 : }
2592 :
2593 633539 : relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2594 :
2595 633539 : if (dump_file)
2596 : {
2597 31 : if (dump_flags & TDF_DETAILS)
2598 0 : dump_reg_info (dump_file);
2599 31 : dump_flow_info (dump_file, dump_flags);
2600 : }
2601 :
2602 : /* Signal that rtl_verify_flow_info_1 can now verify that there
2603 : is at most one switch between hot/cold sections. */
2604 633539 : crtl->bb_reorder_complete = true;
2605 : }
2606 :
2607 : /* Determine which partition the first basic block in the function
2608 : belongs to, then find the first basic block in the current function
2609 : that belongs to a different section, and insert a
2610 : NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2611 : instruction stream. When writing out the assembly code,
2612 : encountering this note will make the compiler switch between the
2613 : hot and cold text sections. */
2614 :
2615 : void
2616 64348 : insert_section_boundary_note (void)
2617 : {
2618 64348 : basic_block bb;
2619 64348 : bool switched_sections = false;
2620 64348 : int current_partition = 0;
2621 :
2622 64348 : if (!crtl->has_bb_partition)
2623 : return;
2624 :
2625 1910246 : FOR_EACH_BB_FN (bb, cfun)
2626 : {
2627 1845898 : if (!current_partition)
2628 64348 : current_partition = BB_PARTITION (bb);
2629 1845898 : if (BB_PARTITION (bb) != current_partition)
2630 : {
2631 64342 : gcc_assert (!switched_sections);
2632 64342 : switched_sections = true;
2633 64342 : emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2634 64342 : current_partition = BB_PARTITION (bb);
2635 : }
2636 : }
2637 :
2638 : /* Make sure crtl->has_bb_partition matches reality even if bbpart finds
2639 : some hot and some cold basic blocks, but later one of those kinds is
2640 : optimized away. */
2641 64348 : crtl->has_bb_partition = switched_sections;
2642 : }
2643 :
2644 : namespace {
2645 :
2646 : const pass_data pass_data_reorder_blocks =
2647 : {
2648 : RTL_PASS, /* type */
2649 : "bbro", /* name */
2650 : OPTGROUP_NONE, /* optinfo_flags */
2651 : TV_REORDER_BLOCKS, /* tv_id */
2652 : 0, /* properties_required */
2653 : 0, /* properties_provided */
2654 : 0, /* properties_destroyed */
2655 : 0, /* todo_flags_start */
2656 : 0, /* todo_flags_finish */
2657 : };
2658 :
2659 : class pass_reorder_blocks : public rtl_opt_pass
2660 : {
2661 : public:
2662 285722 : pass_reorder_blocks (gcc::context *ctxt)
2663 571444 : : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2664 : {}
2665 :
2666 : /* opt_pass methods: */
2667 1471370 : bool gate (function *) final override
2668 : {
2669 1471370 : if (targetm.cannot_modify_jumps_p ())
2670 : return false;
2671 1471370 : return (optimize > 0
2672 1471370 : && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2673 : }
2674 :
2675 : unsigned int execute (function *) final override;
2676 :
2677 : }; // class pass_reorder_blocks
2678 :
2679 : unsigned int
2680 1043684 : pass_reorder_blocks::execute (function *fun)
2681 : {
2682 1043684 : basic_block bb;
2683 :
2684 : /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2685 : splitting possibly introduced more crossjumping opportunities. */
2686 1043684 : cfg_layout_initialize (CLEANUP_EXPENSIVE);
2687 :
2688 1043684 : reorder_basic_blocks ();
2689 1043684 : cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_NO_PARTITIONING);
2690 :
2691 11776933 : FOR_EACH_BB_FN (bb, fun)
2692 10733249 : if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2693 9689565 : bb->aux = bb->next_bb;
2694 1043684 : cfg_layout_finalize ();
2695 :
2696 11964011 : FOR_EACH_BB_FN (bb, fun)
2697 10920327 : df_recompute_luids (bb);
2698 1043684 : return 0;
2699 : }
2700 :
2701 : } // anon namespace
2702 :
2703 : rtl_opt_pass *
2704 285722 : make_pass_reorder_blocks (gcc::context *ctxt)
2705 : {
2706 285722 : return new pass_reorder_blocks (ctxt);
2707 : }
2708 :
2709 : /* Duplicate a block (that we already know ends in a computed jump) into its
2710 : predecessors, where possible. Return whether anything is changed. */
2711 : static bool
2712 867 : maybe_duplicate_computed_goto (basic_block bb, int max_size)
2713 : {
2714 : /* Make sure that the block is small enough. */
2715 867 : rtx_insn *insn;
2716 8520 : FOR_BB_INSNS (bb, insn)
2717 8297 : if (INSN_P (insn))
2718 : {
2719 5808 : max_size -= get_attr_min_length (insn);
2720 5808 : if (max_size < 0)
2721 : return false;
2722 : }
2723 :
2724 223 : bool changed = false;
2725 223 : edge e;
2726 223 : edge_iterator ei;
2727 753 : for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
2728 : {
2729 530 : basic_block pred = e->src;
2730 :
2731 : /* Do not duplicate BB into PRED if we cannot merge a copy of BB
2732 : with PRED. */
2733 530 : if (!single_succ_p (pred)
2734 194 : || e->flags & EDGE_COMPLEX
2735 194 : || pred->index < NUM_FIXED_BLOCKS
2736 171 : || (JUMP_P (BB_END (pred)) && !simplejump_p (BB_END (pred)))
2737 701 : || (JUMP_P (BB_END (pred)) && CROSSING_JUMP_P (BB_END (pred))))
2738 : {
2739 360 : ei_next (&ei);
2740 360 : continue;
2741 : }
2742 :
2743 170 : if (dump_file)
2744 0 : fprintf (dump_file, "Duplicating computed goto bb %d into bb %d\n",
2745 0 : bb->index, e->src->index);
2746 :
2747 : /* Remember if PRED can be duplicated; if so, the copy of BB merged
2748 : with PRED can be duplicated as well. */
2749 170 : bool can_dup_more = can_duplicate_block_p (pred);
2750 :
2751 : /* Make a copy of BB, merge it into PRED. */
2752 170 : basic_block copy = duplicate_block (bb, e, NULL);
2753 170 : emit_barrier_after_bb (copy);
2754 170 : reorder_insns_nobb (BB_HEAD (copy), BB_END (copy), BB_END (pred));
2755 170 : merge_blocks (pred, copy);
2756 :
2757 170 : changed = true;
2758 :
2759 : /* Try to merge the resulting merged PRED into further predecessors. */
2760 170 : if (can_dup_more)
2761 170 : maybe_duplicate_computed_goto (pred, max_size);
2762 : }
2763 :
2764 : return changed;
2765 : }
2766 :
2767 : /* Duplicate the blocks containing computed gotos. This basically unfactors
2768 : computed gotos that were factored early on in the compilation process to
2769 : speed up edge based data flow. We used to not unfactor them again, which
2770 : can seriously pessimize code with many computed jumps in the source code,
2771 : such as interpreters. See e.g. PR15242. */
2772 : static void
2773 899821 : duplicate_computed_gotos (function *fun)
2774 : {
2775 : /* We are estimating the length of uncond jump insn only once
2776 : since the code for getting the insn length always returns
2777 : the minimal length now. */
2778 899821 : if (uncond_jump_length == 0)
2779 108823 : uncond_jump_length = get_uncond_jump_length ();
2780 :
2781 : /* Never copy a block larger than this. */
2782 899821 : int max_size
2783 899821 : = uncond_jump_length * param_max_goto_duplication_insns;
2784 :
2785 899821 : bool changed = false;
2786 :
2787 : /* Try to duplicate all blocks that end in a computed jump and that
2788 : can be duplicated at all. */
2789 899821 : basic_block bb;
2790 11348264 : FOR_EACH_BB_FN (bb, fun)
2791 10448443 : if (computed_jump_p (BB_END (bb)) && can_duplicate_block_p (bb))
2792 697 : changed |= maybe_duplicate_computed_goto (bb, max_size);
2793 :
2794 : /* Some blocks may have become unreachable. */
2795 899821 : if (changed)
2796 77 : cleanup_cfg (0);
2797 :
2798 : /* Duplicating blocks will redirect edges and may cause hot blocks
2799 : previously reached by both hot and cold blocks to become dominated
2800 : only by cold blocks. */
2801 77 : if (changed)
2802 77 : fixup_partitions ();
2803 899821 : }
2804 :
2805 : namespace {
2806 :
2807 : const pass_data pass_data_duplicate_computed_gotos =
2808 : {
2809 : RTL_PASS, /* type */
2810 : "compgotos", /* name */
2811 : OPTGROUP_NONE, /* optinfo_flags */
2812 : TV_DUP_COMPGOTO, /* tv_id */
2813 : 0, /* properties_required */
2814 : 0, /* properties_provided */
2815 : 0, /* properties_destroyed */
2816 : 0, /* todo_flags_start */
2817 : 0, /* todo_flags_finish */
2818 : };
2819 :
2820 : class pass_duplicate_computed_gotos : public rtl_opt_pass
2821 : {
2822 : public:
2823 285722 : pass_duplicate_computed_gotos (gcc::context *ctxt)
2824 571444 : : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2825 : {}
2826 :
2827 : /* opt_pass methods: */
2828 : bool gate (function *) final override;
2829 : unsigned int execute (function *) final override;
2830 :
2831 : }; // class pass_duplicate_computed_gotos
2832 :
2833 : bool
2834 1471370 : pass_duplicate_computed_gotos::gate (function *fun)
2835 : {
2836 1471370 : if (targetm.cannot_modify_jumps_p ())
2837 : return false;
2838 1471370 : return (optimize > 0
2839 1043686 : && flag_expensive_optimizations
2840 2435314 : && ! optimize_function_for_size_p (fun));
2841 : }
2842 :
2843 : unsigned int
2844 899821 : pass_duplicate_computed_gotos::execute (function *fun)
2845 : {
2846 899821 : duplicate_computed_gotos (fun);
2847 :
2848 899821 : return 0;
2849 : }
2850 :
2851 : } // anon namespace
2852 :
2853 : rtl_opt_pass *
2854 285722 : make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2855 : {
2856 285722 : return new pass_duplicate_computed_gotos (ctxt);
2857 : }
2858 :
2859 : /* This function is the main 'entrance' for the optimization that
2860 : partitions hot and cold basic blocks into separate sections of the
2861 : .o file (to improve performance and cache locality). Ideally it
2862 : would be called after all optimizations that rearrange the CFG have
2863 : been called. However part of this optimization may introduce new
2864 : register usage, so it must be called before register allocation has
2865 : occurred. This means that this optimization is actually called
2866 : well before the optimization that reorders basic blocks (see
2867 : function above).
2868 :
2869 : This optimization checks the feedback information to determine
2870 : which basic blocks are hot/cold, updates flags on the basic blocks
2871 : to indicate which section they belong in. This information is
2872 : later used for writing out sections in the .o file. Because hot
2873 : and cold sections can be arbitrarily large (within the bounds of
2874 : memory), far beyond the size of a single function, it is necessary
2875 : to fix up all edges that cross section boundaries, to make sure the
2876 : instructions used can actually span the required distance. The
2877 : fixes are described below.
2878 :
2879 : Fall-through edges must be changed into jumps; it is not safe or
2880 : legal to fall through across a section boundary. Whenever a
2881 : fall-through edge crossing a section boundary is encountered, a new
2882 : basic block is inserted (in the same section as the fall-through
2883 : source), and the fall through edge is redirected to the new basic
2884 : block. The new basic block contains an unconditional jump to the
2885 : original fall-through target. (If the unconditional jump is
2886 : insufficient to cross section boundaries, that is dealt with a
2887 : little later, see below).
2888 :
2889 : In order to deal with architectures that have short conditional
2890 : branches (which cannot span all of memory) we take any conditional
2891 : jump that attempts to cross a section boundary and add a level of
2892 : indirection: it becomes a conditional jump to a new basic block, in
2893 : the same section. The new basic block contains an unconditional
2894 : jump to the original target, in the other section.
2895 :
2896 : For those architectures whose unconditional branch is also
2897 : incapable of reaching all of memory, those unconditional jumps are
2898 : converted into indirect jumps, through a register.
2899 :
2900 : IMPORTANT NOTE: This optimization causes some messy interactions
2901 : with the cfg cleanup optimizations; those optimizations want to
2902 : merge blocks wherever possible, and to collapse indirect jump
2903 : sequences (change "A jumps to B jumps to C" directly into "A jumps
2904 : to C"). Those optimizations can undo the jump fixes that
2905 : partitioning is required to make (see above), in order to ensure
2906 : that jumps attempting to cross section boundaries are really able
2907 : to cover whatever distance the jump requires (on many architectures
2908 : conditional or unconditional jumps are not able to reach all of
2909 : memory). Therefore tests have to be inserted into each such
2910 : optimization to make sure that it does not undo stuff necessary to
2911 : cross partition boundaries. This would be much less of a problem
2912 : if we could perform this optimization later in the compilation, but
2913 : unfortunately the fact that we may need to create indirect jumps
2914 : (through registers) requires that this optimization be performed
2915 : before register allocation.
2916 :
2917 : Hot and cold basic blocks are partitioned and put in separate
2918 : sections of the .o file, to reduce paging and improve cache
2919 : performance (hopefully). This can result in bits of code from the
2920 : same function being widely separated in the .o file. However this
2921 : is not obvious to the current bb structure. Therefore we must take
2922 : care to ensure that: 1). There are no fall_thru edges that cross
2923 : between sections; 2). For those architectures which have "short"
2924 : conditional branches, all conditional branches that attempt to
2925 : cross between sections are converted to unconditional branches;
2926 : and, 3). For those architectures which have "short" unconditional
2927 : branches, all unconditional branches that attempt to cross between
2928 : sections are converted to indirect jumps.
2929 :
2930 : The code for fixing up fall_thru edges that cross between hot and
2931 : cold basic blocks does so by creating new basic blocks containing
2932 : unconditional branches to the appropriate label in the "other"
2933 : section. The new basic block is then put in the same (hot or cold)
2934 : section as the original conditional branch, and the fall_thru edge
2935 : is modified to fall into the new basic block instead. By adding
2936 : this level of indirection we end up with only unconditional branches
2937 : crossing between hot and cold sections.
2938 :
2939 : Conditional branches are dealt with by adding a level of indirection.
2940 : A new basic block is added in the same (hot/cold) section as the
2941 : conditional branch, and the conditional branch is retargeted to the
2942 : new basic block. The new basic block contains an unconditional branch
2943 : to the original target of the conditional branch (in the other section).
2944 :
2945 : Unconditional branches are dealt with by converting them into
2946 : indirect jumps. */
2947 :
2948 : namespace {
2949 :
2950 : const pass_data pass_data_partition_blocks =
2951 : {
2952 : RTL_PASS, /* type */
2953 : "bbpart", /* name */
2954 : OPTGROUP_NONE, /* optinfo_flags */
2955 : TV_REORDER_BLOCKS, /* tv_id */
2956 : PROP_cfglayout, /* properties_required */
2957 : 0, /* properties_provided */
2958 : 0, /* properties_destroyed */
2959 : 0, /* todo_flags_start */
2960 : 0, /* todo_flags_finish */
2961 : };
2962 :
2963 : class pass_partition_blocks : public rtl_opt_pass
2964 : {
2965 : public:
2966 285722 : pass_partition_blocks (gcc::context *ctxt)
2967 571444 : : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2968 : {}
2969 :
2970 : /* opt_pass methods: */
2971 : bool gate (function *) final override;
2972 : unsigned int execute (function *) final override;
2973 :
2974 : }; // class pass_partition_blocks
2975 :
2976 : bool
2977 1471370 : pass_partition_blocks::gate (function *fun)
2978 : {
2979 : /* The optimization to partition hot/cold basic blocks into separate
2980 : sections of the .o file does not work well with linkonce or with
2981 : user defined section attributes or with naked attribute. Don't call
2982 : it if either case arises. */
2983 1471370 : return (flag_reorder_blocks_and_partition
2984 723464 : && optimize
2985 : /* See pass_reorder_blocks::gate. We should not partition if
2986 : we are going to omit the reordering. */
2987 723424 : && optimize_function_for_speed_p (fun)
2988 674849 : && !DECL_COMDAT_GROUP (current_function_decl)
2989 566601 : && !lookup_attribute ("section", DECL_ATTRIBUTES (fun->decl))
2990 566555 : && !lookup_attribute ("naked", DECL_ATTRIBUTES (fun->decl))
2991 : /* Workaround a bug in GDB where read_partial_die doesn't cope
2992 : with DIEs with DW_AT_ranges, see PR81115. */
2993 2037876 : && !(in_lto_p && MAIN_NAME_P (DECL_NAME (fun->decl))));
2994 : }
2995 :
2996 : unsigned
2997 557031 : pass_partition_blocks::execute (function *fun)
2998 : {
2999 557031 : vec<edge> crossing_edges;
3000 :
3001 557031 : if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
3002 : return 0;
3003 :
3004 264504 : df_set_flags (DF_DEFER_INSN_RESCAN);
3005 :
3006 264504 : crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
3007 264504 : if (!crossing_edges.exists ())
3008 : /* Make sure to process deferred rescans and clear changeable df flags. */
3009 : return TODO_df_finish;
3010 :
3011 64348 : crtl->has_bb_partition = true;
3012 :
3013 : /* Make sure the source of any crossing edge ends in a jump and the
3014 : destination of any crossing edge has a label. */
3015 64348 : add_labels_and_missing_jumps (crossing_edges);
3016 :
3017 : /* Convert all crossing fall_thru edges to non-crossing fall
3018 : thrus to unconditional jumps (that jump to the original fall
3019 : through dest). */
3020 64348 : fix_up_fall_thru_edges ();
3021 :
3022 : /* If the architecture does not have conditional branches that can
3023 : span all of memory, convert crossing conditional branches into
3024 : crossing unconditional branches. */
3025 64348 : if (!HAS_LONG_COND_BRANCH)
3026 : fix_crossing_conditional_branches ();
3027 :
3028 : /* If the architecture does not have unconditional branches that
3029 : can span all of memory, convert crossing unconditional branches
3030 : into indirect jumps. Since adding an indirect jump also adds
3031 : a new register usage, update the register usage information as
3032 : well. */
3033 64348 : if (!HAS_LONG_UNCOND_BRANCH)
3034 : fix_crossing_unconditional_branches ();
3035 :
3036 64348 : update_crossing_jump_flags ();
3037 :
3038 : /* Clear bb->aux fields that the above routines were using. */
3039 64348 : clear_aux_for_blocks ();
3040 :
3041 64348 : crossing_edges.release ();
3042 :
3043 : /* ??? FIXME: DF generates the bb info for a block immediately.
3044 : And by immediately, I mean *during* creation of the block.
3045 :
3046 : #0 df_bb_refs_collect
3047 : #1 in df_bb_refs_record
3048 : #2 in create_basic_block_structure
3049 :
3050 : Which means that the bb_has_eh_pred test in df_bb_refs_collect
3051 : will *always* fail, because no edges can have been added to the
3052 : block yet. Which of course means we don't add the right
3053 : artificial refs, which means we fail df_verify (much) later.
3054 :
3055 : Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
3056 : that we also shouldn't grab data from the new blocks those new
3057 : insns are in either. In this way one can create the block, link
3058 : it up properly, and have everything Just Work later, when deferred
3059 : insns are processed.
3060 :
3061 : In the meantime, we have no other option but to throw away all
3062 : of the DF data and recompute it all. */
3063 64348 : if (fun->eh->lp_array)
3064 : {
3065 64348 : df_finish_pass (true);
3066 64348 : df_scan_alloc (NULL);
3067 64348 : df_scan_blocks ();
3068 : /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
3069 : data. We blindly generated all of them when creating the new
3070 : landing pad. Delete those assignments we don't use. */
3071 64348 : df_set_flags (DF_LR_RUN_DCE);
3072 64348 : df_analyze ();
3073 : }
3074 :
3075 : /* Make sure to process deferred rescans and clear changeable df flags. */
3076 : return TODO_df_finish;
3077 : }
3078 :
3079 : } // anon namespace
3080 :
3081 : rtl_opt_pass *
3082 285722 : make_pass_partition_blocks (gcc::context *ctxt)
3083 : {
3084 285722 : return new pass_partition_blocks (ctxt);
3085 : }
|