Line data Source code
1 : /* The tracer pass for the GNU compiler.
2 : Contributed by Jan Hubicka, SuSE Labs.
3 : Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC.
4 : Copyright (C) 2001-2026 Free Software Foundation, Inc.
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify it
9 : under the terms of the GNU General Public License as published by
10 : the Free Software Foundation; either version 3, or (at your option)
11 : any later version.
12 :
13 : GCC is distributed in the hope that it will be useful, but WITHOUT
14 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 : or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 : License for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : /* This pass performs the tail duplication needed for superblock formation.
23 : For more information see:
24 :
25 : Design and Analysis of Profile-Based Optimization in Compaq's
26 : Compilation Tools for Alpha; Journal of Instruction-Level
27 : Parallelism 3 (2000) 1-25
28 :
29 : Unlike Compaq's implementation we don't do the loop peeling as most
30 : probably a better job can be done by a special pass and we don't
31 : need to worry too much about the code size implications as the tail
32 : duplicates are crossjumped again if optimizations are not
33 : performed. */
34 :
35 :
36 : #include "config.h"
37 : #include "system.h"
38 : #include "coretypes.h"
39 : #include "backend.h"
40 : #include "rtl.h"
41 : #include "tree.h"
42 : #include "gimple.h"
43 : #include "cfghooks.h"
44 : #include "tree-pass.h"
45 : #include "profile.h"
46 : #include "cfganal.h"
47 : #include "gimple-iterator.h"
48 : #include "tree-cfg.h"
49 : #include "tree-ssa.h"
50 : #include "tree-inline.h"
51 : #include "cfgloop.h"
52 : #include "alloc-pool.h"
53 : #include "fibonacci_heap.h"
54 : #include "tracer.h"
55 :
56 : static void analyze_bb (basic_block, int *);
57 : static bool better_p (const_edge, const_edge);
58 : static edge find_best_successor (basic_block);
59 : static edge find_best_predecessor (basic_block);
60 : static int find_trace (basic_block, basic_block *);
61 :
62 : /* Minimal outgoing edge probability considered for superblock formation. */
63 : static int probability_cutoff;
64 : static int branch_ratio_cutoff;
65 :
66 : /* A bit BB->index is set if BB has already been seen, i.e. it is
67 : connected to some trace already. */
68 : static sbitmap bb_seen;
69 :
70 : static inline void
71 82167 : mark_bb_seen (basic_block bb)
72 : {
73 82167 : unsigned int size = SBITMAP_SIZE (bb_seen);
74 :
75 82167 : if ((unsigned int)bb->index >= size)
76 0 : bb_seen = sbitmap_resize (bb_seen, size * 2, 0);
77 :
78 82167 : bitmap_set_bit (bb_seen, bb->index);
79 82167 : }
80 :
81 : static inline bool
82 443840 : bb_seen_p (basic_block bb)
83 : {
84 443840 : return bitmap_bit_p (bb_seen, bb->index);
85 : }
86 :
87 : static sbitmap can_duplicate_bb;
88 :
89 : /* Cache VAL as value of can_duplicate_bb_p for BB. */
90 : static inline void
91 267778 : cache_can_duplicate_bb_p (const_basic_block bb, bool val)
92 : {
93 267778 : if (val)
94 267748 : bitmap_set_bit (can_duplicate_bb, bb->index);
95 267778 : }
96 :
97 : /* Return cached value of can_duplicate_bb_p for BB. */
98 : static bool
99 1184676 : cached_can_duplicate_bb_p (const_basic_block bb)
100 : {
101 1184676 : if (can_duplicate_bb)
102 : {
103 1127751 : unsigned int size = SBITMAP_SIZE (can_duplicate_bb);
104 1127751 : if ((unsigned int)bb->index < size)
105 1106217 : return bitmap_bit_p (can_duplicate_bb, bb->index);
106 :
107 : /* Assume added bb's should not be duplicated. */
108 : return false;
109 : }
110 :
111 56925 : return can_duplicate_block_p (bb);
112 : }
113 :
114 : /* Return true if we should ignore the basic block for purposes of tracing. */
115 : bool
116 1263418 : ignore_bb_p (const_basic_block bb)
117 : {
118 1263418 : if (bb->index < NUM_FIXED_BLOCKS)
119 : return true;
120 1247181 : if (optimize_bb_for_size_p (bb))
121 : return true;
122 :
123 1184676 : return !cached_can_duplicate_bb_p (bb);
124 : }
125 :
126 : /* Return number of instructions in the block. */
127 :
128 : static void
129 267778 : analyze_bb (basic_block bb, int *count)
130 : {
131 267778 : gimple_stmt_iterator gsi;
132 267778 : gimple *stmt;
133 267778 : int n = 0;
134 :
135 1762213 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
136 : {
137 1226657 : stmt = gsi_stmt (gsi);
138 1226657 : n += estimate_num_insns (stmt, &eni_size_weights);
139 : }
140 267778 : *count = n;
141 :
142 267778 : cache_can_duplicate_bb_p (bb, can_duplicate_block_p (const_cast<basic_block>
143 : (bb)));
144 267778 : }
145 :
146 : /* Return true if E1 is more frequent than E2. */
147 : static bool
148 603236 : better_p (const_edge e1, const_edge e2)
149 : {
150 882487 : if ((e1->count () > e2->count ()) || (e1->count () < e2->count ()))
151 581302 : return e1->count () > e2->count ();
152 : /* This is needed to avoid changes in the decision after
153 : CFG is modified. */
154 21934 : if (e1->src != e2->src)
155 8296 : return e1->src->index > e2->src->index;
156 13638 : return e1->dest->index > e2->dest->index;
157 : }
158 :
159 : /* Return most frequent successor of basic block BB. */
160 :
161 : static edge
162 410282 : find_best_successor (basic_block bb)
163 : {
164 410282 : edge e;
165 410282 : edge best = NULL;
166 410282 : edge_iterator ei;
167 :
168 1160803 : FOR_EACH_EDGE (e, ei, bb->succs)
169 : {
170 750612 : if (!e->count ().initialized_p ())
171 : return NULL;
172 750521 : if (!best || better_p (e, best))
173 : best = e;
174 : }
175 410191 : if (!best || ignore_bb_p (best->dest))
176 13130 : return NULL;
177 397061 : if (!best->probability.initialized_p ()
178 397061 : || best->probability.to_reg_br_prob_base () <= probability_cutoff)
179 : return NULL;
180 : return best;
181 : }
182 :
183 : /* Return most frequent predecessor of basic block BB. */
184 :
185 : static edge
186 406395 : find_best_predecessor (basic_block bb)
187 : {
188 406395 : edge e;
189 406395 : edge best = NULL;
190 406395 : edge_iterator ei;
191 :
192 1075442 : FOR_EACH_EDGE (e, ei, bb->preds)
193 : {
194 669143 : if (!e->count ().initialized_p ())
195 : return NULL;
196 669047 : if (!best || better_p (e, best))
197 : best = e;
198 : }
199 406299 : if (!best || ignore_bb_p (best->src))
200 17257 : return NULL;
201 389042 : if (bb->count.initialized_p ()
202 1166982 : && (best->count ().to_frequency (cfun) * REG_BR_PROB_BASE
203 389042 : < bb->count.to_frequency (cfun) * branch_ratio_cutoff))
204 144 : return NULL;
205 : return best;
206 : }
207 :
208 : /* Find the trace using bb and record it in the TRACE array.
209 : Return number of basic blocks recorded. */
210 :
211 : static int
212 39537 : find_trace (basic_block bb, basic_block *trace)
213 : {
214 39537 : int i = 0;
215 39537 : edge e;
216 :
217 39537 : if (dump_file)
218 17 : fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun));
219 :
220 103190 : while ((e = find_best_predecessor (bb)) != NULL)
221 : {
222 86748 : basic_block bb2 = e->src;
223 172025 : if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
224 160066 : || find_best_successor (bb2) != e)
225 : break;
226 63653 : if (dump_file)
227 13 : fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
228 : bb = bb2;
229 : }
230 39537 : if (dump_file)
231 17 : fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun));
232 39537 : trace[i++] = bb;
233 :
234 : /* Follow the trace in forward direction. */
235 336964 : while ((e = find_best_successor (bb)) != NULL)
236 : {
237 317555 : bb = e->dest;
238 635042 : if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
239 620760 : || find_best_predecessor (bb) != e)
240 : break;
241 297427 : if (dump_file)
242 30 : fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
243 297427 : trace[i++] = bb;
244 : }
245 39537 : if (dump_file)
246 17 : fprintf (dump_file, "\n");
247 39537 : return i;
248 : }
249 :
250 : /* Duplicate block BB2, placing it after BB in the CFG. Return the
251 : newly created block. */
252 : basic_block
253 15595 : transform_duplicate (basic_block bb, basic_block bb2)
254 : {
255 15595 : edge e;
256 15595 : basic_block copy;
257 :
258 15595 : e = find_edge (bb, bb2);
259 :
260 15595 : copy = duplicate_block (bb2, e, bb);
261 15595 : flush_pending_stmts (e);
262 :
263 15595 : add_phi_args_after_copy (©, 1, NULL);
264 :
265 15595 : return (copy);
266 : }
267 :
268 : /* Look for basic blocks in frequency order, construct traces and tail duplicate
269 : if profitable. */
270 :
271 : static bool
272 11380 : tail_duplicate (void)
273 : {
274 11380 : auto_vec<fibonacci_node<long, basic_block_def>*> blocks;
275 11380 : blocks.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
276 :
277 11380 : basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
278 11380 : int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun));
279 11380 : int ninsns = 0, nduplicated = 0;
280 11380 : gcov_type weighted_insns = 0, traced_insns = 0;
281 11380 : fibonacci_heap<long, basic_block_def> heap (LONG_MIN);
282 11380 : gcov_type cover_insns;
283 11380 : int max_dup_insns;
284 11380 : basic_block bb;
285 11380 : bool changed = false;
286 :
287 : /* Create an oversized sbitmap to reduce the chance that we need to
288 : resize it. */
289 11380 : bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2);
290 11380 : bitmap_clear (bb_seen);
291 11380 : can_duplicate_bb = sbitmap_alloc (last_basic_block_for_fn (cfun));
292 11380 : bitmap_clear (can_duplicate_bb);
293 11380 : initialize_original_copy_tables ();
294 :
295 11380 : if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
296 187 : probability_cutoff = param_tracer_min_branch_probability_feedback;
297 : else
298 11193 : probability_cutoff = param_tracer_min_branch_probability;
299 11380 : probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
300 :
301 11380 : branch_ratio_cutoff =
302 11380 : (REG_BR_PROB_BASE / 100 * param_tracer_min_branch_ratio);
303 :
304 279158 : FOR_EACH_BB_FN (bb, cfun)
305 : {
306 267778 : int n;
307 267778 : analyze_bb (bb, &n);
308 267778 : if (!ignore_bb_p (bb))
309 212204 : blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb);
310 :
311 267778 : counts [bb->index] = n;
312 267778 : ninsns += n;
313 267778 : weighted_insns += n * bb->count.to_frequency (cfun);
314 : }
315 :
316 11380 : if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
317 187 : cover_insns = param_tracer_dynamic_coverage_feedback;
318 : else
319 11193 : cover_insns = param_tracer_dynamic_coverage;
320 11380 : cover_insns = (weighted_insns * cover_insns + 50) / 100;
321 11380 : max_dup_insns = (ninsns * param_tracer_max_code_growth + 50) / 100;
322 :
323 11380 : while (traced_insns < cover_insns && nduplicated < max_dup_insns
324 51339 : && !heap.empty ())
325 : {
326 39959 : basic_block bb = heap.extract_min ();
327 39959 : int n, pos;
328 :
329 39959 : if (!bb)
330 : break;
331 :
332 39959 : blocks[bb->index] = NULL;
333 :
334 39959 : if (ignore_bb_p (bb))
335 422 : continue;
336 39537 : gcc_assert (!bb_seen_p (bb));
337 :
338 39537 : n = find_trace (bb, trace);
339 :
340 39537 : bb = trace[0];
341 39537 : traced_insns += bb->count.to_frequency (cfun) * counts [bb->index];
342 39537 : if (blocks[bb->index])
343 : {
344 10813 : heap.delete_node (blocks[bb->index]);
345 10813 : blocks[bb->index] = NULL;
346 : }
347 :
348 107850 : for (pos = 1; pos < n; pos++)
349 : {
350 82167 : basic_block bb2 = trace[pos];
351 :
352 82167 : if (blocks[bb2->index])
353 : {
354 73760 : heap.delete_node (blocks[bb2->index]);
355 73760 : blocks[bb2->index] = NULL;
356 : }
357 82167 : traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index];
358 82167 : if (EDGE_COUNT (bb2->preds) > 1
359 15128 : && can_duplicate_block_p (bb2)
360 : /* We have the tendency to duplicate the loop header
361 : of all do { } while loops. Do not do that - it is
362 : not profitable and it might create a loop with multiple
363 : entries or at least rotate the loop. */
364 97295 : && bb2->loop_father->header != bb2)
365 : {
366 13854 : nduplicated += counts [bb2->index];
367 13854 : basic_block copy = transform_duplicate (bb, bb2);
368 :
369 : /* Reconsider the original copy of block we've duplicated.
370 : Removing the most common predecessor may make it to be
371 : head. */
372 13854 : blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2);
373 :
374 13854 : if (dump_file)
375 7 : fprintf (dump_file, "Duplicated %i as %i [%i]\n",
376 : bb2->index, copy->index, copy->count.to_frequency (cfun));
377 :
378 : bb2 = copy;
379 : changed = true;
380 : }
381 82167 : mark_bb_seen (bb2);
382 82167 : bb = bb2;
383 : /* In case the trace became infrequent, stop duplicating. */
384 82167 : if (ignore_bb_p (bb))
385 : break;
386 : }
387 39537 : if (dump_file)
388 17 : fprintf (dump_file, " covered now %.1f\n\n",
389 17 : traced_insns * 100.0 / weighted_insns);
390 : }
391 11380 : if (dump_file)
392 10 : fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
393 10 : nduplicated * 100 / ninsns);
394 :
395 11380 : free_original_copy_tables ();
396 11380 : sbitmap_free (bb_seen);
397 11380 : sbitmap_free (can_duplicate_bb);
398 11380 : can_duplicate_bb = NULL;
399 11380 : free (trace);
400 11380 : free (counts);
401 :
402 22760 : return changed;
403 11380 : }
404 :
405 : namespace {
406 :
407 : const pass_data pass_data_tracer =
408 : {
409 : GIMPLE_PASS, /* type */
410 : "tracer", /* name */
411 : OPTGROUP_NONE, /* optinfo_flags */
412 : TV_TRACER, /* tv_id */
413 : 0, /* properties_required */
414 : 0, /* properties_provided */
415 : 0, /* properties_destroyed */
416 : 0, /* todo_flags_start */
417 : TODO_update_ssa, /* todo_flags_finish */
418 : };
419 :
420 : class pass_tracer : public gimple_opt_pass
421 : {
422 : public:
423 287872 : pass_tracer (gcc::context *ctxt)
424 575744 : : gimple_opt_pass (pass_data_tracer, ctxt)
425 : {}
426 :
427 : /* opt_pass methods: */
428 1038027 : bool gate (function *) final override
429 : {
430 1038027 : return (optimize > 0 && flag_tracer && flag_reorder_blocks);
431 : }
432 :
433 : unsigned int execute (function *) final override;
434 :
435 : }; // class pass_tracer
436 :
437 : unsigned int
438 20219 : pass_tracer::execute (function *fun)
439 : {
440 20219 : bool changed;
441 :
442 20219 : if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
443 : return 0;
444 :
445 11380 : mark_dfs_back_edges ();
446 11380 : if (dump_file)
447 10 : brief_dump_cfg (dump_file, dump_flags);
448 :
449 : /* Trace formation is done on the fly inside tail_duplicate */
450 11380 : changed = tail_duplicate ();
451 11380 : if (changed)
452 : {
453 5146 : free_dominance_info (CDI_DOMINATORS);
454 : /* If we changed the CFG schedule loops for fixup by cleanup_cfg. */
455 5146 : loops_state_set (LOOPS_NEED_FIXUP);
456 : }
457 :
458 11380 : if (dump_file)
459 10 : brief_dump_cfg (dump_file, dump_flags);
460 :
461 11380 : return changed ? TODO_cleanup_cfg : 0;
462 : }
463 : } // anon namespace
464 :
465 : gimple_opt_pass *
466 287872 : make_pass_tracer (gcc::context *ctxt)
467 : {
468 287872 : return new pass_tracer (ctxt);
469 : }
|