Line data Source code
1 : /* The tracer pass for the GNU compiler.
2 : Contributed by Jan Hubicka, SuSE Labs.
3 : Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC.
4 : Copyright (C) 2001-2026 Free Software Foundation, Inc.
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify it
9 : under the terms of the GNU General Public License as published by
10 : the Free Software Foundation; either version 3, or (at your option)
11 : any later version.
12 :
13 : GCC is distributed in the hope that it will be useful, but WITHOUT
14 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 : or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 : License for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : /* This pass performs the tail duplication needed for superblock formation.
23 : For more information see:
24 :
25 : Design and Analysis of Profile-Based Optimization in Compaq's
26 : Compilation Tools for Alpha; Journal of Instruction-Level
27 : Parallelism 3 (2000) 1-25
28 :
29 : Unlike Compaq's implementation we don't do the loop peeling as most
30 : probably a better job can be done by a special pass and we don't
31 : need to worry too much about the code size implications as the tail
32 : duplicates are crossjumped again if optimizations are not
33 : performed. */
34 :
35 :
36 : #include "config.h"
37 : #include "system.h"
38 : #include "coretypes.h"
39 : #include "backend.h"
40 : #include "rtl.h"
41 : #include "tree.h"
42 : #include "gimple.h"
43 : #include "cfghooks.h"
44 : #include "tree-pass.h"
45 : #include "profile.h"
46 : #include "cfganal.h"
47 : #include "gimple-iterator.h"
48 : #include "tree-cfg.h"
49 : #include "tree-ssa.h"
50 : #include "tree-inline.h"
51 : #include "cfgloop.h"
52 : #include "alloc-pool.h"
53 : #include "fibonacci_heap.h"
54 : #include "tracer.h"
55 :
56 : static void analyze_bb (basic_block, int *);
57 : static bool better_p (const_edge, const_edge);
58 : static edge find_best_successor (basic_block);
59 : static edge find_best_predecessor (basic_block);
60 : static int find_trace (basic_block, basic_block *);
61 :
62 : /* Minimal outgoing edge probability considered for superblock formation. */
63 : static int probability_cutoff;
64 : static int branch_ratio_cutoff;
65 :
66 : /* A bit BB->index is set if BB has already been seen, i.e. it is
67 : connected to some trace already. */
68 : static sbitmap bb_seen;
69 :
70 : static inline void
71 82488 : mark_bb_seen (basic_block bb)
72 : {
73 82488 : unsigned int size = SBITMAP_SIZE (bb_seen);
74 :
75 82488 : if ((unsigned int)bb->index >= size)
76 0 : bb_seen = sbitmap_resize (bb_seen, size * 2, 0);
77 :
78 82488 : bitmap_set_bit (bb_seen, bb->index);
79 82488 : }
80 :
81 : static inline bool
82 446101 : bb_seen_p (basic_block bb)
83 : {
84 446101 : return bitmap_bit_p (bb_seen, bb->index);
85 : }
86 :
87 : static sbitmap can_duplicate_bb;
88 :
89 : /* Cache VAL as value of can_duplicate_bb_p for BB. */
90 : static inline void
91 268882 : cache_can_duplicate_bb_p (const_basic_block bb, bool val)
92 : {
93 268882 : if (val)
94 268852 : bitmap_set_bit (can_duplicate_bb, bb->index);
95 268882 : }
96 :
97 : /* Return cached value of can_duplicate_bb_p for BB. */
98 : static bool
99 1190470 : cached_can_duplicate_bb_p (const_basic_block bb)
100 : {
101 1190470 : if (can_duplicate_bb)
102 : {
103 1133327 : unsigned int size = SBITMAP_SIZE (can_duplicate_bb);
104 1133327 : if ((unsigned int)bb->index < size)
105 1111677 : return bitmap_bit_p (can_duplicate_bb, bb->index);
106 :
107 : /* Assume added bb's should not be duplicated. */
108 : return false;
109 : }
110 :
111 57143 : return can_duplicate_block_p (bb);
112 : }
113 :
114 : /* Return true if we should ignore the basic block for purposes of tracing. */
115 : bool
116 1269550 : ignore_bb_p (const_basic_block bb)
117 : {
118 1269550 : if (bb->index < NUM_FIXED_BLOCKS)
119 : return true;
120 1253197 : if (optimize_bb_for_size_p (bb))
121 : return true;
122 :
123 1190470 : return !cached_can_duplicate_bb_p (bb);
124 : }
125 :
126 : /* Return number of instructions in the block. */
127 :
128 : static void
129 268882 : analyze_bb (basic_block bb, int *count)
130 : {
131 268882 : gimple_stmt_iterator gsi;
132 268882 : gimple *stmt;
133 268882 : int n = 0;
134 :
135 1767730 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
136 : {
137 1229966 : stmt = gsi_stmt (gsi);
138 1229966 : n += estimate_num_insns (stmt, &eni_size_weights);
139 : }
140 268882 : *count = n;
141 :
142 268882 : cache_can_duplicate_bb_p (bb, can_duplicate_block_p (const_cast<basic_block>
143 : (bb)));
144 268882 : }
145 :
146 : /* Return true if E1 is more frequent than E2. */
147 : static bool
148 606269 : better_p (const_edge e1, const_edge e2)
149 : {
150 886735 : if ((e1->count () > e2->count ()) || (e1->count () < e2->count ()))
151 584263 : return e1->count () > e2->count ();
152 : /* This is needed to avoid changes in the decision after
153 : CFG is modified. */
154 22006 : if (e1->src != e2->src)
155 8324 : return e1->src->index > e2->src->index;
156 13682 : return e1->dest->index > e2->dest->index;
157 : }
158 :
159 : /* Return most frequent successor of basic block BB. */
160 :
161 : static edge
162 412468 : find_best_successor (basic_block bb)
163 : {
164 412468 : edge e;
165 412468 : edge best = NULL;
166 412468 : edge_iterator ei;
167 :
168 1166945 : FOR_EACH_EDGE (e, ei, bb->succs)
169 : {
170 754568 : if (!e->count ().initialized_p ())
171 : return NULL;
172 754477 : if (!best || better_p (e, best))
173 : best = e;
174 : }
175 412377 : if (!best || ignore_bb_p (best->dest))
176 13228 : return NULL;
177 399149 : if (!best->probability.initialized_p ()
178 399149 : || best->probability.to_reg_br_prob_base () <= probability_cutoff)
179 : return NULL;
180 : return best;
181 : }
182 :
183 : /* Return most frequent predecessor of basic block BB. */
184 :
185 : static edge
186 408528 : find_best_predecessor (basic_block bb)
187 : {
188 408528 : edge e;
189 408528 : edge best = NULL;
190 408528 : edge_iterator ei;
191 :
192 1080971 : FOR_EACH_EDGE (e, ei, bb->preds)
193 : {
194 672539 : if (!e->count ().initialized_p ())
195 : return NULL;
196 672443 : if (!best || better_p (e, best))
197 : best = e;
198 : }
199 408432 : if (!best || ignore_bb_p (best->src))
200 17333 : return NULL;
201 391099 : if (bb->count.initialized_p ()
202 1173153 : && (best->count ().to_frequency (cfun) * REG_BR_PROB_BASE
203 391099 : < bb->count.to_frequency (cfun) * branch_ratio_cutoff))
204 144 : return NULL;
205 : return best;
206 : }
207 :
208 : /* Find the trace using bb and record it in the TRACE array.
209 : Return number of basic blocks recorded. */
210 :
211 : static int
212 39703 : find_trace (basic_block bb, basic_block *trace)
213 : {
214 39703 : int i = 0;
215 39703 : edge e;
216 :
217 39703 : if (dump_file)
218 17 : fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun));
219 :
220 103827 : while ((e = find_best_predecessor (bb)) != NULL)
221 : {
222 87307 : basic_block bb2 = e->src;
223 173161 : if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
224 161159 : || find_best_successor (bb2) != e)
225 : break;
226 64124 : if (dump_file)
227 13 : fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
228 : bb = bb2;
229 : }
230 39703 : if (dump_file)
231 17 : fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun));
232 39703 : trace[i++] = bb;
233 :
234 : /* Follow the trace in forward direction. */
235 338616 : while ((e = find_best_successor (bb)) != NULL)
236 : {
237 319091 : bb = e->dest;
238 638114 : if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
239 623792 : || find_best_predecessor (bb) != e)
240 : break;
241 298913 : if (dump_file)
242 30 : fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
243 298913 : trace[i++] = bb;
244 : }
245 39703 : if (dump_file)
246 17 : fprintf (dump_file, "\n");
247 39703 : return i;
248 : }
249 :
250 : /* Duplicate block BB2, placing it after BB in the CFG. Return the
251 : newly created block. */
252 : basic_block
253 15692 : transform_duplicate (basic_block bb, basic_block bb2)
254 : {
255 15692 : edge e;
256 15692 : basic_block copy;
257 :
258 15692 : e = find_edge (bb, bb2);
259 :
260 15692 : copy = duplicate_block (bb2, e, bb);
261 15692 : flush_pending_stmts (e);
262 :
263 15692 : add_phi_args_after_copy (©, 1, NULL);
264 :
265 15692 : return (copy);
266 : }
267 :
268 : /* Look for basic blocks in frequency order, construct traces and tail duplicate
269 : if profitable. */
270 :
271 : static bool
272 11436 : tail_duplicate (void)
273 : {
274 11436 : auto_vec<fibonacci_node<long, basic_block_def>*> blocks;
275 11436 : blocks.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
276 :
277 11436 : basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
278 11436 : int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun));
279 11436 : int ninsns = 0, nduplicated = 0;
280 11436 : gcov_type weighted_insns = 0, traced_insns = 0;
281 11436 : fibonacci_heap<long, basic_block_def> heap (LONG_MIN);
282 11436 : gcov_type cover_insns;
283 11436 : int max_dup_insns;
284 11436 : basic_block bb;
285 11436 : bool changed = false;
286 :
287 : /* Create an oversized sbitmap to reduce the chance that we need to
288 : resize it. */
289 11436 : bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2);
290 11436 : bitmap_clear (bb_seen);
291 11436 : can_duplicate_bb = sbitmap_alloc (last_basic_block_for_fn (cfun));
292 11436 : bitmap_clear (can_duplicate_bb);
293 11436 : initialize_original_copy_tables ();
294 :
295 11436 : if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
296 198 : probability_cutoff = param_tracer_min_branch_probability_feedback;
297 : else
298 11238 : probability_cutoff = param_tracer_min_branch_probability;
299 11436 : probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
300 :
301 11436 : branch_ratio_cutoff =
302 11436 : (REG_BR_PROB_BASE / 100 * param_tracer_min_branch_ratio);
303 :
304 280318 : FOR_EACH_BB_FN (bb, cfun)
305 : {
306 268882 : int n;
307 268882 : analyze_bb (bb, &n);
308 268882 : if (!ignore_bb_p (bb))
309 213115 : blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb);
310 :
311 268882 : counts [bb->index] = n;
312 268882 : ninsns += n;
313 268882 : weighted_insns += n * bb->count.to_frequency (cfun);
314 : }
315 :
316 11436 : if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
317 198 : cover_insns = param_tracer_dynamic_coverage_feedback;
318 : else
319 11238 : cover_insns = param_tracer_dynamic_coverage;
320 11436 : cover_insns = (weighted_insns * cover_insns + 50) / 100;
321 11436 : max_dup_insns = (ninsns * param_tracer_max_code_growth + 50) / 100;
322 :
323 11436 : while (traced_insns < cover_insns && nduplicated < max_dup_insns
324 51565 : && !heap.empty ())
325 : {
326 40129 : basic_block bb = heap.extract_min ();
327 40129 : int n, pos;
328 :
329 40129 : if (!bb)
330 : break;
331 :
332 40129 : blocks[bb->index] = NULL;
333 :
334 40129 : if (ignore_bb_p (bb))
335 426 : continue;
336 39703 : gcc_assert (!bb_seen_p (bb));
337 :
338 39703 : n = find_trace (bb, trace);
339 :
340 39703 : bb = trace[0];
341 39703 : traced_insns += bb->count.to_frequency (cfun) * counts [bb->index];
342 39703 : if (blocks[bb->index])
343 : {
344 10843 : heap.delete_node (blocks[bb->index]);
345 10843 : blocks[bb->index] = NULL;
346 : }
347 :
348 108254 : for (pos = 1; pos < n; pos++)
349 : {
350 82488 : basic_block bb2 = trace[pos];
351 :
352 82488 : if (blocks[bb2->index])
353 : {
354 74043 : heap.delete_node (blocks[bb2->index]);
355 74043 : blocks[bb2->index] = NULL;
356 : }
357 82488 : traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index];
358 82488 : if (EDGE_COUNT (bb2->preds) > 1
359 15217 : && can_duplicate_block_p (bb2)
360 : /* We have the tendency to duplicate the loop header
361 : of all do { } while loops. Do not do that - it is
362 : not profitable and it might create a loop with multiple
363 : entries or at least rotate the loop. */
364 97705 : && bb2->loop_father->header != bb2)
365 : {
366 13937 : nduplicated += counts [bb2->index];
367 13937 : basic_block copy = transform_duplicate (bb, bb2);
368 :
369 : /* Reconsider the original copy of block we've duplicated.
370 : Removing the most common predecessor may make it to be
371 : head. */
372 13937 : blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2);
373 :
374 13937 : if (dump_file)
375 7 : fprintf (dump_file, "Duplicated %i as %i [%i]\n",
376 : bb2->index, copy->index, copy->count.to_frequency (cfun));
377 :
378 : bb2 = copy;
379 : changed = true;
380 : }
381 82488 : mark_bb_seen (bb2);
382 82488 : bb = bb2;
383 : /* In case the trace became infrequent, stop duplicating. */
384 82488 : if (ignore_bb_p (bb))
385 : break;
386 : }
387 39703 : if (dump_file)
388 17 : fprintf (dump_file, " covered now %.1f\n\n",
389 17 : traced_insns * 100.0 / weighted_insns);
390 : }
391 11436 : if (dump_file)
392 10 : fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
393 10 : nduplicated * 100 / ninsns);
394 :
395 11436 : free_original_copy_tables ();
396 11436 : sbitmap_free (bb_seen);
397 11436 : sbitmap_free (can_duplicate_bb);
398 11436 : can_duplicate_bb = NULL;
399 11436 : free (trace);
400 11436 : free (counts);
401 :
402 22872 : return changed;
403 11436 : }
404 :
405 : namespace {
406 :
407 : const pass_data pass_data_tracer =
408 : {
409 : GIMPLE_PASS, /* type */
410 : "tracer", /* name */
411 : OPTGROUP_NONE, /* optinfo_flags */
412 : TV_TRACER, /* tv_id */
413 : 0, /* properties_required */
414 : 0, /* properties_provided */
415 : 0, /* properties_destroyed */
416 : 0, /* todo_flags_start */
417 : TODO_update_ssa, /* todo_flags_finish */
418 : };
419 :
420 : class pass_tracer : public gimple_opt_pass
421 : {
422 : public:
423 288047 : pass_tracer (gcc::context *ctxt)
424 576094 : : gimple_opt_pass (pass_data_tracer, ctxt)
425 : {}
426 :
427 : /* opt_pass methods: */
428 1039539 : bool gate (function *) final override
429 : {
430 1039539 : return (optimize > 0 && flag_tracer && flag_reorder_blocks);
431 : }
432 :
433 : unsigned int execute (function *) final override;
434 :
435 : }; // class pass_tracer
436 :
437 : unsigned int
438 20328 : pass_tracer::execute (function *fun)
439 : {
440 20328 : bool changed;
441 :
442 20328 : if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
443 : return 0;
444 :
445 11436 : mark_dfs_back_edges ();
446 11436 : if (dump_file)
447 10 : brief_dump_cfg (dump_file, dump_flags);
448 :
449 : /* Trace formation is done on the fly inside tail_duplicate */
450 11436 : changed = tail_duplicate ();
451 11436 : if (changed)
452 : {
453 5173 : free_dominance_info (CDI_DOMINATORS);
454 : /* If we changed the CFG schedule loops for fixup by cleanup_cfg. */
455 5173 : loops_state_set (LOOPS_NEED_FIXUP);
456 : }
457 :
458 11436 : if (dump_file)
459 10 : brief_dump_cfg (dump_file, dump_flags);
460 :
461 11436 : return changed ? TODO_cleanup_cfg : 0;
462 : }
463 : } // anon namespace
464 :
465 : gimple_opt_pass *
466 288047 : make_pass_tracer (gcc::context *ctxt)
467 : {
468 288047 : return new pass_tracer (ctxt);
469 : }
|