Line data Source code
1 : /* The tracer pass for the GNU compiler.
2 : Contributed by Jan Hubicka, SuSE Labs.
3 : Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC.
4 : Copyright (C) 2001-2026 Free Software Foundation, Inc.
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify it
9 : under the terms of the GNU General Public License as published by
10 : the Free Software Foundation; either version 3, or (at your option)
11 : any later version.
12 :
13 : GCC is distributed in the hope that it will be useful, but WITHOUT
14 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 : or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 : License for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : /* This pass performs the tail duplication needed for superblock formation.
23 : For more information see:
24 :
25 : Design and Analysis of Profile-Based Optimization in Compaq's
26 : Compilation Tools for Alpha; Journal of Instruction-Level
27 : Parallelism 3 (2000) 1-25
28 :
29 : Unlike Compaq's implementation we don't do the loop peeling as most
30 : probably a better job can be done by a special pass and we don't
31 : need to worry too much about the code size implications as the tail
32 : duplicates are crossjumped again if optimizations are not
33 : performed. */
34 :
35 :
36 : #include "config.h"
37 : #include "system.h"
38 : #include "coretypes.h"
39 : #include "backend.h"
40 : #include "rtl.h"
41 : #include "tree.h"
42 : #include "gimple.h"
43 : #include "cfghooks.h"
44 : #include "tree-pass.h"
45 : #include "profile.h"
46 : #include "cfganal.h"
47 : #include "gimple-iterator.h"
48 : #include "tree-cfg.h"
49 : #include "tree-ssa.h"
50 : #include "tree-inline.h"
51 : #include "cfgloop.h"
52 : #include "alloc-pool.h"
53 : #include "fibonacci_heap.h"
54 : #include "tracer.h"
55 :
56 : static void analyze_bb (basic_block, int *);
57 : static bool better_p (const_edge, const_edge);
58 : static edge find_best_successor (basic_block);
59 : static edge find_best_predecessor (basic_block);
60 : static int find_trace (basic_block, basic_block *);
61 :
62 : /* Minimal outgoing edge probability considered for superblock formation. */
63 : static int probability_cutoff;
64 : static int branch_ratio_cutoff;
65 :
66 : /* A bit BB->index is set if BB has already been seen, i.e. it is
67 : connected to some trace already. */
68 : static sbitmap bb_seen;
69 :
70 : static inline void
71 82423 : mark_bb_seen (basic_block bb)
72 : {
73 82423 : unsigned int size = SBITMAP_SIZE (bb_seen);
74 :
75 82423 : if ((unsigned int)bb->index >= size)
76 0 : bb_seen = sbitmap_resize (bb_seen, size * 2, 0);
77 :
78 82423 : bitmap_set_bit (bb_seen, bb->index);
79 82423 : }
80 :
81 : static inline bool
82 444631 : bb_seen_p (basic_block bb)
83 : {
84 444631 : return bitmap_bit_p (bb_seen, bb->index);
85 : }
86 :
87 : static sbitmap can_duplicate_bb;
88 :
89 : /* Cache VAL as value of can_duplicate_bb_p for BB. */
90 : static inline void
91 268212 : cache_can_duplicate_bb_p (const_basic_block bb, bool val)
92 : {
93 268212 : if (val)
94 268182 : bitmap_set_bit (can_duplicate_bb, bb->index);
95 268212 : }
96 :
97 : /* Return cached value of can_duplicate_bb_p for BB. */
98 : static bool
99 1187005 : cached_can_duplicate_bb_p (const_basic_block bb)
100 : {
101 1187005 : if (can_duplicate_bb)
102 : {
103 1130141 : unsigned int size = SBITMAP_SIZE (can_duplicate_bb);
104 1130141 : if ((unsigned int)bb->index < size)
105 1108511 : return bitmap_bit_p (can_duplicate_bb, bb->index);
106 :
107 : /* Assume added bb's should not be duplicated. */
108 : return false;
109 : }
110 :
111 56864 : return can_duplicate_block_p (bb);
112 : }
113 :
114 : /* Return true if we should ignore the basic block for purposes of tracing. */
115 : bool
116 1265737 : ignore_bb_p (const_basic_block bb)
117 : {
118 1265737 : if (bb->index < NUM_FIXED_BLOCKS)
119 : return true;
120 1249505 : if (optimize_bb_for_size_p (bb))
121 : return true;
122 :
123 1187005 : return !cached_can_duplicate_bb_p (bb);
124 : }
125 :
126 : /* Return number of instructions in the block. */
127 :
128 : static void
129 268212 : analyze_bb (basic_block bb, int *count)
130 : {
131 268212 : gimple_stmt_iterator gsi;
132 268212 : gimple *stmt;
133 268212 : int n = 0;
134 :
135 1754059 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
136 : {
137 1217635 : stmt = gsi_stmt (gsi);
138 1217635 : n += estimate_num_insns (stmt, &eni_size_weights);
139 : }
140 268212 : *count = n;
141 :
142 268212 : cache_can_duplicate_bb_p (bb, can_duplicate_block_p (const_cast<basic_block>
143 : (bb)));
144 268212 : }
145 :
146 : /* Return true if E1 is more frequent than E2. */
147 : static bool
148 604699 : better_p (const_edge e1, const_edge e2)
149 : {
150 884640 : if ((e1->count () > e2->count ()) || (e1->count () < e2->count ()))
151 582727 : return e1->count () > e2->count ();
152 : /* This is needed to avoid changes in the decision after
153 : CFG is modified. */
154 21972 : if (e1->src != e2->src)
155 8237 : return e1->src->index > e2->src->index;
156 13735 : return e1->dest->index > e2->dest->index;
157 : }
158 :
159 : /* Return most frequent successor of basic block BB. */
160 :
161 : static edge
162 411119 : find_best_successor (basic_block bb)
163 : {
164 411119 : edge e;
165 411119 : edge best = NULL;
166 411119 : edge_iterator ei;
167 :
168 1163080 : FOR_EACH_EDGE (e, ei, bb->succs)
169 : {
170 752052 : if (!e->count ().initialized_p ())
171 : return NULL;
172 751961 : if (!best || better_p (e, best))
173 : best = e;
174 : }
175 411028 : if (!best || ignore_bb_p (best->dest))
176 13172 : return NULL;
177 397856 : if (!best->probability.initialized_p ()
178 397856 : || best->probability.to_reg_br_prob_base () <= probability_cutoff)
179 : return NULL;
180 : return best;
181 : }
182 :
183 : /* Return most frequent predecessor of basic block BB. */
184 :
185 : static edge
186 407182 : find_best_predecessor (basic_block bb)
187 : {
188 407182 : edge e;
189 407182 : edge best = NULL;
190 407182 : edge_iterator ei;
191 :
192 1077876 : FOR_EACH_EDGE (e, ei, bb->preds)
193 : {
194 670790 : if (!e->count ().initialized_p ())
195 : return NULL;
196 670694 : if (!best || better_p (e, best))
197 : best = e;
198 : }
199 407086 : if (!best || ignore_bb_p (best->src))
200 17294 : return NULL;
201 389792 : if (bb->count.initialized_p ()
202 1169236 : && (best->count ().to_frequency (cfun) * REG_BR_PROB_BASE
203 389792 : < bb->count.to_frequency (cfun) * branch_ratio_cutoff))
204 140 : return NULL;
205 : return best;
206 : }
207 :
208 : /* Find the trace using bb and record it in the TRACE array.
209 : Return number of basic blocks recorded. */
210 :
211 : static int
212 39602 : find_trace (basic_block bb, basic_block *trace)
213 : {
214 39602 : int i = 0;
215 39602 : edge e;
216 :
217 39602 : if (dump_file)
218 17 : fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun));
219 :
220 103430 : while ((e = find_best_predecessor (bb)) != NULL)
221 : {
222 86939 : basic_block bb2 = e->src;
223 172386 : if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
224 160452 : || find_best_successor (bb2) != e)
225 : break;
226 63828 : if (dump_file)
227 13 : fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
228 : bb = bb2;
229 : }
230 39602 : if (dump_file)
231 17 : fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun));
232 39602 : trace[i++] = bb;
233 :
234 : /* Follow the trace in forward direction. */
235 337606 : while ((e = find_best_successor (bb)) != NULL)
236 : {
237 318090 : bb = e->dest;
238 636110 : if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
239 621842 : || find_best_predecessor (bb) != e)
240 : break;
241 298004 : if (dump_file)
242 30 : fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
243 298004 : trace[i++] = bb;
244 : }
245 39602 : if (dump_file)
246 17 : fprintf (dump_file, "\n");
247 39602 : return i;
248 : }
249 :
250 : /* Duplicate block BB2, placing it after BB in the CFG. Return the
251 : newly created block. */
252 : basic_block
253 15615 : transform_duplicate (basic_block bb, basic_block bb2)
254 : {
255 15615 : edge e;
256 15615 : basic_block copy;
257 :
258 15615 : e = find_edge (bb, bb2);
259 :
260 15615 : copy = duplicate_block (bb2, e, bb);
261 15615 : flush_pending_stmts (e);
262 :
263 15615 : add_phi_args_after_copy (©, 1, NULL);
264 :
265 15615 : return (copy);
266 : }
267 :
268 : /* Look for basic blocks in frequency order, construct traces and tail duplicate
269 : if profitable. */
270 :
271 : static bool
272 11356 : tail_duplicate (void)
273 : {
274 11356 : auto_vec<fibonacci_node<long, basic_block_def>*> blocks;
275 11356 : blocks.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
276 :
277 11356 : basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
278 11356 : int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun));
279 11356 : int ninsns = 0, nduplicated = 0;
280 11356 : gcov_type weighted_insns = 0, traced_insns = 0;
281 11356 : fibonacci_heap<long, basic_block_def> heap (LONG_MIN);
282 11356 : gcov_type cover_insns;
283 11356 : int max_dup_insns;
284 11356 : basic_block bb;
285 11356 : bool changed = false;
286 :
287 : /* Create an oversized sbitmap to reduce the chance that we need to
288 : resize it. */
289 11356 : bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2);
290 11356 : bitmap_clear (bb_seen);
291 11356 : can_duplicate_bb = sbitmap_alloc (last_basic_block_for_fn (cfun));
292 11356 : bitmap_clear (can_duplicate_bb);
293 11356 : initialize_original_copy_tables ();
294 :
295 11356 : if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
296 187 : probability_cutoff = param_tracer_min_branch_probability_feedback;
297 : else
298 11169 : probability_cutoff = param_tracer_min_branch_probability;
299 11356 : probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
300 :
301 11356 : branch_ratio_cutoff =
302 11356 : (REG_BR_PROB_BASE / 100 * param_tracer_min_branch_ratio);
303 :
304 279568 : FOR_EACH_BB_FN (bb, cfun)
305 : {
306 268212 : int n;
307 268212 : analyze_bb (bb, &n);
308 268212 : if (!ignore_bb_p (bb))
309 212670 : blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb);
310 :
311 268212 : counts [bb->index] = n;
312 268212 : ninsns += n;
313 268212 : weighted_insns += n * bb->count.to_frequency (cfun);
314 : }
315 :
316 11356 : if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
317 187 : cover_insns = param_tracer_dynamic_coverage_feedback;
318 : else
319 11169 : cover_insns = param_tracer_dynamic_coverage;
320 11356 : cover_insns = (weighted_insns * cover_insns + 50) / 100;
321 11356 : max_dup_insns = (ninsns * param_tracer_max_code_growth + 50) / 100;
322 :
323 11356 : while (traced_insns < cover_insns && nduplicated < max_dup_insns
324 51380 : && !heap.empty ())
325 : {
326 40024 : basic_block bb = heap.extract_min ();
327 40024 : int n, pos;
328 :
329 40024 : if (!bb)
330 : break;
331 :
332 40024 : blocks[bb->index] = NULL;
333 :
334 40024 : if (ignore_bb_p (bb))
335 422 : continue;
336 39602 : gcc_assert (!bb_seen_p (bb));
337 :
338 39602 : n = find_trace (bb, trace);
339 :
340 39602 : bb = trace[0];
341 39602 : traced_insns += bb->count.to_frequency (cfun) * counts [bb->index];
342 39602 : if (blocks[bb->index])
343 : {
344 10838 : heap.delete_node (blocks[bb->index]);
345 10838 : blocks[bb->index] = NULL;
346 : }
347 :
348 108133 : for (pos = 1; pos < n; pos++)
349 : {
350 82423 : basic_block bb2 = trace[pos];
351 :
352 82423 : if (blocks[bb2->index])
353 : {
354 73984 : heap.delete_node (blocks[bb2->index]);
355 73984 : blocks[bb2->index] = NULL;
356 : }
357 82423 : traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index];
358 82423 : if (EDGE_COUNT (bb2->preds) > 1
359 15167 : && can_duplicate_block_p (bb2)
360 : /* We have the tendency to duplicate the loop header
361 : of all do { } while loops. Do not do that - it is
362 : not profitable and it might create a loop with multiple
363 : entries or at least rotate the loop. */
364 97590 : && bb2->loop_father->header != bb2)
365 : {
366 13892 : nduplicated += counts [bb2->index];
367 13892 : basic_block copy = transform_duplicate (bb, bb2);
368 :
369 : /* Reconsider the original copy of block we've duplicated.
370 : Removing the most common predecessor may make it to be
371 : head. */
372 13892 : blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2);
373 :
374 13892 : if (dump_file)
375 7 : fprintf (dump_file, "Duplicated %i as %i [%i]\n",
376 : bb2->index, copy->index, copy->count.to_frequency (cfun));
377 :
378 : bb2 = copy;
379 : changed = true;
380 : }
381 82423 : mark_bb_seen (bb2);
382 82423 : bb = bb2;
383 : /* In case the trace became infrequent, stop duplicating. */
384 82423 : if (ignore_bb_p (bb))
385 : break;
386 : }
387 39602 : if (dump_file)
388 17 : fprintf (dump_file, " covered now %.1f\n\n",
389 17 : traced_insns * 100.0 / weighted_insns);
390 : }
391 11356 : if (dump_file)
392 10 : fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
393 10 : nduplicated * 100 / ninsns);
394 :
395 11356 : free_original_copy_tables ();
396 11356 : sbitmap_free (bb_seen);
397 11356 : sbitmap_free (can_duplicate_bb);
398 11356 : can_duplicate_bb = NULL;
399 11356 : free (trace);
400 11356 : free (counts);
401 :
402 22712 : return changed;
403 11356 : }
404 :
405 : namespace {
406 :
407 : const pass_data pass_data_tracer =
408 : {
409 : GIMPLE_PASS, /* type */
410 : "tracer", /* name */
411 : OPTGROUP_NONE, /* optinfo_flags */
412 : TV_TRACER, /* tv_id */
413 : 0, /* properties_required */
414 : 0, /* properties_provided */
415 : 0, /* properties_destroyed */
416 : 0, /* todo_flags_start */
417 : TODO_update_ssa, /* todo_flags_finish */
418 : };
419 :
420 : class pass_tracer : public gimple_opt_pass
421 : {
422 : public:
423 285722 : pass_tracer (gcc::context *ctxt)
424 571444 : : gimple_opt_pass (pass_data_tracer, ctxt)
425 : {}
426 :
427 : /* opt_pass methods: */
428 1041484 : bool gate (function *) final override
429 : {
430 1041484 : return (optimize > 0 && flag_tracer && flag_reorder_blocks);
431 : }
432 :
433 : unsigned int execute (function *) final override;
434 :
435 : }; // class pass_tracer
436 :
437 : unsigned int
438 20166 : pass_tracer::execute (function *fun)
439 : {
440 20166 : bool changed;
441 :
442 20166 : if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
443 : return 0;
444 :
445 11356 : mark_dfs_back_edges ();
446 11356 : if (dump_file)
447 10 : brief_dump_cfg (dump_file, dump_flags);
448 :
449 : /* Trace formation is done on the fly inside tail_duplicate */
450 11356 : changed = tail_duplicate ();
451 11356 : if (changed)
452 : {
453 5131 : free_dominance_info (CDI_DOMINATORS);
454 : /* If we changed the CFG schedule loops for fixup by cleanup_cfg. */
455 5131 : loops_state_set (LOOPS_NEED_FIXUP);
456 : }
457 :
458 11356 : if (dump_file)
459 10 : brief_dump_cfg (dump_file, dump_flags);
460 :
461 11356 : return changed ? TODO_cleanup_cfg : 0;
462 : }
463 : } // anon namespace
464 :
465 : gimple_opt_pass *
466 285722 : make_pass_tracer (gcc::context *ctxt)
467 : {
468 285722 : return new pass_tracer (ctxt);
469 : }
|