Line data Source code
1 : /* Basic IPA optimizations based on profile.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* ipa-profile pass implements the following analysis propagating profille
21 : inter-procedurally.
22 :
23 : - Count histogram construction. This is a histogram analyzing how much
24 : time is spent executing statements with a given execution count read
25 : from profile feedback. This histogram is complete only with LTO,
26 : otherwise it contains information only about the current unit.
27 :
28 : The information is used to set hot/cold thresholds.
29 : - Next speculative indirect call resolution is performed: the local
30 : profile pass assigns profile-id to each function and provide us with a
31 : histogram specifying the most common target. We look up the callgraph
32 : node corresponding to the target and produce a speculative call.
33 :
34 : This call may or may not survive through IPA optimization based on decision
35 : of inliner.
36 : - Finally we propagate the following flags: unlikely executed, executed
37 : once, executed at startup and executed at exit. These flags are used to
38 : control code size/performance threshold and code placement (by producing
39 : .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
40 : #include "config.h"
41 : #include "system.h"
42 : #include "coretypes.h"
43 : #include "backend.h"
44 : #include "tree.h"
45 : #include "gimple.h"
46 : #include "predict.h"
47 : #include "alloc-pool.h"
48 : #include "tree-pass.h"
49 : #include "cgraph.h"
50 : #include "data-streamer.h"
51 : #include "gimple-iterator.h"
52 : #include "ipa-utils.h"
53 : #include "profile.h"
54 : #include "value-prof.h"
55 : #include "tree-inline.h"
56 : #include "symbol-summary.h"
57 : #include "tree-vrp.h"
58 : #include "sreal.h"
59 : #include "ipa-cp.h"
60 : #include "ipa-prop.h"
61 : #include "ipa-fnsummary.h"
62 :
63 : /* Entry in the histogram. */
64 :
65 : struct histogram_entry
66 : {
67 : gcov_type count;
68 : int time;
69 : int size;
70 : };
71 :
72 : /* Histogram of profile values.
73 : The histogram is represented as an ordered vector of entries allocated via
74 : histogram_pool. During construction a separate hashtable is kept to lookup
75 : duplicate entries. */
76 :
77 : vec<histogram_entry *> histogram;
78 : static object_allocator<histogram_entry> histogram_pool ("IPA histogram");
79 :
80 : /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
81 :
82 : struct histogram_hash : nofree_ptr_hash <histogram_entry>
83 : {
84 : static inline hashval_t hash (const histogram_entry *);
85 : static inline int equal (const histogram_entry *, const histogram_entry *);
86 : };
87 :
88 : inline hashval_t
89 35456 : histogram_hash::hash (const histogram_entry *val)
90 : {
91 35456 : return val->count;
92 : }
93 :
94 : inline int
95 18337 : histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
96 : {
97 18337 : return val->count == val2->count;
98 : }
99 :
100 : /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
101 : HASHTABLE is the on-side hash kept to avoid duplicates. */
102 :
103 : static void
104 17259 : account_time_size (hash_table<histogram_hash> *hashtable,
105 : vec<histogram_entry *> &histogram,
106 : gcov_type count, int time, int size)
107 : {
108 17259 : histogram_entry key = {count, 0, 0};
109 17259 : histogram_entry **val = hashtable->find_slot (&key, INSERT);
110 :
111 17259 : if (!*val)
112 : {
113 3517 : *val = histogram_pool.allocate ();
114 3517 : **val = key;
115 3517 : histogram.safe_push (*val);
116 : }
117 17259 : (*val)->time += time;
118 17259 : (*val)->size += size;
119 17259 : }
120 :
121 : int
122 4660 : cmp_counts (const void *v1, const void *v2)
123 : {
124 4660 : const histogram_entry *h1 = *(const histogram_entry * const *)v1;
125 4660 : const histogram_entry *h2 = *(const histogram_entry * const *)v2;
126 4660 : if (h1->count < h2->count)
127 : return 1;
128 2699 : if (h1->count > h2->count)
129 2699 : return -1;
130 : return 0;
131 : }
132 :
133 : /* Dump HISTOGRAM to FILE. */
134 :
135 : static void
136 27 : dump_histogram (FILE *file, vec<histogram_entry *> histogram)
137 : {
138 27 : unsigned int i;
139 27 : gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0,
140 27 : overall_size = 0;
141 :
142 27 : fprintf (dump_file, "Histogram:\n");
143 73 : for (i = 0; i < histogram.length (); i++)
144 : {
145 19 : overall_time += histogram[i]->count * histogram[i]->time;
146 19 : overall_size += histogram[i]->size;
147 : }
148 27 : if (!overall_time)
149 23 : overall_time = 1;
150 27 : if (!overall_size)
151 23 : overall_size = 1;
152 46 : for (i = 0; i < histogram.length (); i++)
153 : {
154 19 : cumulated_time += histogram[i]->count * histogram[i]->time;
155 19 : cumulated_size += histogram[i]->size;
156 19 : fprintf (file, " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
157 19 : (int64_t) histogram[i]->count,
158 19 : histogram[i]->time,
159 19 : cumulated_time * 100.0 / overall_time,
160 19 : histogram[i]->size,
161 19 : cumulated_size * 100.0 / overall_size);
162 : }
163 27 : }
164 :
165 : /* Structure containing speculative target information from profile. */
166 :
167 : struct speculative_call_target
168 : {
169 116 : speculative_call_target (unsigned int id = 0, int prob = 0)
170 55 : : target_id (id), target_probability (prob)
171 : {
172 : }
173 :
174 : /* Profile_id of target obtained from profile. */
175 : unsigned int target_id;
176 : /* Probability that call will land in function with target_id. */
177 : unsigned int target_probability;
178 : };
179 :
180 141730 : class speculative_call_summary
181 : {
182 : public:
183 141730 : speculative_call_summary () : speculative_call_targets ()
184 : {}
185 :
186 : auto_vec<speculative_call_target> speculative_call_targets;
187 :
188 : void dump (FILE *f);
189 :
190 : };
191 :
192 : /* Class to manage call summaries. */
193 :
194 : class ipa_profile_call_summaries
195 : : public call_summary<speculative_call_summary *>
196 : {
197 : public:
198 160846 : ipa_profile_call_summaries (symbol_table *table)
199 321692 : : call_summary<speculative_call_summary *> (table)
200 : {}
201 :
202 : /* Duplicate info when an edge is cloned. */
203 : void duplicate (cgraph_edge *, cgraph_edge *,
204 : speculative_call_summary *old_sum,
205 : speculative_call_summary *new_sum) final override;
206 : };
207 :
208 : static ipa_profile_call_summaries *call_sums = NULL;
209 :
210 : /* Dump all information in speculative call summary to F. */
211 :
212 : void
213 34 : speculative_call_summary::dump (FILE *f)
214 : {
215 34 : cgraph_node *n2;
216 :
217 34 : unsigned spec_count = speculative_call_targets.length ();
218 42 : for (unsigned i = 0; i < spec_count; i++)
219 : {
220 8 : speculative_call_target item = speculative_call_targets[i];
221 8 : n2 = find_func_by_profile_id (item.target_id);
222 8 : if (n2)
223 8 : fprintf (f, " The %i speculative target is %s with prob %3.2f\n", i,
224 : n2->dump_name (),
225 8 : item.target_probability / (float) REG_BR_PROB_BASE);
226 : else
227 0 : fprintf (f, " The %i speculative target is %u with prob %3.2f\n", i,
228 : item.target_id,
229 0 : item.target_probability / (float) REG_BR_PROB_BASE);
230 : }
231 34 : }
232 :
233 : /* Duplicate info when an edge is cloned. */
234 :
235 : void
236 42 : ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
237 : speculative_call_summary *old_sum,
238 : speculative_call_summary *new_sum)
239 : {
240 42 : if (!old_sum)
241 : return;
242 :
243 42 : unsigned old_count = old_sum->speculative_call_targets.length ();
244 42 : if (!old_count)
245 : return;
246 :
247 42 : new_sum->speculative_call_targets.reserve_exact (old_count);
248 42 : new_sum->speculative_call_targets.quick_grow_cleared (old_count);
249 :
250 97 : for (unsigned i = 0; i < old_count; i++)
251 : {
252 165 : new_sum->speculative_call_targets[i]
253 55 : = old_sum->speculative_call_targets[i];
254 : }
255 : }
256 :
257 : /* Collect histogram and speculative target summaries from CFG profiles. */
258 :
259 : static void
260 148644 : ipa_profile_generate_summary (void)
261 : {
262 148644 : struct cgraph_node *node;
263 148644 : gimple_stmt_iterator gsi;
264 148644 : basic_block bb;
265 :
266 148644 : hash_table<histogram_hash> hashtable (10);
267 :
268 148644 : gcc_checking_assert (!call_sums);
269 148644 : call_sums = new ipa_profile_call_summaries (symtab);
270 :
271 1483579 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
272 1334935 : if (ENTRY_BLOCK_PTR_FOR_FN
273 2669870 : (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
274 27001 : FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
275 : {
276 20755 : int time = 0;
277 20755 : int size = 0;
278 124961 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
279 : {
280 83451 : gimple *stmt = gsi_stmt (gsi);
281 83451 : if (gimple_code (stmt) == GIMPLE_CALL
282 83451 : && !gimple_call_fndecl (stmt))
283 : {
284 214 : histogram_value h;
285 214 : h = gimple_histogram_value_of_type
286 214 : (DECL_STRUCT_FUNCTION (node->decl),
287 : stmt, HIST_TYPE_INDIR_CALL);
288 : /* No need to do sanity check: gimple_ic_transform already
289 : takes away bad histograms. */
290 214 : if (h)
291 : {
292 40 : gcov_type val, count, all;
293 40 : struct cgraph_edge *e = node->get_edge (stmt);
294 40 : if (e && !e->indirect_unknown_callee)
295 0 : continue;
296 :
297 40 : speculative_call_summary *csum
298 40 : = call_sums->get_create (e);
299 :
300 1320 : for (unsigned j = 0; j < GCOV_TOPN_MAXIMUM_TRACKED_VALUES;
301 : j++)
302 : {
303 1280 : if (!get_nth_most_common_value (NULL, "indirect call",
304 : h, &val, &count, &all,
305 : j))
306 1229 : continue;
307 :
308 51 : if (val == 0 || count == 0)
309 0 : continue;
310 :
311 51 : if (count > all)
312 : {
313 0 : if (dump_file)
314 0 : fprintf (dump_file,
315 : "Probability capped to 1\n");
316 0 : count = all;
317 : }
318 51 : speculative_call_target item (
319 : val,
320 51 : profile_count::from_gcov_type (count)
321 : .probability_in
322 51 : (profile_count::from_gcov_type (all))
323 51 : .to_reg_br_prob_base ());
324 51 : csum->speculative_call_targets.safe_push (item);
325 : }
326 :
327 40 : gimple_remove_histogram_value
328 40 : (DECL_STRUCT_FUNCTION (node->decl), stmt, h);
329 : }
330 : }
331 83451 : time += estimate_num_insns (stmt, &eni_time_weights);
332 83451 : size += estimate_num_insns (stmt, &eni_size_weights);
333 : }
334 54777 : if (bb->count.ipa_p () && bb->count.initialized_p ())
335 17011 : account_time_size (&hashtable, histogram,
336 34022 : bb->count.ipa ().to_gcov_type (),
337 : time, size);
338 : }
339 148644 : histogram.qsort (cmp_counts);
340 148644 : }
341 :
342 : /* Serialize the speculative summary info for LTO. */
343 :
344 : static void
345 2601 : ipa_profile_write_edge_summary (lto_simple_output_block *ob,
346 : speculative_call_summary *csum)
347 : {
348 2601 : unsigned len = 0;
349 :
350 2601 : len = csum->speculative_call_targets.length ();
351 :
352 4 : gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES);
353 :
354 2601 : streamer_write_hwi_stream (ob->main_stream, len);
355 :
356 2601 : if (len)
357 : {
358 4 : unsigned spec_count = csum->speculative_call_targets.length ();
359 14 : for (unsigned i = 0; i < spec_count; i++)
360 : {
361 10 : speculative_call_target item = csum->speculative_call_targets[i];
362 10 : gcc_assert (item.target_id);
363 10 : streamer_write_hwi_stream (ob->main_stream, item.target_id);
364 10 : streamer_write_hwi_stream (ob->main_stream, item.target_probability);
365 : }
366 : }
367 2601 : }
368 :
369 : /* Serialize the ipa info for lto. */
370 :
371 : static void
372 20000 : ipa_profile_write_summary (void)
373 : {
374 20000 : struct lto_simple_output_block *ob
375 20000 : = lto_create_simple_output_block (LTO_section_ipa_profile);
376 20000 : unsigned int i;
377 :
378 20000 : streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
379 40439 : for (i = 0; i < histogram.length (); i++)
380 : {
381 439 : streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
382 439 : streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
383 439 : streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
384 : }
385 :
386 20000 : if (!call_sums)
387 0 : return;
388 :
389 : /* Serialize speculative targets information. */
390 20000 : unsigned int count = 0;
391 20000 : lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
392 20000 : lto_symtab_encoder_iterator lsei;
393 20000 : cgraph_node *node;
394 :
395 113628 : for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
396 93628 : lsei_next_function_in_partition (&lsei))
397 : {
398 93628 : node = lsei_cgraph_node (lsei);
399 93628 : if (node->definition && node->has_gimple_body_p ()
400 186281 : && node->indirect_calls)
401 1023 : count++;
402 : }
403 :
404 20000 : streamer_write_uhwi_stream (ob->main_stream, count);
405 :
406 : /* Process all of the functions. */
407 20000 : for (lsei = lsei_start_function_in_partition (encoder);
408 28068 : !lsei_end_p (lsei) && count; lsei_next_function_in_partition (&lsei))
409 : {
410 8068 : cgraph_node *node = lsei_cgraph_node (lsei);
411 8068 : if (node->definition && node->has_gimple_body_p ()
412 15787 : && node->indirect_calls)
413 : {
414 1023 : int node_ref = lto_symtab_encoder_encode (encoder, node);
415 1023 : streamer_write_uhwi_stream (ob->main_stream, node_ref);
416 :
417 3624 : for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
418 : {
419 2601 : speculative_call_summary *csum = call_sums->get_create (e);
420 2601 : ipa_profile_write_edge_summary (ob, csum);
421 : }
422 : }
423 : }
424 :
425 20000 : lto_destroy_simple_output_block (ob);
426 : }
427 :
428 : /* Dump all profile summary data for all cgraph nodes and edges to file F. */
429 :
430 : static void
431 27 : ipa_profile_dump_all_summaries (FILE *f)
432 : {
433 27 : fprintf (dump_file,
434 : "\n========== IPA-profile speculative targets: ==========\n");
435 27 : cgraph_node *node;
436 98 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
437 : {
438 71 : fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
439 105 : for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
440 : {
441 34 : fprintf (f, " Summary for %s of indirect edge %d:\n",
442 34 : e->caller->dump_name (), e->lto_stmt_uid);
443 34 : speculative_call_summary *csum = call_sums->get_create (e);
444 34 : csum->dump (f);
445 : }
446 : }
447 27 : fprintf (f, "\n\n");
448 27 : }
449 :
450 : /* Read speculative targets information about edge for LTO WPA. */
451 :
452 : static void
453 1405 : ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge)
454 : {
455 1405 : unsigned i, len;
456 :
457 1405 : len = streamer_read_hwi (ib);
458 1405 : gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES);
459 1405 : speculative_call_summary *csum = call_sums->get_create (edge);
460 :
461 1415 : for (i = 0; i < len; i++)
462 : {
463 10 : unsigned int target_id = streamer_read_hwi (ib);
464 10 : int target_probability = streamer_read_hwi (ib);
465 10 : speculative_call_target item (target_id, target_probability);
466 10 : csum->speculative_call_targets.safe_push (item);
467 : }
468 1405 : }
469 :
470 : /* Read profile speculative targets section information for LTO WPA. */
471 :
472 : static void
473 10925 : ipa_profile_read_summary_section (struct lto_file_decl_data *file_data,
474 : class lto_input_block *ib)
475 : {
476 10925 : if (!ib)
477 : return;
478 :
479 10925 : lto_symtab_encoder_t encoder = file_data->symtab_node_encoder;
480 :
481 10925 : unsigned int count = streamer_read_uhwi (ib);
482 :
483 10925 : unsigned int i;
484 10925 : unsigned int index;
485 10925 : cgraph_node * node;
486 :
487 11603 : for (i = 0; i < count; i++)
488 : {
489 678 : index = streamer_read_uhwi (ib);
490 678 : node
491 678 : = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, index));
492 :
493 2083 : for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
494 1405 : ipa_profile_read_edge_summary (ib, e);
495 : }
496 : }
497 :
498 : /* Deserialize the IPA histogram and speculative targets summary info for LTO.
499 : */
500 :
501 : static void
502 12202 : ipa_profile_read_summary (void)
503 : {
504 12202 : struct lto_file_decl_data ** file_data_vec
505 12202 : = lto_get_file_decl_data ();
506 12202 : struct lto_file_decl_data * file_data;
507 12202 : int j = 0;
508 :
509 12202 : hash_table<histogram_hash> hashtable (10);
510 :
511 12202 : gcc_checking_assert (!call_sums);
512 12202 : call_sums = new ipa_profile_call_summaries (symtab);
513 :
514 25459 : while ((file_data = file_data_vec[j++]))
515 : {
516 13257 : const char *data;
517 13257 : size_t len;
518 13257 : class lto_input_block *ib
519 13257 : = lto_create_simple_input_block (file_data,
520 : LTO_section_ipa_profile,
521 : &data, &len);
522 13257 : if (ib)
523 : {
524 10925 : unsigned int num = streamer_read_uhwi (ib);
525 10925 : unsigned int n;
526 11173 : for (n = 0; n < num; n++)
527 : {
528 248 : gcov_type count = streamer_read_gcov_count (ib);
529 248 : int time = streamer_read_uhwi (ib);
530 248 : int size = streamer_read_uhwi (ib);
531 248 : account_time_size (&hashtable, histogram,
532 : count, time, size);
533 : }
534 :
535 10925 : ipa_profile_read_summary_section (file_data, ib);
536 :
537 10925 : lto_destroy_simple_input_block (file_data,
538 : LTO_section_ipa_profile,
539 : ib, data, len);
540 : }
541 : }
542 12202 : histogram.qsort (cmp_counts);
543 12202 : }
544 :
545 : /* Data used by ipa_propagate_frequency. */
546 :
547 : struct ipa_propagate_frequency_data
548 : {
549 : cgraph_node *function_symbol;
550 : bool maybe_unlikely_executed;
551 : bool maybe_executed_once;
552 : bool only_called_at_startup;
553 : bool only_called_at_exit;
554 : };
555 :
556 : /* Worker for ipa_propagate_frequency_1. */
557 :
558 : static bool
559 5100257 : ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
560 : {
561 5100257 : struct ipa_propagate_frequency_data *d;
562 5100257 : struct cgraph_edge *edge;
563 :
564 5100257 : d = (struct ipa_propagate_frequency_data *)data;
565 5100257 : for (edge = node->callers;
566 10306031 : edge && (d->maybe_unlikely_executed || d->maybe_executed_once
567 149273 : || d->only_called_at_startup || d->only_called_at_exit);
568 5205774 : edge = edge->next_caller)
569 : {
570 5205774 : if (edge->caller != d->function_symbol)
571 : {
572 5203290 : d->only_called_at_startup &= edge->caller->only_called_at_startup;
573 : /* It makes sense to put main() together with the static constructors.
574 : It will be executed for sure, but rest of functions called from
575 : main are definitely not at startup only. */
576 5203290 : if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
577 156121 : d->only_called_at_startup = 0;
578 5203290 : d->only_called_at_exit &= edge->caller->only_called_at_exit;
579 : }
580 :
581 : /* When profile feedback is available, do not try to propagate too hard;
582 : counts are already good guide on function frequencies and roundoff
583 : errors can make us to push function into unlikely section even when
584 : it is executed by the train run. Transfer the function only if all
585 : callers are unlikely executed. */
586 5205774 : if (profile_info
587 5206307 : && !(edge->callee->count.ipa () == profile_count::zero ())
588 5206304 : && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
589 0 : || (edge->caller->inlined_to
590 0 : && edge->caller->inlined_to->frequency
591 0 : != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
592 530 : d->maybe_unlikely_executed = false;
593 5205774 : if (edge->count.ipa ().initialized_p ()
594 5205774 : && !edge->count.ipa ().nonzero_p ())
595 30361 : continue;
596 5175413 : switch (edge->caller->frequency)
597 : {
598 : case NODE_FREQUENCY_UNLIKELY_EXECUTED:
599 : break;
600 373146 : case NODE_FREQUENCY_EXECUTED_ONCE:
601 373146 : {
602 373146 : if (dump_file && (dump_flags & TDF_DETAILS))
603 90 : fprintf (dump_file, " Called by %s that is executed once\n",
604 : edge->caller->dump_name ());
605 373146 : d->maybe_unlikely_executed = false;
606 373146 : ipa_call_summary *s = ipa_call_summaries->get (edge);
607 373146 : if (s != NULL && s->loop_depth)
608 : {
609 9326 : d->maybe_executed_once = false;
610 9326 : if (dump_file && (dump_flags & TDF_DETAILS))
611 6 : fprintf (dump_file, " Called in loop\n");
612 : }
613 : break;
614 : }
615 4799832 : case NODE_FREQUENCY_HOT:
616 4799832 : case NODE_FREQUENCY_NORMAL:
617 4799832 : if (dump_file && (dump_flags & TDF_DETAILS))
618 136 : fprintf (dump_file, " Called by %s that is normal or hot\n",
619 : edge->caller->dump_name ());
620 4799832 : d->maybe_unlikely_executed = false;
621 4799832 : d->maybe_executed_once = false;
622 4799832 : break;
623 : }
624 : }
625 5100257 : return edge != NULL;
626 : }
627 :
628 : /* Return true if NODE contains hot calls. */
629 :
630 : bool
631 28235 : contains_hot_call_p (struct cgraph_node *node)
632 : {
633 28235 : struct cgraph_edge *e;
634 76931 : for (e = node->callees; e; e = e->next_callee)
635 48698 : if (e->maybe_hot_p ())
636 : return true;
637 48696 : else if (!e->inline_failed
638 48696 : && contains_hot_call_p (e->callee))
639 : return true;
640 28608 : for (e = node->indirect_calls; e; e = e->next_callee)
641 375 : if (e->maybe_hot_p ())
642 : return true;
643 : return false;
644 : }
645 :
646 : /* See if the frequency of NODE can be updated based on frequencies of its
647 : callers. */
648 : bool
649 8757434 : ipa_propagate_frequency (struct cgraph_node *node)
650 : {
651 8757434 : struct ipa_propagate_frequency_data d = {node, true, true, true, true};
652 8757434 : bool changed = false;
653 :
654 : /* We cannot propagate anything useful about externally visible functions
655 : nor about virtuals. */
656 8757434 : if (!node->local
657 5184934 : || node->alias
658 13930667 : || (opt_for_fn (node->decl, flag_devirtualize)
659 5089498 : && DECL_VIRTUAL_P (node->decl)))
660 : return false;
661 5091603 : gcc_assert (node->analyzed);
662 5091603 : if (dump_file && (dump_flags & TDF_DETAILS))
663 226 : fprintf (dump_file, "Processing frequency %s\n", node->dump_name ());
664 :
665 5091603 : node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d,
666 : true);
667 :
668 5091603 : if ((d.only_called_at_startup && !d.only_called_at_exit)
669 45210 : && !node->only_called_at_startup)
670 : {
671 16509 : node->only_called_at_startup = true;
672 16509 : if (dump_file)
673 0 : fprintf (dump_file, "Node %s promoted to only called at startup.\n",
674 : node->dump_name ());
675 : changed = true;
676 : }
677 5091603 : if ((d.only_called_at_exit && !d.only_called_at_startup)
678 49 : && !node->only_called_at_exit)
679 : {
680 19 : node->only_called_at_exit = true;
681 19 : if (dump_file)
682 0 : fprintf (dump_file, "Node %s promoted to only called at exit.\n",
683 : node->dump_name ());
684 : changed = true;
685 : }
686 :
687 : /* With profile we can decide on hot/normal based on count. */
688 5091603 : if (node->count. ipa().initialized_p ())
689 : {
690 22078 : bool hot = false;
691 22078 : if (!(node->count. ipa() == profile_count::zero ())
692 207 : && node->count. ipa() >= get_hot_bb_threshold ())
693 202 : hot = true;
694 22078 : if (!hot)
695 21876 : hot |= contains_hot_call_p (node);
696 22078 : if (hot)
697 : {
698 204 : if (node->frequency != NODE_FREQUENCY_HOT)
699 : {
700 14 : if (dump_file)
701 0 : fprintf (dump_file, "Node %s promoted to hot.\n",
702 : node->dump_name ());
703 14 : node->frequency = NODE_FREQUENCY_HOT;
704 14 : return true;
705 : }
706 : return false;
707 : }
708 21874 : else if (node->frequency == NODE_FREQUENCY_HOT)
709 : {
710 4 : if (dump_file)
711 0 : fprintf (dump_file, "Node %s reduced to normal.\n",
712 : node->dump_name ());
713 4 : node->frequency = NODE_FREQUENCY_NORMAL;
714 4 : changed = true;
715 : }
716 : }
717 : /* These come either from profile or user hints; never update them. */
718 5091399 : if (node->frequency == NODE_FREQUENCY_HOT
719 5091393 : || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
720 : return changed;
721 5073403 : if (d.maybe_unlikely_executed)
722 : {
723 9114 : node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
724 9114 : if (dump_file)
725 0 : fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
726 : node->dump_name ());
727 : changed = true;
728 : }
729 5064289 : else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
730 : {
731 118937 : node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
732 118937 : if (dump_file)
733 45 : fprintf (dump_file, "Node %s promoted to executed once.\n",
734 : node->dump_name ());
735 : changed = true;
736 : }
737 : return changed;
738 : }
739 :
740 : /* Check that number of arguments of N agrees with E.
741 : Be conservative when summaries are not present. */
742 :
743 : static bool
744 42 : check_argument_count (struct cgraph_node *n, struct cgraph_edge *e)
745 : {
746 42 : if (!ipa_node_params_sum || !ipa_edge_args_sum)
747 : return true;
748 42 : ipa_node_params *info = ipa_node_params_sum->get (n->function_symbol ());
749 42 : if (!info)
750 : return true;
751 42 : ipa_edge_args *e_info = ipa_edge_args_sum->get (e);
752 42 : if (!e_info)
753 : return true;
754 66 : if (ipa_get_param_count (info) != ipa_get_cs_argument_count (e_info)
755 42 : && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (e_info)
756 0 : || !stdarg_p (TREE_TYPE (n->decl))))
757 0 : return false;
758 : return true;
759 : }
760 :
761 : /* Simple ipa profile pass propagating frequencies across the callgraph. */
762 :
763 : static unsigned int
764 151656 : ipa_profile (void)
765 : {
766 151656 : struct cgraph_node **order;
767 151656 : struct cgraph_edge *e;
768 151656 : int order_pos;
769 151656 : bool something_changed = false;
770 151656 : int i;
771 151656 : widest_int overall_time = 0, cumulated = 0;
772 151656 : gcov_type overall_size = 0;
773 151656 : struct cgraph_node *n,*n2;
774 151656 : int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
775 151656 : int nmismatch = 0, nimpossible = 0;
776 151656 : bool node_map_initialized = false;
777 151656 : gcov_type threshold;
778 :
779 151656 : if (dump_file)
780 : {
781 27 : if (profile_info)
782 : {
783 4 : fprintf (dump_file,
784 : "runs: %i sum_max: %" PRId64 " cutoff: %" PRId64"\n",
785 : profile_info->runs, profile_info->sum_max, profile_info->cutoff);
786 4 : fprintf (dump_file, "hot bb threshold: %" PRId64 "\n",
787 : get_hot_bb_threshold ());
788 : }
789 27 : dump_histogram (dump_file, histogram);
790 : }
791 161148 : for (i = 0; i < (int)histogram.length (); i++)
792 : {
793 3302 : overall_time += ((widest_int)histogram[i]->count) * histogram[i]->time;
794 : /* Watch for overflow. */
795 3302 : gcc_assert (overall_time >= 0);
796 3302 : overall_size += histogram[i]->size;
797 : }
798 151656 : threshold = 0;
799 151656 : if (overall_time != 0)
800 : {
801 101 : gcc_assert (overall_size);
802 :
803 101 : widest_int cutoff = ((overall_time * param_hot_bb_count_ws_permille
804 202 : + 500) / 1000);
805 : /* Watch for overflow. */
806 101 : gcc_assert (cutoff >= 0);
807 450 : for (i = 0; cumulated < cutoff; i++)
808 : {
809 349 : cumulated += ((widest_int)histogram[i]->count) * histogram[i]->time;
810 349 : threshold = histogram[i]->count;
811 : }
812 101 : if (!threshold)
813 0 : threshold = 1;
814 101 : if (dump_file)
815 : {
816 4 : widest_int cumulated_time = 0;
817 4 : gcov_type cumulated_size = 0;
818 :
819 4 : for (i = 0;
820 38 : i < (int)histogram.length () && histogram[i]->count >= threshold;
821 : i++)
822 : {
823 15 : cumulated_time += ((widest_int)histogram[i]->count)
824 30 : * histogram[i]->time;
825 15 : cumulated_size += histogram[i]->size;
826 : }
827 4 : fprintf (dump_file, "Determined min count: %" PRId64
828 : " Time:%3.2f%% Size:%3.2f%%\n",
829 : (int64_t)threshold,
830 8 : (cumulated_time * 10000 / overall_time).to_shwi () / 100.0,
831 4 : cumulated_size * 100.0 / overall_size);
832 4 : }
833 :
834 101 : if (in_lto_p)
835 : {
836 4 : if (dump_file)
837 2 : fprintf (dump_file, "Setting hotness threshold in LTO mode.\n");
838 4 : set_hot_bb_threshold (threshold);
839 : }
840 101 : }
841 151656 : histogram.release ();
842 151656 : histogram_pool.release ();
843 :
844 : /* Produce speculative calls: we saved common target from profiling into
845 : e->target_id. Now, at link time, we can look up corresponding
846 : function node and produce speculative call. */
847 :
848 151656 : gcc_checking_assert (call_sums);
849 :
850 151656 : if (dump_file)
851 : {
852 27 : if (!node_map_initialized)
853 27 : init_node_map (false);
854 27 : node_map_initialized = true;
855 :
856 27 : ipa_profile_dump_all_summaries (dump_file);
857 : }
858 :
859 1588017 : FOR_EACH_DEFINED_FUNCTION (n)
860 : {
861 1436361 : bool update = false;
862 :
863 1436361 : if (!opt_for_fn (n->decl, flag_ipa_profile))
864 9617 : continue;
865 :
866 1566750 : for (e = n->indirect_calls; e; e = e->next_callee)
867 : {
868 140006 : if (n->count.initialized_p ())
869 140002 : nindirect++;
870 :
871 140006 : speculative_call_summary *csum = call_sums->get_create (e);
872 140045 : unsigned spec_count = csum->speculative_call_targets.length ();
873 39 : if (spec_count)
874 : {
875 39 : if (!node_map_initialized)
876 19 : init_node_map (false);
877 39 : node_map_initialized = true;
878 39 : ncommon++;
879 :
880 39 : unsigned speculative_id = 0;
881 39 : profile_count orig = e->count;
882 90 : for (unsigned i = 0; i < spec_count; i++)
883 : {
884 51 : speculative_call_target item
885 51 : = csum->speculative_call_targets[i];
886 51 : n2 = find_func_by_profile_id (item.target_id);
887 51 : if (n2)
888 : {
889 50 : if (dump_file)
890 : {
891 8 : fprintf (dump_file,
892 : "Indirect call -> direct call from"
893 : " other module %s => %s, prob %3.2f\n",
894 : n->dump_name (),
895 : n2->dump_name (),
896 : item.target_probability
897 8 : / (float) REG_BR_PROB_BASE);
898 : }
899 50 : if (item.target_probability < REG_BR_PROB_BASE / 2)
900 : {
901 8 : nuseless++;
902 8 : if (dump_file)
903 1 : fprintf (dump_file,
904 : "Not speculating: "
905 : "probability is too low.\n");
906 : }
907 42 : else if (n2->get_availability () <= AVAIL_INTERPOSABLE
908 42 : && n2->can_be_discarded_p ())
909 : {
910 0 : nuseless++;
911 0 : if (dump_file)
912 0 : fprintf (dump_file,
913 : "Not speculating: target is overwritable "
914 : "and can be discarded.\n");
915 : }
916 42 : else if (!check_argument_count (n2, e))
917 : {
918 0 : nmismatch++;
919 0 : if (dump_file)
920 0 : fprintf (dump_file,
921 : "Not speculating: "
922 : "parameter count mismatch\n");
923 : }
924 42 : else if (usable_polymorphic_info_p (e->indirect_info)
925 10 : && !opt_for_fn (n->decl, flag_devirtualize)
926 4 : && !possible_polymorphic_call_target_p (e, n2))
927 : {
928 0 : nimpossible++;
929 0 : if (dump_file)
930 0 : fprintf (dump_file,
931 : "Not speculating: "
932 : "function is not in the polymorphic "
933 : "call target list\n");
934 : }
935 : else
936 : {
937 : /* Target may be overwritable, but profile says that
938 : control flow goes to this particular implementation
939 : of N2. Speculate on the local alias to allow
940 : inlining. */
941 42 : if (!n2->can_be_discarded_p ())
942 : {
943 36 : cgraph_node *alias;
944 36 : alias = dyn_cast<cgraph_node *>
945 36 : (n2->noninterposable_alias ());
946 : if (alias)
947 42 : n2 = alias;
948 : }
949 42 : nconverted++;
950 42 : profile_probability prob
951 : = profile_probability::from_reg_br_prob_base
952 42 : (item.target_probability).adjusted ();
953 42 : e->make_speculative (n2,
954 : orig.apply_probability (prob),
955 : speculative_id);
956 42 : update = true;
957 42 : speculative_id++;
958 : }
959 : }
960 : else
961 : {
962 1 : if (dump_file)
963 0 : fprintf (dump_file,
964 : "Function with profile-id %i not found.\n",
965 : item.target_id);
966 1 : nunknown++;
967 : }
968 : }
969 : }
970 : }
971 1426744 : if (update)
972 36 : ipa_update_overall_fn_summary (n);
973 : }
974 151656 : if (node_map_initialized)
975 46 : del_node_map ();
976 151656 : if (dump_file && nindirect)
977 25 : fprintf (dump_file,
978 : "%i indirect calls trained.\n"
979 : "%i (%3.2f%%) have common target.\n"
980 : "%i (%3.2f%%) targets was not found.\n"
981 : "%i (%3.2f%%) targets had parameter count mismatch.\n"
982 : "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
983 : "%i (%3.2f%%) speculations seems useless.\n"
984 : "%i (%3.2f%%) speculations produced.\n",
985 : nindirect,
986 25 : ncommon, ncommon * 100.0 / nindirect,
987 25 : nunknown, nunknown * 100.0 / nindirect,
988 25 : nmismatch, nmismatch * 100.0 / nindirect,
989 25 : nimpossible, nimpossible * 100.0 / nindirect,
990 25 : nuseless, nuseless * 100.0 / nindirect,
991 25 : nconverted, nconverted * 100.0 / nindirect);
992 :
993 151656 : order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
994 151656 : order_pos = ipa_reverse_postorder (order);
995 2874044 : for (i = order_pos - 1; i >= 0; i--)
996 : {
997 2722388 : if (order[i]->local
998 168716 : && opt_for_fn (order[i]->decl, flag_ipa_profile)
999 2885644 : && ipa_propagate_frequency (order[i]))
1000 : {
1001 327477 : for (e = order[i]->callees; e; e = e->next_callee)
1002 298286 : if (e->callee->local && !e->callee->aux)
1003 : {
1004 10390 : something_changed = true;
1005 10390 : e->callee->aux = (void *)1;
1006 : }
1007 : }
1008 2722388 : order[i]->aux = NULL;
1009 : }
1010 :
1011 160494 : while (something_changed)
1012 : {
1013 : something_changed = false;
1014 120015 : for (i = order_pos - 1; i >= 0; i--)
1015 : {
1016 111177 : if (order[i]->aux
1017 16772 : && opt_for_fn (order[i]->decl, flag_ipa_profile)
1018 127949 : && ipa_propagate_frequency (order[i]))
1019 : {
1020 100320 : for (e = order[i]->callees; e; e = e->next_callee)
1021 85119 : if (e->callee->local && !e->callee->aux)
1022 : {
1023 6386 : something_changed = true;
1024 6386 : e->callee->aux = (void *)1;
1025 : }
1026 : }
1027 111177 : order[i]->aux = NULL;
1028 : }
1029 : }
1030 151656 : free (order);
1031 :
1032 151656 : if (dump_file && (dump_flags & TDF_DETAILS))
1033 0 : symtab->dump (dump_file);
1034 :
1035 151656 : delete call_sums;
1036 151656 : call_sums = NULL;
1037 :
1038 151656 : return 0;
1039 151656 : }
1040 :
1041 : namespace {
1042 :
1043 : const pass_data pass_data_ipa_profile =
1044 : {
1045 : IPA_PASS, /* type */
1046 : "profile_estimate", /* name */
1047 : OPTGROUP_NONE, /* optinfo_flags */
1048 : TV_IPA_PROFILE, /* tv_id */
1049 : 0, /* properties_required */
1050 : 0, /* properties_provided */
1051 : 0, /* properties_destroyed */
1052 : 0, /* todo_flags_start */
1053 : 0, /* todo_flags_finish */
1054 : };
1055 :
1056 : class pass_ipa_profile : public ipa_opt_pass_d
1057 : {
1058 : public:
1059 285722 : pass_ipa_profile (gcc::context *ctxt)
1060 : : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
1061 : ipa_profile_generate_summary, /* generate_summary */
1062 : ipa_profile_write_summary, /* write_summary */
1063 : ipa_profile_read_summary, /* read_summary */
1064 : NULL, /* write_optimization_summary */
1065 : NULL, /* read_optimization_summary */
1066 : NULL, /* stmt_fixup */
1067 : 0, /* function_transform_todo_flags_start */
1068 : NULL, /* function_transform */
1069 285722 : NULL) /* variable_transform */
1070 285722 : {}
1071 :
1072 : /* opt_pass methods: */
1073 586755 : bool gate (function *) final override { return flag_ipa_profile || in_lto_p; }
1074 151656 : unsigned int execute (function *) final override { return ipa_profile (); }
1075 :
1076 : }; // class pass_ipa_profile
1077 :
1078 : } // anon namespace
1079 :
1080 : ipa_opt_pass_d *
1081 285722 : make_pass_ipa_profile (gcc::context *ctxt)
1082 : {
1083 285722 : return new pass_ipa_profile (ctxt);
1084 : }
1085 :
1086 : /* Reset all state within ipa-profile.cc so that we can rerun the compiler
1087 : within the same process. For use by toplev::finalize. */
1088 :
1089 : void
1090 256621 : ipa_profile_cc_finalize (void)
1091 : {
1092 256621 : delete call_sums;
1093 256621 : call_sums = NULL;
1094 256621 : }
|