Line data Source code
1 : /* Read and annotate call graph profile from the auto profile data file.
2 : Copyright (C) 2014-2026 Free Software Foundation, Inc.
3 : Contributed by Dehao Chen (dehao@google.com)
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "config.h"
22 : #define INCLUDE_MAP
23 : #define INCLUDE_SET
24 : #include "system.h"
25 : #include "coretypes.h"
26 : #include "backend.h"
27 : #include "tree.h"
28 : #include "gimple.h"
29 : #include "predict.h"
30 : #include "alloc-pool.h"
31 : #include "tree-pass.h"
32 : #include "ssa.h"
33 : #include "cgraph.h"
34 : #include "gcov-io.h"
35 : #include "diagnostic-core.h"
36 : #include "profile.h"
37 : #include "langhooks.h"
38 : #include "context.h"
39 : #include "pass_manager.h"
40 : #include "cfgloop.h"
41 : #include "tree-cfg.h"
42 : #include "tree-cfgcleanup.h"
43 : #include "tree-into-ssa.h"
44 : #include "gimple-iterator.h"
45 : #include "value-prof.h"
46 : #include "symbol-summary.h"
47 : #include "sreal.h"
48 : #include "ipa-cp.h"
49 : #include "ipa-prop.h"
50 : #include "ipa-fnsummary.h"
51 : #include "ipa-inline.h"
52 : #include "tree-inline.h"
53 : #include "auto-profile.h"
54 : #include "tree-pretty-print.h"
55 : #include "gimple-pretty-print.h"
56 : #include "output.h"
57 :
58 : /* The following routines implement AutoFDO optimization.
59 :
60 : This optimization uses sampling profiles to annotate basic block counts
61 : and uses heuristics to estimate branch probabilities.
62 :
63 : There are three phases in AutoFDO:
64 :
65 : Phase 1: At startup.
66 : Read profile from the profile data file.
67 : The following info is read from the profile datafile:
68 : * string_table: a map between function name and its index.
69 : * autofdo_source_profile: a map from function_instance name to
70 : function_instance. This is represented as a forest of
71 : function_instances.
72 : * WorkingSet: a histogram of how many instructions are covered for a
73 : given percentage of total cycles. This is describing the binary
74 : level information (not source level). This info is used to help
75 : decide if we want aggressive optimizations that could increase
76 : code footprint (e.g. loop unroll etc.)
77 : A function instance is an instance of function that could either be a
78 : standalone symbol, or a clone of a function that is inlined into another
79 : function.
80 :
81 : Phase 2: In afdo_offline pass.
82 : Remove function instances from other translation units
83 : and offline all cross-translation unit inlining done during train
84 : run compilation. This is necessary to not lose profiles with
85 : LTO train run.
86 :
87 : Phase 3: During early optimization.
88 : AFDO inline + value profile transformation.
89 : This happens during early optimization.
90 : During early inlining AFDO inliner is executed which
91 : uses autofdo_source_profile to find if a callsite is:
92 : * inlined in the profiled binary.
93 : * callee body is hot in the profiling run.
94 : If both condition satisfies, early inline will inline the callsite
95 : regardless of the code growth.
96 :
97 : Performing this early has benefit of doing early optimizations
98 : before read IPA passes and getting more "context sensitivity" of
99 : the profile read. Profile of inlined functions may differ
100 : significantly from one inline instance to another and from the
101 : offline version.
102 :
103 : This is controlled by -fauto-profile-inlining and is independent
104 : of -fearly-inlining.
105 :
106 : Phase 4: In AFDO pass.
107 : Offline all functions that has been inlined in the
108 : train run but were not inlined in early inlining nor AFDO
109 : inline.
110 :
111 : Phase 5: In AFDO pass.
112 : Annotate control flow graph.
113 : * Annotate basic block count
114 : * Estimate branch probability
115 : * Use earlier static profile to fill in the gaps
116 : if AFDO profile is ambiguous
117 :
118 : After the above 5 phases, all profile is readily annotated on the GCC IR.
119 : AutoFDO tries to reuse all FDO infrastructure as much as possible to make
120 : use of the profile. E.g. it uses existing mechanism to calculate the basic
121 : block/edge frequency, as well as the cgraph node/edge count.
122 : */
123 :
124 : #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
125 :
126 : /* profile counts determined by AFDO smaller than afdo_hot_bb_threshold are
127 : considered cols. */
128 : gcov_type afdo_hot_bb_threshold = -1;
129 :
130 : /* Return true if COUNT is possibly hot. */
131 : bool
132 0 : maybe_hot_afdo_count_p (profile_count count)
133 : {
134 0 : gcc_checking_assert (count.ipa ().initialized_p ());
135 0 : return count.ipa ().to_gcov_type () >= afdo_hot_bb_threshold;
136 : }
137 :
138 : /* Return true if location of STMT may be expressed by debug info. */
139 :
140 : static bool
141 0 : stmt_loc_used_by_debug_info (gimple *stmt)
142 : {
143 : /* Only inline_entry and gimple_bind's locations
144 : are not output into debug output. */
145 0 : if (is_gimple_debug (stmt))
146 0 : return gimple_debug_begin_stmt_p (stmt);
147 0 : if (gimple_code (stmt) == GIMPLE_LABEL
148 : || gimple_code (stmt) == GIMPLE_NOP
149 : || gimple_code (stmt) == GIMPLE_PREDICT)
150 : return false;
151 0 : if (gimple_clobber_p (stmt))
152 0 : return false;
153 : return true;
154 : }
155 :
156 : namespace autofdo
157 : {
158 :
159 : /* Intermediate edge info used when propagating AutoFDO profile information.
160 : We can't edge->count() directly since it's computed from edge's probability
161 : while probability is yet not decided during propagation. */
162 : #define AFDO_EINFO(e) ((class edge_info *) e->aux)
163 : class edge_info
164 : {
165 : public:
166 0 : edge_info () : count_ (profile_count::zero ().afdo ()), annotated_ (false) {}
167 0 : bool is_annotated () const { return annotated_; }
168 0 : void set_annotated () { annotated_ = true; }
169 0 : profile_count get_count () const { return count_; }
170 0 : void set_count (profile_count count) { count_ = count; }
171 : private:
172 : profile_count count_;
173 : bool annotated_;
174 : };
175 :
176 : /* Represent a source location: (function_decl, lineno). */
177 : struct decl_lineno
178 : {
179 : tree decl;
180 : /* Relative locations stored in auto-profile. */
181 : unsigned int afdo_loc;
182 : /* Actual location afdo_loc was computed from used to output diagnostics. */
183 : location_t location;
184 : };
185 :
186 : /* Represent an inline stack. vector[0] is the leaf node. */
187 : typedef auto_vec<decl_lineno, 20> inline_stack;
188 :
189 : /* String array that stores function names. */
190 : typedef auto_vec<const char *> string_vector;
191 :
192 : /* Map from function name's index in string_table to target's
193 : execution count. */
194 : typedef std::map<unsigned, gcov_type> icall_target_map;
195 :
196 : /* Set of gimple stmts. Used to track if the stmt has already been promoted
197 : to direct call. */
198 : typedef std::set<gimple *> stmt_set;
199 :
200 : /* Set and map used to translate name indexes. */
201 : typedef hash_set<int_hash <int, -1, -2>> name_index_set;
202 : typedef hash_map<int_hash <int, -1, -2>, int> name_index_map;
203 :
204 : /* Represent count info of an inline stack. */
205 0 : class count_info
206 : {
207 : public:
208 : /* Sampled count of the inline stack. */
209 : gcov_type count;
210 :
211 : /* Map from indirect call target to its sample count. */
212 : icall_target_map targets;
213 :
214 : /* Whether this inline stack is already used in annotation.
215 :
216 : Each inline stack should only be used to annotate IR once.
217 : This will be enforced when instruction-level discriminator
218 : is supported. */
219 : };
220 :
221 : /* operator< for "const char *". */
222 : struct string_compare
223 : {
224 0 : bool operator()(const char *a, const char *b) const
225 : {
226 0 : return strcmp (a, b) < 0;
227 : }
228 : };
229 :
230 : /* Store the summary information for the profile. */
231 : struct summary_info
232 : {
233 : /* There are currently 16 hard-coded percentiles in the GCOV format. */
234 : static constexpr unsigned NUM_PERCENTILES = 16;
235 :
236 : /* The detailed summary is a histogram-based calculation of the minimum
237 : execution count required to belong to a certain set of percentile of
238 : counts. */
239 : struct detailed_summary
240 : {
241 : /* The percentile that this represents (multiplied by 1,000,000). */
242 : uint32_t cutoff;
243 : /* The minimum execution count required to belong to this percentile. */
244 : uint64_t min_count;
245 : /* The number of samples which belong to this percentile. */
246 : uint64_t num_counts;
247 : };
248 :
249 : /* The sum of execution counts of all samples. */
250 : uint64_t total_count;
251 : /* The maximum individual count. */
252 : uint64_t max_count;
253 : /* The maximum head count across all functions. */
254 : uint64_t max_function_count;
255 : /* The number of lines that have samples. */
256 : uint64_t num_counts;
257 : /* The number of functions that have samples. */
258 : uint64_t num_functions;
259 : /* The percentile threshold information. */
260 : detailed_summary detailed_summaries[NUM_PERCENTILES];
261 :
262 : /* Read profile. Return TRUE on success. */
263 : bool read ();
264 :
265 : /* Get the minimum count required for percentile CUTOFF. */
266 : uint64_t get_threshold_count (uint32_t cutoff);
267 : };
268 :
269 : /* Store a string array, indexed by string position in the array. */
270 : class string_table
271 : {
272 : public:
273 : static const int unknown_filename = -1;
274 :
275 0 : string_table ()
276 0 : {}
277 :
278 : ~string_table ();
279 :
280 : /* For a given string, returns its index. */
281 : int get_index (const char *name) const;
282 :
283 : /* For a given decl, returns the index of the decl name. */
284 : int get_index_by_decl (tree decl) const;
285 :
286 : /* For a given index, returns the symbol name. */
287 : const char *get_symbol_name (int index) const;
288 :
289 : /* For a given index, returns the filename. */
290 : const char *get_filename (int index) const;
291 :
292 : /* For a given symbol name index, returns the filename index. */
293 : int get_filename_by_symbol (int index) const;
294 :
295 : /* For a given function name, returns the filename index. */
296 : int get_filename_by_symbol (const char *name) const;
297 :
298 : /* For a given filename, returns the index. */
299 : int get_filename_index (const char *name) const;
300 :
301 : /* Get the original name and file name index for a node. This will return the
302 : name from the current TU if there are multiple symbols that map to
303 : NAME. */
304 : std::pair<const char *, int> get_original_name (const char *name) const;
305 :
306 : /* Read profile, return TRUE on success. */
307 : bool read ();
308 :
309 : /* Return number of entries. */
310 0 : size_t num_entries () { return symbol_names_.length (); }
311 :
312 : /* Add new symbol name STRING (with an associated file name FILENAME_IDX) and
313 : return its index. */
314 : int add_symbol_name (const char *string, int filename_idx);
315 :
316 : /* Add new filename and return its index (returning the same if it already
317 : exists). */
318 : int add_filename (const char *name);
319 :
320 : /* Return cgraph node corresponding to given name index. */
321 : cgraph_node *get_cgraph_node (int);
322 :
323 : const string_vector& filenames () { return filenames_; }
324 : private:
325 : typedef std::map<const char *, unsigned, string_compare> string_index_map;
326 : typedef std::map<const char *, auto_vec<unsigned>, string_compare>
327 : clashing_name_map;
328 : typedef std::map<const char *, char *, string_compare> string_string_map;
329 :
330 : string_vector symbol_names_;
331 : string_vector filenames_;
332 :
333 : string_index_map symbol_name_map_;
334 : string_index_map filename_map_;
335 : string_index_map symbol_to_filename_map_;
336 :
337 : string_string_map original_names_map_;
338 : clashing_name_map clashing_names_map_;
339 : };
340 :
341 : /* Descriptor for a function_instance which can be used to disambiguate it from
342 : other instances. This consists of the symbol name and the file name indices
343 : from string_table. */
344 :
345 : class function_instance_descriptor
346 : {
347 : /* The string_table index for the file name. */
348 : unsigned file_name_;
349 : /* The string_table index for the function name. */
350 : unsigned symbol_name_;
351 :
352 : public:
353 0 : unsigned file_name () const { return file_name_; }
354 0 : unsigned symbol_name () const { return symbol_name_; }
355 :
356 0 : function_instance_descriptor (unsigned file_name, unsigned symbol_name)
357 0 : : file_name_ (file_name), symbol_name_ (symbol_name)
358 : {}
359 :
360 0 : function_instance_descriptor (int file_name, int symbol_name)
361 0 : : file_name_ (file_name), symbol_name_ (symbol_name)
362 : {}
363 :
364 0 : void set_symbol_name (unsigned new_name) { symbol_name_ = new_name; }
365 :
366 0 : bool operator< (const function_instance_descriptor &other) const
367 : {
368 0 : return file_name_ < other.file_name_
369 0 : || (file_name_ == other.file_name_
370 0 : && symbol_name_ < other.symbol_name_);
371 : }
372 : };
373 :
374 : /* Profile of a function instance:
375 : 1. total_count of the function.
376 : 2. head_count (entry basic block count) of the function (only valid when
377 : function is a top-level function_instance, i.e. it is the original copy
378 : instead of the inlined copy).
379 : 3. map from source location (decl_lineno) to profile (count_info).
380 : 4. map from callsite to callee function_instance. */
381 :
382 : class function_instance
383 : {
384 : public:
385 : typedef auto_vec<function_instance *> function_instance_stack;
386 :
387 : /* Read the profile and return a function_instance with head count as
388 : HEAD_COUNT. Recursively read callsites to create nested function_instances
389 : too. STACK is used to track the recursive creation process. */
390 : static function_instance *
391 : read_function_instance (function_instance_stack *stack, bool toplevel = true);
392 :
393 : /* Recursively deallocate all callsites (nested function_instances). */
394 : ~function_instance ();
395 :
396 : /* Accessors. */
397 0 : int symbol_name () const { return descriptor_.symbol_name (); }
398 0 : int file_name () const { return descriptor_.file_name (); }
399 0 : void set_symbol_name (int index) { descriptor_.set_symbol_name (index); }
400 0 : function_instance_descriptor get_descriptor () const { return descriptor_; }
401 :
402 : gcov_type
403 0 : total_count () const
404 : {
405 0 : return total_count_;
406 : }
407 :
408 : /* Return head count or -1 if unknown. */
409 : gcov_type
410 0 : head_count () const
411 : {
412 0 : return head_count_;
413 : }
414 :
415 : gcov_type
416 0 : timestamp () const
417 : {
418 0 : return timestamp_;
419 : }
420 :
421 0 : void set_timestamp (gcov_type timestamp) { timestamp_ = timestamp; }
422 :
423 : /* Propagate timestamp from top-level function_instance to
424 : inlined instances. */
425 : void prop_timestamp ();
426 :
427 : /* Traverse callsites of the current function_instance to find one at the
428 : location of LINENO and callee name represented in DECL.
429 : LOCATION should match LINENO and is used to output diagnostics. */
430 : function_instance *get_function_instance_by_decl (unsigned lineno,
431 : tree decl,
432 : location_t location) const;
433 :
434 : /* Merge profile of clones. Note that cloning hasn't been performed when
435 : we annotate the CFG (at this stage). */
436 : void merge (function_instance *other,
437 : vec <function_instance *> &new_functions);
438 :
439 : /* Look for inline instances that was not realized and
440 : remove them while possibly merging them to offline variants. */
441 : void offline_if_not_realized (vec <function_instance *> &new_functions);
442 :
443 : /* Match function instance with gimple body. */
444 : bool match (cgraph_node *node, vec <function_instance *> &new_functions,
445 : name_index_map &to_symbol_name);
446 :
447 : /* Offline all inlined functions with name in SEEN.
448 : If new toplevel functions are created, add them to NEW_FUNCTIONS. */
449 : void offline_if_in_set (name_index_set &seen,
450 : vec <function_instance *> &new_functions);
451 :
452 : /* Walk inlined functions and if their name is not in SEEN
453 : remove it. */
454 :
455 : void remove_external_functions (name_index_set &seen,
456 : name_index_map &to_symbol_name,
457 : vec <function_instance *> &new_functions);
458 :
459 : /* Store the profile info for LOC in INFO. Return TRUE if profile info
460 : is found. */
461 : bool get_count_info (location_t loc, count_info *info) const;
462 :
463 : /* Read the inlined indirect call target profile for STMT in FN and store it
464 : in MAP, return the total count for all inlined indirect calls. */
465 : gcov_type find_icall_target_map (tree fn, gcall *stmt,
466 : icall_target_map *map) const;
467 :
468 : /* Remove inlined indirect call target profile for STMT in FN. */
469 : void remove_icall_target (tree fn, gcall *stmt);
470 :
471 : /* Mark LOC as annotated. */
472 : void mark_annotated (location_t loc);
473 :
474 : void dump (FILE *f, int indent = 0, bool nested = false) const;
475 :
476 : void dump_inline_stack (FILE *f) const;
477 :
478 : DEBUG_FUNCTION void debug () const;
479 :
480 : /* Mark function as removed from indir target list. */
481 : void
482 0 : remove_icall_target ()
483 : {
484 0 : removed_icall_target_ = true;
485 : }
486 :
487 : /* Return true if function is removed from indir target list. */
488 : bool
489 0 : removed_icall_target ()
490 : {
491 0 : return removed_icall_target_;
492 : }
493 :
494 : /* Set inlined_to pointer. */
495 : void
496 0 : set_inlined_to (function_instance *inlined_to)
497 : {
498 0 : gcc_checking_assert (inlined_to != this);
499 0 : inlined_to_ = inlined_to;
500 0 : }
501 :
502 : /* Return pointer to the function instance this function is inlined
503 : to or NULL if it is outer instance. */
504 : function_instance *
505 0 : inlined_to () const
506 : {
507 0 : return inlined_to_;
508 : }
509 :
510 : /* Mark function as realized. */
511 : void
512 0 : set_realized ()
513 : {
514 0 : realized_ = true;
515 0 : }
516 :
517 : /* Return true if function is realized. */
518 : bool
519 0 : realized_p ()
520 : {
521 0 : return realized_;
522 : }
523 :
524 : /* Mark function as in_worklist. */
525 : void
526 0 : set_in_worklist ()
527 : {
528 0 : gcc_checking_assert (!inlined_to_ && !in_worklist_p ());
529 0 : in_worklist_ = true;
530 0 : }
531 :
532 : void
533 0 : clear_in_worklist ()
534 : {
535 0 : gcc_checking_assert (!inlined_to_ && in_worklist_p ());
536 0 : in_worklist_ = false;
537 0 : }
538 :
539 :
540 : /* Return true if function is in_worklist. */
541 : bool
542 0 : in_worklist_p ()
543 : {
544 0 : return in_worklist_;
545 : }
546 :
547 : /* Return corresponding cgraph node. */
548 : cgraph_node *get_cgraph_node ();
549 :
550 : void
551 0 : set_location (location_t l)
552 : {
553 0 : gcc_checking_assert (location_ == UNKNOWN_LOCATION);
554 0 : location_= l;
555 0 : }
556 :
557 : location_t
558 0 : get_location ()
559 : {
560 0 : return location_;
561 : }
562 :
563 : void
564 0 : set_call_location (location_t l)
565 : {
566 0 : gcc_checking_assert (call_location_ == UNKNOWN_LOCATION
567 : && l != UNKNOWN_LOCATION);
568 0 : call_location_= l;
569 0 : }
570 :
571 : location_t
572 0 : get_call_location ()
573 : {
574 0 : return call_location_;
575 : }
576 :
577 : /* Lookup count and warn about duplicates. */
578 : count_info *lookup_count (location_t loc, inline_stack &stack,
579 : cgraph_node *node);
580 : private:
581 : /* Callsite, represented as (decl_lineno, callee_function_name_index). */
582 : typedef std::pair<unsigned, unsigned> callsite;
583 :
584 : /* Map from callsite to callee function_instance. */
585 : typedef std::map<callsite, function_instance *> callsite_map;
586 :
587 0 : function_instance (unsigned symbol_name, unsigned file_name,
588 : gcov_type head_count)
589 0 : : descriptor_ (file_name, symbol_name), total_count_ (0),
590 0 : head_count_ (head_count), timestamp_ (0),
591 0 : removed_icall_target_ (false), realized_ (false), in_worklist_ (false),
592 0 : inlined_to_ (NULL), location_ (UNKNOWN_LOCATION),
593 0 : call_location_ (UNKNOWN_LOCATION)
594 : {
595 : }
596 :
597 : /* Map from source location (decl_lineno) to profile (count_info). */
598 : typedef std::map<unsigned, count_info> position_count_map;
599 :
600 : /* The indices into the string table identifying the function_instance. */
601 : function_instance_descriptor descriptor_;
602 :
603 : /* Total sample count. */
604 : gcov_type total_count_;
605 :
606 : /* Entry BB's sample count. */
607 : gcov_type head_count_;
608 :
609 : /* perf timestamp associated with first execution of function, which is
610 : used to compute node->tp_first_run. */
611 : gcov_type timestamp_;
612 :
613 : /* Map from callsite location to callee function_instance. */
614 : callsite_map callsites;
615 :
616 : /* Map from source location to count_info. */
617 : position_count_map pos_counts;
618 :
619 : /* True if function was removed from indir target list. */
620 : bool removed_icall_target_;
621 :
622 : /* True if function exists in IL. I.e. for toplevel instance we
623 : have corresponding symbol and for inline instance we inlined
624 : to it. */
625 : bool realized_;
626 :
627 : /* True if function is in worklist for merging/offlining. */
628 : bool in_worklist_;
629 :
630 : /* Pointer to outer function instance or NULL if this
631 : is a toplevel one. */
632 : function_instance *inlined_to_;
633 :
634 : /* Location of function and its call (in case it is inlined). */
635 : location_t location_, call_location_;
636 :
637 : /* Turn inline instance to offline. */
638 : static bool offline (function_instance *fn,
639 : vec <function_instance *> &new_functions);
640 :
641 : /* Helper routine for prop_timestamp. */
642 : void prop_timestamp_1 (gcov_type timestamp);
643 : };
644 :
645 : /* Profile for all functions. */
646 : class autofdo_source_profile
647 : {
648 : public:
649 : static autofdo_source_profile *
650 0 : create ()
651 : {
652 0 : autofdo_source_profile *map = new autofdo_source_profile ();
653 :
654 0 : if (map->read ())
655 : return map;
656 0 : delete map;
657 0 : return NULL;
658 : }
659 :
660 : ~autofdo_source_profile ();
661 :
662 : /* For a given DECL, returns the top-level function_instance. */
663 : function_instance *get_function_instance_by_decl (tree decl, const char * = NULL) const;
664 :
665 : /* For a given DESCRIPTOR, return the matching instance if found. */
666 : function_instance *
667 : get_function_instance_by_descriptor (function_instance_descriptor) const;
668 :
669 : void add_function_instance (function_instance *);
670 :
671 : /* Find count_info for a given gimple STMT. If found, store the count_info
672 : in INFO and return true; otherwise return false.
673 : NODE can be used to specify particular inline clone. */
674 : bool get_count_info (gimple *stmt, count_info *info,
675 : cgraph_node *node = NULL) const;
676 :
677 : /* Find count_info for a given gimple location GIMPLE_LOC. If found,
678 : store the count_info in INFO and return true; otherwise return false.
679 : NODE can be used to specify particular inline clone. */
680 : bool get_count_info (location_t gimple_loc, count_info *info,
681 : cgraph_node *node = NULL) const;
682 :
683 : /* Find total count of the callee of EDGE. */
684 : gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
685 :
686 : /* Update value profile INFO for STMT within NODE from the inlined indirect
687 : callsite. Return true if INFO is updated. */
688 : bool update_inlined_ind_target (gcall *stmt, count_info *info,
689 : cgraph_node *node);
690 :
691 : void remove_icall_target (cgraph_edge *e);
692 :
693 : /* Offline all functions not defined in the current translation unit. */
694 : void offline_external_functions ();
695 :
696 : void offline_unrealized_inlines ();
697 :
698 : private:
699 : /* Map from pair of function_instance filename and symbol name (in
700 : string_table) to function_instance. */
701 : typedef std::map<function_instance_descriptor, function_instance *>
702 : name_function_instance_map;
703 :
704 0 : autofdo_source_profile () {}
705 :
706 : /* Read AutoFDO profile and returns TRUE on success. */
707 : bool read ();
708 :
709 : /* Return the function_instance in the profile that correspond to the
710 : inline STACK. */
711 : function_instance *
712 : get_function_instance_by_inline_stack (const inline_stack &stack) const;
713 :
714 : /* Find the matching function instance which has DESCRIPTOR as its
715 : descriptor. If not found, also try checking if an instance exists with the
716 : same name which has no associated filename. */
717 : name_function_instance_map::const_iterator find_iter_for_function_instance (
718 : function_instance_descriptor descriptor) const;
719 :
720 : /* Similar to the above, but return a pointer to the instance instead of an
721 : iterator. */
722 : function_instance *
723 : find_function_instance (function_instance_descriptor descriptor) const;
724 :
725 : /* Remove a function instance from the map. Returns true if the entry was
726 : actually deleted. */
727 : bool remove_function_instance (function_instance *inst);
728 :
729 : name_function_instance_map map_;
730 :
731 : auto_vec <function_instance *> duplicate_functions_;
732 : };
733 :
734 : /* Store the summary information from the GCOV file. */
735 : static summary_info *afdo_summary_info;
736 :
737 : /* Store the strings read from the profile data file. */
738 : static string_table *afdo_string_table;
739 :
740 : /* Store the AutoFDO source profile. */
741 : static autofdo_source_profile *afdo_source_profile;
742 :
743 : /* gcov_summary structure to store the profile_info. */
744 : static gcov_summary *afdo_profile_info;
745 :
746 : /* Map from timestamp -> <name, tp_first_run>.
747 :
748 : The purpose of this map is to map 64-bit timestamp values to (1..N) sorted
749 : by ascending order of timestamps and assign that to node->tp_first_run,
750 : since we don't need the full 64-bit range. */
751 : static std::map<gcov_type, int> timestamp_info_map;
752 :
753 : /* Scaling factor for afdo data. Compared to normal profile
754 : AFDO profile counts are much lower, depending on sampling
755 : frequency. We scale data up to reduce effects of roundoff
756 : errors. */
757 :
758 : static gcov_type afdo_count_scale = 1;
759 :
760 : /* Helper functions. */
761 :
762 : /* Return the original name of NAME: strip the suffix that starts
763 : with '.' for names that are generated after auto-profile pass.
764 : This is to match profiled names with the names in the IR at this stage.
765 : Note that we only have to strip suffix and not in the middle.
766 : Caller is responsible for freeing RET. */
767 :
768 : static char *
769 0 : get_original_name (const char *name, bool alloc = true)
770 : {
771 0 : char *ret = alloc ? xstrdup (name) : const_cast<char *> (name);
772 0 : char *last_dot = strrchr (ret, '.');
773 0 : if (last_dot == NULL)
774 : return ret;
775 0 : bool only_digits = true;
776 : char *ptr = last_dot;
777 0 : while (*(++ptr) != 0)
778 0 : if (*ptr < '0' || *ptr > '9')
779 : {
780 : only_digits = false;
781 : break;
782 : }
783 0 : if (only_digits)
784 0 : *last_dot = 0;
785 0 : char *next_dot = strrchr (ret, '.');
786 : /* if nested function such as foo.0, return foo.0 */
787 0 : if (next_dot == NULL)
788 : {
789 0 : *last_dot = '.';
790 0 : return ret;
791 : }
792 : /* Suffixes of clones that compiler generates after auto-profile. */
793 0 : const char *suffixes[] = {"isra", "constprop", "lto_priv", "part", "cold"};
794 0 : for (unsigned i = 0; i < sizeof (suffixes) / sizeof (const char *); ++i)
795 : {
796 0 : int len = strlen (suffixes[i]);
797 0 : if (len == last_dot - next_dot - 1
798 0 : && strncmp (next_dot + 1, suffixes[i], strlen (suffixes[i])) == 0)
799 : {
800 0 : *next_dot = 0;
801 0 : return get_original_name (ret, false);
802 : }
803 : }
804 : /* Otherwise, it is for clones such as .omp_fn.N that was done before
805 : auto-profile and should be kept as it is. */
806 0 : *last_dot = '.';
807 0 : return ret;
808 : }
809 :
810 : /* Return the combined location, which is a 32bit integer in which
811 : higher 16 bits stores the line offset of LOC to the start lineno
812 : of DECL, The lower 16 bits stores the discriminator. */
813 :
814 : static unsigned
815 0 : get_combined_location (location_t loc, tree decl)
816 : {
817 0 : bool warned = false;
818 : /* TODO: allow more bits for line and less bits for discriminator. */
819 0 : if ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) >= (1<<15)
820 0 : || (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) <= -(1<<15))
821 0 : warned = warning_at (loc, OPT_Wauto_profile,
822 : "auto-profile cannot encode offset %i "
823 : "that exceeds 16 bytes",
824 0 : LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl));
825 0 : if (warned)
826 0 : inform (DECL_SOURCE_LOCATION (decl), "location offset is related to");
827 0 : if ((unsigned)get_discriminator_from_loc (loc) >= (1u << 16))
828 0 : warning_at (loc, OPT_Wauto_profile,
829 : "auto-profile cannot encode discriminators "
830 : "that exceeds 16 bytes");
831 0 : return ((unsigned)(LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16)
832 0 : | get_discriminator_from_loc (loc);
833 : }
834 :
835 : /* Return the function decl of a given lexical BLOCK. */
836 :
837 : static tree
838 0 : get_function_decl_from_block (tree block)
839 : {
840 0 : if (!inlined_function_outer_scope_p (block))
841 : return NULL_TREE;
842 :
843 0 : return BLOCK_ABSTRACT_ORIGIN (block);
844 : }
845 :
846 : /* Dump LOC to F. */
847 :
848 : static void
849 0 : dump_afdo_loc (FILE *f, unsigned loc)
850 : {
851 0 : if (loc & 65535)
852 0 : fprintf (f, "%i.%i", loc >> 16, loc & 65535);
853 : else
854 0 : fprintf (f, "%i", loc >> 16);
855 0 : }
856 :
857 : /* Return assembler name as in symbol table and DW_AT_linkage_name. */
858 :
859 : static const char *
860 0 : raw_symbol_name (const char *asmname)
861 : {
862 : /* If we start supporting user_label_prefixes, add_linkage_attr will also
863 : need to be fixed. */
864 0 : if (strlen (user_label_prefix))
865 0 : sorry ("auto-profile is not supported for targets with user label prefix");
866 0 : return asmname + (asmname[0] == '*');
867 : }
868 :
869 : /* Convenience wrapper that looks up assembler name. */
870 :
871 : static const char *
872 0 : raw_symbol_name (tree decl)
873 : {
874 0 : return raw_symbol_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
875 : }
876 :
877 : /* Dump STACK to F. */
878 :
879 : static void
880 0 : dump_inline_stack (FILE *f, inline_stack *stack)
881 : {
882 0 : bool first = true;
883 0 : for (decl_lineno &p : *stack)
884 : {
885 0 : fprintf (f, "%s%s:",
886 : first ? "" : "; ",
887 : raw_symbol_name (p.decl));
888 0 : dump_afdo_loc (f, p.afdo_loc);
889 0 : first = false;
890 : }
891 0 : fprintf (f, "\n");
892 0 : }
893 :
894 : /* Store inline stack for STMT in STACK. */
895 :
896 : static void
897 0 : get_inline_stack (location_t locus, inline_stack *stack,
898 : tree fn = current_function_decl)
899 : {
900 0 : if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
901 : return;
902 :
903 0 : tree block = LOCATION_BLOCK (locus);
904 0 : if (block && TREE_CODE (block) == BLOCK)
905 : {
906 0 : for (block = BLOCK_SUPERCONTEXT (block);
907 0 : block && (TREE_CODE (block) == BLOCK);
908 0 : block = BLOCK_SUPERCONTEXT (block))
909 : {
910 0 : location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
911 0 : if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
912 0 : continue;
913 :
914 0 : tree decl = get_function_decl_from_block (block);
915 0 : stack->safe_push (
916 0 : {decl, get_combined_location (locus, decl), locus});
917 0 : locus = tmp_locus;
918 : }
919 : }
920 0 : stack->safe_push ({fn, get_combined_location (locus, fn), locus});
921 : }
922 :
923 : /* Same as get_inline_stack for a given node which may be
924 : an inline clone. If NODE is NULL, assume current_function_decl. */
925 : static void
926 0 : get_inline_stack_in_node (location_t locus, inline_stack *stack,
927 : cgraph_node *node)
928 : {
929 0 : if (!node)
930 0 : return get_inline_stack (locus, stack);
931 0 : do
932 : {
933 0 : get_inline_stack (locus, stack, node->decl);
934 : /* If caller is inlined, continue building stack. */
935 0 : if (!node->inlined_to)
936 : node = NULL;
937 : else
938 : {
939 0 : locus = gimple_location (node->callers->call_stmt);
940 0 : node = node->callers->caller;
941 : }
942 : }
943 0 : while (node);
944 : }
945 :
946 : /* Return combined location of LOCUS within BLOCK that is in
947 : function FN.
948 :
949 : This is a 32bit integer in which higher 16 bits stores the line offset of
950 : LOC to the start lineno of DECL, The lower 16 bits stores the
951 : discriminator. */
952 :
953 : static unsigned
954 0 : get_relative_location_for_locus (tree fn, tree block, location_t locus)
955 : {
956 0 : if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
957 : return -1;
958 :
959 0 : for (; block && (TREE_CODE (block) == BLOCK);
960 0 : block = BLOCK_SUPERCONTEXT (block))
961 0 : if (inlined_function_outer_scope_p (block))
962 0 : return get_combined_location (locus,
963 0 : get_function_decl_from_block (block));
964 0 : return get_combined_location (locus, fn);
965 : }
966 :
967 : /* Return combined location of STMT in function FN. */
968 :
969 : static unsigned
970 0 : get_relative_location_for_stmt (tree fn, gimple *stmt)
971 : {
972 0 : return get_relative_location_for_locus
973 0 : (fn, LOCATION_BLOCK (gimple_location (stmt)),
974 0 : gimple_location (stmt));
975 : }
976 :
977 : /* Return either the basename or the realpath for a given path based on
978 : PARAM_PROFILE_FUNC_INTERNAL_ID. */
979 :
980 : static const char *
981 0 : get_normalized_path (const char *path, bool from_gcov = false)
982 : {
983 0 : if (param_profile_func_internal_id == 1)
984 : /* The GCOV will already contain the entire path. It doesn't need to be
985 : normalized with lrealpath (). */
986 0 : return from_gcov ? path : lrealpath (path);
987 0 : return lbasename (path);
988 : }
989 :
990 : /* Member functions for summary_info. */
991 :
992 : bool
993 0 : summary_info::read ()
994 : {
995 0 : if (gcov_read_unsigned () != GCOV_TAG_AFDO_SUMMARY)
996 : return false;
997 :
998 0 : total_count = gcov_read_counter ();
999 0 : max_count = gcov_read_counter ();
1000 0 : max_function_count = gcov_read_counter ();
1001 0 : num_counts = gcov_read_counter ();
1002 0 : num_functions = gcov_read_counter ();
1003 0 : uint64_t num_detailed_summaries = gcov_read_counter ();
1004 0 : gcc_checking_assert (num_detailed_summaries == NUM_PERCENTILES);
1005 0 : for (uint64_t i = 0; i < num_detailed_summaries; i++)
1006 : {
1007 0 : detailed_summaries[i].cutoff = gcov_read_unsigned ();
1008 0 : detailed_summaries[i].min_count = gcov_read_counter ();
1009 0 : detailed_summaries[i].num_counts = gcov_read_counter ();
1010 : }
1011 :
1012 0 : return !gcov_is_error ();
1013 : }
1014 :
1015 : /* Get the minimum count required for percentile CUTOFF. */
1016 :
1017 : uint64_t
1018 0 : summary_info::get_threshold_count (uint32_t cutoff)
1019 : {
1020 : /* The cutoffs stored in the GCOV are fractions multiplied by 1,000,000. */
1021 0 : gcc_checking_assert (cutoff <= 1'000'000);
1022 0 : unsigned idx = 0;
1023 : /* Find the first cutoff at least as high as CUTOFF. */
1024 0 : for (; idx < NUM_PERCENTILES; idx++)
1025 0 : if (detailed_summaries[idx].cutoff >= cutoff)
1026 : break;
1027 0 : idx = std::min (NUM_PERCENTILES - 1, idx);
1028 0 : return detailed_summaries[idx].min_count;
1029 : }
1030 :
1031 : /* Member functions for string_table. */
1032 :
1033 : /* Deconstructor. */
1034 :
1035 0 : string_table::~string_table ()
1036 : {
1037 0 : for (unsigned i = 0; i < symbol_names_.length (); i++)
1038 0 : free (const_cast<char *> (symbol_names_[i]));
1039 0 : for (unsigned i = 0; i < filenames_.length (); i++)
1040 0 : free (const_cast<char *> (filenames_[i]));
1041 0 : for (auto it = original_names_map_.begin (); it != original_names_map_.end ();
1042 0 : it++)
1043 0 : free (it->second);
1044 0 : }
1045 :
1046 :
1047 : /* Return the index of a given function NAME. Return -1 if NAME is not
1048 : found in string table. */
1049 :
1050 : int
1051 0 : string_table::get_index (const char *name) const
1052 : {
1053 0 : if (name == NULL)
1054 : return -1;
1055 0 : string_index_map::const_iterator iter = symbol_name_map_.find (name);
1056 0 : if (iter == symbol_name_map_.end ())
1057 : return -1;
1058 :
1059 0 : return iter->second;
1060 : }
1061 :
1062 : /* Return the index of a given function DECL. Return -1 if DECL is not
1063 : found in string table. */
1064 :
1065 : int
1066 0 : string_table::get_index_by_decl (tree decl) const
1067 : {
1068 0 : const char *name = raw_symbol_name (decl);
1069 0 : int ret = get_index (name);
1070 0 : if (ret != -1)
1071 : return ret;
1072 0 : if (DECL_FROM_INLINE (decl))
1073 0 : return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
1074 :
1075 : return -1;
1076 : }
1077 :
1078 : /* Return the function name of a given INDEX. */
1079 :
1080 : const char *
1081 0 : string_table::get_symbol_name (int index) const
1082 : {
1083 0 : if (index <= 0 || index >= (int) symbol_names_.length ())
1084 0 : fatal_error (UNKNOWN_LOCATION,
1085 : "auto-profile contains invalid symbol name index %d", index);
1086 :
1087 0 : return symbol_names_[index];
1088 : }
1089 :
1090 : /* For a given index, returns the string. */
1091 :
1092 : const char *
1093 0 : string_table::get_filename (int index) const
1094 : {
1095 : /* There may not be any file name for some functions, ignore them. */
1096 0 : if (index == string_table::unknown_filename)
1097 : return "<unknown>";
1098 :
1099 0 : if (index < 0 || index >= (int) filenames_.length ())
1100 0 : fatal_error (UNKNOWN_LOCATION,
1101 : "auto-profile contains invalid filename index %d", index);
1102 :
1103 0 : return filenames_[index];
1104 : }
1105 :
1106 : /* For a given symbol name index, returns the filename index. */
1107 :
1108 : int
1109 0 : string_table::get_filename_by_symbol (int index) const
1110 : {
1111 0 : return get_filename_by_symbol (get_symbol_name (index));
1112 : }
1113 :
1114 : /* For a given function name, returns the filename index. */
1115 :
1116 : int
1117 0 : string_table::get_filename_by_symbol (const char *name) const
1118 : {
1119 0 : auto it = symbol_to_filename_map_.find (name);
1120 0 : if (it != symbol_to_filename_map_.end () && it->second < filenames_.length ())
1121 0 : return it->second;
1122 : return string_table::unknown_filename;
1123 : }
1124 :
1125 : /* For a given filename, returns the index. */
1126 :
1127 : int
1128 0 : string_table::get_filename_index (const char *name) const
1129 : {
1130 0 : auto iter = filename_map_.find (name);
1131 0 : return iter == filename_map_.end () ? string_table::unknown_filename
1132 0 : : iter->second;
1133 : }
1134 :
1135 : /* Get the original name and file name index for a node. This will return the
1136 : name from the current TU if there are multiple symbols that map to
1137 : NAME. */
1138 :
1139 : std::pair<const char *, int>
1140 0 : string_table::get_original_name (const char *name) const
1141 : {
1142 : /* Check if the un-prefixed name differs from the actual name. */
1143 0 : auto stripped = original_names_map_.find (name);
1144 :
1145 : /* The original name for the symbol is its name, i.e. there are no
1146 : suffixes. */
1147 0 : if (stripped == original_names_map_.end ())
1148 0 : return {name, get_filename_by_symbol (name)};
1149 :
1150 : /* Figure out if a clash exists. */
1151 0 : auto clash = clashing_names_map_.find (stripped->second);
1152 0 : gcc_checking_assert (clash != clashing_names_map_.end ());
1153 :
1154 : /* Try to find a function from the current TU. */
1155 0 : gcc_checking_assert (clash->second.length () >= 1);
1156 0 : symtab_node *n
1157 0 : = cgraph_node::get_for_asmname (get_identifier (stripped->second));
1158 0 : if (n && is_a<cgraph_node *> (n))
1159 0 : for (cgraph_node *cn = dyn_cast<cgraph_node *> (n); cn;)
1160 : {
1161 : /* Check if there is a symbol in the current TU that has the same name
1162 : as in the GCOV. */
1163 0 : for (auto name : clash->second)
1164 : {
1165 0 : int filename_idx = get_filename_by_symbol (name);
1166 0 : if (cn->definition && cn->has_gimple_body_p ()
1167 0 : && !strcmp (get_normalized_path (DECL_SOURCE_FILE (cn->decl)),
1168 : get_filename (filename_idx)))
1169 0 : return {stripped->second, filename_idx};
1170 : }
1171 0 : cn = dyn_cast<cgraph_node *> (cn->next_sharing_asm_name);
1172 : }
1173 :
1174 : /* No match found. Just stick to the current symbol and return the stripped
1175 : name. */
1176 0 : return {stripped->second, get_filename_by_symbol (name)};
1177 : }
1178 :
1179 : /* Add new symbol name STRING (with an associated file name FILENAME_IDX) and
1180 : return its index. */
1181 :
1182 : int
1183 0 : string_table::add_symbol_name (const char *string, int filename_idx)
1184 : {
1185 0 : gcc_checking_assert (
1186 : filename_idx == string_table::unknown_filename
1187 : || (filename_idx >= 0 && filename_idx < (int) filenames_.length ()));
1188 0 : symbol_names_.safe_push (string);
1189 0 : symbol_name_map_[symbol_names_.last ()] = symbol_names_.length () - 1;
1190 0 : symbol_to_filename_map_[symbol_names_.last ()] = filename_idx;
1191 0 : return symbol_names_.length () - 1;
1192 : }
1193 :
1194 : /* Add new filename and return its index (returning the same if it already
1195 : exists). */
1196 :
1197 : int
1198 0 : string_table::add_filename (const char *name)
1199 : {
1200 0 : auto it = filename_map_.find (name);
1201 0 : if (it != filename_map_.end ())
1202 0 : return it->second;
1203 0 : filenames_.safe_push (xstrdup (name));
1204 0 : return filenames_.length () - 1;
1205 : }
1206 :
1207 : /* Read the string table. Return TRUE if reading is successful. */
1208 :
1209 : bool
1210 0 : string_table::read ()
1211 : {
1212 0 : if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
1213 : return false;
1214 : /* Skip the length of the section. */
1215 0 : gcov_read_unsigned ();
1216 : /* Read in the file name table. */
1217 0 : unsigned file_num = gcov_read_unsigned ();
1218 0 : filenames_.reserve (file_num);
1219 0 : for (unsigned i = 0; i < file_num; i++)
1220 : {
1221 0 : const char *filename = gcov_read_string ();
1222 0 : filenames_.quick_push (xstrdup (get_normalized_path (filename, true)));
1223 0 : filename_map_[filenames_.last ()] = i;
1224 0 : free (const_cast<char *> (filename));
1225 0 : if (gcov_is_error ())
1226 : return false;
1227 : }
1228 : /* Read in the function name -> file name table. */
1229 0 : unsigned string_num = gcov_read_unsigned ();
1230 0 : symbol_names_.reserve (string_num);
1231 0 : for (unsigned i = 0; i < string_num; i++)
1232 : {
1233 0 : symbol_names_.quick_push (const_cast<char *> (gcov_read_string ()));
1234 0 : symbol_name_map_[symbol_names_.last ()] = i;
1235 0 : unsigned filename_idx = gcov_read_unsigned ();
1236 0 : symbol_to_filename_map_[symbol_names_.last ()] = filename_idx;
1237 0 : char *original = const_cast<char *> (
1238 0 : autofdo::get_original_name (symbol_names_.last ()));
1239 0 : if (strcmp (original, symbol_names_.last ()))
1240 : {
1241 : /* Take ownership of ORIGINAL. */
1242 0 : original_names_map_[symbol_names_.last ()] = original;
1243 0 : clashing_names_map_[original].safe_push (i);
1244 : /* It is possible that a public symbol with the stripped name exists.
1245 : If it does exist, add it as well. */
1246 0 : auto publik = symbol_name_map_.find (original);
1247 0 : if (publik != symbol_name_map_.end ()
1248 0 : && clashing_names_map_.find (publik->first)
1249 0 : == clashing_names_map_.end ())
1250 0 : clashing_names_map_[publik->first].safe_push (publik->second);
1251 : }
1252 : else
1253 : /* There are no suffixes to remove. */
1254 0 : free (original);
1255 :
1256 0 : if (gcov_is_error ())
1257 0 : return false;
1258 : }
1259 0 : return true;
1260 : }
1261 :
1262 : /* Return cgraph node corresponding to given NAME_INDEX,
1263 : NULL if unavailable. */
1264 : cgraph_node *
1265 0 : string_table::get_cgraph_node (int name_index)
1266 : {
1267 0 : const char *sname = get_symbol_name (name_index);
1268 :
1269 0 : symtab_node *n = cgraph_node::get_for_asmname (get_identifier (sname));
1270 0 : for (;n; n = n->next_sharing_asm_name)
1271 0 : if (cgraph_node *cn = dyn_cast <cgraph_node *> (n))
1272 0 : if (cn->definition && cn->has_gimple_body_p ())
1273 : return cn;
1274 : return NULL;
1275 : }
1276 :
1277 : /* Return corresponding cgraph node. */
1278 :
1279 : cgraph_node *
1280 0 : function_instance::get_cgraph_node ()
1281 : {
1282 0 : return afdo_string_table->get_cgraph_node (symbol_name ());
1283 : }
1284 :
1285 : /* Member functions for function_instance. */
1286 :
1287 0 : function_instance::~function_instance ()
1288 : {
1289 0 : gcc_assert (!in_worklist_p ());
1290 0 : for (callsite_map::iterator iter = callsites.begin ();
1291 0 : iter != callsites.end (); ++iter)
1292 0 : delete iter->second;
1293 0 : }
1294 :
1295 : /* Propagate timestamp TS of function_instance to inlined instances if it's
1296 : not already set. */
1297 :
1298 : void
1299 0 : function_instance::prop_timestamp_1 (gcov_type ts)
1300 : {
1301 0 : if (!timestamp () && total_count () > 0)
1302 0 : set_timestamp (ts);
1303 0 : for (auto it = callsites.begin (); it != callsites.end (); ++it)
1304 0 : it->second->prop_timestamp_1 (ts);
1305 0 : }
1306 :
1307 : void
1308 0 : function_instance::prop_timestamp (void)
1309 : {
1310 0 : prop_timestamp_1 (timestamp ());
1311 0 : }
1312 :
1313 : /* Traverse callsites of the current function_instance to find one at the
1314 : location of LINENO and callee name represented in DECL. */
1315 :
1316 : function_instance *
1317 0 : function_instance::get_function_instance_by_decl (unsigned lineno,
1318 : tree decl,
1319 : location_t location) const
1320 : {
1321 0 : int func_name_idx = afdo_string_table->get_index_by_decl (decl);
1322 0 : if (func_name_idx != -1)
1323 : {
1324 0 : callsite_map::const_iterator ret
1325 0 : = callsites.find (std::make_pair (lineno, func_name_idx));
1326 0 : if (ret != callsites.end ())
1327 0 : return ret->second;
1328 : }
1329 0 : if (DECL_FROM_INLINE (decl))
1330 : {
1331 0 : function_instance
1332 0 : *ret = get_function_instance_by_decl (lineno,
1333 0 : DECL_ABSTRACT_ORIGIN (decl),
1334 : location);
1335 0 : return ret;
1336 : }
1337 0 : if (dump_enabled_p ())
1338 : {
1339 0 : for (auto const &iter : callsites)
1340 0 : if (iter.first.first == lineno)
1341 0 : dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS,
1342 0 : dump_user_location_t::from_location_t (location),
1343 : "auto-profile has mismatched function name %s"
1344 : " instead of %s at loc %i:%i",
1345 : afdo_string_table->get_symbol_name (
1346 0 : iter.first.second),
1347 : raw_symbol_name (decl), lineno >> 16,
1348 : lineno & 65535);
1349 : }
1350 :
1351 : return NULL;
1352 : }
1353 :
1354 : /* Merge profile of OTHER to THIS. Note that cloning hasn't been performed
1355 : when we annotate the CFG (at this stage). */
1356 :
1357 : void
1358 0 : function_instance::merge (function_instance *other,
1359 : vec <function_instance *> &new_functions)
1360 : {
1361 : /* Do not merge to itself and only merge functions of same name. */
1362 0 : gcc_checking_assert (other != this
1363 : && other->symbol_name () == symbol_name ());
1364 :
1365 0 : if (file_name () != other->file_name ())
1366 : return;
1367 :
1368 0 : total_count_ += other->total_count_;
1369 0 : if (other->total_count () && total_count () && other->head_count () == -1)
1370 0 : head_count_ = -1;
1371 0 : else if (head_count_ != -1)
1372 0 : head_count_ += other->head_count_;
1373 :
1374 : /* While merging timestamps, set the one that occurs earlier. */
1375 0 : if (timestamp () == 0
1376 0 : || (other->timestamp () > 0
1377 0 : && other->timestamp () < timestamp ()))
1378 0 : set_timestamp (other->timestamp ());
1379 :
1380 : bool changed = true;
1381 :
1382 0 : while (changed)
1383 : {
1384 0 : changed = false;
1385 : /* If both function instances agree on particular inlined function,
1386 : merge profiles. Otherwise offline the instance. */
1387 0 : for (callsite_map::const_iterator iter = other->callsites.begin ();
1388 0 : iter != other->callsites.end ();)
1389 0 : if (callsites.count (iter->first) == 0)
1390 : {
1391 0 : function_instance *f = iter->second;
1392 0 : if (dump_file)
1393 : {
1394 0 : fprintf (dump_file, " Mismatch in inlined functions;"
1395 : " offlining in merge source:");
1396 0 : f->dump_inline_stack (dump_file);
1397 0 : fprintf (dump_file, "\n");
1398 : }
1399 : /* We already merged outer part of the function accounting
1400 : the inlined call; compensate. */
1401 0 : for (function_instance *s = this; s; s = s->inlined_to ())
1402 : {
1403 0 : s->total_count_ -= f->total_count ();
1404 0 : gcc_checking_assert (s->total_count_ >= 0);
1405 : }
1406 0 : other->callsites.erase (iter);
1407 0 : function_instance::offline (f, new_functions);
1408 : /* Start from beginning as merging might have offlined
1409 : some functions in the case of recursive inlining. */
1410 0 : iter = other->callsites.begin ();
1411 : }
1412 : else
1413 0 : ++iter;
1414 0 : for (callsite_map::const_iterator iter = callsites.begin ();
1415 0 : iter != callsites.end ();)
1416 0 : if (other->callsites.count (iter->first) == 0)
1417 : {
1418 0 : function_instance *f = iter->second;
1419 0 : if (dump_file)
1420 : {
1421 0 : fprintf (dump_file, " Mismatch in inlined functions;"
1422 : " offlining in merge destination:");
1423 0 : f->dump_inline_stack (dump_file);
1424 0 : fprintf (dump_file, "\n");
1425 : }
1426 0 : callsites.erase (iter);
1427 0 : function_instance::offline (f, new_functions);
1428 0 : iter = callsites.begin ();
1429 0 : changed = true;
1430 : }
1431 : else
1432 0 : ++iter;
1433 : }
1434 0 : for (callsite_map::const_iterator iter = other->callsites.begin ();
1435 0 : iter != other->callsites.end (); ++iter)
1436 : {
1437 0 : if (dump_file)
1438 : {
1439 0 : fprintf (dump_file, " Merging profile for inlined function\n"
1440 : " from: ");
1441 0 : iter->second->dump_inline_stack (dump_file);
1442 0 : fprintf (dump_file, " total:%" PRIu64 "\n to : ",
1443 0 : (int64_t)iter->second->total_count ());
1444 0 : callsites[iter->first]->dump_inline_stack (dump_file);
1445 0 : fprintf (dump_file, " total:%" PRIu64 "\n",
1446 0 : (int64_t)callsites[iter->first]->total_count ());
1447 : }
1448 :
1449 0 : callsites[iter->first]->merge (iter->second, new_functions);
1450 : }
1451 :
1452 0 : for (position_count_map::const_iterator iter = other->pos_counts.begin ();
1453 0 : iter != other->pos_counts.end (); ++iter)
1454 0 : if (pos_counts.count (iter->first) == 0)
1455 0 : pos_counts[iter->first] = iter->second;
1456 : else
1457 : {
1458 0 : pos_counts[iter->first].count += iter->second.count;
1459 0 : for (icall_target_map::const_iterator titer
1460 0 : = iter->second.targets.begin ();
1461 0 : titer != iter->second.targets.end (); ++titer)
1462 0 : if (pos_counts[iter->first].targets.count (titer->first) == 0)
1463 0 : pos_counts[iter->first].targets[titer->first]
1464 0 : = titer->second;
1465 : else
1466 0 : pos_counts[iter->first].targets[titer->first]
1467 0 : += titer->second;
1468 : }
1469 : }
1470 :
1471 : /* Make inline function FN offline.
1472 : If toplevel function of same name already exists, then merge profiles.
1473 : Otherwise turn FN toplevel. Return true if new toplevel function
1474 : was introduced.
1475 : If new toplevel functions are created and NEW_FUNCTIONS != NULL,
1476 : add them to NEW_FUNCTIONS.
1477 :
1478 : TODO: When offlining indirect call we lose information about the
1479 : call target. It should be possible to add it into
1480 : targets histogram. */
1481 :
1482 : bool
1483 0 : function_instance::offline (function_instance *fn,
1484 : vec <function_instance *> &new_functions)
1485 : {
1486 0 : gcc_checking_assert (fn->inlined_to ());
1487 0 : for (function_instance *s = fn->inlined_to (); s; s = s->inlined_to ())
1488 : {
1489 0 : s->total_count_ -= fn->total_count ();
1490 0 : gcc_checking_assert (s->total_count_ >= 0);
1491 : }
1492 0 : function_instance *to
1493 0 : = afdo_source_profile->get_function_instance_by_descriptor (
1494 : fn->get_descriptor ());
1495 0 : fn->set_inlined_to (NULL);
1496 : /* If there is offline function of same name, we need to merge profile.
1497 : Delay this by adding function to a worklist so we do not run into
1498 : problem with recursive inlining. */
1499 0 : if (to)
1500 : {
1501 0 : if (fn->in_worklist_p ())
1502 : return false;
1503 0 : fn->set_in_worklist ();
1504 0 : new_functions.safe_push (fn);
1505 0 : if (dump_file)
1506 : {
1507 0 : fprintf (dump_file, " Recoding duplicate: ");
1508 0 : to->dump_inline_stack (dump_file);
1509 0 : fprintf (dump_file, "\n");
1510 : }
1511 0 : return true;
1512 : }
1513 0 : if (dump_file)
1514 : {
1515 0 : fprintf (dump_file, " Added as offline instance: ");
1516 0 : fn->dump_inline_stack (dump_file);
1517 0 : fprintf (dump_file, "\n");
1518 : }
1519 0 : if (fn->total_count ())
1520 0 : fn->head_count_ = -1;
1521 0 : afdo_source_profile->add_function_instance (fn);
1522 0 : fn->set_in_worklist ();
1523 0 : new_functions.safe_push (fn);
1524 0 : return true;
1525 : }
1526 :
1527 : /* Offline all inlined functions with name in SEEN.
1528 : If new toplevel functions are created, add them to NEW_FUNCTIONS. */
1529 :
1530 : void
1531 0 : function_instance::offline_if_in_set (name_index_set &seen,
1532 : vec <function_instance *> &new_functions)
1533 : {
1534 0 : for (callsite_map::const_iterator iter = callsites.begin ();
1535 0 : iter != callsites.end ();)
1536 0 : if (seen.contains (iter->first.second))
1537 : {
1538 0 : function_instance *f = iter->second;
1539 0 : if (dump_file)
1540 : {
1541 0 : fprintf (dump_file, "Offlining function inlined to other module: ");
1542 0 : f->dump_inline_stack (dump_file);
1543 0 : fprintf (dump_file, "\n");
1544 : }
1545 0 : iter = callsites.erase (iter);
1546 0 : function_instance::offline (f, new_functions);
1547 : /* Start from beginning as merging might have offlined
1548 : some functions in the case of recursive inlining. */
1549 0 : iter = callsites.begin ();
1550 : }
1551 : else
1552 : {
1553 0 : iter->second->offline_if_in_set (seen, new_functions);
1554 0 : ++iter;
1555 : }
1556 0 : }
1557 :
1558 : /* Try to check if inlined_fn can correspond to a call of function N.
1559 : Return non-zero if it corresponds and 2 if renaming was done. */
1560 :
1561 : static int
1562 0 : match_with_target (cgraph_node *n,
1563 : gimple *stmt,
1564 : function_instance *inlined_fn,
1565 : cgraph_node *orig_callee)
1566 : {
1567 0 : cgraph_node *callee = orig_callee->ultimate_alias_target ();
1568 0 : const char *symbol_name = raw_symbol_name (callee->decl);
1569 0 : const char *name
1570 0 : = afdo_string_table->get_symbol_name (inlined_fn->symbol_name ());
1571 0 : if (strcmp (name, symbol_name))
1572 : {
1573 0 : int i;
1574 0 : bool in_suffix = false;
1575 0 : for (i = 0; i; i++)
1576 : {
1577 : if (name[i] != symbol_name[i])
1578 : break;
1579 : if (name[i] == '.')
1580 : in_suffix = true;
1581 : }
1582 : /* Accept dwarf names and stripped suffixes. */
1583 0 : if (!strcmp (lang_hooks.dwarf_name (callee->decl, 0),
1584 : afdo_string_table->get_symbol_name (
1585 : inlined_fn->symbol_name ()))
1586 0 : || (!name[i] && symbol_name[i] == '.') || in_suffix)
1587 : {
1588 0 : int index = afdo_string_table->get_index (symbol_name);
1589 0 : if (index == -1)
1590 0 : index = afdo_string_table->add_symbol_name (
1591 0 : xstrdup (symbol_name),
1592 : afdo_string_table->add_filename (
1593 0 : get_normalized_path (DECL_SOURCE_FILE (callee->decl))));
1594 0 : if (dump_file)
1595 0 : fprintf (dump_file,
1596 : " Renaming inlined call target %s to %s\n",
1597 : name, symbol_name);
1598 0 : inlined_fn->set_symbol_name (index);
1599 0 : return 2;
1600 : }
1601 : /* Only warn about declarations. It is possible that the function
1602 : is declared as alias in other module and we inlined cross-module. */
1603 0 : if (callee->definition
1604 0 : && warning (OPT_Wauto_profile,
1605 : "auto-profile of %q+F contains inlined "
1606 : "function with symbol name %s instead of symbol name %s",
1607 : n->decl, name, symbol_name))
1608 0 : inform (gimple_location (stmt), "corresponding call");
1609 0 : return 0;
1610 : }
1611 : return 1;
1612 : }
1613 :
1614 : static void
1615 0 : dump_stmt (gimple *stmt, count_info *info, function_instance *inlined_fn,
1616 : inline_stack &stack)
1617 : {
1618 0 : if (dump_file)
1619 : {
1620 0 : fprintf (dump_file, " ");
1621 0 : if (!stack.length ())
1622 0 : fprintf (dump_file, " ");
1623 : else
1624 : {
1625 0 : gcc_checking_assert (stack.length () == 1);
1626 0 : fprintf (dump_file, "%5i", stack[0].afdo_loc >> 16);
1627 0 : if (stack[0].afdo_loc & 65535)
1628 0 : fprintf (dump_file, ".%-5i", stack[0].afdo_loc & 65535);
1629 : else
1630 0 : fprintf (dump_file, " ");
1631 0 : if (info)
1632 0 : fprintf (dump_file, "%9" PRIu64 " ", (int64_t)info->count);
1633 0 : else if (inlined_fn)
1634 0 : fprintf (dump_file, " inlined ");
1635 : else
1636 0 : fprintf (dump_file, " no info ");
1637 : }
1638 0 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1639 : }
1640 0 : }
1641 :
1642 : /* Lookup count and warn about duplicates. */
1643 : count_info *
1644 0 : function_instance::lookup_count (location_t loc, inline_stack &stack,
1645 : cgraph_node *node)
1646 : {
1647 0 : gcc_checking_assert (stack.length () < 2);
1648 0 : if (stack.length ())
1649 : {
1650 0 : int c = pos_counts.count (stack[0].afdo_loc);
1651 0 : if (c > 1
1652 : && warning (OPT_Wauto_profile,
1653 : "duplicated count information"
1654 : " in auto-profile of %q+F"
1655 : " with relative location %i discriminator %i",
1656 : node->decl, stack[0].afdo_loc >> 16,
1657 : stack[0].afdo_loc & 65535))
1658 : inform (loc, "corresponding source location");
1659 0 : if (c)
1660 0 : return &pos_counts[stack[0].afdo_loc];
1661 : }
1662 : return NULL;
1663 : }
1664 :
1665 : /* Mark expr locations as used. */
1666 : void
1667 0 : mark_expr_locations (function_instance *f, tree t, cgraph_node *node,
1668 : hash_set<const count_info *> &counts)
1669 : {
1670 0 : inline_stack stack;
1671 0 : return;
1672 : if (!t)
1673 : return;
1674 : do
1675 : {
1676 : get_inline_stack_in_node (EXPR_LOCATION (t), &stack, node);
1677 : /* FIXME: EXPR_LOCATION does not always originate from current
1678 : function. */
1679 : if (stack.length () > 1)
1680 : break;
1681 : count_info *info = f->lookup_count (EXPR_LOCATION (t), stack, node);
1682 : if (info)
1683 : counts.add (info);
1684 : if (handled_component_p (t))
1685 : t = TREE_OPERAND (t, 0);
1686 : else
1687 : break;
1688 : }
1689 : while (true);
1690 0 : }
1691 :
1692 : /* Match function instance with gimple body.
1693 : Report mismatches, attempt to fix them if possible and remove data we will
1694 : not use.
1695 :
1696 : Set location and call_location so we can output diagnostics and know what
1697 : functions was already matched. */
1698 :
1699 : bool
1700 0 : function_instance::match (cgraph_node *node,
1701 : vec <function_instance *> &new_functions,
1702 : name_index_map &to_symbol_name)
1703 : {
1704 0 : if (get_location () != UNKNOWN_LOCATION)
1705 : return false;
1706 0 : set_location (DECL_SOURCE_LOCATION (node->decl));
1707 0 : if (dump_file)
1708 : {
1709 0 : fprintf (dump_file,
1710 : "\nMatching gimple function %s with auto profile: ",
1711 : node->dump_name ());
1712 0 : dump_inline_stack (dump_file);
1713 0 : fprintf (dump_file, "\n");
1714 : }
1715 0 : basic_block bb;
1716 : /* Sets used to track if entires in auto-profile are useful. */
1717 0 : hash_set<const count_info *> counts;
1718 0 : hash_set<const count_info *> targets;
1719 0 : hash_set<const function_instance *> functions;
1720 0 : hash_set<const function_instance *> functions_to_offline;
1721 :
1722 : /* We try to fill in lost disciminator if there is unique call
1723 : with given line number. This map is used to record them. */
1724 0 : hash_map<int_hash <int, -1, -2>,auto_vec <gcall *>> lineno_to_call;
1725 0 : bool lineno_to_call_computed = false;
1726 :
1727 0 : for (tree arg = DECL_ARGUMENTS (node->decl); arg; arg = DECL_CHAIN (arg))
1728 : {
1729 0 : inline_stack stack;
1730 :
1731 0 : get_inline_stack_in_node (DECL_SOURCE_LOCATION (arg), &stack, node);
1732 0 : count_info *info = lookup_count (DECL_SOURCE_LOCATION (arg), stack, node);
1733 0 : if (stack.length () && dump_file)
1734 : {
1735 0 : gcc_checking_assert (stack.length () == 1);
1736 0 : fprintf (dump_file, "%5i", stack[0].afdo_loc >> 16);
1737 0 : if (stack[0].afdo_loc & 65535)
1738 0 : fprintf (dump_file, " .%-5i arg", stack[0].afdo_loc & 65535);
1739 : else
1740 0 : fprintf (dump_file, " arg ");
1741 0 : print_generic_expr (dump_file, arg);
1742 0 : fprintf (dump_file, "\n");
1743 : }
1744 0 : if (info)
1745 0 : counts.add (info);
1746 0 : }
1747 0 : FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
1748 : {
1749 0 : if (dump_file)
1750 0 : fprintf (dump_file, " basic block %i\n", bb->index);
1751 0 : for (gphi_iterator gpi = gsi_start_phis (bb);
1752 0 : !gsi_end_p (gpi);
1753 0 : gsi_next (&gpi))
1754 : {
1755 0 : gphi *phi = gpi.phi ();
1756 0 : inline_stack stack;
1757 :
1758 : /* We do not assign discriminators to PHI nodes.
1759 : In case we every start using them, we wil need to
1760 : update tree-cfg.cc::assign_discriminators. */
1761 0 : gcc_assert (gimple_location (phi) == UNKNOWN_LOCATION);
1762 0 : get_inline_stack_in_node (gimple_location (phi), &stack, node);
1763 0 : count_info *info = lookup_count (gimple_location (phi), stack, node);
1764 0 : gcc_assert (!info);
1765 0 : dump_stmt (phi, info, NULL, stack);
1766 0 : counts.add (info);
1767 0 : for (edge e : bb->succs)
1768 : {
1769 0 : location_t phi_loc
1770 0 : = gimple_phi_arg_location_from_edge (phi, e);
1771 0 : inline_stack stack;
1772 0 : get_inline_stack_in_node (phi_loc, &stack, node);
1773 0 : count_info *info = lookup_count (phi_loc, stack, node);
1774 0 : if (info)
1775 0 : counts.add (info);
1776 0 : gcc_checking_assert (stack.length () < 2);
1777 0 : mark_expr_locations (this,
1778 : gimple_phi_arg_def_from_edge (phi, e),
1779 : node, counts);
1780 0 : }
1781 0 : }
1782 : /* TODO: goto locuses are not used for BB annotation. */
1783 0 : for (edge e : bb->succs)
1784 : {
1785 0 : inline_stack stack;
1786 0 : get_inline_stack_in_node (e->goto_locus, &stack, node);
1787 0 : count_info *info = lookup_count (e->goto_locus, stack, node);
1788 0 : if (info)
1789 0 : counts.add (info);
1790 0 : }
1791 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
1792 0 : !gsi_end_p (gsi); gsi_next (&gsi))
1793 : {
1794 0 : inline_stack stack;
1795 0 : gimple *stmt = gsi_stmt (gsi);
1796 0 : get_inline_stack_in_node (gimple_location (stmt), &stack, node);
1797 :
1798 0 : count_info *info = lookup_count (gimple_location (stmt), stack, node);
1799 0 : if (info)
1800 0 : counts.add (info);
1801 0 : for (unsigned int op = 0; op < gimple_num_ops (stmt); op++)
1802 0 : mark_expr_locations (this, gimple_op (stmt, op), node, counts);
1803 0 : if (gimple_code (stmt) == GIMPLE_CALL)
1804 : {
1805 0 : function_instance *inlined_fn = NULL;
1806 0 : function_instance *inlined_fn_nodisc = NULL;
1807 : /* Lookup callsite. */
1808 0 : if (stack.length ())
1809 : {
1810 0 : int c = 0;
1811 0 : int cnodis = 0;
1812 0 : for (auto const &iter : callsites)
1813 0 : if (iter.first.first == stack[0].afdo_loc)
1814 : {
1815 0 : if (!c)
1816 0 : inlined_fn = iter.second;
1817 0 : c++;
1818 : }
1819 : /* Discriminators are sometimes lost; try to find the
1820 : call without discriminator info. */
1821 0 : else if (iter.first.first == (stack[0].afdo_loc & ~65535))
1822 : {
1823 0 : if (!cnodis)
1824 0 : inlined_fn_nodisc = iter.second;
1825 0 : cnodis++;
1826 : }
1827 0 : if ((c > 1 || (!c && cnodis > 1))
1828 0 : && warning (OPT_Wauto_profile,
1829 : "duplicated callsite in auto-profile of %q+F"
1830 : " with relative location %i,"
1831 : " discriminator %i",
1832 0 : node->decl, stack[0].afdo_loc >> 16,
1833 0 : stack[0].afdo_loc & 65535))
1834 0 : inform (gimple_location (stmt), "corresponding call");
1835 0 : if (inlined_fn && info && info->targets.size ()
1836 0 : && warning (OPT_Wauto_profile,
1837 : "both call targets and inline callsite"
1838 : " information is present in auto-profile"
1839 : " of function %q+F with relative location"
1840 : " %i, discriminator %i",
1841 0 : node->decl, stack[0].afdo_loc >> 16,
1842 0 : stack[0].afdo_loc & 65535))
1843 0 : inform (gimple_location (stmt), "corresponding call");
1844 0 : tree callee = gimple_call_fndecl (stmt);
1845 0 : cgraph_node *callee_node;
1846 0 : unsigned int loc = stack[0].afdo_loc;
1847 0 : bool lost_discriminator = false;
1848 0 : if (!inlined_fn && inlined_fn_nodisc)
1849 : {
1850 0 : if (!lineno_to_call_computed)
1851 : {
1852 0 : basic_block bb2;
1853 0 : FOR_EACH_BB_FN (bb2,
1854 : DECL_STRUCT_FUNCTION (node->decl))
1855 0 : for (gimple_stmt_iterator gsi2
1856 0 : = gsi_start_bb (bb2);
1857 0 : !gsi_end_p (gsi2); gsi_next (&gsi2))
1858 0 : if (gcall *call
1859 0 : = dyn_cast <gcall *> (gsi_stmt (gsi2)))
1860 : {
1861 0 : inline_stack stack2;
1862 0 : get_inline_stack_in_node
1863 0 : (gimple_location (call),
1864 : &stack2, node);
1865 0 : if (stack2.length ())
1866 0 : lineno_to_call.get_or_insert
1867 0 : (stack2[0].afdo_loc >> 16).safe_push (call);
1868 0 : }
1869 : lineno_to_call_computed = true;
1870 : }
1871 : /* If we can determine lost discriminator uniquely,
1872 : use it. */
1873 0 : if (lineno_to_call.get
1874 0 : (stack[0].afdo_loc >> 16)->length () == 1)
1875 : {
1876 0 : if (warning (OPT_Wauto_profile,
1877 : "auto-profile of %q+F seem to contain"
1878 : " lost discriminator %i for"
1879 : " call of %s at relative location %i",
1880 : node->decl, loc & 65535,
1881 : afdo_string_table->get_symbol_name (
1882 : inlined_fn_nodisc->symbol_name ()),
1883 : loc >> 16))
1884 0 : inform (gimple_location (stmt),
1885 : "corresponding call");
1886 0 : inlined_fn = inlined_fn_nodisc;
1887 0 : if (dump_file)
1888 0 : fprintf (dump_file, " Lost discriminator %i\n",
1889 : loc & 65535);
1890 0 : loc = loc & ~65535;
1891 : }
1892 : lost_discriminator = true;
1893 : }
1894 0 : if (callee && (callee_node = cgraph_node::get (callee)))
1895 : {
1896 0 : if (inlined_fn)
1897 : {
1898 0 : int old_name = inlined_fn->symbol_name ();
1899 0 : int r = match_with_target (node, stmt, inlined_fn,
1900 : callee_node);
1901 0 : if (r == 2)
1902 : {
1903 0 : auto iter = callsites.find ({loc, old_name});
1904 0 : gcc_checking_assert (
1905 : old_name != inlined_fn->symbol_name ()
1906 : && iter != callsites.end ()
1907 : && iter->second == inlined_fn);
1908 0 : callsite key2 = {stack[0].afdo_loc,
1909 0 : inlined_fn->symbol_name ()};
1910 0 : callsites.erase (iter);
1911 0 : callsites[key2] = inlined_fn;
1912 : }
1913 0 : if (r)
1914 0 : functions.add (inlined_fn);
1915 : else
1916 0 : functions_to_offline.add (inlined_fn);
1917 : }
1918 :
1919 0 : if (info && info->targets.size () > 1)
1920 0 : warning_at (gimple_location (stmt), OPT_Wauto_profile,
1921 : "auto-profile of %q+F contains multiple"
1922 : " targets for a direct call with relative"
1923 : " location %i, discriminator %i",
1924 0 : node->decl, stack[0].afdo_loc >> 16,
1925 0 : stack[0].afdo_loc & 65535);
1926 : /* We do not need target profile for direct calls. */
1927 0 : if (info)
1928 0 : info->targets.clear ();
1929 : }
1930 : else
1931 : {
1932 0 : if (inlined_fn
1933 0 : && inlined_fn->get_call_location ()
1934 : != UNKNOWN_LOCATION)
1935 : {
1936 0 : if (warning (OPT_Wauto_profile,
1937 : "function contains two calls of the same"
1938 : " relative location +%i,"
1939 : " discriminator %i,"
1940 : " that leads to lost auto-profile",
1941 : loc >> 16,
1942 : loc & 65535))
1943 : {
1944 0 : inform (gimple_location (stmt),
1945 : "location of the first call");
1946 0 : inform (inlined_fn->get_call_location (),
1947 : "location of the second call");
1948 : }
1949 0 : if (dump_file)
1950 0 : fprintf (dump_file,
1951 : " Duplicated call location\n");
1952 0 : inlined_fn = NULL;
1953 : }
1954 0 : if (inlined_fn)
1955 : {
1956 0 : inlined_fn->set_call_location
1957 0 : (gimple_location (stmt));
1958 : /* Do renaming if needed so we can look up
1959 : cgraph node and recurse into inlined function. */
1960 0 : int *newn
1961 0 : = to_symbol_name.get (inlined_fn->symbol_name ());
1962 0 : gcc_checking_assert (
1963 : !newn || *newn != inlined_fn->symbol_name ());
1964 0 : if (newn || lost_discriminator)
1965 : {
1966 0 : auto iter = callsites.find (
1967 0 : {loc, inlined_fn->symbol_name ()});
1968 0 : gcc_checking_assert (iter != callsites.end ()
1969 : && iter->second
1970 : == inlined_fn);
1971 0 : callsite key2
1972 0 : = {stack[0].afdo_loc,
1973 0 : newn ? *newn : inlined_fn->symbol_name ()};
1974 0 : callsites.erase (iter);
1975 0 : callsites[key2] = inlined_fn;
1976 0 : inlined_fn->set_symbol_name (
1977 0 : newn ? *newn : inlined_fn->symbol_name ());
1978 : }
1979 0 : functions.add (inlined_fn);
1980 : }
1981 0 : if (info)
1982 0 : targets.add (info);
1983 : }
1984 : }
1985 0 : dump_stmt (stmt, info, inlined_fn, stack);
1986 : }
1987 : else
1988 0 : dump_stmt (stmt, info, NULL, stack);
1989 0 : }
1990 : }
1991 0 : bool warned = false;
1992 0 : for (auto &iter : pos_counts)
1993 0 : if (iter.second.targets.size ()
1994 0 : && counts.contains (&iter.second)
1995 0 : && !targets.contains (&iter.second))
1996 : {
1997 0 : if (!warned)
1998 0 : warned = warning_at
1999 0 : (DECL_SOURCE_LOCATION (node->decl),
2000 0 : OPT_Wauto_profile,
2001 : "auto-profile of %q+F contains indirect call targets"
2002 : " not associated with an indirect call statement",
2003 : node->decl);
2004 0 : if (warned)
2005 0 : inform (DECL_SOURCE_LOCATION (node->decl),
2006 : "count %" PRIu64
2007 : " with relative location +%i, discriminator %i",
2008 0 : iter.second.count, iter.first >> 16, iter.first & 65535);
2009 0 : if (dump_file)
2010 : {
2011 0 : fprintf (dump_file, "Removing targets of ");
2012 0 : dump_afdo_loc (dump_file, iter.first);
2013 0 : fprintf (dump_file, "\n");
2014 : }
2015 0 : iter.second.targets.clear ();
2016 : }
2017 0 : warned = false;
2018 : /* Profile sometimes contains extra location for start or end of function
2019 : (prologue, epilogue).
2020 : TODO: If present, perhaps it can be used to determine entry block
2021 : and exit block counts. */
2022 0 : unsigned int end_location = get_combined_location
2023 0 : (DECL_STRUCT_FUNCTION (node->decl)->function_end_locus, node->decl);
2024 0 : unsigned int start_location = get_combined_location
2025 0 : (DECL_STRUCT_FUNCTION (node->decl)->function_start_locus, node->decl);
2026 : /* When outputting code to builtins location we use line number 0.
2027 : create_gcov is stupid and happily computes offsets across files.
2028 : Silently ignore it. */
2029 0 : unsigned int zero_location
2030 0 : = ((unsigned)(1-DECL_SOURCE_LINE (node->decl))) << 16;
2031 0 : for (position_count_map::const_iterator iter = pos_counts.begin ();
2032 0 : iter != pos_counts.end ();)
2033 0 : if (!counts.contains (&iter->second))
2034 : {
2035 0 : if (iter->first != end_location
2036 : && iter->first != start_location
2037 : && (iter->first & 65535) != zero_location
2038 : && iter->first
2039 : /* FIXME: dwarf5 does not represent inline stack of debug
2040 : statements and consequently create_gcov is sometimes
2041 : mixing up statements from other functions. Do not warn
2042 : user about this until this problem is solved.
2043 : We still write info into dump file. */
2044 : && 0)
2045 : {
2046 : if (!warned)
2047 : warned = warning_at (DECL_SOURCE_LOCATION (node->decl),
2048 : OPT_Wauto_profile,
2049 : "auto-profile of %q+F contains extra statements",
2050 : node->decl);
2051 : if (warned)
2052 : inform (DECL_SOURCE_LOCATION (node->decl),
2053 : "count %" PRIu64 " with relative location +%i,"
2054 : " discriminator %i",
2055 : iter->second.count, iter->first >> 16,
2056 : iter->first & 65535);
2057 : if ((iter->first >> 16) > (end_location >> 16) && warned)
2058 : inform (DECL_SOURCE_LOCATION (node->decl),
2059 : "location is after end of function");
2060 : }
2061 0 : if (dump_file)
2062 : {
2063 0 : fprintf (dump_file, "Removing unmatched count ");
2064 0 : dump_afdo_loc (dump_file, iter->first);
2065 0 : fprintf (dump_file, ":%" PRIu64, iter->second.count);
2066 0 : for (auto &titer : iter->second.targets)
2067 0 : fprintf (dump_file, " %s:%" PRIu64,
2068 0 : afdo_string_table->get_symbol_name (titer.first),
2069 0 : (int64_t) titer.second);
2070 0 : fprintf (dump_file, "\n");
2071 : }
2072 0 : iter = pos_counts.erase (iter);
2073 : }
2074 : else
2075 0 : iter++;
2076 0 : warned = false;
2077 0 : for (callsite_map::const_iterator iter = callsites.begin ();
2078 0 : iter != callsites.end ();)
2079 0 : if (!functions.contains (iter->second))
2080 : {
2081 0 : function_instance *f = iter->second;
2082 : /* If we did not see the corresponding statement, warn. */
2083 0 : if (!functions_to_offline.contains (iter->second))
2084 : {
2085 0 : if (!warned)
2086 0 : warned = warning_at (DECL_SOURCE_LOCATION (node->decl),
2087 0 : OPT_Wauto_profile,
2088 : "auto-profile of %q+F contains"
2089 : " extra callsites",
2090 : node->decl);
2091 0 : if (warned)
2092 0 : inform (DECL_SOURCE_LOCATION (node->decl),
2093 : "call of %s with total count %" PRId64
2094 : ", relative location +%i, discriminator %i",
2095 0 : afdo_string_table->get_symbol_name (iter->first.second),
2096 0 : iter->second->total_count (), iter->first.first >> 16,
2097 0 : iter->first.first & 65535);
2098 0 : if ((iter->first.first >> 16) > (end_location >> 16) && warned)
2099 0 : inform (DECL_SOURCE_LOCATION (node->decl),
2100 : "location is after end of function");
2101 0 : if (dump_file)
2102 : {
2103 0 : fprintf (dump_file,
2104 : "Offlining inline with no corresponding gimple stmt ");
2105 0 : f->dump_inline_stack (dump_file);
2106 0 : fprintf (dump_file, "\n");
2107 : }
2108 : }
2109 0 : else if (dump_file)
2110 : {
2111 0 : fprintf (dump_file,
2112 : "Offlining mismatched inline ");
2113 0 : f->dump_inline_stack (dump_file);
2114 0 : fprintf (dump_file, "\n");
2115 : }
2116 0 : callsites.erase (iter);
2117 0 : offline (f, new_functions);
2118 0 : iter = callsites.begin ();
2119 : }
2120 : else
2121 0 : iter++;
2122 0 : for (auto &iter : callsites)
2123 0 : if (cgraph_node *n = iter.second->get_cgraph_node ())
2124 0 : iter.second->match (n, new_functions, to_symbol_name);
2125 0 : return true;
2126 0 : }
2127 :
2128 : /* Walk inlined functions and if their name is not in SEEN
2129 : remove it. Also rename function names as given by
2130 : to_symbol_name map. */
2131 :
2132 : void
2133 0 : function_instance::remove_external_functions
2134 : (name_index_set &seen,
2135 : name_index_map &to_symbol_name,
2136 : vec <function_instance *> &new_functions)
2137 : {
2138 0 : auto_vec <callsite, 20> to_rename;
2139 :
2140 0 : for (callsite_map::const_iterator iter = callsites.begin ();
2141 0 : iter != callsites.end ();)
2142 0 : if (!seen.contains (iter->first.second))
2143 : {
2144 0 : function_instance *f = iter->second;
2145 0 : if (dump_file)
2146 : {
2147 0 : fprintf (dump_file, " Removing external inline: ");
2148 0 : f->dump_inline_stack (dump_file);
2149 0 : fprintf (dump_file, "\n");
2150 : }
2151 0 : iter = callsites.erase (iter);
2152 0 : f->set_inlined_to (NULL);
2153 0 : f->offline_if_in_set (seen, new_functions);
2154 0 : delete f;
2155 : }
2156 : else
2157 : {
2158 0 : gcc_checking_assert ((int) iter->first.second
2159 : == iter->second->symbol_name ());
2160 0 : int *newn = iter->second->get_call_location () == UNKNOWN_LOCATION
2161 0 : ? to_symbol_name.get (iter->first.second)
2162 : : NULL;
2163 0 : if (newn)
2164 : {
2165 0 : gcc_checking_assert (iter->second->inlined_to ());
2166 0 : to_rename.safe_push (iter->first);
2167 : }
2168 0 : iter->second->remove_external_functions
2169 0 : (seen, to_symbol_name, new_functions);
2170 0 : ++iter;
2171 : }
2172 0 : for (auto &key : to_rename)
2173 : {
2174 0 : auto iter = callsites.find (key);
2175 0 : callsite key2 = key;
2176 0 : key2.second = *to_symbol_name.get (key.second);
2177 0 : iter->second->set_symbol_name (key2.second);
2178 0 : callsites.erase (iter);
2179 0 : callsites[key2] = iter->second;
2180 : }
2181 0 : auto_vec <int, 20> target_to_rename;
2182 0 : for (auto &iter : pos_counts)
2183 : {
2184 0 : for (auto const &titer : iter.second.targets)
2185 : {
2186 0 : int *ren = to_symbol_name.get (titer.first);
2187 0 : if (ren)
2188 0 : target_to_rename.safe_push (titer.first);
2189 : }
2190 0 : while (target_to_rename.length ())
2191 : {
2192 0 : int key = target_to_rename.pop ();
2193 0 : int key2 = *to_symbol_name.get (key);
2194 0 : auto i = iter.second.targets.find (key);
2195 0 : if (iter.second.targets.count (key2) == 0)
2196 0 : iter.second.targets[key2] = i->second;
2197 : else
2198 0 : iter.second.targets[key2] += i->second;
2199 0 : iter.second.targets.erase (i);
2200 : }
2201 : }
2202 0 : }
2203 :
2204 : /* Look for inline instances that was not realized and
2205 : remove them while possibly merging them to offline variants. */
2206 :
2207 : void
2208 0 : function_instance::offline_if_not_realized
2209 : (vec <function_instance *> &new_functions)
2210 : {
2211 0 : for (callsite_map::const_iterator iter = callsites.begin ();
2212 0 : iter != callsites.end ();)
2213 0 : if (!iter->second->realized_p ())
2214 : {
2215 0 : function_instance *f = iter->second;
2216 0 : if (dump_file)
2217 : {
2218 0 : fprintf (dump_file, "Offlining unrealized inline ");
2219 0 : f->dump_inline_stack (dump_file);
2220 0 : fprintf (dump_file, "\n");
2221 : }
2222 0 : iter = callsites.erase (iter);
2223 0 : offline (f, new_functions);
2224 : }
2225 : else
2226 : {
2227 0 : iter->second->offline_if_not_realized (new_functions);
2228 0 : ++iter;
2229 : }
2230 0 : }
2231 :
2232 : /* Dump instance to F indented by INDENT. */
2233 :
2234 : void
2235 0 : function_instance::dump (FILE *f, int indent, bool nested) const
2236 : {
2237 0 : if (!nested)
2238 0 : fprintf (f, "%*s%s total:%" PRIu64 " head:%" PRId64 "\n", indent, "",
2239 : afdo_string_table->get_symbol_name (symbol_name ()),
2240 0 : (int64_t) total_count (), (int64_t) head_count ());
2241 : else
2242 0 : fprintf (f, " total:%" PRIu64 "\n", (int64_t)total_count ());
2243 0 : for (auto const &iter : pos_counts)
2244 : {
2245 0 : fprintf (f, "%*s", indent + 2, "");
2246 0 : dump_afdo_loc (f, iter.first);
2247 0 : fprintf (f, ": %" PRIu64, (int64_t)iter.second.count);
2248 :
2249 0 : for (auto const &titer : iter.second.targets)
2250 0 : fprintf (f, " %s:%" PRIu64,
2251 0 : afdo_string_table->get_symbol_name (titer.first),
2252 0 : (int64_t) titer.second);
2253 0 : fprintf (f,"\n");
2254 : }
2255 0 : for (auto const &iter : callsites)
2256 : {
2257 0 : fprintf (f, "%*s", indent + 2, "");
2258 0 : dump_afdo_loc (f, iter.first.first);
2259 0 : fprintf (f, ": %s",
2260 0 : afdo_string_table->get_symbol_name (iter.first.second));
2261 0 : iter.second->dump (f, indent + 2, true);
2262 0 : gcc_checking_assert ((int) iter.first.second
2263 : == iter.second->symbol_name ());
2264 : }
2265 0 : }
2266 :
2267 : /* Dump inline path. */
2268 :
2269 : void
2270 0 : function_instance::dump_inline_stack (FILE *f) const
2271 : {
2272 0 : auto_vec <callsite, 20> stack;
2273 0 : const function_instance *p = this, *s = inlined_to ();
2274 0 : while (s)
2275 : {
2276 0 : bool found = false;
2277 0 : for (callsite_map::const_iterator iter = s->callsites.begin ();
2278 0 : iter != s->callsites.end (); ++iter)
2279 0 : if (iter->second == p)
2280 : {
2281 0 : gcc_checking_assert (
2282 : !found && (int) iter->first.second == p->symbol_name ());
2283 0 : stack.safe_push ({iter->first.first, s->symbol_name ()});
2284 0 : found = true;
2285 : }
2286 0 : gcc_checking_assert (found);
2287 0 : p = s;
2288 0 : s = s->inlined_to ();
2289 : }
2290 0 : for (callsite &s: stack)
2291 : {
2292 0 : fprintf (f, "%s:", afdo_string_table->get_symbol_name (s.second));
2293 0 : dump_afdo_loc (f, s.first);
2294 0 : fprintf (f, " ");
2295 : }
2296 0 : fprintf (f, "%s", afdo_string_table->get_symbol_name (symbol_name ()));
2297 0 : }
2298 :
2299 : /* Dump instance to stderr. */
2300 :
2301 : void
2302 0 : function_instance::debug () const
2303 : {
2304 0 : dump (stderr);
2305 0 : }
2306 :
2307 : /* Return profile info for LOC in INFO. */
2308 :
2309 : bool
2310 0 : function_instance::get_count_info (location_t loc, count_info *info) const
2311 : {
2312 0 : position_count_map::const_iterator iter = pos_counts.find (loc);
2313 0 : if (iter == pos_counts.end ())
2314 : return false;
2315 0 : *info = iter->second;
2316 0 : return true;
2317 : }
2318 :
2319 : /* Read the inlined indirect call target profile for STMT and store it in
2320 : MAP, return the total count for all inlined indirect calls. */
2321 :
2322 : gcov_type
2323 0 : function_instance::find_icall_target_map (tree fn, gcall *stmt,
2324 : icall_target_map *map) const
2325 : {
2326 0 : gcov_type ret = 0;
2327 0 : unsigned stmt_offset = get_relative_location_for_stmt (fn, stmt);
2328 :
2329 0 : for (callsite_map::const_iterator iter = callsites.begin ();
2330 0 : iter != callsites.end (); ++iter)
2331 : {
2332 0 : unsigned callee = iter->second->symbol_name ();
2333 : /* Check if callsite location match the stmt. */
2334 0 : if (iter->first.first != stmt_offset
2335 0 : || iter->second->removed_icall_target ())
2336 0 : continue;
2337 0 : struct cgraph_node *node = cgraph_node::get_for_asmname (
2338 : get_identifier (afdo_string_table->get_symbol_name (callee)));
2339 0 : if (node == NULL)
2340 0 : continue;
2341 0 : (*map)[callee] = iter->second->total_count () * afdo_count_scale;
2342 0 : ret += iter->second->total_count () * afdo_count_scale;
2343 : }
2344 0 : return ret;
2345 : }
2346 :
2347 : /* Remove the inlined indirect call target profile for STMT. */
2348 :
2349 : void
2350 0 : function_instance::remove_icall_target (tree fn, gcall *stmt)
2351 : {
2352 0 : unsigned stmt_offset = get_relative_location_for_stmt (fn, stmt);
2353 0 : int n = 0;
2354 :
2355 0 : for (auto iter : callsites)
2356 0 : if (iter.first.first == stmt_offset)
2357 : {
2358 0 : iter.second->remove_icall_target ();
2359 0 : n++;
2360 : }
2361 : /* TODO: If we add support for multiple targets, we may want to
2362 : remove only those we succesfully inlined. */
2363 0 : gcc_assert (n);
2364 0 : }
2365 :
2366 : /* Offline all functions not defined in the current unit.
2367 : We will not be able to early inline them.
2368 : Doing so early will get VPT decisions more realistic. */
2369 :
2370 : void
2371 0 : autofdo_source_profile::offline_external_functions ()
2372 : {
2373 : /* First check all available definitions and mark their names as
2374 : visible. */
2375 0 : cgraph_node *node;
2376 0 : name_index_set seen;
2377 0 : name_index_map to_symbol_name;
2378 0 : size_t last_name;
2379 :
2380 : /* Add renames erasing suffixes produced by late clones, such as
2381 : .isra, .ipcp. */
2382 0 : for (size_t i = 1; i < afdo_string_table->num_entries (); i++)
2383 : {
2384 0 : const char *n1 = afdo_string_table->get_symbol_name (i);
2385 0 : std::pair<const char *, int> name_filename
2386 0 : = afdo_string_table->get_original_name (n1);
2387 0 : const char *n2 = name_filename.first;
2388 0 : if (!strcmp (n1, n2))
2389 : {
2390 : /* Watch for duplicate entries.
2391 : This seems to happen in practice and may be useful to distinguish
2392 : multiple static symbols of the same name, but we do not realy
2393 : have a way to differentiate them in get_symbol_name lookup. */
2394 0 : int index = afdo_string_table->get_index (n1);
2395 0 : if (index != (int)i)
2396 : {
2397 0 : if (dump_file)
2398 0 : fprintf (dump_file,
2399 : "string table in auto-profile contains"
2400 : " duplicated name %s\n", n1);
2401 0 : to_symbol_name.put (i, index);
2402 : }
2403 0 : continue;
2404 0 : }
2405 0 : if (dump_file)
2406 0 : fprintf (dump_file, "Adding rename removing clone suffixes %s -> %s\n",
2407 : n1, n2);
2408 0 : int index = afdo_string_table->get_index (n2);
2409 0 : if (index == -1)
2410 0 : index = afdo_string_table->add_symbol_name (xstrdup (n2),
2411 : name_filename.second);
2412 0 : to_symbol_name.put (i, index);
2413 : }
2414 0 : last_name = afdo_string_table->num_entries ();
2415 0 : FOR_EACH_DEFINED_FUNCTION (node)
2416 : {
2417 0 : const char *name = raw_symbol_name (node->decl);
2418 0 : const char *dwarf_name = lang_hooks.dwarf_name (node->decl, 0);
2419 0 : int index = afdo_string_table->get_index (name);
2420 :
2421 : /* Inline function may be identified by its dwarf names;
2422 : rename them to symbol names. With LTO dwarf names are
2423 : lost in free_lange_data. */
2424 0 : if (strcmp (name, dwarf_name))
2425 : {
2426 0 : int index2 = afdo_string_table->get_index (dwarf_name);
2427 0 : if (index2 != -1)
2428 : {
2429 0 : if (index == -1)
2430 0 : index = afdo_string_table->add_symbol_name (
2431 0 : xstrdup (name),
2432 : afdo_string_table->add_filename (
2433 0 : get_normalized_path (DECL_SOURCE_FILE (node->decl))));
2434 0 : if (dump_file)
2435 : {
2436 0 : fprintf (dump_file, "Adding dwarf->symbol rename %s -> %s\n",
2437 : afdo_string_table->get_symbol_name (index2), name);
2438 0 : if (to_symbol_name.get (index2))
2439 0 : fprintf (dump_file, "Dwarf name is not unique");
2440 : }
2441 0 : to_symbol_name.put (index2, index);
2442 0 : seen.add (index2);
2443 : }
2444 : }
2445 0 : if (index != -1)
2446 : {
2447 0 : if (dump_file)
2448 0 : fprintf (dump_file, "%s is defined in node %s\n",
2449 : afdo_string_table->get_symbol_name (index),
2450 : node->dump_name ());
2451 0 : seen.add (index);
2452 : }
2453 : else
2454 : {
2455 0 : if (dump_file)
2456 : {
2457 0 : if (dwarf_name && strcmp (dwarf_name, name))
2458 0 : fprintf (dump_file,
2459 : "Node %s not in auto profile (%s neither %s)\n",
2460 : node->dump_name (),
2461 : name,
2462 : dwarf_name);
2463 : else
2464 0 : fprintf (dump_file,
2465 : "Node %s (symbol %s) not in auto profile\n",
2466 : node->dump_name (),
2467 : name);
2468 : }
2469 : }
2470 : }
2471 :
2472 0 : for (auto iter : to_symbol_name)
2473 : {
2474 : /* In case dwarf name was duplicated and later renamed,
2475 : handle both. No more than one hop should be needed. */
2476 0 : int *newn = to_symbol_name.get (iter.second);
2477 0 : if (newn)
2478 0 : iter.second = *newn;
2479 0 : gcc_checking_assert (!to_symbol_name.get (iter.second));
2480 0 : if (seen.contains (iter.second))
2481 0 : seen.add (iter.first);
2482 : }
2483 :
2484 : /* Now process all toplevel (offline) function instances.
2485 :
2486 : If instance has no definition in this translation unit,
2487 : first offline all inlined functions which are defined here
2488 : (so we do not lose profile due to cross-module inlining
2489 : done by link-time optimizers).
2490 :
2491 : If instance has a definition, look into all inlined functions
2492 : and remove external ones (result of cross-module inlining).
2493 :
2494 : TODO: after early-inlining we ought to offline all functions
2495 : that were not inlined. */
2496 0 : vec <function_instance *>&fns = duplicate_functions_;
2497 0 : auto_vec <function_instance *, 20>fns2;
2498 : /* Populate worklist with all functions to process. Processing
2499 : may introduce new functions by offlining. */
2500 0 : for (auto &function : map_)
2501 : {
2502 0 : function.second->set_in_worklist ();
2503 0 : fns.safe_push (function.second);
2504 : }
2505 :
2506 : /* There are two worklists. First all functions needs to be matched
2507 : with gimple body and only then we want to do merging, since matching
2508 : should be done on unmodified profile and merging works better if
2509 : mismatches are already resolved both in source and destination. */
2510 0 : while (fns.length () || fns2.length ())
2511 : {
2512 : /* In case renaming introduced new name, keep seen up to date. */
2513 0 : for (; last_name < afdo_string_table->num_entries (); last_name++)
2514 : {
2515 0 : const char *name = afdo_string_table->get_symbol_name (last_name);
2516 0 : symtab_node *n
2517 0 : = afdo_string_table->get_cgraph_node (last_name);
2518 0 : if (dump_file)
2519 0 : fprintf (dump_file, "New name %s %s\n", name,
2520 : n ? "wth corresponding definition"
2521 : : "with no corresponding definition");
2522 0 : if (n)
2523 0 : seen.add (last_name);
2524 : }
2525 0 : if (fns.length ())
2526 : {
2527 0 : function_instance *f = fns.pop ();
2528 0 : if (f->get_location () == UNKNOWN_LOCATION)
2529 : {
2530 0 : int index = f->symbol_name ();
2531 0 : int *newn = to_symbol_name.get (index);
2532 0 : if (newn)
2533 : {
2534 0 : if (find_function_instance (f->get_descriptor ()) == f)
2535 0 : remove_function_instance (f);
2536 0 : f->set_symbol_name (*newn);
2537 0 : if (!find_function_instance (f->get_descriptor ()))
2538 0 : add_function_instance (f);
2539 : }
2540 0 : if (cgraph_node *n = f->get_cgraph_node ())
2541 : {
2542 0 : gcc_checking_assert (seen.contains (f->symbol_name ()));
2543 0 : f->match (n, fns, to_symbol_name);
2544 : }
2545 : }
2546 0 : fns2.safe_push (f);
2547 : }
2548 : else
2549 : {
2550 0 : function_instance *f = fns2.pop ();
2551 0 : int index = f->symbol_name ();
2552 0 : gcc_checking_assert (f->in_worklist_p ());
2553 :
2554 : /* If map has different function_instance of same name, then
2555 : this is a duplicated entry which needs to be merged. */
2556 0 : function_instance *index_inst
2557 0 : = find_function_instance (f->get_descriptor ());
2558 0 : if (index_inst && index_inst != f)
2559 : {
2560 0 : if (dump_file)
2561 : {
2562 0 : fprintf (dump_file, "Merging duplicate instance: ");
2563 0 : f->dump_inline_stack (dump_file);
2564 0 : fprintf (dump_file, "\n");
2565 : }
2566 0 : index_inst->merge (f, fns);
2567 0 : gcc_checking_assert (!f->inlined_to ());
2568 0 : f->clear_in_worklist ();
2569 0 : delete f;
2570 : }
2571 : /* If name was not seen in the symbol table, remove it. */
2572 0 : else if (!seen.contains (index))
2573 : {
2574 0 : f->offline_if_in_set (seen, fns);
2575 0 : f->clear_in_worklist ();
2576 0 : if (dump_file)
2577 0 : fprintf (dump_file, "Removing external %s\n",
2578 : afdo_string_table->get_symbol_name (
2579 : f->symbol_name ()));
2580 0 : if (index_inst == f)
2581 0 : remove_function_instance (f);
2582 0 : delete f;
2583 : }
2584 : /* If this is offline function instance seen in this
2585 : translation unit offline external inlines and possibly
2586 : rename from dwarf name. */
2587 : else
2588 : {
2589 0 : f->remove_external_functions (seen, to_symbol_name, fns);
2590 0 : f->clear_in_worklist ();
2591 : }
2592 : }
2593 : }
2594 0 : if (dump_file)
2595 0 : for (auto const &function : map_)
2596 : {
2597 0 : seen.contains (function.second->symbol_name ());
2598 0 : function.second->dump (dump_file);
2599 : }
2600 0 : }
2601 :
2602 : /* Walk scope block BLOCK and mark all inlined functions as realized. */
2603 :
2604 : static void
2605 0 : walk_block (tree fn, function_instance *s, tree block)
2606 : {
2607 0 : if (inlined_function_outer_scope_p (block))
2608 : {
2609 0 : unsigned loc = get_relative_location_for_locus
2610 0 : (fn, BLOCK_SUPERCONTEXT (block),
2611 0 : BLOCK_SOURCE_LOCATION (block));
2612 0 : function_instance *ns
2613 : = s->get_function_instance_by_decl
2614 0 : (loc, BLOCK_ABSTRACT_ORIGIN (block),
2615 0 : BLOCK_SOURCE_LOCATION (block));
2616 0 : if (!ns)
2617 : {
2618 0 : if (dump_file)
2619 : {
2620 0 : fprintf (dump_file, " Failed to find inlined instance:");
2621 0 : s->dump_inline_stack (dump_file);
2622 0 : fprintf (dump_file, ":");
2623 0 : dump_afdo_loc (dump_file, loc);
2624 0 : fprintf (dump_file, " %s\n",
2625 0 : raw_symbol_name (BLOCK_ABSTRACT_ORIGIN (block)));
2626 : }
2627 0 : return;
2628 : }
2629 0 : s = ns;
2630 0 : if (dump_file)
2631 : {
2632 0 : fprintf (dump_file, " Marking realized inline: ");
2633 0 : s->dump_inline_stack (dump_file);
2634 0 : fprintf (dump_file, "\n");
2635 : }
2636 0 : s->set_realized ();
2637 : }
2638 0 : for (tree t = BLOCK_SUBBLOCKS (block); t ; t = BLOCK_CHAIN (t))
2639 0 : walk_block (fn, s, t);
2640 : }
2641 :
2642 : /* Offline all inline functions that are not marked as realized.
2643 : This will merge their profile into offline versions where available.
2644 : Also remove all functions we will no longer use. */
2645 :
2646 : void
2647 0 : autofdo_source_profile::offline_unrealized_inlines ()
2648 : {
2649 0 : auto_vec <function_instance *>fns;
2650 : /* Populate worklist with all functions to process. Processing
2651 : may introduce new functions by offlining. */
2652 0 : for (auto const &function : map_)
2653 : {
2654 0 : fns.safe_push (function.second);
2655 0 : function.second->set_in_worklist ();
2656 : }
2657 0 : while (fns.length ())
2658 : {
2659 0 : function_instance *f = fns.pop ();
2660 0 : int index = f->symbol_name ();
2661 0 : function_instance *index_inst
2662 0 : = find_function_instance (f->get_descriptor ());
2663 0 : bool in_map = index_inst != nullptr;
2664 0 : if (in_map)
2665 0 : if (cgraph_node *n = f->get_cgraph_node ())
2666 : {
2667 0 : if (dump_file)
2668 0 : fprintf (dump_file, "Marking realized %s\n",
2669 : afdo_string_table->get_symbol_name (index));
2670 0 : f->set_realized ();
2671 0 : if (DECL_INITIAL (n->decl)
2672 0 : && DECL_INITIAL (n->decl) != error_mark_node)
2673 0 : walk_block (n->decl, f, DECL_INITIAL (n->decl));
2674 : }
2675 0 : f->offline_if_not_realized (fns);
2676 0 : gcc_checking_assert ((in_map || !f->realized_p ())
2677 : && f->in_worklist_p ());
2678 :
2679 : /* If this is duplicated instance, merge it into one in map. */
2680 0 : if (in_map && index_inst != f)
2681 : {
2682 0 : if (dump_file)
2683 : {
2684 0 : fprintf (dump_file, "Merging duplicate instance: ");
2685 0 : f->dump_inline_stack (dump_file);
2686 0 : fprintf (dump_file, "\n");
2687 : }
2688 0 : index_inst->merge (f, fns);
2689 0 : f->clear_in_worklist ();
2690 0 : gcc_checking_assert (!f->inlined_to ());
2691 0 : delete f;
2692 : }
2693 : /* If function is not in symbol table, remove it. */
2694 0 : else if (!f->realized_p ())
2695 : {
2696 0 : if (dump_file)
2697 0 : fprintf (dump_file, "Removing optimized out function %s\n",
2698 : afdo_string_table->get_symbol_name (f->symbol_name ()));
2699 0 : if (in_map)
2700 0 : remove_function_instance (index_inst);
2701 0 : f->clear_in_worklist ();
2702 0 : delete f;
2703 : }
2704 : else
2705 0 : f->clear_in_worklist ();
2706 : }
2707 0 : if (dump_file)
2708 0 : for (auto const &function : map_)
2709 0 : function.second->dump (dump_file);
2710 0 : }
2711 :
2712 : /* Read the profile and create a function_instance with head count as
2713 : HEAD_COUNT. Recursively read callsites to create nested function_instances
2714 : too. STACK is used to track the recursive creation process. */
2715 :
2716 : /* function instance profile format:
2717 :
2718 : ENTRY_COUNT: 8 bytes
2719 : TIMESTAMP: 8 bytes (only for toplevel symbols)
2720 : NAME_INDEX: 4 bytes
2721 : NUM_POS_COUNTS: 4 bytes
2722 : NUM_CALLSITES: 4 byte
2723 : POS_COUNT_1:
2724 : POS_1_OFFSET: 4 bytes
2725 : NUM_TARGETS: 4 bytes
2726 : COUNT: 8 bytes
2727 : TARGET_1:
2728 : VALUE_PROFILE_TYPE: 4 bytes
2729 : TARGET_IDX: 8 bytes
2730 : COUNT: 8 bytes
2731 : TARGET_2
2732 : ...
2733 : TARGET_n
2734 : POS_COUNT_2
2735 : ...
2736 : POS_COUNT_N
2737 : CALLSITE_1:
2738 : CALLSITE_1_OFFSET: 4 bytes
2739 : FUNCTION_INSTANCE_PROFILE (nested)
2740 : CALLSITE_2
2741 : ...
2742 : CALLSITE_n. */
2743 :
2744 : function_instance *
2745 0 : function_instance::read_function_instance (function_instance_stack *stack,
2746 : bool toplevel)
2747 : {
2748 0 : gcov_type_unsigned timestamp = 0;
2749 0 : gcov_type head_count = -1;
2750 0 : if (toplevel)
2751 : {
2752 0 : head_count = gcov_read_counter ();
2753 0 : timestamp = (gcov_type_unsigned) gcov_read_counter ();
2754 : }
2755 0 : unsigned name = gcov_read_unsigned ();
2756 0 : unsigned num_pos_counts = gcov_read_unsigned ();
2757 0 : unsigned num_callsites = gcov_read_unsigned ();
2758 0 : function_instance *s
2759 : = new function_instance (name,
2760 0 : afdo_string_table->get_filename_by_symbol (name),
2761 0 : head_count);
2762 0 : if (timestamp > 0)
2763 0 : s->set_timestamp (timestamp);
2764 0 : if (!stack->is_empty ())
2765 0 : s->set_inlined_to (stack->last ());
2766 0 : stack->safe_push (s);
2767 :
2768 0 : for (unsigned i = 0; i < num_pos_counts; i++)
2769 : {
2770 0 : unsigned offset = gcov_read_unsigned ();
2771 0 : unsigned num_targets = gcov_read_unsigned ();
2772 0 : gcov_type count = gcov_read_counter ();
2773 0 : s->pos_counts[offset].count = count;
2774 :
2775 0 : for (unsigned j = 0; j < stack->length (); j++)
2776 0 : (*stack)[j]->total_count_ += count;
2777 0 : for (unsigned j = 0; j < num_targets; j++)
2778 : {
2779 : /* Only indirect call target histogram is supported now. */
2780 0 : gcov_read_unsigned ();
2781 0 : gcov_type target_idx = gcov_read_counter ();
2782 0 : s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
2783 : }
2784 : }
2785 0 : for (unsigned i = 0; i < num_callsites; i++)
2786 : {
2787 0 : unsigned offset = gcov_read_unsigned ();
2788 0 : function_instance *callee_function_instance
2789 0 : = read_function_instance (stack, false);
2790 0 : s->callsites[std::make_pair (offset,
2791 0 : callee_function_instance->symbol_name ())]
2792 0 : = callee_function_instance;
2793 : }
2794 0 : stack->pop ();
2795 0 : return s;
2796 : }
2797 :
2798 : /* Member functions for autofdo_source_profile. */
2799 :
2800 0 : autofdo_source_profile::~autofdo_source_profile ()
2801 : {
2802 0 : for (name_function_instance_map::const_iterator iter = map_.begin ();
2803 0 : iter != map_.end (); ++iter)
2804 0 : delete iter->second;
2805 0 : }
2806 :
2807 : /* For a given DECL, returns the top-level function_instance. */
2808 :
2809 : function_instance *
2810 0 : autofdo_source_profile::get_function_instance_by_decl (tree decl, const char *filename) const
2811 : {
2812 0 : if (!filename)
2813 0 : filename = get_normalized_path (DECL_SOURCE_FILE (decl));
2814 0 : int index = afdo_string_table->get_index_by_decl (decl);
2815 0 : if (index == -1)
2816 : return NULL;
2817 :
2818 0 : function_instance_descriptor descriptor (
2819 0 : afdo_string_table->get_filename_index (filename), index);
2820 0 : return find_function_instance (descriptor);
2821 : }
2822 :
2823 : /* For a given DESCRIPTOR, return the matching instance if found. */
2824 :
2825 : function_instance *
2826 0 : autofdo_source_profile::get_function_instance_by_descriptor (
2827 : function_instance_descriptor descriptor) const
2828 : {
2829 0 : return find_function_instance (descriptor);
2830 : }
2831 :
2832 : /* Add function instance FN. */
2833 :
2834 : void
2835 0 : autofdo_source_profile::add_function_instance (function_instance *fn)
2836 : {
2837 0 : gcc_checking_assert (map_.find (fn->get_descriptor ()) == map_.end ());
2838 0 : map_[fn->get_descriptor ()] = fn;
2839 0 : }
2840 :
2841 : /* Find count_info for a given gimple STMT. If found, store the count_info
2842 : in INFO and return true; otherwise return false. */
2843 :
2844 : bool
2845 0 : autofdo_source_profile::get_count_info (gimple *stmt, count_info *info,
2846 : cgraph_node *node) const
2847 : {
2848 0 : gcc_checking_assert (stmt_loc_used_by_debug_info (stmt));
2849 0 : return get_count_info (gimple_location (stmt), info, node);
2850 : }
2851 :
2852 : bool
2853 0 : autofdo_source_profile::get_count_info (location_t gimple_loc,
2854 : count_info *info,
2855 : cgraph_node *node) const
2856 : {
2857 0 : if (LOCATION_LOCUS (gimple_loc) == cfun->function_end_locus)
2858 : return false;
2859 :
2860 0 : inline_stack stack;
2861 0 : get_inline_stack_in_node (gimple_loc, &stack, node);
2862 0 : if (stack.length () == 0)
2863 : return false;
2864 0 : function_instance *s = get_function_instance_by_inline_stack (stack);
2865 0 : if (s == NULL)
2866 : return false;
2867 0 : return s->get_count_info (stack[0].afdo_loc, info);
2868 0 : }
2869 :
2870 : /* Update value profile INFO for STMT from the inlined indirect callsite.
2871 : Return true if INFO is updated. */
2872 :
2873 : bool
2874 0 : autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
2875 : count_info *info,
2876 : cgraph_node *node)
2877 : {
2878 0 : if (dump_file)
2879 : {
2880 0 : fprintf (dump_file, "Checking indirect call -> direct call ");
2881 0 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2882 : }
2883 :
2884 0 : if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
2885 : {
2886 0 : if (dump_file)
2887 0 : fprintf (dump_file, " bad locus (function end)\n");
2888 0 : return false;
2889 : }
2890 :
2891 0 : count_info old_info;
2892 0 : get_count_info (stmt, &old_info, node);
2893 0 : gcov_type total = 0;
2894 0 : for (icall_target_map::const_iterator iter = old_info.targets.begin ();
2895 0 : iter != old_info.targets.end (); ++iter)
2896 0 : total += iter->second;
2897 0 : total *= afdo_count_scale;
2898 :
2899 : /* Program behavior changed, original promoted (and inlined) target is not
2900 : hot any more. Will avoid promote the original target.
2901 :
2902 : To check if original promoted target is still hot, we check the total
2903 : count of the unpromoted targets (stored in TOTAL). If a callsite count
2904 : (stored in INFO) is smaller than half of the total count, the original
2905 : promoted target is considered not hot any more. */
2906 0 : if (info->count < total / 2)
2907 : {
2908 0 : if (dump_file)
2909 0 : fprintf (dump_file, " not hot anymore %ld < %ld",
2910 : (long)info->count,
2911 : (long)total /2);
2912 0 : return false;
2913 : }
2914 :
2915 0 : inline_stack stack;
2916 0 : get_inline_stack_in_node (gimple_location (stmt), &stack, node);
2917 0 : if (stack.length () == 0)
2918 : {
2919 0 : if (dump_file)
2920 0 : fprintf (dump_file, " no inline stack\n");
2921 0 : return false;
2922 : }
2923 0 : function_instance *s = get_function_instance_by_inline_stack (stack);
2924 0 : if (s == NULL)
2925 : {
2926 0 : if (dump_file)
2927 : {
2928 0 : fprintf (dump_file, " function not found in inline stack:");
2929 0 : dump_inline_stack (dump_file, &stack);
2930 : }
2931 0 : return false;
2932 : }
2933 0 : icall_target_map map;
2934 0 : if (s->find_icall_target_map (node ? node->decl
2935 : : current_function_decl,
2936 : stmt, &map) == 0)
2937 : {
2938 0 : if (dump_file)
2939 : {
2940 0 : fprintf (dump_file, " no target map for stack: ");
2941 0 : dump_inline_stack (dump_file, &stack);
2942 : }
2943 0 : return false;
2944 : }
2945 0 : for (icall_target_map::const_iterator iter = map.begin ();
2946 0 : iter != map.end (); ++iter)
2947 0 : info->targets[iter->first] = iter->second;
2948 0 : if (dump_file)
2949 : {
2950 0 : fprintf (dump_file, " looks good; stack:");
2951 0 : dump_inline_stack (dump_file, &stack);
2952 : }
2953 : return true;
2954 0 : }
2955 :
2956 : void
2957 0 : autofdo_source_profile::remove_icall_target (cgraph_edge *e)
2958 : {
2959 0 : autofdo::inline_stack stack;
2960 0 : autofdo::get_inline_stack_in_node (gimple_location (e->call_stmt),
2961 : &stack, e->caller);
2962 0 : autofdo::function_instance *s
2963 0 : = get_function_instance_by_inline_stack (stack);
2964 0 : s->remove_icall_target (e->caller->decl, e->call_stmt);
2965 0 : }
2966 :
2967 : /* Find total count of the callee of EDGE. */
2968 :
2969 : gcov_type
2970 0 : autofdo_source_profile::get_callsite_total_count (
2971 : struct cgraph_edge *edge) const
2972 : {
2973 0 : inline_stack stack;
2974 0 : stack.safe_push ({edge->callee->decl, 0, UNKNOWN_LOCATION});
2975 :
2976 0 : get_inline_stack_in_node (gimple_location (edge->call_stmt), &stack,
2977 : edge->caller);
2978 0 : if (dump_file)
2979 : {
2980 0 : if (!edge->caller->inlined_to)
2981 0 : fprintf (dump_file, "Looking up afdo profile for call %s -> %s stack:",
2982 0 : edge->caller->dump_name (), edge->callee->dump_name ());
2983 : else
2984 0 : fprintf (dump_file, "Looking up afdo profile for call %s -> %s transitively %s stack:",
2985 0 : edge->caller->dump_name (), edge->callee->dump_name (),
2986 : edge->caller->inlined_to->dump_name ());
2987 0 : dump_inline_stack (dump_file, &stack);
2988 : }
2989 :
2990 0 : function_instance *s = get_function_instance_by_inline_stack (stack);
2991 0 : if (s == NULL)
2992 : {
2993 0 : if (dump_file)
2994 0 : fprintf (dump_file, "No function instance found\n");
2995 0 : return 0;
2996 : }
2997 0 : if (afdo_string_table->get_index_by_decl (edge->callee->decl)
2998 0 : != s->symbol_name ())
2999 : {
3000 0 : if (dump_file)
3001 0 : fprintf (dump_file, "Mismatched name of callee %s and profile %s\n",
3002 0 : raw_symbol_name (edge->callee->decl),
3003 : afdo_string_table->get_symbol_name (s->symbol_name ()));
3004 0 : return 0;
3005 : }
3006 :
3007 0 : return s->total_count () * afdo_count_scale;
3008 0 : }
3009 :
3010 : /* Read AutoFDO profile and returns TRUE on success. */
3011 :
3012 : /* source profile format:
3013 :
3014 : GCOV_TAG_AFDO_FUNCTION: 4 bytes
3015 : LENGTH: 4 bytes
3016 : NUM_FUNCTIONS: 4 bytes
3017 : FUNCTION_INSTANCE_1
3018 : FUNCTION_INSTANCE_2
3019 : ...
3020 : FUNCTION_INSTANCE_N. */
3021 :
3022 : bool
3023 0 : autofdo_source_profile::read ()
3024 : {
3025 0 : if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
3026 : {
3027 0 : inform (UNKNOWN_LOCATION, "Not expected TAG.");
3028 0 : return false;
3029 : }
3030 :
3031 0 : gcc_checking_assert (!afdo_source_profile);
3032 0 : afdo_source_profile = this;
3033 :
3034 : /* Skip the length of the section. */
3035 0 : gcov_read_unsigned ();
3036 :
3037 : /* Read in the function/callsite profile, and store it in local
3038 : data structure. */
3039 0 : unsigned function_num = gcov_read_unsigned ();
3040 0 : for (unsigned i = 0; i < function_num; i++)
3041 : {
3042 0 : function_instance::function_instance_stack stack;
3043 0 : function_instance *s
3044 0 : = function_instance::read_function_instance (&stack);
3045 :
3046 0 : if (find_function_instance (s->get_descriptor ()) == nullptr)
3047 0 : add_function_instance (s);
3048 : else
3049 0 : fatal_error (UNKNOWN_LOCATION,
3050 : "auto-profile contains duplicated function instance %s",
3051 : afdo_string_table->get_symbol_name (s->symbol_name ()));
3052 0 : s->prop_timestamp ();
3053 0 : timestamp_info_map.insert({s->timestamp (), 0});
3054 0 : }
3055 :
3056 : /* timestamp_info_map is std::map with timestamp as key,
3057 : so it's already sorted in ascending order wrt timestamps.
3058 : This loop maps function with lowest timestamp to 1, and so on.
3059 : In afdo_annotate_cfg, node->tp_first_run is then set to corresponding
3060 : tp_first_run value. */
3061 :
3062 0 : int tp_first_run = 1;
3063 0 : for (auto &p : timestamp_info_map)
3064 0 : p.second = tp_first_run++;
3065 :
3066 0 : afdo_profile_info->sum_max = afdo_summary_info->max_count;
3067 : /* Scale up the profile, but leave some bits in case some counts gets
3068 : bigger than sum_max eventually. */
3069 0 : if (afdo_profile_info->sum_max)
3070 0 : afdo_count_scale
3071 0 : = MAX (((gcov_type)1 << (profile_count::n_bits - 10))
3072 : / afdo_profile_info->sum_max, 1);
3073 0 : afdo_profile_info->cutoff *= afdo_count_scale;
3074 : /* Derive the hot count threshold from the profile summary. */
3075 0 : afdo_hot_bb_threshold = afdo_summary_info->get_threshold_count (
3076 0 : param_hot_bb_count_ws_permille * 1000)
3077 0 : * afdo_count_scale;
3078 0 : set_hot_bb_threshold (afdo_hot_bb_threshold);
3079 0 : if (dump_file)
3080 0 : fprintf (dump_file,
3081 : "Max count in profile %" PRIu64 "\n"
3082 : "Setting scale %" PRIu64 "\n"
3083 : "Scaled max count %" PRIu64 "\n"
3084 : "Cutoff %" PRIu64 "\n"
3085 : "Unscaled hot count threshold %" PRIu64 "\n"
3086 : "Hot count threshold %" PRIu64 "\n\n",
3087 : (int64_t) afdo_profile_info->sum_max, (int64_t) afdo_count_scale,
3088 0 : (int64_t) (afdo_profile_info->sum_max * afdo_count_scale),
3089 0 : (int64_t) afdo_profile_info->cutoff,
3090 0 : (int64_t) afdo_summary_info->get_threshold_count (
3091 0 : param_hot_bb_count_ws_permille * 1000),
3092 : (int64_t) afdo_hot_bb_threshold);
3093 0 : afdo_profile_info->sum_max *= afdo_count_scale;
3094 0 : return true;
3095 : }
3096 :
3097 : /* Return the function_instance in the profile that correspond to the
3098 : inline STACK. */
3099 :
3100 : function_instance *
3101 0 : autofdo_source_profile::get_function_instance_by_inline_stack (
3102 : const inline_stack &stack) const
3103 : {
3104 0 : function_instance_descriptor descriptor (
3105 : afdo_string_table->get_filename_index (
3106 0 : get_normalized_path (DECL_SOURCE_FILE (stack[stack.length () - 1].decl))),
3107 0 : afdo_string_table->get_index_by_decl (stack[stack.length () - 1].decl));
3108 0 : function_instance *s = find_function_instance (descriptor);
3109 :
3110 0 : if (s == NULL)
3111 : {
3112 0 : if (dump_file)
3113 0 : fprintf (dump_file, "No offline instance for %s\n",
3114 0 : raw_symbol_name (stack[stack.length () - 1].decl));
3115 0 : return NULL;
3116 : }
3117 :
3118 0 : for (unsigned i = stack.length () - 1; i > 0; i--)
3119 : {
3120 0 : s = s->get_function_instance_by_decl (stack[i].afdo_loc,
3121 0 : stack[i - 1].decl,
3122 0 : stack[i].location);
3123 0 : if (s == NULL)
3124 : {
3125 : /* afdo inliner extends the stack by last entry with unknown
3126 : location while checking if function was inlined during train run.
3127 : We do not want to print diagnostics about every function
3128 : which is not inlined. */
3129 : if (s && dump_enabled_p () && stack[i].location != UNKNOWN_LOCATION)
3130 : dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS,
3131 : dump_user_location_t::from_location_t
3132 : (stack[i].location),
3133 : "auto-profile has no inlined function instance "
3134 : "for inlined call of %s at relative "
3135 : " location +%i, discriminator %i\n",
3136 : raw_symbol_name (stack[i - 1].decl),
3137 : stack[i].afdo_loc >> 16,
3138 : stack[i].afdo_loc & 65535);
3139 : return NULL;
3140 : }
3141 : }
3142 : return s;
3143 : }
3144 :
3145 : /* Find the matching function instance which has DESCRIPTOR as its
3146 : descriptor. If not found, also try checking if an instance exists with the
3147 : same name which has no associated filename. */
3148 :
3149 : autofdo_source_profile::name_function_instance_map::const_iterator
3150 0 : autofdo_source_profile::find_iter_for_function_instance (
3151 : function_instance_descriptor descriptor) const
3152 : {
3153 0 : auto it = map_.find (descriptor);
3154 :
3155 : /* Try searching for the symbol not having a filename if it isn't found. */
3156 0 : if (it == map_.end ())
3157 0 : it = map_.find (
3158 0 : function_instance_descriptor (string_table::unknown_filename,
3159 0 : (int) descriptor.symbol_name ()));
3160 0 : return it;
3161 : }
3162 :
3163 : /* Similar to the above, but return a pointer to the instance instead of an
3164 : iterator. */
3165 :
3166 : function_instance *
3167 0 : autofdo_source_profile::find_function_instance (
3168 : function_instance_descriptor descriptor) const
3169 : {
3170 0 : auto it = find_iter_for_function_instance (descriptor);
3171 0 : return it == map_.end () ? NULL : it->second;
3172 : }
3173 :
3174 : /* Remove a function instance from the map. Returns true if the entry was
3175 : actually deleted. */
3176 :
3177 : bool
3178 0 : autofdo_source_profile::remove_function_instance (function_instance *inst)
3179 : {
3180 0 : auto iter = find_iter_for_function_instance (inst->get_descriptor ());
3181 0 : if (iter != map_.end ())
3182 : {
3183 0 : map_.erase (iter);
3184 0 : return true;
3185 : }
3186 : return false;
3187 : }
3188 :
3189 : /* Module profile is only used by LIPO. Here we simply ignore it. */
3190 :
3191 : static void
3192 0 : fake_read_autofdo_module_profile ()
3193 : {
3194 : /* Read in the module info. */
3195 0 : gcov_read_unsigned ();
3196 :
3197 : /* Skip the length of the section. */
3198 0 : gcov_read_unsigned ();
3199 :
3200 : /* Read in the file name table. */
3201 0 : unsigned total_module_num = gcov_read_unsigned ();
3202 0 : gcc_assert (total_module_num == 0);
3203 0 : }
3204 :
3205 : /* Read data from profile data file. */
3206 :
3207 : static void
3208 0 : read_profile (void)
3209 : {
3210 0 : if (gcov_open (auto_profile_file, 1) == 0)
3211 : {
3212 0 : error ("cannot open profile file %s", auto_profile_file);
3213 0 : return;
3214 : }
3215 :
3216 0 : if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
3217 : {
3218 0 : error ("AutoFDO profile magic number does not match");
3219 0 : return;
3220 : }
3221 :
3222 : /* Skip the version number. */
3223 0 : unsigned version = gcov_read_unsigned ();
3224 0 : if (version != AUTO_PROFILE_VERSION)
3225 : {
3226 0 : error ("AutoFDO profile version %u does not match %u",
3227 : version, AUTO_PROFILE_VERSION);
3228 0 : return;
3229 : }
3230 :
3231 : /* Skip the empty integer. */
3232 0 : gcov_read_unsigned ();
3233 :
3234 : /* summary_info. */
3235 0 : afdo_summary_info = new summary_info ();
3236 0 : if (!afdo_summary_info->read ())
3237 : {
3238 0 : error ("cannot read summary information from %s", auto_profile_file);
3239 0 : return;
3240 : }
3241 :
3242 : /* string_table. */
3243 0 : afdo_string_table = new string_table ();
3244 0 : if (!afdo_string_table->read ())
3245 : {
3246 0 : error ("cannot read string table from %s", auto_profile_file);
3247 0 : return;
3248 : }
3249 :
3250 : /* autofdo_source_profile. */
3251 0 : afdo_source_profile = autofdo_source_profile::create ();
3252 0 : if (afdo_source_profile == NULL
3253 0 : || gcov_is_error ())
3254 : {
3255 0 : error ("cannot read function profile from %s", auto_profile_file);
3256 0 : delete afdo_source_profile;
3257 0 : afdo_source_profile = NULL;
3258 0 : return;
3259 : }
3260 :
3261 : /* autofdo_module_profile. */
3262 0 : fake_read_autofdo_module_profile ();
3263 0 : if (gcov_is_error ())
3264 : {
3265 0 : error ("cannot read module profile from %s", auto_profile_file);
3266 0 : return;
3267 : }
3268 : }
3269 :
3270 : /* From AutoFDO profiles, find values inside STMT for that we want to measure
3271 : histograms for indirect-call optimization.
3272 :
3273 : This function is actually served for 2 purposes:
3274 : * before annotation, we need to mark histogram, promote and inline
3275 : * after annotation, we just need to mark, and let follow-up logic to
3276 : decide if it needs to promote and inline. */
3277 :
3278 : static bool
3279 0 : afdo_indirect_call (gcall *stmt, const icall_target_map &map,
3280 : bool transform, cgraph_edge *indirect_edge)
3281 : {
3282 0 : tree callee;
3283 :
3284 0 : if (map.size () == 0)
3285 : {
3286 0 : if (dump_file)
3287 0 : fprintf (dump_file, "No targets found\n");
3288 0 : return false;
3289 : }
3290 0 : if (!stmt)
3291 : {
3292 0 : if (dump_file)
3293 0 : fprintf (dump_file, "No call statement\n");
3294 0 : return false;
3295 : }
3296 0 : if (gimple_call_internal_p (stmt))
3297 : {
3298 0 : if (dump_file)
3299 0 : fprintf (dump_file, "Internal call\n");
3300 0 : return false;
3301 : }
3302 0 : if (gimple_call_fndecl (stmt) != NULL_TREE)
3303 : {
3304 0 : if (dump_file)
3305 0 : fprintf (dump_file, "Call is already direct\n");
3306 0 : return false;
3307 : }
3308 :
3309 0 : gcov_type total = 0;
3310 0 : icall_target_map::const_iterator max_iter = map.end ();
3311 :
3312 0 : for (icall_target_map::const_iterator iter = map.begin ();
3313 0 : iter != map.end (); ++iter)
3314 : {
3315 0 : total += iter->second;
3316 0 : if (max_iter == map.end () || max_iter->second < iter->second)
3317 : max_iter = iter;
3318 : }
3319 0 : total *= afdo_count_scale;
3320 0 : struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
3321 0 : get_identifier (afdo_string_table->get_symbol_name (max_iter->first)));
3322 0 : if (direct_call == NULL)
3323 : {
3324 0 : if (dump_file)
3325 0 : fprintf (dump_file, "Failed to find cgraph node for %s\n",
3326 0 : afdo_string_table->get_symbol_name (max_iter->first));
3327 0 : return false;
3328 : }
3329 :
3330 0 : callee = gimple_call_fn (stmt);
3331 :
3332 0 : if (!transform)
3333 : {
3334 0 : if (!direct_call->profile_id)
3335 : {
3336 0 : if (dump_file)
3337 0 : fprintf (dump_file, "No profile id\n");
3338 0 : return false;
3339 : }
3340 0 : histogram_value hist = gimple_alloc_histogram_value (
3341 : cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
3342 0 : hist->n_counters = 4;
3343 0 : hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
3344 0 : gimple_add_histogram_value (cfun, stmt, hist);
3345 :
3346 : /* Total counter */
3347 0 : hist->hvalue.counters[0] = total;
3348 : /* Number of value/counter pairs */
3349 0 : hist->hvalue.counters[1] = 1;
3350 : /* Value */
3351 0 : hist->hvalue.counters[2] = direct_call->profile_id;
3352 : /* Counter */
3353 0 : hist->hvalue.counters[3] = max_iter->second * afdo_count_scale;
3354 :
3355 0 : if (!direct_call->profile_id)
3356 : {
3357 0 : if (dump_file)
3358 0 : fprintf (dump_file, "Histogram attached\n");
3359 0 : return false;
3360 : }
3361 : return false;
3362 : }
3363 :
3364 0 : if (dump_file)
3365 : {
3366 0 : fprintf (dump_file, "Indirect call -> direct call ");
3367 0 : print_generic_expr (dump_file, callee, TDF_SLIM);
3368 0 : fprintf (dump_file, " => ");
3369 0 : print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
3370 : }
3371 :
3372 0 : if (!direct_call->definition)
3373 : {
3374 0 : if (dump_file)
3375 0 : fprintf (dump_file, " no definition available\n");
3376 0 : return false;
3377 : }
3378 :
3379 0 : if (dump_file)
3380 : {
3381 0 : fprintf (dump_file, " transformation on insn ");
3382 0 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3383 0 : fprintf (dump_file, "\n");
3384 : }
3385 :
3386 0 : indirect_edge->make_speculative
3387 0 : (direct_call,
3388 0 : gimple_bb (stmt)->count.apply_scale (99, 100));
3389 0 : return true;
3390 : }
3391 :
3392 : /* From AutoFDO profiles, find values inside STMT for that we want to measure
3393 : histograms and adds them to list VALUES. */
3394 :
3395 : static bool
3396 0 : afdo_vpt (gcall *gs, const icall_target_map &map,
3397 : bool transform, cgraph_edge *indirect_edge)
3398 : {
3399 0 : return afdo_indirect_call (gs, map, transform, indirect_edge);
3400 : }
3401 :
3402 : typedef std::set<basic_block> bb_set;
3403 :
3404 : static bool
3405 0 : is_bb_annotated (const basic_block bb, const bb_set &annotated)
3406 : {
3407 0 : if (annotated.find (bb) != annotated.end ())
3408 : {
3409 0 : gcc_checking_assert (bb->count.quality () == AFDO
3410 : || !bb->count.nonzero_p ());
3411 : return true;
3412 : }
3413 0 : gcc_checking_assert (bb->count.quality () != AFDO
3414 : || !bb->count.nonzero_p ());
3415 : return false;
3416 : }
3417 :
3418 : static void
3419 0 : set_bb_annotated (basic_block bb, bb_set *annotated)
3420 : {
3421 0 : gcc_checking_assert (bb->count.quality () == AFDO
3422 : || !bb->count.nonzero_p ());
3423 0 : annotated->insert (bb);
3424 0 : }
3425 :
3426 : /* Update COUNT by known autofdo count C. */
3427 : static void
3428 0 : update_count_by_afdo_count (profile_count *count, gcov_type c)
3429 : {
3430 0 : if (c)
3431 0 : *count = profile_count::from_gcov_type (c).afdo ();
3432 : /* In case we have guessed profile which is already zero, preserve
3433 : quality info. */
3434 0 : else if (count->nonzero_p ()
3435 0 : || count->quality () == GUESSED
3436 0 : || count->quality () == GUESSED_LOCAL)
3437 0 : *count = profile_count::zero ().afdo ();
3438 0 : }
3439 :
3440 : /* Update COUNT by known autofdo count C. */
3441 : static void
3442 0 : update_count_by_afdo_count (profile_count *count, profile_count c)
3443 : {
3444 0 : if (c.nonzero_p ())
3445 0 : *count = c;
3446 : /* In case we have guessed profile which is already zero, preserve
3447 : quality info. */
3448 0 : else if (count->nonzero_p ()
3449 0 : || count->quality () < c.quality ())
3450 0 : *count = c;
3451 0 : }
3452 :
3453 : /* Try to determine unscaled count of edge E.
3454 : Return -1 if nothing is known. */
3455 :
3456 : static gcov_type
3457 0 : afdo_unscaled_edge_count (edge e)
3458 : {
3459 0 : gcov_type max_count = -1;
3460 0 : basic_block bb_succ = e->dest;
3461 0 : count_info info;
3462 0 : if (afdo_source_profile->get_count_info (e->goto_locus, &info))
3463 : {
3464 0 : if (info.count > max_count)
3465 : max_count = info.count;
3466 0 : if (dump_file && info.count)
3467 : {
3468 0 : fprintf (dump_file,
3469 : " goto location of edge %i->%i with count %" PRIu64"\n",
3470 0 : e->src->index, e->dest->index, (int64_t)info.count);
3471 : }
3472 : }
3473 0 : for (gphi_iterator gpi = gsi_start_phis (bb_succ);
3474 0 : !gsi_end_p (gpi); gsi_next (&gpi))
3475 : {
3476 0 : gphi *phi = gpi.phi ();
3477 0 : location_t phi_loc
3478 0 : = gimple_phi_arg_location_from_edge (phi, e);
3479 0 : if (afdo_source_profile->get_count_info (phi_loc, &info))
3480 : {
3481 0 : if (info.count > max_count)
3482 : max_count = info.count;
3483 0 : if (dump_file && info.count)
3484 : {
3485 0 : fprintf (dump_file,
3486 : " phi op of edge %i->%i with count %" PRIu64": ",
3487 0 : e->src->index, e->dest->index, (int64_t)info.count);
3488 0 : print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
3489 : }
3490 : }
3491 : }
3492 0 : return max_count;
3493 0 : }
3494 :
3495 : /* For a given BB, set its execution count. Attach value profile if a stmt
3496 : is not in PROMOTED, because we only want to promote an indirect call once.
3497 : Return TRUE if BB is annotated. */
3498 :
3499 : static bool
3500 0 : afdo_set_bb_count (basic_block bb, hash_set <basic_block> &zero_bbs)
3501 : {
3502 0 : gimple_stmt_iterator gsi;
3503 0 : gcov_type max_count = -1;
3504 0 : if (dump_file)
3505 0 : fprintf (dump_file, " Looking up AFDO count of bb %i\n", bb->index);
3506 :
3507 0 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3508 : {
3509 0 : count_info info;
3510 0 : gimple *stmt = gsi_stmt (gsi);
3511 0 : if (!stmt_loc_used_by_debug_info (stmt))
3512 0 : continue;
3513 0 : if (afdo_source_profile->get_count_info (stmt, &info))
3514 : {
3515 0 : if (info.count > max_count)
3516 : max_count = info.count;
3517 0 : if (dump_file)
3518 : {
3519 0 : fprintf (dump_file, " count %" PRIu64 " in stmt: ",
3520 : (int64_t)info.count);
3521 0 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3522 : }
3523 0 : gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
3524 : /* TODO; if inlined early and indirect call was not optimized out,
3525 : we will end up speculating again. Early inliner should remove
3526 : all targets for edges it speculated into safely. */
3527 0 : if (call
3528 0 : && info.targets.size () > 0)
3529 0 : afdo_vpt (call, info.targets, false, NULL);
3530 : }
3531 0 : }
3532 :
3533 0 : if (max_count == -1 && single_succ_p (bb))
3534 0 : max_count = afdo_unscaled_edge_count (single_succ_edge (bb));
3535 :
3536 0 : if (max_count == -1)
3537 : return false;
3538 :
3539 0 : if (max_count)
3540 : {
3541 0 : update_count_by_afdo_count (&bb->count, max_count * afdo_count_scale);
3542 0 : if (dump_file)
3543 0 : fprintf (dump_file,
3544 : " Annotated bb %i with count %" PRId64
3545 : ", scaled to %" PRId64 "\n",
3546 : bb->index, (int64_t)max_count,
3547 0 : (int64_t)(max_count * afdo_count_scale));
3548 0 : return true;
3549 : }
3550 : else
3551 : {
3552 0 : if (dump_file)
3553 0 : fprintf (dump_file,
3554 : " bb %i has statements with 0 count\n", bb->index);
3555 0 : zero_bbs.add (bb);
3556 : }
3557 0 : return false;
3558 : }
3559 :
3560 : /* BB1 and BB2 are in an equivalent class iff:
3561 : 1. BB1 dominates BB2.
3562 : 2. BB2 post-dominates BB1.
3563 : 3. BB1 and BB2 are in the same loop nest.
3564 : This function finds the equivalent class for each basic block, and
3565 : stores a pointer to the first BB in its equivalent class. Meanwhile,
3566 : set bb counts for the same equivalent class to be idenical. Update
3567 : ANNOTATED_BB for the first BB in its equivalent class. */
3568 :
3569 : static void
3570 0 : afdo_find_equiv_class (bb_set *annotated_bb)
3571 : {
3572 0 : basic_block bb;
3573 :
3574 0 : FOR_ALL_BB_FN (bb, cfun)
3575 0 : bb->aux = NULL;
3576 :
3577 0 : FOR_ALL_BB_FN (bb, cfun)
3578 : {
3579 0 : if (bb->aux != NULL)
3580 0 : continue;
3581 0 : bb->aux = bb;
3582 0 : for (basic_block bb1 : get_dominated_by (CDI_DOMINATORS, bb))
3583 0 : if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
3584 0 : && bb1->loop_father == bb->loop_father)
3585 : {
3586 0 : bb1->aux = bb;
3587 0 : if (is_bb_annotated (bb1, *annotated_bb)
3588 0 : && (!is_bb_annotated (bb, *annotated_bb)
3589 0 : || bb1->count > bb->count))
3590 : {
3591 0 : if (dump_file)
3592 : {
3593 0 : fprintf (dump_file,
3594 : " Copying count of bb %i to bb %i; count is:",
3595 : bb1->index,
3596 : bb->index);
3597 0 : bb1->count.dump (dump_file);
3598 0 : fprintf (dump_file, "\n");
3599 : }
3600 0 : update_count_by_afdo_count (&bb->count, bb1->count);
3601 0 : set_bb_annotated (bb, annotated_bb);
3602 : }
3603 0 : }
3604 :
3605 0 : for (basic_block bb1 : get_dominated_by (CDI_POST_DOMINATORS, bb))
3606 0 : if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
3607 0 : && bb1->loop_father == bb->loop_father)
3608 : {
3609 0 : bb1->aux = bb;
3610 0 : if (is_bb_annotated (bb1, *annotated_bb)
3611 0 : && (!is_bb_annotated (bb, *annotated_bb)
3612 0 : || bb1->count > bb->count))
3613 : {
3614 0 : if (dump_file)
3615 : {
3616 0 : fprintf (dump_file,
3617 : " Copying count of bb %i to bb %i; count is:",
3618 : bb1->index,
3619 : bb->index);
3620 0 : bb1->count.dump (dump_file);
3621 0 : fprintf (dump_file, "\n");
3622 : }
3623 0 : update_count_by_afdo_count (&bb->count, bb1->count);
3624 0 : set_bb_annotated (bb, annotated_bb);
3625 : }
3626 0 : }
3627 : }
3628 0 : }
3629 :
3630 : /* If a basic block's count is known, and only one of its in/out edges' count
3631 : is unknown, its count can be calculated. Meanwhile, if all of the in/out
3632 : edges' counts are known, then the basic block's unknown count can also be
3633 : calculated. Also, if a block has a single predecessor or successor, the block's
3634 : count can be propagated to that predecessor or successor.
3635 : IS_SUCC is true if out edges of a basic blocks are examined.
3636 : Update ANNOTATED_BB accordingly.
3637 : Return TRUE if any basic block/edge count is changed. */
3638 :
3639 : static bool
3640 0 : afdo_propagate_edge (bool is_succ, bb_set *annotated_bb)
3641 : {
3642 0 : basic_block bb;
3643 0 : bool changed = false;
3644 :
3645 0 : FOR_EACH_BB_FN (bb, cfun)
3646 : {
3647 0 : edge e, unknown_edge = NULL;
3648 0 : edge_iterator ei;
3649 0 : int num_unknown_edges = 0;
3650 0 : int num_edges = 0;
3651 0 : profile_count total_known_count = profile_count::zero ().afdo ();
3652 :
3653 0 : FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
3654 : {
3655 0 : gcc_assert (AFDO_EINFO (e) != NULL);
3656 0 : if (! AFDO_EINFO (e)->is_annotated ())
3657 0 : num_unknown_edges++, unknown_edge = e;
3658 : else
3659 0 : total_known_count += AFDO_EINFO (e)->get_count ();
3660 0 : num_edges++;
3661 : }
3662 0 : if (dump_file)
3663 : {
3664 0 : fprintf (dump_file, "bb %i %s propagating %s edges %i, "
3665 : "unknown edges %i, known count ",
3666 : bb->index,
3667 0 : is_bb_annotated (bb, *annotated_bb) ? "(annotated)" : "",
3668 : is_succ ? "successors" : "predecessors", num_edges,
3669 : num_unknown_edges);
3670 0 : total_known_count.dump (dump_file);
3671 0 : fprintf (dump_file, " bb count ");
3672 0 : bb->count.dump (dump_file);
3673 0 : fprintf (dump_file, "\n");
3674 : }
3675 :
3676 : /* Be careful not to annotate block with no successor in special cases. */
3677 0 : if (num_unknown_edges == 0 && num_edges
3678 0 : && !is_bb_annotated (bb, *annotated_bb))
3679 : {
3680 0 : if (dump_file)
3681 : {
3682 0 : fprintf (dump_file, " Annotating bb %i with count ", bb->index);
3683 0 : total_known_count.dump (dump_file);
3684 0 : fprintf (dump_file, "\n");
3685 : }
3686 0 : update_count_by_afdo_count (&bb->count, total_known_count);
3687 0 : set_bb_annotated (bb, annotated_bb);
3688 0 : changed = true;
3689 : }
3690 0 : else if (is_bb_annotated (bb, *annotated_bb)
3691 : /* We do not want to consider 0 (afdo) > 0 (precise) */
3692 0 : && total_known_count.nonzero_p ()
3693 0 : && bb->count < total_known_count)
3694 : {
3695 0 : if (dump_file)
3696 : {
3697 0 : fprintf (dump_file, " Increasing bb %i count from ",
3698 : bb->index);
3699 0 : bb->count.dump (dump_file);
3700 0 : fprintf (dump_file, " to ");
3701 0 : total_known_count.dump (dump_file);
3702 0 : fprintf (dump_file, " hoping to mitigate afdo inconsistency\n");
3703 : }
3704 0 : bb->count = total_known_count;
3705 0 : changed = true;
3706 : }
3707 0 : else if (num_unknown_edges == 1 && is_bb_annotated (bb, *annotated_bb))
3708 : {
3709 0 : if (bb->count > total_known_count)
3710 : {
3711 0 : profile_count new_count = bb->count - total_known_count;
3712 0 : AFDO_EINFO (unknown_edge)->set_count (new_count);
3713 : }
3714 : else
3715 0 : AFDO_EINFO (unknown_edge)->set_count
3716 0 : (profile_count::zero ().afdo ());
3717 0 : if (dump_file)
3718 : {
3719 0 : fprintf (dump_file, " Annotated edge %i->%i with count ",
3720 0 : unknown_edge->src->index, unknown_edge->dest->index);
3721 0 : AFDO_EINFO (unknown_edge)->get_count ().dump (dump_file);
3722 0 : fprintf (dump_file, "\n");
3723 : }
3724 0 : AFDO_EINFO (unknown_edge)->set_annotated ();
3725 0 : changed = true;
3726 : }
3727 0 : else if (num_unknown_edges > 1
3728 0 : && is_bb_annotated (bb, *annotated_bb)
3729 0 : && (total_known_count >= bb->count || !bb->count.nonzero_p ()))
3730 : {
3731 0 : FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
3732 : {
3733 0 : gcc_assert (AFDO_EINFO (e) != NULL);
3734 0 : if (! AFDO_EINFO (e)->is_annotated ())
3735 : {
3736 0 : AFDO_EINFO (e)->set_count
3737 0 : (profile_count::zero ().afdo ());
3738 0 : AFDO_EINFO (e)->set_annotated ();
3739 0 : if (dump_file)
3740 : {
3741 0 : fprintf (dump_file, " Annotated edge %i->%i with count ",
3742 0 : e->src->index, e->dest->index);
3743 0 : AFDO_EINFO (unknown_edge)->get_count ().dump (dump_file);
3744 0 : fprintf (dump_file, "\n");
3745 : }
3746 : }
3747 : }
3748 : }
3749 0 : else if (num_unknown_edges == 0
3750 0 : && is_bb_annotated (bb, *annotated_bb)
3751 0 : && (is_succ ? single_succ_p (bb) : single_pred_p (bb)))
3752 : {
3753 0 : edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb);
3754 0 : if (AFDO_EINFO (e)->is_annotated ()
3755 0 : && AFDO_EINFO (e)->get_count () < bb->count)
3756 : {
3757 0 : if (dump_file)
3758 : {
3759 0 : fprintf (dump_file, " Increasing edge %i->%i count from ",
3760 0 : e->src->index, e->dest->index);
3761 0 : AFDO_EINFO (e)->get_count ().dump (dump_file);
3762 0 : fprintf (dump_file, " to ");
3763 0 : bb->count.dump (dump_file);
3764 0 : fprintf (dump_file, " hoping to mitigate afdo inconsistency\n");
3765 : }
3766 0 : AFDO_EINFO (e)->set_count (bb->count);
3767 0 : changed = true;
3768 : }
3769 : }
3770 : }
3771 0 : return changed;
3772 : }
3773 :
3774 : /* Special propagation for circuit expressions. Because GCC translates
3775 : control flow into data flow for circuit expressions. E.g.
3776 : BB1:
3777 : if (a && b)
3778 : BB2
3779 : else
3780 : BB3
3781 :
3782 : will be translated into:
3783 :
3784 : BB1:
3785 : if (a)
3786 : goto BB.t1
3787 : else
3788 : goto BB.t3
3789 : BB.t1:
3790 : if (b)
3791 : goto BB.t2
3792 : else
3793 : goto BB.t3
3794 : BB.t2:
3795 : goto BB.t3
3796 : BB.t3:
3797 : tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
3798 : if (tmp)
3799 : goto BB2
3800 : else
3801 : goto BB3
3802 :
3803 : In this case, we need to propagate through PHI to determine the edge
3804 : count of BB1->BB.t1, BB.t1->BB.t2. */
3805 :
3806 : static void
3807 0 : afdo_propagate_circuit (const bb_set &annotated_bb)
3808 : {
3809 0 : basic_block bb;
3810 0 : FOR_ALL_BB_FN (bb, cfun)
3811 : {
3812 0 : gimple *def_stmt;
3813 0 : tree cmp_rhs, cmp_lhs;
3814 0 : gimple *cmp_stmt = last_nondebug_stmt (bb);
3815 0 : edge e;
3816 0 : edge_iterator ei;
3817 :
3818 0 : if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
3819 0 : continue;
3820 0 : cmp_rhs = gimple_cond_rhs (cmp_stmt);
3821 0 : cmp_lhs = gimple_cond_lhs (cmp_stmt);
3822 0 : if (!TREE_CONSTANT (cmp_rhs)
3823 0 : || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
3824 0 : continue;
3825 0 : if (TREE_CODE (cmp_lhs) != SSA_NAME)
3826 0 : continue;
3827 0 : if (!is_bb_annotated (bb, annotated_bb))
3828 0 : continue;
3829 0 : def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
3830 0 : while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
3831 0 : && gimple_assign_single_p (def_stmt)
3832 0 : && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
3833 0 : def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
3834 0 : if (!def_stmt)
3835 0 : continue;
3836 0 : gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
3837 0 : if (!phi_stmt)
3838 0 : continue;
3839 0 : FOR_EACH_EDGE (e, ei, bb->succs)
3840 : {
3841 0 : unsigned i, total = 0;
3842 0 : edge only_one;
3843 0 : bool check_value_one = (((integer_onep (cmp_rhs))
3844 0 : ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
3845 0 : ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
3846 0 : if (! AFDO_EINFO (e)->is_annotated ())
3847 0 : continue;
3848 0 : for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
3849 : {
3850 0 : tree val = gimple_phi_arg_def (phi_stmt, i);
3851 0 : edge ep = gimple_phi_arg_edge (phi_stmt, i);
3852 :
3853 0 : if (!TREE_CONSTANT (val)
3854 0 : || !(integer_zerop (val) || integer_onep (val)))
3855 0 : continue;
3856 0 : if (check_value_one ^ integer_onep (val))
3857 0 : continue;
3858 0 : total++;
3859 0 : only_one = ep;
3860 0 : if (! (AFDO_EINFO (e)->get_count ()).nonzero_p ()
3861 0 : && ! AFDO_EINFO (ep)->is_annotated ())
3862 : {
3863 0 : AFDO_EINFO (ep)->set_count (profile_count::zero ().afdo ());
3864 0 : AFDO_EINFO (ep)->set_annotated ();
3865 : }
3866 : }
3867 0 : if (total == 1 && ! AFDO_EINFO (only_one)->is_annotated ())
3868 : {
3869 0 : AFDO_EINFO (only_one)->set_count (AFDO_EINFO (e)->get_count ());
3870 0 : AFDO_EINFO (only_one)->set_annotated ();
3871 : }
3872 : }
3873 : }
3874 0 : }
3875 :
3876 : /* Propagate the basic block count and edge count on the control flow
3877 : graph. We do the propagation iteratively until stabilize. */
3878 :
3879 : static void
3880 0 : afdo_propagate (bb_set *annotated_bb)
3881 : {
3882 0 : bool changed = true;
3883 0 : int i = 0;
3884 :
3885 0 : basic_block bb;
3886 0 : FOR_ALL_BB_FN (bb, cfun)
3887 0 : if (!is_bb_annotated (bb, *annotated_bb)
3888 0 : && is_bb_annotated ((basic_block)bb->aux, *annotated_bb))
3889 : {
3890 0 : update_count_by_afdo_count (&bb->count, ((basic_block)bb->aux)->count);
3891 0 : set_bb_annotated (bb, annotated_bb);
3892 0 : if (dump_file)
3893 : {
3894 0 : fprintf (dump_file,
3895 : " Copying count of bb %i to bb %i; count is:",
3896 0 : ((basic_block)bb->aux)->index,
3897 : bb->index);
3898 0 : bb->count.dump (dump_file);
3899 0 : fprintf (dump_file, "\n");
3900 : }
3901 : }
3902 :
3903 0 : while (changed && i++ < 100)
3904 : {
3905 0 : changed = false;
3906 :
3907 0 : if (afdo_propagate_edge (true, annotated_bb))
3908 : changed = true;
3909 0 : if (afdo_propagate_edge (false, annotated_bb))
3910 0 : changed = true;
3911 0 : afdo_propagate_circuit (*annotated_bb);
3912 : }
3913 0 : if (dump_file)
3914 0 : fprintf (dump_file, "Propagation took %i iterations %s\n",
3915 : i, changed ? "; iteration limit reached\n" : "");
3916 0 : }
3917 :
3918 : /* qsort comparator of sreals. */
3919 : static int
3920 0 : cmp (const void *a, const void *b)
3921 : {
3922 0 : if (*(const sreal *)a < *(const sreal *)b)
3923 : return 1;
3924 0 : if (*(const sreal *)a > *(const sreal *)b)
3925 0 : return -1;
3926 : return 0;
3927 : }
3928 :
3929 : /* To scale a connected component of graph we collect desired scales of
3930 : basic blocks on the boundary and then compute a robust average. */
3931 :
3932 : struct scale
3933 : {
3934 : /* Scale desired. */
3935 : sreal scale;
3936 : /* Weight for averaging computed from execution count of the edge
3937 : scale originates from. */
3938 : uint64_t weight;
3939 : };
3940 :
3941 : /* Add scale ORIG/ANNOTATED to SCALES. */
3942 :
3943 : static void
3944 0 : add_scale (vec <scale> *scales, profile_count annotated, profile_count orig)
3945 : {
3946 0 : if (dump_file)
3947 : {
3948 0 : orig.dump (dump_file);
3949 0 : fprintf (dump_file, " should be ");
3950 0 : annotated.dump (dump_file);
3951 0 : fprintf (dump_file, "\n");
3952 : }
3953 0 : if (orig.nonzero_p ())
3954 : {
3955 0 : sreal scale
3956 0 : = annotated.guessed_local ()
3957 0 : .to_sreal_scale (orig);
3958 0 : if (dump_file)
3959 0 : fprintf (dump_file, " adding scale %.16f, weight %" PRId64 "\n",
3960 0 : scale.to_double (), annotated.value () + 1);
3961 0 : scales->safe_push ({scale, annotated.value () + 1});
3962 : }
3963 0 : }
3964 :
3965 : /* Scale counts of all basic blocks in BBS by SCALE and convert them to
3966 : IPA quality. */
3967 :
3968 : static void
3969 0 : scale_bbs (const vec <basic_block> &bbs, sreal scale)
3970 : {
3971 0 : if (dump_file)
3972 0 : fprintf (dump_file, " Scaling by %.16f\n", scale.to_double ());
3973 0 : for (basic_block b : bbs)
3974 0 : if (!(b->count == profile_count::zero ())
3975 0 : && b->count.initialized_p ())
3976 : {
3977 0 : profile_count o = b->count;
3978 0 : b->count = b->count.force_guessed () * scale;
3979 :
3980 : /* If we scaled to 0, make it auto-fdo since that is treated
3981 : less agressively. */
3982 0 : if (!b->count.nonzero_p () && o.nonzero_p ())
3983 0 : b->count = profile_count::zero ().afdo ();
3984 0 : if (dump_file)
3985 : {
3986 0 : fprintf (dump_file, " bb %i count updated ", b->index);
3987 0 : o.dump (dump_file);
3988 0 : fprintf (dump_file, " -> ");
3989 0 : b->count.dump (dump_file);
3990 0 : fprintf (dump_file, "\n");
3991 : }
3992 : }
3993 0 : }
3994 :
3995 : /* Determine scaling factor by taking robust average of SCALES
3996 : and taking into account limits.
3997 : MAX_COUNT is maximal guessed count to be scaled while MAC_COUNT_IN_FN
3998 : is maximal count in function determined by auto-fdo. */
3999 :
4000 : sreal
4001 0 : determine_scale (vec <scale> *scales, profile_count max_count,
4002 : profile_count max_count_in_fn)
4003 : {
4004 0 : scales->qsort (cmp);
4005 :
4006 0 : uint64_t overall_weight = 0;
4007 0 : for (scale &e : *scales)
4008 0 : overall_weight += e.weight;
4009 :
4010 0 : uint64_t cummulated = 0, weight_sum = 0;
4011 0 : sreal scale_sum = 0;
4012 0 : for (scale &e : *scales)
4013 : {
4014 0 : uint64_t prev = cummulated;
4015 0 : cummulated += e.weight;
4016 0 : if (cummulated >= overall_weight / 4
4017 0 : && prev <= 3 * overall_weight / 4)
4018 : {
4019 0 : scale_sum += e.scale * e.weight;
4020 0 : weight_sum += e.weight;
4021 0 : if (dump_file)
4022 0 : fprintf (dump_file, " accounting scale %.16f, weight %" PRId64 "\n",
4023 : e.scale.to_double (), e.weight);
4024 : }
4025 0 : else if (dump_file)
4026 0 : fprintf (dump_file, " ignoring scale %.16f, weight %" PRId64 "\n",
4027 : e.scale.to_double (), e.weight);
4028 : }
4029 0 : sreal scale = scale_sum / (sreal)weight_sum;
4030 :
4031 : /* Avoid scaled regions to have very large counts.
4032 : Otherwise they may dominate ipa-profile's histogram computing cutoff
4033 : of hot basic blocks. */
4034 0 : if (max_count * scale > max_count_in_fn.guessed_local ().apply_scale (128, 1))
4035 : {
4036 0 : if (dump_file)
4037 : {
4038 0 : fprintf (dump_file, "Scaling by %.16f produces max count ",
4039 : scale.to_double ());
4040 0 : (max_count * scale).dump (dump_file);
4041 0 : fprintf (dump_file, " that exceeds max count in fn ");
4042 0 : max_count_in_fn.dump (dump_file);
4043 0 : fprintf (dump_file, "; capping\n");
4044 : }
4045 0 : scale = max_count_in_fn.guessed_local ().to_sreal_scale (max_count);
4046 : }
4047 0 : return scale;
4048 : }
4049 :
4050 : /* Scale profile of the whole function to approximately match auto-profile. */
4051 :
4052 : bool
4053 0 : scale_bb_profile ()
4054 : {
4055 0 : const function_instance *s
4056 : = afdo_source_profile->get_function_instance_by_decl
4057 0 : (current_function_decl);
4058 :
4059 : /* In the first pass only store non-zero counts. */
4060 0 : gcov_type head_count = s->head_count () * autofdo::afdo_count_scale;
4061 0 : hash_set <basic_block> zero_bbs;
4062 0 : auto_vec <basic_block, 20> bbs (n_basic_blocks_for_fn (cfun));
4063 0 : auto_vec <scale, 20> scales;
4064 0 : basic_block bb;
4065 0 : profile_count max_count = profile_count::zero ();
4066 0 : profile_count max_count_in_fn = profile_count::zero ();
4067 0 : bbs.quick_push (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4068 0 : bbs.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun));
4069 0 : if (head_count > 0)
4070 : {
4071 0 : profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
4072 0 : max_count = entry_count;
4073 0 : update_count_by_afdo_count (&entry_count, head_count);
4074 0 : max_count_in_fn = entry_count;
4075 0 : add_scale (&scales, entry_count, ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
4076 : }
4077 0 : FOR_EACH_BB_FN (bb, cfun)
4078 : {
4079 0 : profile_count cnt = bb->count;
4080 0 : bbs.safe_push (bb);
4081 0 : max_count = profile_count::max_prefer_initialized (max_count, cnt);
4082 0 : if (afdo_set_bb_count (bb, zero_bbs))
4083 : {
4084 0 : std::swap (cnt, bb->count);
4085 0 : max_count_in_fn
4086 0 : = profile_count::max_prefer_initialized (max_count_in_fn, cnt);
4087 0 : add_scale (&scales, cnt, bb->count);
4088 : }
4089 : }
4090 0 : if (scales.length ())
4091 : {
4092 0 : sreal scale = determine_scale (&scales, max_count, max_count_in_fn);
4093 0 : scale_bbs (bbs, scale);
4094 0 : return true;
4095 : }
4096 : return false;
4097 0 : }
4098 :
4099 : /* In case given basic block was fully optimized out, AutoFDO
4100 : will have no data about it. In this case try to preserve static profile.
4101 : Identify connected components (in undirected form of CFG) which has
4102 : no annotations at all. Look at thir boundaries and try to determine
4103 : scaling factor and scale. */
4104 :
4105 : void
4106 0 : afdo_adjust_guessed_profile (bb_set *annotated_bb)
4107 : {
4108 : /* Basic blocks of connected component currently processed. */
4109 0 : auto_vec <basic_block, 20> bbs (n_basic_blocks_for_fn (cfun));
4110 : /* Scale factors found. */
4111 0 : auto_vec <scale, 20> scales;
4112 0 : auto_vec <basic_block, 20> stack (n_basic_blocks_for_fn (cfun));
4113 :
4114 0 : basic_block seed_bb;
4115 0 : unsigned int component_id = 1;
4116 :
4117 : /* Map from basic block to its component.
4118 : 0 is used for univisited BBs,
4119 : 1 means that BB is annotated,
4120 : >=2 is an id of the component BB belongs to. */
4121 0 : auto_vec <unsigned int, 20> component;
4122 0 : component.safe_grow (last_basic_block_for_fn (cfun));
4123 0 : profile_count max_count_in_fn = profile_count::zero ();
4124 0 : FOR_ALL_BB_FN (seed_bb, cfun)
4125 0 : if (is_bb_annotated (seed_bb, *annotated_bb))
4126 : {
4127 0 : component[seed_bb->index] = 1;
4128 0 : max_count_in_fn
4129 0 : = profile_count::max_prefer_initialized (max_count_in_fn, seed_bb->count);
4130 : }
4131 : else
4132 0 : component[seed_bb->index] = 0;
4133 0 : FOR_ALL_BB_FN (seed_bb, cfun)
4134 0 : if (!component[seed_bb->index])
4135 : {
4136 0 : stack.quick_push (seed_bb);
4137 0 : component_id++;
4138 0 : bbs.truncate (0);
4139 0 : scales.truncate (0);
4140 0 : component[seed_bb->index] = component_id;
4141 0 : profile_count max_count = profile_count::zero ();
4142 :
4143 : /* Identify connected component starting in BB. */
4144 0 : if (dump_file)
4145 0 : fprintf (dump_file, "Starting connected component in bb %i\n",
4146 : seed_bb->index);
4147 0 : do
4148 : {
4149 0 : basic_block b = stack.pop ();
4150 :
4151 0 : bbs.quick_push (b);
4152 0 : max_count = profile_count::max_prefer_initialized (max_count, b->count);
4153 :
4154 0 : for (edge e: b->preds)
4155 0 : if (!component[e->src->index])
4156 : {
4157 0 : stack.quick_push (e->src);
4158 0 : component[e->src->index] = component_id;
4159 : }
4160 0 : for (edge e: b->succs)
4161 0 : if (!component[e->dest->index])
4162 : {
4163 0 : stack.quick_push (e->dest);
4164 0 : component[e->dest->index] = component_id;
4165 : }
4166 : }
4167 0 : while (!stack.is_empty ());
4168 :
4169 : /* If all blocks in components has 0 count, we do not need
4170 : to scale, only we must convert to IPA quality. */
4171 0 : if (!max_count.nonzero_p ())
4172 : {
4173 0 : if (dump_file)
4174 0 : fprintf (dump_file, " All counts are 0; scale = 1\n");
4175 0 : scale_bbs (bbs, 1);
4176 0 : continue;
4177 : }
4178 :
4179 : /* Now visit the component and try to figure out its desired
4180 : frequency. */
4181 0 : for (basic_block b : bbs)
4182 : {
4183 0 : if (dump_file)
4184 : {
4185 0 : fprintf (dump_file, " visiting bb %i with count ", b->index);
4186 0 : b->count.dump (dump_file);
4187 0 : fprintf (dump_file, "\n");
4188 : }
4189 0 : if (!b->count.nonzero_p ())
4190 0 : continue;
4191 : /* Sum of counts of annotated edges into B. */
4192 0 : profile_count annotated_count = profile_count::zero ();
4193 : /* Sum of counts of edges into B with source in current
4194 : component. */
4195 0 : profile_count current_component_count = profile_count::zero ();
4196 0 : bool boundary = false;
4197 :
4198 0 : for (edge e: b->preds)
4199 0 : if (AFDO_EINFO (e)->is_annotated ())
4200 : {
4201 0 : if (dump_file)
4202 : {
4203 0 : fprintf (dump_file, " Annotated pred edge to %i "
4204 0 : "with count ", e->src->index);
4205 0 : AFDO_EINFO (e)->get_count ().dump (dump_file);
4206 0 : fprintf (dump_file, "\n");
4207 : }
4208 0 : boundary = true;
4209 0 : annotated_count += AFDO_EINFO (e)->get_count ();
4210 : }
4211 : /* If source is anotated, combine with static
4212 : probability prediction.
4213 : TODO: We can do better in case some of edges out are
4214 : annotated and distribute only remaining count out of BB. */
4215 0 : else if (is_bb_annotated (e->src, *annotated_bb))
4216 : {
4217 0 : boundary = true;
4218 0 : if (dump_file)
4219 : {
4220 0 : fprintf (dump_file, " Annotated predecessor %i "
4221 : "with count ", e->src->index);
4222 0 : e->src->count.dump (dump_file);
4223 0 : fprintf (dump_file, " edge count using static profile ");
4224 0 : e->count ().dump (dump_file);
4225 0 : fprintf (dump_file, "\n");
4226 : }
4227 0 : annotated_count += e->count ();
4228 : }
4229 : else
4230 : {
4231 0 : current_component_count += e->count ();
4232 0 : gcc_checking_assert (component[e->src->index] == component_id);
4233 : }
4234 0 : if (boundary && current_component_count.initialized_p ())
4235 : {
4236 0 : if (dump_file)
4237 0 : fprintf (dump_file, " bb %i in count ", b->index);
4238 0 : add_scale (&scales,
4239 : annotated_count,
4240 : b->count - current_component_count);
4241 : }
4242 0 : for (edge e: b->succs)
4243 0 : if (AFDO_EINFO (e)->is_annotated ())
4244 : {
4245 0 : if (dump_file)
4246 0 : fprintf (dump_file, " edge %i->%i count ",
4247 0 : b->index, e->dest->index);
4248 0 : add_scale (&scales, AFDO_EINFO (e)->get_count (), e->count ());
4249 : }
4250 0 : else if (is_bb_annotated (e->dest, *annotated_bb))
4251 : {
4252 0 : profile_count annotated_count = e->dest->count;
4253 0 : profile_count out_count = profile_count::zero ();
4254 0 : bool ok = true;
4255 :
4256 0 : for (edge e2: e->dest->preds)
4257 0 : if (AFDO_EINFO (e2)->is_annotated ())
4258 0 : annotated_count -= AFDO_EINFO (e2)->get_count ();
4259 0 : else if (component[e2->src->index] == component_id)
4260 0 : out_count += e2->count ();
4261 0 : else if (is_bb_annotated (e2->src, *annotated_bb))
4262 0 : annotated_count -= e2->count ();
4263 0 : else if (e2->probability.nonzero_p ())
4264 : {
4265 : ok = false;
4266 : break;
4267 : }
4268 0 : if (!ok)
4269 0 : continue;
4270 0 : if (dump_file)
4271 0 : fprintf (dump_file,
4272 : " edge %i->%i has annotated successor; count ",
4273 0 : b->index, e->dest->index);
4274 0 : add_scale (&scales, annotated_count, e->count ());
4275 : }
4276 :
4277 : }
4278 :
4279 : /* If we failed to find annotated entry or exit edge,
4280 : look for exit edges and scale profile so the dest
4281 : BB get all flow it needs. This is imprecise because
4282 : the edge is not annotated and thus BB has more than
4283 : one such predecessor. */
4284 0 : if (!scales.length ())
4285 0 : for (basic_block b : bbs)
4286 0 : if (b->count.nonzero_p ())
4287 0 : for (edge e: b->succs)
4288 0 : if (is_bb_annotated (e->dest, *annotated_bb))
4289 : {
4290 0 : profile_count annotated_count = e->dest->count;
4291 0 : for (edge e2: e->dest->preds)
4292 0 : if (AFDO_EINFO (e2)->is_annotated ())
4293 0 : annotated_count -= AFDO_EINFO (e2)->get_count ();
4294 0 : if (dump_file)
4295 0 : fprintf (dump_file,
4296 : " edge %i->%i has annotated successor;"
4297 : " upper bound count ",
4298 0 : b->index, e->dest->index);
4299 0 : add_scale (&scales, annotated_count, e->count ());
4300 : }
4301 0 : if (!scales.length ())
4302 : {
4303 0 : if (dump_file)
4304 0 : fprintf (dump_file,
4305 : " Can not determine count from the boundary; giving up\n");
4306 0 : continue;
4307 : }
4308 0 : gcc_checking_assert (scales.length ());
4309 0 : sreal scale = determine_scale (&scales, max_count, max_count_in_fn);
4310 0 : scale_bbs (bbs, scale);
4311 : }
4312 0 : }
4313 :
4314 : /* Propagate counts on control flow graph and calculate branch
4315 : probabilities. */
4316 :
4317 : static void
4318 0 : afdo_calculate_branch_prob (bb_set *annotated_bb)
4319 : {
4320 0 : edge e;
4321 0 : edge_iterator ei;
4322 0 : basic_block bb;
4323 :
4324 0 : FOR_ALL_BB_FN (bb, cfun)
4325 : {
4326 0 : gcc_assert (bb->aux == NULL);
4327 0 : FOR_EACH_EDGE (e, ei, bb->succs)
4328 : {
4329 0 : gcc_assert (e->aux == NULL);
4330 0 : e->aux = new edge_info ();
4331 0 : gcov_type c = afdo_unscaled_edge_count (e);
4332 0 : if (c == 0 && e->count () == profile_count::zero ())
4333 : {
4334 0 : AFDO_EINFO (e)->set_count (profile_count::zero ());
4335 0 : if (dump_file)
4336 0 : fprintf (dump_file,
4337 : " Annotating edge %i->%i with count 0;"
4338 : " static profile aggress",
4339 0 : e->src->index, e->dest->index);
4340 : }
4341 0 : else if (c > 0)
4342 : {
4343 0 : AFDO_EINFO (e)->set_count
4344 0 : (profile_count::from_gcov_type
4345 0 : (c * autofdo::afdo_count_scale).afdo ());
4346 0 : if (dump_file)
4347 : {
4348 0 : fprintf (dump_file,
4349 : " Annotating edge %i->%i with count ",
4350 0 : e->src->index, e->dest->index);
4351 0 : AFDO_EINFO (e)->get_count ().dump (dump_file);
4352 0 : fprintf (dump_file, "\n");
4353 : }
4354 : }
4355 : }
4356 : }
4357 :
4358 0 : afdo_find_equiv_class (annotated_bb);
4359 0 : afdo_propagate (annotated_bb);
4360 :
4361 0 : FOR_EACH_BB_FN (bb, cfun)
4362 0 : if (is_bb_annotated (bb, *annotated_bb))
4363 : {
4364 0 : bool all_known = true;
4365 0 : profile_count total_count = profile_count::zero ().afdo ();
4366 :
4367 0 : FOR_EACH_EDGE (e, ei, bb->succs)
4368 : {
4369 0 : gcc_assert (AFDO_EINFO (e) != NULL);
4370 0 : if (! AFDO_EINFO (e)->is_annotated ())
4371 : {
4372 : /* If by static profile this edge never happens,
4373 : still propagate the rest. */
4374 0 : if (e->probability.nonzero_p ())
4375 : {
4376 : all_known = false;
4377 : break;
4378 : }
4379 : }
4380 : else
4381 0 : total_count += AFDO_EINFO (e)->get_count ();
4382 : }
4383 0 : if (!all_known || !total_count.nonzero_p ())
4384 0 : continue;
4385 0 : if (dump_file)
4386 : {
4387 0 : fprintf (dump_file, "Total count of bb %i is ", bb->index);
4388 0 : total_count.dump (dump_file);
4389 0 : fprintf (dump_file, "\n");
4390 : }
4391 :
4392 0 : FOR_EACH_EDGE (e, ei, bb->succs)
4393 0 : if (AFDO_EINFO (e)->is_annotated ())
4394 : {
4395 0 : profile_count cnt = AFDO_EINFO (e)->get_count ();
4396 : /* If probability is 1, preserve reliable static prediction
4397 : (This is, for example the case of single fallthru edge
4398 : or single fallthru plus unlikely EH edge.) */
4399 0 : if (cnt == total_count
4400 0 : && e->probability == profile_probability::always ())
4401 : ;
4402 0 : else if (cnt.nonzero_p ())
4403 0 : e->probability
4404 0 : = cnt.probability_in (total_count);
4405 : /* If probability is zero, preserve reliable static
4406 : prediction. */
4407 0 : else if (e->probability.nonzero_p ()
4408 0 : || e->probability.quality () == GUESSED)
4409 0 : e->probability = profile_probability::never ().afdo ();
4410 0 : if (dump_file)
4411 : {
4412 0 : fprintf (dump_file, " probability of edge %i->%i"
4413 : " with count ",
4414 0 : e->src->index, e->dest->index);
4415 0 : cnt.dump (dump_file);
4416 0 : fprintf (dump_file, " set to ");
4417 0 : e->probability.dump (dump_file);
4418 0 : fprintf (dump_file, "\n");
4419 : }
4420 : }
4421 : }
4422 0 : afdo_adjust_guessed_profile (annotated_bb);
4423 0 : FOR_ALL_BB_FN (bb, cfun)
4424 : {
4425 0 : bb->aux = NULL;
4426 0 : FOR_EACH_EDGE (e, ei, bb->succs)
4427 0 : if (AFDO_EINFO (e) != NULL)
4428 : {
4429 0 : delete AFDO_EINFO (e);
4430 0 : e->aux = NULL;
4431 : }
4432 : }
4433 0 : }
4434 :
4435 : /* Annotate auto profile to the control flow graph. */
4436 :
4437 : static void
4438 0 : afdo_annotate_cfg (void)
4439 : {
4440 0 : basic_block bb;
4441 0 : bb_set annotated_bb;
4442 0 : const function_instance *s
4443 0 : = afdo_source_profile->get_function_instance_by_decl (
4444 : current_function_decl);
4445 :
4446 : /* FIXME: This is a workaround for sourcefile tracking, if afdo_string_table
4447 : ends up with empty filename or incorrect filename for the function and
4448 : should be removed once issues with sourcefile tracking get fixed. */
4449 0 : if (s == NULL)
4450 0 : for (unsigned i = 0; i < afdo_string_table->filenames ().length (); i++)
4451 : {
4452 0 : s = afdo_source_profile->get_function_instance_by_decl (current_function_decl, afdo_string_table->filenames()[i]);
4453 0 : if (s)
4454 : break;
4455 : }
4456 :
4457 0 : if (s == NULL)
4458 : {
4459 0 : if (dump_file)
4460 0 : fprintf (dump_file, "No afdo profile for %s\n",
4461 0 : cgraph_node::get (current_function_decl)->dump_name ());
4462 : /* create_gcov only dumps symbols with some samples in them.
4463 : This means that we get nonempty zero_bbs only if some
4464 : nonzero counts in profile were not matched with statements. */
4465 0 : if (!flag_profile_partial_training
4466 0 : && !param_auto_profile_reorder_only)
4467 : {
4468 0 : FOR_ALL_BB_FN (bb, cfun)
4469 0 : if (bb->count.quality () == GUESSED_LOCAL)
4470 0 : bb->count = bb->count.global0afdo ();
4471 0 : update_max_bb_count ();
4472 : }
4473 0 : return;
4474 : }
4475 :
4476 0 : auto ts_it = timestamp_info_map.find (s->timestamp ());
4477 0 : if (ts_it != timestamp_info_map.end ())
4478 : {
4479 0 : cgraph_node *node = cgraph_node::get (current_function_decl);
4480 0 : node->tp_first_run = ts_it->second;
4481 :
4482 0 : if (dump_file)
4483 0 : fprintf (dump_file, "Setting %s->tp_first_run to %d\n",
4484 : node->asm_name (), node->tp_first_run);
4485 : }
4486 :
4487 0 : if (param_auto_profile_reorder_only)
4488 : return;
4489 :
4490 0 : calculate_dominance_info (CDI_POST_DOMINATORS);
4491 0 : calculate_dominance_info (CDI_DOMINATORS);
4492 0 : loop_optimizer_init (0);
4493 :
4494 0 : if (dump_file)
4495 : {
4496 0 : fprintf (dump_file, "\n\nAnnotating BB profile of %s\n",
4497 0 : cgraph_node::get (current_function_decl)->dump_name ());
4498 0 : fprintf (dump_file, "\n");
4499 0 : s->dump (dump_file);
4500 0 : fprintf (dump_file, "\n");
4501 : }
4502 0 : bool profile_found = false;
4503 0 : hash_set <basic_block> zero_bbs;
4504 0 : gcov_type head_count = s->head_count () * autofdo::afdo_count_scale;
4505 :
4506 0 : if (!param_auto_profile_bbs)
4507 : {
4508 0 : if (scale_bb_profile ())
4509 : return;
4510 : }
4511 : else
4512 : {
4513 : /* In the first pass only store non-zero counts. */
4514 0 : profile_found = head_count > 0;
4515 0 : FOR_EACH_BB_FN (bb, cfun)
4516 : {
4517 0 : if (afdo_set_bb_count (bb, zero_bbs))
4518 : {
4519 0 : if (bb->count.quality () == AFDO)
4520 : {
4521 0 : gcc_assert (bb->count.nonzero_p ());
4522 : profile_found = true;
4523 : }
4524 0 : set_bb_annotated (bb, &annotated_bb);
4525 : }
4526 : }
4527 : }
4528 : /* Exit without clobbering static profile if there was no
4529 : non-zero count. */
4530 0 : if (!profile_found)
4531 : {
4532 : /* create_gcov only dumps symbols with some samples in them.
4533 : This means that we get nonempty zero_bbs only if some
4534 : nonzero counts in profile were not matched with statements.
4535 : ??? We can adjust create_gcov to also recordinfo
4536 : about function with no samples. Then we can distinguish
4537 : between lost profiles which should be kept local and
4538 : real functions with 0 samples during train run. */
4539 0 : if (zero_bbs.is_empty ())
4540 : {
4541 0 : if (dump_file)
4542 0 : fprintf (dump_file, "No afdo samples found"
4543 : "; Setting global count to afdo0\n");
4544 : }
4545 : else
4546 : {
4547 0 : if (dump_file)
4548 0 : fprintf (dump_file, "Setting global count to afdo0\n");
4549 : }
4550 0 : if (!flag_profile_partial_training)
4551 : {
4552 0 : FOR_ALL_BB_FN (bb, cfun)
4553 0 : if (bb->count.quality () == GUESSED_LOCAL)
4554 0 : bb->count = bb->count.global0afdo ();
4555 0 : update_max_bb_count ();
4556 : }
4557 :
4558 0 : loop_optimizer_finalize ();
4559 0 : free_dominance_info (CDI_DOMINATORS);
4560 0 : free_dominance_info (CDI_POST_DOMINATORS);
4561 0 : return;
4562 : }
4563 : /* We try to preserve static profile for BBs with 0
4564 : afdo samples, but if even static profile agrees with 0,
4565 : consider it final so propagation works better. */
4566 0 : for (basic_block bb : zero_bbs)
4567 0 : if (!bb->count.nonzero_p ())
4568 : {
4569 0 : update_count_by_afdo_count (&bb->count, 0);
4570 0 : set_bb_annotated (bb, &annotated_bb);
4571 0 : if (dump_file)
4572 : {
4573 0 : fprintf (dump_file, " Annotating bb %i with count ", bb->index);
4574 0 : bb->count.dump (dump_file);
4575 0 : fprintf (dump_file,
4576 : " (has 0 count in both static and afdo profile)\n");
4577 : }
4578 : }
4579 :
4580 : /* Update profile. */
4581 0 : if (head_count > 0)
4582 : {
4583 0 : update_count_by_afdo_count (&ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
4584 : head_count);
4585 0 : set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun), &annotated_bb);
4586 0 : if (!is_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, annotated_bb)
4587 0 : || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
4588 0 : > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
4589 : {
4590 0 : ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
4591 0 : = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
4592 0 : set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb,
4593 : &annotated_bb);
4594 : }
4595 0 : if (!is_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun), annotated_bb)
4596 0 : || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
4597 0 : > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
4598 : {
4599 0 : EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
4600 0 : = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
4601 0 : set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
4602 : }
4603 : }
4604 :
4605 : /* Calculate, propagate count and probability information on CFG. */
4606 0 : afdo_calculate_branch_prob (&annotated_bb);
4607 :
4608 : /* If we failed to turn some of original guessed profile to global,
4609 : set basic blocks uninitialized. */
4610 0 : FOR_ALL_BB_FN (bb, cfun)
4611 0 : if (!bb->count.ipa_p ())
4612 : {
4613 : /* We skip annotating entry profile if it is 0
4614 : in hope to be able to determine it better from the
4615 : static profile.
4616 :
4617 : Now we know we can not derive it from other info,
4618 : so set it since it is better than UNKNOWN. */
4619 0 : if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
4620 0 : bb->count = profile_count::zero ().afdo ();
4621 : else
4622 0 : bb->count = profile_count::uninitialized ();
4623 0 : if (dump_file)
4624 0 : fprintf (dump_file, " Unknown count of bb %i\n", bb->index);
4625 0 : cfun->cfg->full_profile = false;
4626 : }
4627 :
4628 0 : cgraph_node::get (current_function_decl)->count
4629 0 : = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
4630 0 : update_max_bb_count ();
4631 0 : profile_status_for_fn (cfun) = PROFILE_READ;
4632 0 : if (flag_value_profile_transformations)
4633 : {
4634 0 : gimple_value_profile_transformations ();
4635 0 : free_dominance_info (CDI_DOMINATORS);
4636 0 : free_dominance_info (CDI_POST_DOMINATORS);
4637 0 : update_ssa (TODO_update_ssa);
4638 : }
4639 :
4640 0 : loop_optimizer_finalize ();
4641 0 : free_dominance_info (CDI_DOMINATORS);
4642 0 : free_dominance_info (CDI_POST_DOMINATORS);
4643 0 : }
4644 :
4645 : /* Use AutoFDO profile to annotate the control flow graph.
4646 : Return the todo flag. */
4647 :
4648 : static unsigned int
4649 0 : auto_profile (void)
4650 : {
4651 0 : struct cgraph_node *node;
4652 :
4653 0 : if (symtab->state == FINISHED || !afdo_source_profile)
4654 : return 0;
4655 :
4656 0 : init_node_map (true);
4657 0 : profile_info = autofdo::afdo_profile_info;
4658 0 : afdo_source_profile->offline_unrealized_inlines ();
4659 :
4660 0 : FOR_EACH_FUNCTION (node)
4661 : {
4662 0 : if (!gimple_has_body_p (node->decl))
4663 0 : continue;
4664 :
4665 : /* Don't profile functions produced for builtin stuff. */
4666 0 : if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
4667 0 : continue;
4668 :
4669 0 : push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4670 :
4671 : /* Local pure-const may imply need to fixup the cfg.
4672 : This is similar to what is done in tree-profile.cc. */
4673 0 : if ((execute_fixup_cfg () & TODO_cleanup_cfg))
4674 0 : cleanup_tree_cfg ();
4675 :
4676 0 : autofdo::afdo_annotate_cfg ();
4677 0 : compute_function_frequency ();
4678 :
4679 0 : free_dominance_info (CDI_DOMINATORS);
4680 0 : free_dominance_info (CDI_POST_DOMINATORS);
4681 0 : cgraph_edge::rebuild_edges ();
4682 0 : pop_cfun ();
4683 : }
4684 :
4685 : return 0;
4686 : }
4687 : } /* namespace autofdo. */
4688 :
4689 : /* Read the profile from the profile data file. */
4690 :
4691 : void
4692 0 : read_autofdo_file (void)
4693 : {
4694 0 : if (auto_profile_file == NULL)
4695 0 : auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
4696 :
4697 0 : autofdo::afdo_profile_info = XNEW (gcov_summary);
4698 0 : autofdo::afdo_profile_info->runs = 1;
4699 0 : autofdo::afdo_profile_info->sum_max = 0;
4700 0 : autofdo::afdo_profile_info->cutoff = 1;
4701 :
4702 : /* Read the profile from the profile file. */
4703 0 : autofdo::read_profile ();
4704 0 : }
4705 :
4706 : /* Free the resources. */
4707 :
4708 : void
4709 0 : end_auto_profile (void)
4710 : {
4711 0 : delete autofdo::afdo_source_profile;
4712 0 : delete autofdo::afdo_string_table;
4713 0 : delete autofdo::afdo_summary_info;
4714 0 : profile_info = NULL;
4715 0 : }
4716 :
4717 : /* Returns TRUE if EDGE is hot enough to be inlined early. */
4718 :
4719 : bool
4720 0 : afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
4721 : {
4722 0 : gcov_type count
4723 0 : = autofdo::afdo_source_profile->get_callsite_total_count (edge);
4724 :
4725 0 : if (count > 0)
4726 : {
4727 0 : bool is_hot;
4728 0 : profile_count pcount = profile_count::from_gcov_type (count).afdo ();
4729 0 : is_hot = maybe_hot_afdo_count_p (pcount);
4730 0 : if (dump_file)
4731 : {
4732 0 : fprintf (dump_file, "Call %s -> %s has %s afdo profile count ",
4733 0 : edge->caller->dump_name (), edge->callee->dump_name (),
4734 : is_hot ? "hot" : "cold");
4735 0 : pcount.dump (dump_file);
4736 0 : fprintf (dump_file, "\n");
4737 : }
4738 0 : return is_hot;
4739 : }
4740 :
4741 : return false;
4742 : }
4743 :
4744 : /* Do indirect call promotion during early inlining to make the
4745 : IR match the profiled binary before actual annotation.
4746 :
4747 : This is needed because an indirect call might have been promoted
4748 : and inlined in the profiled binary. If we do not promote and
4749 : inline these indirect calls before annotation, the profile for
4750 : these promoted functions will be lost.
4751 :
4752 : e.g. foo() --indirect_call--> bar()
4753 : In profiled binary, the callsite is promoted and inlined, making
4754 : the profile look like:
4755 :
4756 : foo: {
4757 : loc_foo_1: count_1
4758 : bar@loc_foo_2: {
4759 : loc_bar_1: count_2
4760 : loc_bar_2: count_3
4761 : }
4762 : }
4763 :
4764 : Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
4765 : If we perform annotation on it, the profile inside bar@loc_foo2
4766 : will be wasted.
4767 :
4768 : To avoid this, we promote loc_foo_2 and inline the promoted bar
4769 : function before annotation, so the profile inside bar@loc_foo2
4770 : will be useful. */
4771 :
4772 : bool
4773 0 : afdo_vpt_for_early_inline (cgraph_node *node)
4774 : {
4775 0 : if (!node->indirect_calls)
4776 : return false;
4777 0 : bool changed = false;
4778 0 : cgraph_node *outer = node->inlined_to ? node->inlined_to : node;
4779 0 : if (autofdo::afdo_source_profile->get_function_instance_by_decl
4780 0 : (outer->decl) == NULL)
4781 : return false;
4782 0 : for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
4783 : {
4784 0 : gcov_type bb_count = 0;
4785 0 : autofdo::count_info info;
4786 0 : basic_block bb = gimple_bb (e->call_stmt);
4787 :
4788 : /* TODO: This is quadratic; cache the value. */
4789 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
4790 0 : !gsi_end_p (gsi); gsi_next (&gsi))
4791 : {
4792 0 : gimple *stmt = gsi_stmt (gsi);
4793 0 : if (!stmt_loc_used_by_debug_info (stmt))
4794 0 : continue;
4795 0 : autofdo::count_info info;
4796 0 : if (autofdo::afdo_source_profile->get_count_info (stmt, &info, node))
4797 0 : bb_count = MAX (bb_count, info.count);
4798 0 : }
4799 0 : autofdo::afdo_source_profile->get_count_info (e->call_stmt, &info, node);
4800 0 : info.count = bb_count;
4801 0 : if (!autofdo::afdo_source_profile->update_inlined_ind_target
4802 0 : (e->call_stmt, &info, node))
4803 0 : continue;
4804 0 : changed |= autofdo::afdo_vpt (e->call_stmt, info.targets, true, e);
4805 0 : }
4806 : return changed;
4807 : }
4808 :
4809 : /* If speculation used during early inline, remove the target
4810 : so we do not speculate the indirect edge again during afdo pass. */
4811 :
4812 : void
4813 0 : remove_afdo_speculative_target (cgraph_edge *e)
4814 : {
4815 0 : autofdo::afdo_source_profile->remove_icall_target (e);
4816 0 : }
4817 :
4818 : namespace
4819 : {
4820 :
4821 : const pass_data pass_data_ipa_auto_profile = {
4822 : SIMPLE_IPA_PASS, "afdo", /* name */
4823 : OPTGROUP_NONE, /* optinfo_flags */
4824 : TV_IPA_AUTOFDO, /* tv_id */
4825 : 0, /* properties_required */
4826 : 0, /* properties_provided */
4827 : 0, /* properties_destroyed */
4828 : 0, /* todo_flags_start */
4829 : 0, /* todo_flags_finish */
4830 : };
4831 :
4832 : class pass_ipa_auto_profile : public simple_ipa_opt_pass
4833 : {
4834 : public:
4835 288775 : pass_ipa_auto_profile (gcc::context *ctxt)
4836 577550 : : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
4837 : {
4838 : }
4839 :
4840 : /* opt_pass methods: */
4841 : bool
4842 232449 : gate (function *) final override
4843 : {
4844 232449 : return flag_auto_profile;
4845 : }
4846 : unsigned int
4847 0 : execute (function *) final override
4848 : {
4849 0 : return autofdo::auto_profile ();
4850 : }
4851 : }; // class pass_ipa_auto_profile
4852 :
4853 : } // anon namespace
4854 :
4855 : simple_ipa_opt_pass *
4856 288775 : make_pass_ipa_auto_profile (gcc::context *ctxt)
4857 : {
4858 288775 : return new pass_ipa_auto_profile (ctxt);
4859 : }
4860 :
4861 : namespace
4862 : {
4863 :
4864 : const pass_data pass_data_ipa_auto_profile_offline = {
4865 : SIMPLE_IPA_PASS, "afdo_offline", /* name */
4866 : OPTGROUP_NONE, /* optinfo_flags */
4867 : TV_IPA_AUTOFDO_OFFLINE, /* tv_id */
4868 : 0, /* properties_required */
4869 : 0, /* properties_provided */
4870 : 0, /* properties_destroyed */
4871 : 0, /* todo_flags_start */
4872 : 0, /* todo_flags_finish */
4873 : };
4874 :
4875 : class pass_ipa_auto_profile_offline : public simple_ipa_opt_pass
4876 : {
4877 : public:
4878 288775 : pass_ipa_auto_profile_offline (gcc::context *ctxt)
4879 577550 : : simple_ipa_opt_pass (pass_data_ipa_auto_profile_offline, ctxt)
4880 : {
4881 : }
4882 :
4883 : /* opt_pass methods: */
4884 : bool
4885 232449 : gate (function *) final override
4886 : {
4887 232449 : return flag_auto_profile;
4888 : }
4889 : unsigned int
4890 0 : execute (function *) final override
4891 : {
4892 0 : read_autofdo_file ();
4893 0 : if (autofdo::afdo_source_profile)
4894 0 : autofdo::afdo_source_profile->offline_external_functions ();
4895 0 : return 0;
4896 : }
4897 : }; // class pass_ipa_auto_profile
4898 :
4899 : } // anon namespace
4900 :
4901 : simple_ipa_opt_pass *
4902 288775 : make_pass_ipa_auto_profile_offline (gcc::context *ctxt)
4903 : {
4904 288775 : return new pass_ipa_auto_profile_offline (ctxt);
4905 : }
|