Line data Source code
1 : /* Read and annotate call graph profile from the auto profile data file.
2 : Copyright (C) 2014-2026 Free Software Foundation, Inc.
3 : Contributed by Dehao Chen (dehao@google.com)
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "config.h"
22 : #define INCLUDE_MAP
23 : #define INCLUDE_SET
24 : #include "system.h"
25 : #include "coretypes.h"
26 : #include "backend.h"
27 : #include "tree.h"
28 : #include "gimple.h"
29 : #include "predict.h"
30 : #include "alloc-pool.h"
31 : #include "tree-pass.h"
32 : #include "ssa.h"
33 : #include "cgraph.h"
34 : #include "gcov-io.h"
35 : #include "diagnostic-core.h"
36 : #include "profile.h"
37 : #include "langhooks.h"
38 : #include "context.h"
39 : #include "pass_manager.h"
40 : #include "cfgloop.h"
41 : #include "tree-cfg.h"
42 : #include "tree-cfgcleanup.h"
43 : #include "tree-into-ssa.h"
44 : #include "gimple-iterator.h"
45 : #include "value-prof.h"
46 : #include "symbol-summary.h"
47 : #include "sreal.h"
48 : #include "ipa-cp.h"
49 : #include "ipa-prop.h"
50 : #include "ipa-fnsummary.h"
51 : #include "ipa-inline.h"
52 : #include "tree-inline.h"
53 : #include "auto-profile.h"
54 : #include "tree-pretty-print.h"
55 : #include "gimple-pretty-print.h"
56 : #include "output.h"
57 :
58 : /* The following routines implement AutoFDO optimization.
59 :
60 : This optimization uses sampling profiles to annotate basic block counts
61 : and uses heuristics to estimate branch probabilities.
62 :
63 : There are three phases in AutoFDO:
64 :
65 : Phase 1: At startup.
66 : Read profile from the profile data file.
67 : The following info is read from the profile datafile:
68 : * string_table: a map between function name and its index.
69 : * autofdo_source_profile: a map from function_instance name to
70 : function_instance. This is represented as a forest of
71 : function_instances.
72 : * WorkingSet: a histogram of how many instructions are covered for a
73 : given percentage of total cycles. This is describing the binary
74 : level information (not source level). This info is used to help
75 : decide if we want aggressive optimizations that could increase
76 : code footprint (e.g. loop unroll etc.)
77 : A function instance is an instance of function that could either be a
78 : standalone symbol, or a clone of a function that is inlined into another
79 : function.
80 :
81 : Phase 2: In afdo_offline pass.
82 : Remove function instances from other translation units
83 : and offline all cross-translation unit inlining done during train
84 : run compilation. This is necessary to not lose profiles with
85 : LTO train run.
86 :
87 : Phase 3: During early optimization.
88 : AFDO inline + value profile transformation.
89 : This happens during early optimization.
90 : During early inlining AFDO inliner is executed which
91 : uses autofdo_source_profile to find if a callsite is:
92 : * inlined in the profiled binary.
93 : * callee body is hot in the profiling run.
94 : If both condition satisfies, early inline will inline the callsite
95 : regardless of the code growth.
96 :
97 : Performing this early has benefit of doing early optimizations
98 : before read IPA passes and getting more "context sensitivity" of
99 : the profile read. Profile of inlined functions may differ
100 : significantly from one inline instance to another and from the
101 : offline version.
102 :
103 : This is controlled by -fauto-profile-inlining and is independent
104 : of -fearly-inlining.
105 :
106 : Phase 4: In AFDO pass.
107 : Offline all functions that has been inlined in the
108 : train run but were not inlined in early inlining nor AFDO
109 : inline.
110 :
111 : Phase 5: In AFDO pass.
112 : Annotate control flow graph.
113 : * Annotate basic block count
114 : * Estimate branch probability
115 : * Use earlier static profile to fill in the gaps
116 : if AFDO profile is ambiguous
117 :
118 : After the above 5 phases, all profile is readily annotated on the GCC IR.
119 : AutoFDO tries to reuse all FDO infrastructure as much as possible to make
120 : use of the profile. E.g. it uses existing mechanism to calculate the basic
121 : block/edge frequency, as well as the cgraph node/edge count.
122 : */
123 :
124 : #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
125 :
126 : /* profile counts determined by AFDO smaller than afdo_hot_bb_threshold are
127 : considered cols. */
128 : gcov_type afdo_hot_bb_threshold = -1;
129 :
130 : /* Return true if COUNT is possibly hot. */
131 : bool
132 0 : maybe_hot_afdo_count_p (profile_count count)
133 : {
134 0 : gcc_checking_assert (count.ipa ().initialized_p ());
135 0 : return count.ipa ().to_gcov_type () >= afdo_hot_bb_threshold;
136 : }
137 :
138 : /* Return true if location of STMT may be expressed by debug info. */
139 :
140 : static bool
141 0 : stmt_loc_used_by_debug_info (gimple *stmt)
142 : {
143 : /* Only inline_entry and gimple_bind's locations
144 : are not output into debug output. */
145 0 : if (is_gimple_debug (stmt))
146 0 : return gimple_debug_begin_stmt_p (stmt);
147 0 : if (gimple_code (stmt) == GIMPLE_LABEL
148 : || gimple_code (stmt) == GIMPLE_NOP
149 : || gimple_code (stmt) == GIMPLE_PREDICT)
150 : return false;
151 0 : if (gimple_clobber_p (stmt))
152 0 : return false;
153 : return true;
154 : }
155 :
156 : namespace autofdo
157 : {
158 :
159 : /* Intermediate edge info used when propagating AutoFDO profile information.
160 : We can't edge->count() directly since it's computed from edge's probability
161 : while probability is yet not decided during propagation. */
162 : #define AFDO_EINFO(e) ((class edge_info *) e->aux)
163 : class edge_info
164 : {
165 : public:
166 0 : edge_info () : count_ (profile_count::zero ().afdo ()), annotated_ (false) {}
167 0 : bool is_annotated () const { return annotated_; }
168 0 : void set_annotated () { annotated_ = true; }
169 0 : profile_count get_count () const { return count_; }
170 0 : void set_count (profile_count count) { count_ = count; }
171 : private:
172 : profile_count count_;
173 : bool annotated_;
174 : };
175 :
176 : /* Represent a source location: (function_decl, lineno). */
177 : struct decl_lineno
178 : {
179 : tree decl;
180 : /* Relative locations stored in auto-profile. */
181 : unsigned int afdo_loc;
182 : /* Actual location afdo_loc was computed from used to output diagnostics. */
183 : location_t location;
184 : };
185 :
186 : /* Represent an inline stack. vector[0] is the leaf node. */
187 : typedef auto_vec<decl_lineno, 20> inline_stack;
188 :
189 : /* String array that stores function names. */
190 : typedef auto_vec<const char *> string_vector;
191 :
192 : /* Map from function name's index in string_table to target's
193 : execution count. */
194 : typedef std::map<unsigned, gcov_type> icall_target_map;
195 :
196 : /* Set of gimple stmts. Used to track if the stmt has already been promoted
197 : to direct call. */
198 : typedef std::set<gimple *> stmt_set;
199 :
200 : /* Set and map used to translate name indexes. */
201 : typedef hash_set<int_hash <int, -1, -2>> name_index_set;
202 : typedef hash_map<int_hash <int, -1, -2>, int> name_index_map;
203 :
204 : /* Represent count info of an inline stack. */
205 0 : class count_info
206 : {
207 : public:
208 : /* Sampled count of the inline stack. */
209 : gcov_type count;
210 :
211 : /* Map from indirect call target to its sample count. */
212 : icall_target_map targets;
213 :
214 : /* Whether this inline stack is already used in annotation.
215 :
216 : Each inline stack should only be used to annotate IR once.
217 : This will be enforced when instruction-level discriminator
218 : is supported. */
219 : };
220 :
221 : /* operator< for "const char *". */
222 : struct string_compare
223 : {
224 0 : bool operator()(const char *a, const char *b) const
225 : {
226 0 : return strcmp (a, b) < 0;
227 : }
228 : };
229 :
230 : /* Store the summary information for the profile. */
231 : struct summary_info
232 : {
233 : /* There are currently 16 hard-coded percentiles in the GCOV format. */
234 : static constexpr unsigned NUM_PERCENTILES = 16;
235 :
236 : /* The detailed summary is a histogram-based calculation of the minimum
237 : execution count required to belong to a certain set of percentile of
238 : counts. */
239 : struct detailed_summary
240 : {
241 : /* The percentile that this represents (multiplied by 1,000,000). */
242 : uint32_t cutoff;
243 : /* The minimum execution count required to belong to this percentile. */
244 : uint64_t min_count;
245 : /* The number of samples which belong to this percentile. */
246 : uint64_t num_counts;
247 : };
248 :
249 : /* The sum of execution counts of all samples. */
250 : uint64_t total_count;
251 : /* The maximum individual count. */
252 : uint64_t max_count;
253 : /* The maximum head count across all functions. */
254 : uint64_t max_function_count;
255 : /* The number of lines that have samples. */
256 : uint64_t num_counts;
257 : /* The number of functions that have samples. */
258 : uint64_t num_functions;
259 : /* The percentile threshold information. */
260 : detailed_summary detailed_summaries[NUM_PERCENTILES];
261 :
262 : /* Read profile. Return TRUE on success. */
263 : bool read ();
264 :
265 : /* Get the minimum count required for percentile CUTOFF. */
266 : uint64_t get_threshold_count (uint32_t cutoff);
267 : };
268 :
269 : /* Store a string array, indexed by string position in the array. */
270 : class string_table
271 : {
272 : public:
273 : static const int unknown_filename = -1;
274 :
275 0 : string_table ()
276 0 : {}
277 :
278 : ~string_table ();
279 :
280 : /* For a given string, returns its index. */
281 : int get_index (const char *name) const;
282 :
283 : /* For a given decl, returns the index of the decl name. */
284 : int get_index_by_decl (tree decl) const;
285 :
286 : /* For a given index, returns the symbol name. */
287 : const char *get_symbol_name (int index) const;
288 :
289 : /* For a given index, returns the filename. */
290 : const char *get_filename (int index) const;
291 :
292 : /* For a given symbol name index, returns the filename index. */
293 : int get_filename_by_symbol (int index) const;
294 :
295 : /* For a given function name, returns the filename index. */
296 : int get_filename_by_symbol (const char *name) const;
297 :
298 : /* For a given filename, returns the index. */
299 : int get_filename_index (const char *name) const;
300 :
301 : /* Get the original name and file name index for a node. This will return the
302 : name from the current TU if there are multiple symbols that map to
303 : NAME. */
304 : std::pair<const char *, int> get_original_name (const char *name) const;
305 :
306 : /* Read profile, return TRUE on success. */
307 : bool read ();
308 :
309 : /* Return number of entries. */
310 0 : size_t num_entries () { return symbol_names_.length (); }
311 :
312 : /* Add new symbol name STRING (with an associated file name FILENAME_IDX) and
313 : return its index. */
314 : int add_symbol_name (const char *string, int filename_idx);
315 :
316 : /* Add new filename and return its index (returning the same if it already
317 : exists). */
318 : int add_filename (const char *name);
319 :
320 : /* Return cgraph node corresponding to given name index. */
321 : cgraph_node *get_cgraph_node (int);
322 :
323 : const string_vector& filenames () { return filenames_; }
324 : private:
325 : typedef std::map<const char *, unsigned, string_compare> string_index_map;
326 : typedef std::map<const char *, auto_vec<unsigned>, string_compare>
327 : clashing_name_map;
328 : typedef std::map<const char *, char *, string_compare> string_string_map;
329 :
330 : string_vector symbol_names_;
331 : string_vector filenames_;
332 :
333 : string_index_map symbol_name_map_;
334 : string_index_map filename_map_;
335 : string_index_map symbol_to_filename_map_;
336 :
337 : string_string_map original_names_map_;
338 : clashing_name_map clashing_names_map_;
339 : };
340 :
341 : /* Descriptor for a function_instance which can be used to disambiguate it from
342 : other instances. This consists of the symbol name and the file name indices
343 : from string_table. */
344 :
345 : class function_instance_descriptor
346 : {
347 : /* The string_table index for the file name. */
348 : unsigned file_name_;
349 : /* The string_table index for the function name. */
350 : unsigned symbol_name_;
351 :
352 : public:
353 0 : unsigned file_name () const { return file_name_; }
354 0 : unsigned symbol_name () const { return symbol_name_; }
355 :
356 0 : function_instance_descriptor (unsigned file_name, unsigned symbol_name)
357 0 : : file_name_ (file_name), symbol_name_ (symbol_name)
358 : {}
359 :
360 0 : function_instance_descriptor (int file_name, int symbol_name)
361 0 : : file_name_ (file_name), symbol_name_ (symbol_name)
362 : {}
363 :
364 0 : void set_symbol_name (unsigned new_name) { symbol_name_ = new_name; }
365 :
366 0 : bool operator< (const function_instance_descriptor &other) const
367 : {
368 0 : return file_name_ < other.file_name_
369 0 : || (file_name_ == other.file_name_
370 0 : && symbol_name_ < other.symbol_name_);
371 : }
372 : };
373 :
374 : /* Profile of a function instance:
375 : 1. total_count of the function.
376 : 2. head_count (entry basic block count) of the function (only valid when
377 : function is a top-level function_instance, i.e. it is the original copy
378 : instead of the inlined copy).
379 : 3. map from source location (decl_lineno) to profile (count_info).
380 : 4. map from callsite to callee function_instance. */
381 :
382 : class function_instance
383 : {
384 : public:
385 : typedef auto_vec<function_instance *> function_instance_stack;
386 :
387 : /* Read the profile and return a function_instance with head count as
388 : HEAD_COUNT. Recursively read callsites to create nested function_instances
389 : too. STACK is used to track the recursive creation process. */
390 : static function_instance *
391 : read_function_instance (function_instance_stack *stack, bool toplevel = true);
392 :
393 : /* Recursively deallocate all callsites (nested function_instances). */
394 : ~function_instance ();
395 :
396 : /* Accessors. */
397 0 : int symbol_name () const { return descriptor_.symbol_name (); }
398 0 : int file_name () const { return descriptor_.file_name (); }
399 0 : void set_symbol_name (int index) { descriptor_.set_symbol_name (index); }
400 0 : function_instance_descriptor get_descriptor () const { return descriptor_; }
401 :
402 : gcov_type
403 0 : total_count () const
404 : {
405 0 : return total_count_;
406 : }
407 :
408 : /* Return head count or -1 if unknown. */
409 : gcov_type
410 0 : head_count () const
411 : {
412 0 : return head_count_;
413 : }
414 :
415 : gcov_type
416 0 : timestamp () const
417 : {
418 0 : return timestamp_;
419 : }
420 :
421 0 : void set_timestamp (gcov_type timestamp) { timestamp_ = timestamp; }
422 :
423 : /* Propagate timestamp from top-level function_instance to
424 : inlined instances. */
425 : void prop_timestamp ();
426 :
427 : /* Traverse callsites of the current function_instance to find one at the
428 : location of LINENO and callee name represented in DECL.
429 : LOCATION should match LINENO and is used to output diagnostics. */
430 : function_instance *get_function_instance_by_decl (unsigned lineno,
431 : tree decl,
432 : location_t location) const;
433 :
434 : /* Merge profile of clones. Note that cloning hasn't been performed when
435 : we annotate the CFG (at this stage). */
436 : void merge (function_instance *other,
437 : vec <function_instance *> &new_functions);
438 :
439 : /* Look for inline instances that was not realized and
440 : remove them while possibly merging them to offline variants. */
441 : void offline_if_not_realized (vec <function_instance *> &new_functions);
442 :
443 : /* Match function instance with gimple body. */
444 : bool match (cgraph_node *node, vec <function_instance *> &new_functions,
445 : name_index_map &to_symbol_name);
446 :
447 : /* Offline all inlined functions with name in SEEN.
448 : If new toplevel functions are created, add them to NEW_FUNCTIONS. */
449 : void offline_if_in_set (name_index_set &seen,
450 : vec <function_instance *> &new_functions);
451 :
452 : /* Walk inlined functions and if their name is not in SEEN
453 : remove it. */
454 :
455 : void remove_external_functions (name_index_set &seen,
456 : name_index_map &to_symbol_name,
457 : vec <function_instance *> &new_functions);
458 :
459 : /* Store the profile info for LOC in INFO. Return TRUE if profile info
460 : is found. */
461 : bool get_count_info (location_t loc, count_info *info) const;
462 :
463 : /* Read the inlined indirect call target profile for STMT in FN and store it
464 : in MAP, return the total count for all inlined indirect calls. */
465 : gcov_type find_icall_target_map (tree fn, gcall *stmt,
466 : icall_target_map *map) const;
467 :
468 : /* Remove inlined indirect call target profile for STMT in FN. */
469 : void remove_icall_target (tree fn, gcall *stmt);
470 :
471 : /* Mark LOC as annotated. */
472 : void mark_annotated (location_t loc);
473 :
474 : void dump (FILE *f, int indent = 0, bool nested = false) const;
475 :
476 : void dump_inline_stack (FILE *f) const;
477 :
478 : DEBUG_FUNCTION void debug () const;
479 :
480 : /* Mark function as removed from indir target list. */
481 : void
482 0 : remove_icall_target ()
483 : {
484 0 : removed_icall_target_ = true;
485 : }
486 :
487 : /* Return true if function is removed from indir target list. */
488 : bool
489 0 : removed_icall_target ()
490 : {
491 0 : return removed_icall_target_;
492 : }
493 :
494 : /* Set inlined_to pointer. */
495 : void
496 0 : set_inlined_to (function_instance *inlined_to)
497 : {
498 0 : gcc_checking_assert (inlined_to != this);
499 0 : inlined_to_ = inlined_to;
500 0 : }
501 :
502 : /* Return pointer to the function instance this function is inlined
503 : to or NULL if it is outer instance. */
504 : function_instance *
505 0 : inlined_to () const
506 : {
507 0 : return inlined_to_;
508 : }
509 :
510 : /* Mark function as realized. */
511 : void
512 0 : set_realized ()
513 : {
514 0 : realized_ = true;
515 0 : }
516 :
517 : /* Return true if function is realized. */
518 : bool
519 0 : realized_p ()
520 : {
521 0 : return realized_;
522 : }
523 :
524 : /* Mark function as in_worklist. */
525 : void
526 0 : set_in_worklist ()
527 : {
528 0 : gcc_checking_assert (!inlined_to_ && !in_worklist_p ());
529 0 : in_worklist_ = true;
530 0 : }
531 :
532 : void
533 0 : clear_in_worklist ()
534 : {
535 0 : gcc_checking_assert (!inlined_to_ && in_worklist_p ());
536 0 : in_worklist_ = false;
537 0 : }
538 :
539 :
540 : /* Return true if function is in_worklist. */
541 : bool
542 0 : in_worklist_p ()
543 : {
544 0 : return in_worklist_;
545 : }
546 :
547 : /* Return corresponding cgraph node. */
548 : cgraph_node *get_cgraph_node ();
549 :
550 : void
551 0 : set_location (location_t l)
552 : {
553 0 : gcc_checking_assert (location_ == UNKNOWN_LOCATION);
554 0 : location_= l;
555 0 : }
556 :
557 : location_t
558 0 : get_location ()
559 : {
560 0 : return location_;
561 : }
562 :
563 : void
564 0 : set_call_location (location_t l)
565 : {
566 0 : gcc_checking_assert (call_location_ == UNKNOWN_LOCATION
567 : && l != UNKNOWN_LOCATION);
568 0 : call_location_= l;
569 0 : }
570 :
571 : location_t
572 0 : get_call_location ()
573 : {
574 0 : return call_location_;
575 : }
576 :
577 : /* Lookup count and warn about duplicates. */
578 : count_info *lookup_count (location_t loc, inline_stack &stack,
579 : cgraph_node *node);
580 : private:
581 : /* Callsite, represented as (decl_lineno, callee_function_name_index). */
582 : typedef std::pair<unsigned, unsigned> callsite;
583 :
584 : /* Map from callsite to callee function_instance. */
585 : typedef std::map<callsite, function_instance *> callsite_map;
586 :
587 0 : function_instance (unsigned symbol_name, unsigned file_name,
588 : gcov_type head_count)
589 0 : : descriptor_ (file_name, symbol_name), total_count_ (0),
590 0 : head_count_ (head_count), timestamp_ (0),
591 0 : removed_icall_target_ (false), realized_ (false), in_worklist_ (false),
592 0 : inlined_to_ (NULL), location_ (UNKNOWN_LOCATION),
593 0 : call_location_ (UNKNOWN_LOCATION)
594 : {
595 : }
596 :
597 : /* Map from source location (decl_lineno) to profile (count_info). */
598 : typedef std::map<unsigned, count_info> position_count_map;
599 :
600 : /* The indices into the string table identifying the function_instance. */
601 : function_instance_descriptor descriptor_;
602 :
603 : /* Total sample count. */
604 : gcov_type total_count_;
605 :
606 : /* Entry BB's sample count. */
607 : gcov_type head_count_;
608 :
609 : /* perf timestamp associated with first execution of function, which is
610 : used to compute node->tp_first_run. */
611 : gcov_type timestamp_;
612 :
613 : /* Map from callsite location to callee function_instance. */
614 : callsite_map callsites;
615 :
616 : /* Map from source location to count_info. */
617 : position_count_map pos_counts;
618 :
619 : /* True if function was removed from indir target list. */
620 : bool removed_icall_target_;
621 :
622 : /* True if function exists in IL. I.e. for toplevel instance we
623 : have corresponding symbol and for inline instance we inlined
624 : to it. */
625 : bool realized_;
626 :
627 : /* True if function is in worklist for merging/offlining. */
628 : bool in_worklist_;
629 :
630 : /* Pointer to outer function instance or NULL if this
631 : is a toplevel one. */
632 : function_instance *inlined_to_;
633 :
634 : /* Location of function and its call (in case it is inlined). */
635 : location_t location_, call_location_;
636 :
637 : /* Turn inline instance to offline. */
638 : static bool offline (function_instance *fn,
639 : vec <function_instance *> &new_functions);
640 :
641 : /* Helper routine for prop_timestamp. */
642 : void prop_timestamp_1 (gcov_type timestamp);
643 : };
644 :
645 : /* Profile for all functions. */
646 : class autofdo_source_profile
647 : {
648 : public:
649 : static autofdo_source_profile *
650 0 : create ()
651 : {
652 0 : autofdo_source_profile *map = new autofdo_source_profile ();
653 :
654 0 : if (map->read ())
655 : return map;
656 0 : delete map;
657 0 : return NULL;
658 : }
659 :
660 : ~autofdo_source_profile ();
661 :
662 : /* For a given DECL, returns the top-level function_instance. */
663 : function_instance *get_function_instance_by_decl (tree decl, const char * = NULL) const;
664 :
665 : /* For a given DESCRIPTOR, return the matching instance if found. */
666 : function_instance *
667 : get_function_instance_by_descriptor (function_instance_descriptor) const;
668 :
669 : void add_function_instance (function_instance *);
670 :
671 : /* Find count_info for a given gimple STMT. If found, store the count_info
672 : in INFO and return true; otherwise return false.
673 : NODE can be used to specify particular inline clone. */
674 : bool get_count_info (gimple *stmt, count_info *info,
675 : cgraph_node *node = NULL) const;
676 :
677 : /* Find count_info for a given gimple location GIMPLE_LOC. If found,
678 : store the count_info in INFO and return true; otherwise return false.
679 : NODE can be used to specify particular inline clone. */
680 : bool get_count_info (location_t gimple_loc, count_info *info,
681 : cgraph_node *node = NULL) const;
682 :
683 : /* Find total count of the callee of EDGE. */
684 : gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
685 :
686 : /* Update value profile INFO for STMT within NODE from the inlined indirect
687 : callsite. Return true if INFO is updated. */
688 : bool update_inlined_ind_target (gcall *stmt, count_info *info,
689 : cgraph_node *node);
690 :
691 : void remove_icall_target (cgraph_edge *e);
692 :
693 : /* Offline all functions not defined in the current translation unit. */
694 : void offline_external_functions ();
695 :
696 : void offline_unrealized_inlines ();
697 :
698 : private:
699 : /* Map from pair of function_instance filename and symbol name (in
700 : string_table) to function_instance. */
701 : typedef std::map<function_instance_descriptor, function_instance *>
702 : name_function_instance_map;
703 :
704 0 : autofdo_source_profile () {}
705 :
706 : /* Read AutoFDO profile and returns TRUE on success. */
707 : bool read ();
708 :
709 : /* Return the function_instance in the profile that correspond to the
710 : inline STACK. */
711 : function_instance *
712 : get_function_instance_by_inline_stack (const inline_stack &stack) const;
713 :
714 : /* Find the matching function instance which has DESCRIPTOR as its
715 : descriptor. If not found, also try checking if an instance exists with the
716 : same name which has no associated filename. */
717 : name_function_instance_map::const_iterator find_iter_for_function_instance (
718 : function_instance_descriptor descriptor) const;
719 :
720 : /* Similar to the above, but return a pointer to the instance instead of an
721 : iterator. */
722 : function_instance *
723 : find_function_instance (function_instance_descriptor descriptor) const;
724 :
725 : /* Remove a function instance from the map. Returns true if the entry was
726 : actually deleted. */
727 : bool remove_function_instance (function_instance *inst);
728 :
729 : name_function_instance_map map_;
730 :
731 : auto_vec <function_instance *> duplicate_functions_;
732 : };
733 :
734 : /* Store the summary information from the GCOV file. */
735 : static summary_info *afdo_summary_info;
736 :
737 : /* Store the strings read from the profile data file. */
738 : static string_table *afdo_string_table;
739 :
740 : /* Store the AutoFDO source profile. */
741 : static autofdo_source_profile *afdo_source_profile;
742 :
743 : /* gcov_summary structure to store the profile_info. */
744 : static gcov_summary *afdo_profile_info;
745 :
746 : /* Map from timestamp -> <name, tp_first_run>.
747 :
748 : The purpose of this map is to map 64-bit timestamp values to (1..N) sorted
749 : by ascending order of timestamps and assign that to node->tp_first_run,
750 : since we don't need the full 64-bit range. */
751 : static std::map<gcov_type, int> timestamp_info_map;
752 :
753 : /* Scaling factor for afdo data. Compared to normal profile
754 : AFDO profile counts are much lower, depending on sampling
755 : frequency. We scale data up to reduce effects of roundoff
756 : errors. */
757 :
758 : static gcov_type afdo_count_scale = 1;
759 :
760 : /* Helper functions. */
761 :
762 : /* Return the original name of NAME: strip the suffix that starts
763 : with '.' for names that are generated after auto-profile pass.
764 : This is to match profiled names with the names in the IR at this stage.
765 : Note that we only have to strip suffix and not in the middle.
766 : Caller is responsible for freeing RET. */
767 :
768 : static char *
769 0 : get_original_name (const char *name, bool alloc = true)
770 : {
771 0 : char *ret = alloc ? xstrdup (name) : const_cast<char *> (name);
772 0 : char *last_dot = strrchr (ret, '.');
773 0 : if (last_dot == NULL)
774 : return ret;
775 0 : bool only_digits = true;
776 : char *ptr = last_dot;
777 0 : while (*(++ptr) != 0)
778 0 : if (*ptr < '0' || *ptr > '9')
779 : {
780 : only_digits = false;
781 : break;
782 : }
783 0 : if (only_digits)
784 0 : *last_dot = 0;
785 0 : char *next_dot = strrchr (ret, '.');
786 : /* if nested function such as foo.0, return foo.0 */
787 0 : if (next_dot == NULL)
788 : {
789 0 : *last_dot = '.';
790 0 : return ret;
791 : }
792 : /* Suffixes of clones that compiler generates after auto-profile. */
793 0 : const char *suffixes[] = {"isra", "constprop", "lto_priv", "part", "cold"};
794 0 : for (unsigned i = 0; i < sizeof (suffixes) / sizeof (const char *); ++i)
795 : {
796 0 : int len = strlen (suffixes[i]);
797 0 : if (len == last_dot - next_dot - 1
798 0 : && strncmp (next_dot + 1, suffixes[i], strlen (suffixes[i])) == 0)
799 : {
800 0 : *next_dot = 0;
801 0 : return get_original_name (ret, false);
802 : }
803 : }
804 : /* Otherwise, it is for clones such as .omp_fn.N that was done before
805 : auto-profile and should be kept as it is. */
806 0 : *last_dot = '.';
807 0 : return ret;
808 : }
809 :
810 : /* Return the combined location, which is a 32bit integer in which
811 : higher 16 bits stores the line offset of LOC to the start lineno
812 : of DECL, The lower 16 bits stores the discriminator. */
813 :
814 : static unsigned
815 0 : get_combined_location (location_t loc, tree decl)
816 : {
817 0 : bool warned = false;
818 : /* TODO: allow more bits for line and less bits for discriminator. */
819 0 : if ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) >= (1<<15)
820 0 : || (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) <= -(1<<15))
821 0 : warned = warning_at (loc, OPT_Wauto_profile,
822 : "auto-profile cannot encode offset %i "
823 : "that exceeds 16 bytes",
824 0 : LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl));
825 0 : if (warned)
826 0 : inform (DECL_SOURCE_LOCATION (decl), "location offset is related to");
827 0 : if ((unsigned)get_discriminator_from_loc (loc) >= (1u << 16))
828 0 : warning_at (loc, OPT_Wauto_profile,
829 : "auto-profile cannot encode discriminators "
830 : "that exceeds 16 bytes");
831 0 : return ((unsigned)(LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16)
832 0 : | get_discriminator_from_loc (loc);
833 : }
834 :
835 : /* Return the function decl of a given lexical BLOCK. */
836 :
837 : static tree
838 0 : get_function_decl_from_block (tree block)
839 : {
840 0 : if (!inlined_function_outer_scope_p (block))
841 : return NULL_TREE;
842 :
843 0 : return BLOCK_ABSTRACT_ORIGIN (block);
844 : }
845 :
846 : /* Dump LOC to F. */
847 :
848 : static void
849 0 : dump_afdo_loc (FILE *f, unsigned loc)
850 : {
851 0 : if (loc & 65535)
852 0 : fprintf (f, "%i.%i", loc >> 16, loc & 65535);
853 : else
854 0 : fprintf (f, "%i", loc >> 16);
855 0 : }
856 :
857 : /* Return assembler name as in symbol table and DW_AT_linkage_name. */
858 :
859 : static const char *
860 0 : raw_symbol_name (const char *asmname)
861 : {
862 : /* If we start supporting user_label_prefixes, add_linkage_attr will also
863 : need to be fixed. */
864 0 : if (strlen (user_label_prefix))
865 0 : sorry ("auto-profile is not supported for targets with user label prefix");
866 0 : return asmname + (asmname[0] == '*');
867 : }
868 :
869 : /* Convenience wrapper that looks up assembler name. */
870 :
871 : static const char *
872 0 : raw_symbol_name (tree decl)
873 : {
874 0 : return raw_symbol_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
875 : }
876 :
877 : /* Dump STACK to F. */
878 :
879 : static void
880 0 : dump_inline_stack (FILE *f, inline_stack *stack)
881 : {
882 0 : bool first = true;
883 0 : for (decl_lineno &p : *stack)
884 : {
885 0 : fprintf (f, "%s%s:",
886 : first ? "" : "; ",
887 : raw_symbol_name (p.decl));
888 0 : dump_afdo_loc (f, p.afdo_loc);
889 0 : first = false;
890 : }
891 0 : fprintf (f, "\n");
892 0 : }
893 :
894 : /* Store inline stack for STMT in STACK. */
895 :
896 : static void
897 0 : get_inline_stack (location_t locus, inline_stack *stack,
898 : tree fn = current_function_decl)
899 : {
900 0 : if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
901 : return;
902 :
903 0 : tree block = LOCATION_BLOCK (locus);
904 0 : if (block && TREE_CODE (block) == BLOCK)
905 : {
906 0 : for (block = BLOCK_SUPERCONTEXT (block);
907 0 : block && (TREE_CODE (block) == BLOCK);
908 0 : block = BLOCK_SUPERCONTEXT (block))
909 : {
910 0 : location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
911 0 : if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
912 0 : continue;
913 :
914 0 : tree decl = get_function_decl_from_block (block);
915 0 : stack->safe_push (
916 0 : {decl, get_combined_location (locus, decl), locus});
917 0 : locus = tmp_locus;
918 : }
919 : }
920 0 : stack->safe_push ({fn, get_combined_location (locus, fn), locus});
921 : }
922 :
923 : /* Same as get_inline_stack for a given node which may be
924 : an inline clone. If NODE is NULL, assume current_function_decl. */
925 : static void
926 0 : get_inline_stack_in_node (location_t locus, inline_stack *stack,
927 : cgraph_node *node)
928 : {
929 0 : if (!node)
930 0 : return get_inline_stack (locus, stack);
931 0 : do
932 : {
933 0 : get_inline_stack (locus, stack, node->decl);
934 : /* If caller is inlined, continue building stack. */
935 0 : if (!node->inlined_to)
936 : node = NULL;
937 : else
938 : {
939 0 : locus = gimple_location (node->callers->call_stmt);
940 0 : node = node->callers->caller;
941 : }
942 : }
943 0 : while (node);
944 : }
945 :
946 : /* Return combined location of LOCUS within BLOCK that is in
947 : function FN.
948 :
949 : This is a 32bit integer in which higher 16 bits stores the line offset of
950 : LOC to the start lineno of DECL, The lower 16 bits stores the
951 : discriminator. */
952 :
953 : static unsigned
954 0 : get_relative_location_for_locus (tree fn, tree block, location_t locus)
955 : {
956 0 : if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
957 : return -1;
958 :
959 0 : for (; block && (TREE_CODE (block) == BLOCK);
960 0 : block = BLOCK_SUPERCONTEXT (block))
961 0 : if (inlined_function_outer_scope_p (block))
962 0 : return get_combined_location (locus,
963 0 : get_function_decl_from_block (block));
964 0 : return get_combined_location (locus, fn);
965 : }
966 :
967 : /* Return combined location of STMT in function FN. */
968 :
969 : static unsigned
970 0 : get_relative_location_for_stmt (tree fn, gimple *stmt)
971 : {
972 0 : return get_relative_location_for_locus
973 0 : (fn, LOCATION_BLOCK (gimple_location (stmt)),
974 0 : gimple_location (stmt));
975 : }
976 :
977 : /* Return either the basename or the realpath for a given path based on
978 : PARAM_PROFILE_FUNC_INTERNAL_ID. */
979 :
980 : static const char *
981 0 : get_normalized_path (const char *path, bool from_gcov = false)
982 : {
983 0 : if (param_profile_func_internal_id == 1)
984 : /* The GCOV will already contain the entire path. It doesn't need to be
985 : normalized with lrealpath (). */
986 0 : return from_gcov ? path : lrealpath (path);
987 0 : return lbasename (path);
988 : }
989 :
990 : /* Member functions for summary_info. */
991 :
992 : bool
993 0 : summary_info::read ()
994 : {
995 0 : if (gcov_read_unsigned () != GCOV_TAG_AFDO_SUMMARY)
996 : return false;
997 :
998 0 : total_count = gcov_read_counter ();
999 0 : max_count = gcov_read_counter ();
1000 0 : max_function_count = gcov_read_counter ();
1001 0 : num_counts = gcov_read_counter ();
1002 0 : num_functions = gcov_read_counter ();
1003 0 : uint64_t num_detailed_summaries = gcov_read_counter ();
1004 0 : gcc_checking_assert (num_detailed_summaries == NUM_PERCENTILES);
1005 0 : for (uint64_t i = 0; i < num_detailed_summaries; i++)
1006 : {
1007 0 : detailed_summaries[i].cutoff = gcov_read_unsigned ();
1008 0 : detailed_summaries[i].min_count = gcov_read_counter ();
1009 0 : detailed_summaries[i].num_counts = gcov_read_counter ();
1010 : }
1011 :
1012 0 : return !gcov_is_error ();
1013 : }
1014 :
1015 : /* Get the minimum count required for percentile CUTOFF. */
1016 :
1017 : uint64_t
1018 0 : summary_info::get_threshold_count (uint32_t cutoff)
1019 : {
1020 : /* The cutoffs stored in the GCOV are fractions multiplied by 1,000,000. */
1021 0 : gcc_checking_assert (cutoff <= 1'000'000);
1022 0 : unsigned idx = 0;
1023 : /* Find the first cutoff at least as high as CUTOFF. */
1024 0 : for (; idx < NUM_PERCENTILES; idx++)
1025 0 : if (detailed_summaries[idx].cutoff >= cutoff)
1026 : break;
1027 0 : idx = std::min (NUM_PERCENTILES - 1, idx);
1028 0 : return detailed_summaries[idx].min_count;
1029 : }
1030 :
1031 : /* Member functions for string_table. */
1032 :
1033 : /* Deconstructor. */
1034 :
1035 0 : string_table::~string_table ()
1036 : {
1037 0 : for (unsigned i = 0; i < symbol_names_.length (); i++)
1038 0 : free (const_cast<char *> (symbol_names_[i]));
1039 0 : for (unsigned i = 0; i < filenames_.length (); i++)
1040 0 : free (const_cast<char *> (filenames_[i]));
1041 0 : for (auto it = original_names_map_.begin (); it != original_names_map_.end ();
1042 0 : it++)
1043 0 : free (it->second);
1044 0 : }
1045 :
1046 :
1047 : /* Return the index of a given function NAME. Return -1 if NAME is not
1048 : found in string table. */
1049 :
1050 : int
1051 0 : string_table::get_index (const char *name) const
1052 : {
1053 0 : if (name == NULL)
1054 : return -1;
1055 0 : string_index_map::const_iterator iter = symbol_name_map_.find (name);
1056 0 : if (iter == symbol_name_map_.end ())
1057 : return -1;
1058 :
1059 0 : return iter->second;
1060 : }
1061 :
1062 : /* Return the index of a given function DECL. Return -1 if DECL is not
1063 : found in string table. */
1064 :
1065 : int
1066 0 : string_table::get_index_by_decl (tree decl) const
1067 : {
1068 0 : const char *name = raw_symbol_name (decl);
1069 0 : int ret = get_index (name);
1070 0 : if (ret != -1)
1071 : return ret;
1072 0 : if (DECL_FROM_INLINE (decl))
1073 0 : return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
1074 :
1075 : return -1;
1076 : }
1077 :
1078 : /* Return the function name of a given INDEX. */
1079 :
1080 : const char *
1081 0 : string_table::get_symbol_name (int index) const
1082 : {
1083 0 : if (index <= 0 || index >= (int) symbol_names_.length ())
1084 0 : fatal_error (UNKNOWN_LOCATION,
1085 : "auto-profile contains invalid symbol name index %d", index);
1086 :
1087 0 : return symbol_names_[index];
1088 : }
1089 :
1090 : /* For a given index, returns the string. */
1091 :
1092 : const char *
1093 0 : string_table::get_filename (int index) const
1094 : {
1095 : /* There may not be any file name for some functions, ignore them. */
1096 0 : if (index == string_table::unknown_filename)
1097 : return "<unknown>";
1098 :
1099 0 : if (index < 0 || index >= (int) filenames_.length ())
1100 0 : fatal_error (UNKNOWN_LOCATION,
1101 : "auto-profile contains invalid filename index %d", index);
1102 :
1103 0 : return filenames_[index];
1104 : }
1105 :
1106 : /* For a given symbol name index, returns the filename index. */
1107 :
1108 : int
1109 0 : string_table::get_filename_by_symbol (int index) const
1110 : {
1111 0 : return get_filename_by_symbol (get_symbol_name (index));
1112 : }
1113 :
1114 : /* For a given function name, returns the filename index. */
1115 :
1116 : int
1117 0 : string_table::get_filename_by_symbol (const char *name) const
1118 : {
1119 0 : auto it = symbol_to_filename_map_.find (name);
1120 0 : if (it != symbol_to_filename_map_.end () && it->second < filenames_.length ())
1121 0 : return it->second;
1122 : return string_table::unknown_filename;
1123 : }
1124 :
1125 : /* For a given filename, returns the index. */
1126 :
1127 : int
1128 0 : string_table::get_filename_index (const char *name) const
1129 : {
1130 0 : auto iter = filename_map_.find (name);
1131 0 : return iter == filename_map_.end () ? string_table::unknown_filename
1132 0 : : iter->second;
1133 : }
1134 :
1135 : /* Get the original name and file name index for a node. This will return the
1136 : name from the current TU if there are multiple symbols that map to
1137 : NAME. */
1138 :
1139 : std::pair<const char *, int>
1140 0 : string_table::get_original_name (const char *name) const
1141 : {
1142 : /* Check if the un-prefixed name differs from the actual name. */
1143 0 : auto stripped = original_names_map_.find (name);
1144 :
1145 : /* The original name for the symbol is its name, i.e. there are no
1146 : suffixes. */
1147 0 : if (stripped == original_names_map_.end ())
1148 0 : return {name, get_filename_by_symbol (name)};
1149 :
1150 : /* Figure out if a clash exists. */
1151 0 : auto clash = clashing_names_map_.find (stripped->second);
1152 0 : gcc_checking_assert (clash != clashing_names_map_.end ());
1153 :
1154 : /* Try to find a function from the current TU. */
1155 0 : gcc_checking_assert (clash->second.length () >= 1);
1156 0 : symtab_node *n
1157 0 : = cgraph_node::get_for_asmname (get_identifier (stripped->second));
1158 0 : if (n && is_a<cgraph_node *> (n))
1159 0 : for (cgraph_node *cn = dyn_cast<cgraph_node *> (n); cn;)
1160 : {
1161 : /* Check if there is a symbol in the current TU that has the same name
1162 : as in the GCOV. */
1163 0 : for (auto name : clash->second)
1164 : {
1165 0 : int filename_idx = get_filename_by_symbol (name);
1166 0 : if (cn->definition && cn->has_gimple_body_p ()
1167 0 : && !strcmp (get_normalized_path (DECL_SOURCE_FILE (cn->decl)),
1168 : get_filename (filename_idx)))
1169 0 : return {stripped->second, filename_idx};
1170 : }
1171 0 : cn = dyn_cast<cgraph_node *> (cn->next_sharing_asm_name);
1172 : }
1173 :
1174 : /* No match found. Just stick to the current symbol and return the stripped
1175 : name. */
1176 0 : return {stripped->second, get_filename_by_symbol (name)};
1177 : }
1178 :
1179 : /* Add new symbol name STRING (with an associated file name FILENAME_IDX) and
1180 : return its index. */
1181 :
1182 : int
1183 0 : string_table::add_symbol_name (const char *string, int filename_idx)
1184 : {
1185 0 : gcc_checking_assert (
1186 : filename_idx == string_table::unknown_filename
1187 : || (filename_idx >= 0 && filename_idx < (int) filenames_.length ()));
1188 0 : symbol_names_.safe_push (string);
1189 0 : symbol_name_map_[symbol_names_.last ()] = symbol_names_.length () - 1;
1190 0 : symbol_to_filename_map_[symbol_names_.last ()] = filename_idx;
1191 0 : return symbol_names_.length () - 1;
1192 : }
1193 :
1194 : /* Add new filename and return its index (returning the same if it already
1195 : exists). */
1196 :
1197 : int
1198 0 : string_table::add_filename (const char *name)
1199 : {
1200 0 : auto it = filename_map_.find (name);
1201 0 : if (it != filename_map_.end ())
1202 0 : return it->second;
1203 0 : filenames_.safe_push (xstrdup (name));
1204 0 : return filenames_.length () - 1;
1205 : }
1206 :
1207 : /* Read the string table. Return TRUE if reading is successful. */
1208 :
1209 : bool
1210 0 : string_table::read ()
1211 : {
1212 0 : if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
1213 : return false;
1214 : /* Skip the length of the section. */
1215 0 : gcov_read_unsigned ();
1216 : /* Read in the file name table. */
1217 0 : unsigned file_num = gcov_read_unsigned ();
1218 0 : filenames_.reserve (file_num);
1219 0 : for (unsigned i = 0; i < file_num; i++)
1220 : {
1221 0 : const char *filename = gcov_read_string ();
1222 0 : filenames_.quick_push (xstrdup (get_normalized_path (filename, true)));
1223 0 : filename_map_[filenames_.last ()] = i;
1224 0 : free (const_cast<char *> (filename));
1225 0 : if (gcov_is_error ())
1226 : return false;
1227 : }
1228 : /* Read in the function name -> file name table. */
1229 0 : unsigned string_num = gcov_read_unsigned ();
1230 0 : symbol_names_.reserve (string_num);
1231 0 : for (unsigned i = 0; i < string_num; i++)
1232 : {
1233 0 : symbol_names_.quick_push (const_cast<char *> (gcov_read_string ()));
1234 0 : symbol_name_map_[symbol_names_.last ()] = i;
1235 0 : unsigned filename_idx = gcov_read_unsigned ();
1236 0 : symbol_to_filename_map_[symbol_names_.last ()] = filename_idx;
1237 0 : char *original = const_cast<char *> (
1238 0 : autofdo::get_original_name (symbol_names_.last ()));
1239 0 : if (strcmp (original, symbol_names_.last ()))
1240 : {
1241 : /* Take ownership of ORIGINAL. */
1242 0 : original_names_map_[symbol_names_.last ()] = original;
1243 0 : clashing_names_map_[original].safe_push (i);
1244 : /* It is possible that a public symbol with the stripped name exists.
1245 : If it does exist, add it as well. */
1246 0 : auto publik = symbol_name_map_.find (original);
1247 0 : if (publik != symbol_name_map_.end ()
1248 0 : && clashing_names_map_.find (publik->first)
1249 0 : == clashing_names_map_.end ())
1250 0 : clashing_names_map_[publik->first].safe_push (publik->second);
1251 : }
1252 : else
1253 : /* There are no suffixes to remove. */
1254 0 : free (original);
1255 :
1256 0 : if (gcov_is_error ())
1257 0 : return false;
1258 : }
1259 0 : return true;
1260 : }
1261 :
1262 : /* Return cgraph node corresponding to given NAME_INDEX,
1263 : NULL if unavailable. */
1264 : cgraph_node *
1265 0 : string_table::get_cgraph_node (int name_index)
1266 : {
1267 0 : const char *sname = get_symbol_name (name_index);
1268 :
1269 0 : symtab_node *n = cgraph_node::get_for_asmname (get_identifier (sname));
1270 0 : for (;n; n = n->next_sharing_asm_name)
1271 0 : if (cgraph_node *cn = dyn_cast <cgraph_node *> (n))
1272 0 : if (cn->definition && cn->has_gimple_body_p ())
1273 : return cn;
1274 : return NULL;
1275 : }
1276 :
1277 : /* Return corresponding cgraph node. */
1278 :
1279 : cgraph_node *
1280 0 : function_instance::get_cgraph_node ()
1281 : {
1282 0 : return afdo_string_table->get_cgraph_node (symbol_name ());
1283 : }
1284 :
1285 : /* Member functions for function_instance. */
1286 :
1287 0 : function_instance::~function_instance ()
1288 : {
1289 0 : gcc_assert (!in_worklist_p ());
1290 0 : for (callsite_map::iterator iter = callsites.begin ();
1291 0 : iter != callsites.end (); ++iter)
1292 0 : delete iter->second;
1293 0 : }
1294 :
1295 : /* Propagate timestamp TS of function_instance to inlined instances if it's
1296 : not already set. */
1297 :
1298 : void
1299 0 : function_instance::prop_timestamp_1 (gcov_type ts)
1300 : {
1301 0 : if (!timestamp () && total_count () > 0)
1302 0 : set_timestamp (ts);
1303 0 : for (auto it = callsites.begin (); it != callsites.end (); ++it)
1304 0 : it->second->prop_timestamp_1 (ts);
1305 0 : }
1306 :
1307 : void
1308 0 : function_instance::prop_timestamp (void)
1309 : {
1310 0 : prop_timestamp_1 (timestamp ());
1311 0 : }
1312 :
1313 : /* Traverse callsites of the current function_instance to find one at the
1314 : location of LINENO and callee name represented in DECL. */
1315 :
1316 : function_instance *
1317 0 : function_instance::get_function_instance_by_decl (unsigned lineno,
1318 : tree decl,
1319 : location_t location) const
1320 : {
1321 0 : int func_name_idx = afdo_string_table->get_index_by_decl (decl);
1322 0 : if (func_name_idx != -1)
1323 : {
1324 0 : callsite_map::const_iterator ret
1325 0 : = callsites.find (std::make_pair (lineno, func_name_idx));
1326 0 : if (ret != callsites.end ())
1327 0 : return ret->second;
1328 : }
1329 0 : if (DECL_FROM_INLINE (decl))
1330 : {
1331 0 : function_instance
1332 0 : *ret = get_function_instance_by_decl (lineno,
1333 0 : DECL_ABSTRACT_ORIGIN (decl),
1334 : location);
1335 0 : return ret;
1336 : }
1337 0 : if (dump_enabled_p ())
1338 : {
1339 0 : for (auto const &iter : callsites)
1340 0 : if (iter.first.first == lineno)
1341 0 : dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS,
1342 0 : dump_user_location_t::from_location_t (location),
1343 : "auto-profile has mismatched function name %s"
1344 : " instead of %s at loc %i:%i",
1345 : afdo_string_table->get_symbol_name (
1346 0 : iter.first.second),
1347 : raw_symbol_name (decl), lineno >> 16,
1348 : lineno & 65535);
1349 : }
1350 :
1351 : return NULL;
1352 : }
1353 :
1354 : /* Merge profile of OTHER to THIS. Note that cloning hasn't been performed
1355 : when we annotate the CFG (at this stage). */
1356 :
1357 : void
1358 0 : function_instance::merge (function_instance *other,
1359 : vec <function_instance *> &new_functions)
1360 : {
1361 : /* Do not merge to itself and only merge functions of same name. */
1362 0 : gcc_checking_assert (other != this
1363 : && other->symbol_name () == symbol_name ());
1364 :
1365 0 : if (file_name () != other->file_name ())
1366 : return;
1367 :
1368 0 : total_count_ += other->total_count_;
1369 0 : if (other->total_count () && total_count () && other->head_count () == -1)
1370 0 : head_count_ = -1;
1371 0 : else if (head_count_ != -1)
1372 0 : head_count_ += other->head_count_;
1373 :
1374 : /* While merging timestamps, set the one that occurs earlier. */
1375 0 : if (other->timestamp () < timestamp ())
1376 0 : set_timestamp (other->timestamp ());
1377 :
1378 : bool changed = true;
1379 :
1380 0 : while (changed)
1381 : {
1382 0 : changed = false;
1383 : /* If both function instances agree on particular inlined function,
1384 : merge profiles. Otherwise offline the instance. */
1385 0 : for (callsite_map::const_iterator iter = other->callsites.begin ();
1386 0 : iter != other->callsites.end ();)
1387 0 : if (callsites.count (iter->first) == 0)
1388 : {
1389 0 : function_instance *f = iter->second;
1390 0 : if (dump_file)
1391 : {
1392 0 : fprintf (dump_file, " Mismatch in inlined functions;"
1393 : " offlining in merge source:");
1394 0 : f->dump_inline_stack (dump_file);
1395 0 : fprintf (dump_file, "\n");
1396 : }
1397 : /* We already merged outer part of the function accounting
1398 : the inlined call; compensate. */
1399 0 : for (function_instance *s = this; s; s = s->inlined_to ())
1400 : {
1401 0 : s->total_count_ -= f->total_count ();
1402 0 : gcc_checking_assert (s->total_count_ >= 0);
1403 : }
1404 0 : other->callsites.erase (iter);
1405 0 : function_instance::offline (f, new_functions);
1406 : /* Start from beginning as merging might have offlined
1407 : some functions in the case of recursive inlining. */
1408 0 : iter = other->callsites.begin ();
1409 : }
1410 : else
1411 0 : ++iter;
1412 0 : for (callsite_map::const_iterator iter = callsites.begin ();
1413 0 : iter != callsites.end ();)
1414 0 : if (other->callsites.count (iter->first) == 0)
1415 : {
1416 0 : function_instance *f = iter->second;
1417 0 : if (dump_file)
1418 : {
1419 0 : fprintf (dump_file, " Mismatch in inlined functions;"
1420 : " offlining in merge destination:");
1421 0 : f->dump_inline_stack (dump_file);
1422 0 : fprintf (dump_file, "\n");
1423 : }
1424 0 : callsites.erase (iter);
1425 0 : function_instance::offline (f, new_functions);
1426 0 : iter = callsites.begin ();
1427 0 : changed = true;
1428 : }
1429 : else
1430 0 : ++iter;
1431 : }
1432 0 : for (callsite_map::const_iterator iter = other->callsites.begin ();
1433 0 : iter != other->callsites.end (); ++iter)
1434 : {
1435 0 : if (dump_file)
1436 : {
1437 0 : fprintf (dump_file, " Merging profile for inlined function\n"
1438 : " from: ");
1439 0 : iter->second->dump_inline_stack (dump_file);
1440 0 : fprintf (dump_file, " total:%" PRIu64 "\n to : ",
1441 0 : (int64_t)iter->second->total_count ());
1442 0 : callsites[iter->first]->dump_inline_stack (dump_file);
1443 0 : fprintf (dump_file, " total:%" PRIu64 "\n",
1444 0 : (int64_t)callsites[iter->first]->total_count ());
1445 : }
1446 :
1447 0 : callsites[iter->first]->merge (iter->second, new_functions);
1448 : }
1449 :
1450 0 : for (position_count_map::const_iterator iter = other->pos_counts.begin ();
1451 0 : iter != other->pos_counts.end (); ++iter)
1452 0 : if (pos_counts.count (iter->first) == 0)
1453 0 : pos_counts[iter->first] = iter->second;
1454 : else
1455 : {
1456 0 : pos_counts[iter->first].count += iter->second.count;
1457 0 : for (icall_target_map::const_iterator titer
1458 0 : = iter->second.targets.begin ();
1459 0 : titer != iter->second.targets.end (); ++titer)
1460 0 : if (pos_counts[iter->first].targets.count (titer->first) == 0)
1461 0 : pos_counts[iter->first].targets[titer->first]
1462 0 : = titer->second;
1463 : else
1464 0 : pos_counts[iter->first].targets[titer->first]
1465 0 : += titer->second;
1466 : }
1467 : }
1468 :
1469 : /* Make inline function FN offline.
1470 : If toplevel function of same name already exists, then merge profiles.
1471 : Otherwise turn FN toplevel. Return true if new toplevel function
1472 : was introduced.
1473 : If new toplevel functions are created and NEW_FUNCTIONS != NULL,
1474 : add them to NEW_FUNCTIONS.
1475 :
1476 : TODO: When offlining indirect call we lose information about the
1477 : call target. It should be possible to add it into
1478 : targets histogram. */
1479 :
1480 : bool
1481 0 : function_instance::offline (function_instance *fn,
1482 : vec <function_instance *> &new_functions)
1483 : {
1484 0 : gcc_checking_assert (fn->inlined_to ());
1485 0 : for (function_instance *s = fn->inlined_to (); s; s = s->inlined_to ())
1486 : {
1487 0 : s->total_count_ -= fn->total_count ();
1488 0 : gcc_checking_assert (s->total_count_ >= 0);
1489 : }
1490 0 : function_instance *to
1491 0 : = afdo_source_profile->get_function_instance_by_descriptor (
1492 : fn->get_descriptor ());
1493 0 : fn->set_inlined_to (NULL);
1494 : /* If there is offline function of same name, we need to merge profile.
1495 : Delay this by adding function to a worklist so we do not run into
1496 : problem with recursive inlining. */
1497 0 : if (to)
1498 : {
1499 0 : if (fn->in_worklist_p ())
1500 : return false;
1501 0 : fn->set_in_worklist ();
1502 0 : new_functions.safe_push (fn);
1503 0 : if (dump_file)
1504 : {
1505 0 : fprintf (dump_file, " Recoding duplicate: ");
1506 0 : to->dump_inline_stack (dump_file);
1507 0 : fprintf (dump_file, "\n");
1508 : }
1509 0 : return true;
1510 : }
1511 0 : if (dump_file)
1512 : {
1513 0 : fprintf (dump_file, " Added as offline instance: ");
1514 0 : fn->dump_inline_stack (dump_file);
1515 0 : fprintf (dump_file, "\n");
1516 : }
1517 0 : if (fn->total_count ())
1518 0 : fn->head_count_ = -1;
1519 0 : afdo_source_profile->add_function_instance (fn);
1520 0 : fn->set_in_worklist ();
1521 0 : new_functions.safe_push (fn);
1522 0 : return true;
1523 : }
1524 :
1525 : /* Offline all inlined functions with name in SEEN.
1526 : If new toplevel functions are created, add them to NEW_FUNCTIONS. */
1527 :
1528 : void
1529 0 : function_instance::offline_if_in_set (name_index_set &seen,
1530 : vec <function_instance *> &new_functions)
1531 : {
1532 0 : for (callsite_map::const_iterator iter = callsites.begin ();
1533 0 : iter != callsites.end ();)
1534 0 : if (seen.contains (iter->first.second))
1535 : {
1536 0 : function_instance *f = iter->second;
1537 0 : if (dump_file)
1538 : {
1539 0 : fprintf (dump_file, "Offlining function inlined to other module: ");
1540 0 : f->dump_inline_stack (dump_file);
1541 0 : fprintf (dump_file, "\n");
1542 : }
1543 0 : iter = callsites.erase (iter);
1544 0 : function_instance::offline (f, new_functions);
1545 : /* Start from beginning as merging might have offlined
1546 : some functions in the case of recursive inlining. */
1547 0 : iter = callsites.begin ();
1548 : }
1549 : else
1550 : {
1551 0 : iter->second->offline_if_in_set (seen, new_functions);
1552 0 : ++iter;
1553 : }
1554 0 : }
1555 :
1556 : /* Try to check if inlined_fn can correspond to a call of function N.
1557 : Return non-zero if it corresponds and 2 if renaming was done. */
1558 :
1559 : static int
1560 0 : match_with_target (cgraph_node *n,
1561 : gimple *stmt,
1562 : function_instance *inlined_fn,
1563 : cgraph_node *orig_callee)
1564 : {
1565 0 : cgraph_node *callee = orig_callee->ultimate_alias_target ();
1566 0 : const char *symbol_name = raw_symbol_name (callee->decl);
1567 0 : const char *name
1568 0 : = afdo_string_table->get_symbol_name (inlined_fn->symbol_name ());
1569 0 : if (strcmp (name, symbol_name))
1570 : {
1571 0 : int i;
1572 0 : bool in_suffix = false;
1573 0 : for (i = 0; i; i++)
1574 : {
1575 : if (name[i] != symbol_name[i])
1576 : break;
1577 : if (name[i] == '.')
1578 : in_suffix = true;
1579 : }
1580 : /* Accept dwarf names and stripped suffixes. */
1581 0 : if (!strcmp (lang_hooks.dwarf_name (callee->decl, 0),
1582 : afdo_string_table->get_symbol_name (
1583 : inlined_fn->symbol_name ()))
1584 0 : || (!name[i] && symbol_name[i] == '.') || in_suffix)
1585 : {
1586 0 : int index = afdo_string_table->get_index (symbol_name);
1587 0 : if (index == -1)
1588 0 : index = afdo_string_table->add_symbol_name (
1589 0 : xstrdup (symbol_name),
1590 : afdo_string_table->add_filename (
1591 0 : get_normalized_path (DECL_SOURCE_FILE (callee->decl))));
1592 0 : if (dump_file)
1593 0 : fprintf (dump_file,
1594 : " Renaming inlined call target %s to %s\n",
1595 : name, symbol_name);
1596 0 : inlined_fn->set_symbol_name (index);
1597 0 : return 2;
1598 : }
1599 : /* Only warn about declarations. It is possible that the function
1600 : is declared as alias in other module and we inlined cross-module. */
1601 0 : if (callee->definition
1602 0 : && warning (OPT_Wauto_profile,
1603 : "auto-profile of %q+F contains inlined "
1604 : "function with symbol name %s instead of symbol name %s",
1605 : n->decl, name, symbol_name))
1606 0 : inform (gimple_location (stmt), "corresponding call");
1607 0 : return 0;
1608 : }
1609 : return 1;
1610 : }
1611 :
1612 : static void
1613 0 : dump_stmt (gimple *stmt, count_info *info, function_instance *inlined_fn,
1614 : inline_stack &stack)
1615 : {
1616 0 : if (dump_file)
1617 : {
1618 0 : fprintf (dump_file, " ");
1619 0 : if (!stack.length ())
1620 0 : fprintf (dump_file, " ");
1621 : else
1622 : {
1623 0 : gcc_checking_assert (stack.length () == 1);
1624 0 : fprintf (dump_file, "%5i", stack[0].afdo_loc >> 16);
1625 0 : if (stack[0].afdo_loc & 65535)
1626 0 : fprintf (dump_file, ".%-5i", stack[0].afdo_loc & 65535);
1627 : else
1628 0 : fprintf (dump_file, " ");
1629 0 : if (info)
1630 0 : fprintf (dump_file, "%9" PRIu64 " ", (int64_t)info->count);
1631 0 : else if (inlined_fn)
1632 0 : fprintf (dump_file, " inlined ");
1633 : else
1634 0 : fprintf (dump_file, " no info ");
1635 : }
1636 0 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1637 : }
1638 0 : }
1639 :
1640 : /* Lookup count and warn about duplicates. */
1641 : count_info *
1642 0 : function_instance::lookup_count (location_t loc, inline_stack &stack,
1643 : cgraph_node *node)
1644 : {
1645 0 : gcc_checking_assert (stack.length () < 2);
1646 0 : if (stack.length ())
1647 : {
1648 0 : int c = pos_counts.count (stack[0].afdo_loc);
1649 0 : if (c > 1
1650 : && warning (OPT_Wauto_profile,
1651 : "duplicated count information"
1652 : " in auto-profile of %q+F"
1653 : " with relative location %i discriminator %i",
1654 : node->decl, stack[0].afdo_loc >> 16,
1655 : stack[0].afdo_loc & 65535))
1656 : inform (loc, "corresponding source location");
1657 0 : if (c)
1658 0 : return &pos_counts[stack[0].afdo_loc];
1659 : }
1660 : return NULL;
1661 : }
1662 :
1663 : /* Mark expr locations as used. */
1664 : void
1665 0 : mark_expr_locations (function_instance *f, tree t, cgraph_node *node,
1666 : hash_set<const count_info *> &counts)
1667 : {
1668 0 : inline_stack stack;
1669 0 : return;
1670 : if (!t)
1671 : return;
1672 : do
1673 : {
1674 : get_inline_stack_in_node (EXPR_LOCATION (t), &stack, node);
1675 : /* FIXME: EXPR_LOCATION does not always originate from current
1676 : function. */
1677 : if (stack.length () > 1)
1678 : break;
1679 : count_info *info = f->lookup_count (EXPR_LOCATION (t), stack, node);
1680 : if (info)
1681 : counts.add (info);
1682 : if (handled_component_p (t))
1683 : t = TREE_OPERAND (t, 0);
1684 : else
1685 : break;
1686 : }
1687 : while (true);
1688 0 : }
1689 :
1690 : /* Match function instance with gimple body.
1691 : Report mismatches, attempt to fix them if possible and remove data we will
1692 : not use.
1693 :
1694 : Set location and call_location so we can output diagnostics and know what
1695 : functions was already matched. */
1696 :
1697 : bool
1698 0 : function_instance::match (cgraph_node *node,
1699 : vec <function_instance *> &new_functions,
1700 : name_index_map &to_symbol_name)
1701 : {
1702 0 : if (get_location () != UNKNOWN_LOCATION)
1703 : return false;
1704 0 : set_location (DECL_SOURCE_LOCATION (node->decl));
1705 0 : if (dump_file)
1706 : {
1707 0 : fprintf (dump_file,
1708 : "\nMatching gimple function %s with auto profile: ",
1709 : node->dump_name ());
1710 0 : dump_inline_stack (dump_file);
1711 0 : fprintf (dump_file, "\n");
1712 : }
1713 0 : basic_block bb;
1714 : /* Sets used to track if entires in auto-profile are useful. */
1715 0 : hash_set<const count_info *> counts;
1716 0 : hash_set<const count_info *> targets;
1717 0 : hash_set<const function_instance *> functions;
1718 0 : hash_set<const function_instance *> functions_to_offline;
1719 :
1720 : /* We try to fill in lost disciminator if there is unique call
1721 : with given line number. This map is used to record them. */
1722 0 : hash_map<int_hash <int, -1, -2>,auto_vec <gcall *>> lineno_to_call;
1723 0 : bool lineno_to_call_computed = false;
1724 :
1725 0 : for (tree arg = DECL_ARGUMENTS (node->decl); arg; arg = DECL_CHAIN (arg))
1726 : {
1727 0 : inline_stack stack;
1728 :
1729 0 : get_inline_stack_in_node (DECL_SOURCE_LOCATION (arg), &stack, node);
1730 0 : count_info *info = lookup_count (DECL_SOURCE_LOCATION (arg), stack, node);
1731 0 : if (stack.length () && dump_file)
1732 : {
1733 0 : gcc_checking_assert (stack.length () == 1);
1734 0 : fprintf (dump_file, "%5i", stack[0].afdo_loc >> 16);
1735 0 : if (stack[0].afdo_loc & 65535)
1736 0 : fprintf (dump_file, " .%-5i arg", stack[0].afdo_loc & 65535);
1737 : else
1738 0 : fprintf (dump_file, " arg ");
1739 0 : print_generic_expr (dump_file, arg);
1740 0 : fprintf (dump_file, "\n");
1741 : }
1742 0 : if (info)
1743 0 : counts.add (info);
1744 0 : }
1745 0 : FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
1746 : {
1747 0 : if (dump_file)
1748 0 : fprintf (dump_file, " basic block %i\n", bb->index);
1749 0 : for (gphi_iterator gpi = gsi_start_phis (bb);
1750 0 : !gsi_end_p (gpi);
1751 0 : gsi_next (&gpi))
1752 : {
1753 0 : gphi *phi = gpi.phi ();
1754 0 : inline_stack stack;
1755 :
1756 : /* We do not assign discriminators to PHI nodes.
1757 : In case we every start using them, we wil need to
1758 : update tree-cfg.cc::assign_discriminators. */
1759 0 : gcc_assert (gimple_location (phi) == UNKNOWN_LOCATION);
1760 0 : get_inline_stack_in_node (gimple_location (phi), &stack, node);
1761 0 : count_info *info = lookup_count (gimple_location (phi), stack, node);
1762 0 : gcc_assert (!info);
1763 0 : dump_stmt (phi, info, NULL, stack);
1764 0 : counts.add (info);
1765 0 : for (edge e : bb->succs)
1766 : {
1767 0 : location_t phi_loc
1768 0 : = gimple_phi_arg_location_from_edge (phi, e);
1769 0 : inline_stack stack;
1770 0 : get_inline_stack_in_node (phi_loc, &stack, node);
1771 0 : count_info *info = lookup_count (phi_loc, stack, node);
1772 0 : if (info)
1773 0 : counts.add (info);
1774 0 : gcc_checking_assert (stack.length () < 2);
1775 0 : mark_expr_locations (this,
1776 : gimple_phi_arg_def_from_edge (phi, e),
1777 : node, counts);
1778 0 : }
1779 0 : }
1780 : /* TODO: goto locuses are not used for BB annotation. */
1781 0 : for (edge e : bb->succs)
1782 : {
1783 0 : inline_stack stack;
1784 0 : get_inline_stack_in_node (e->goto_locus, &stack, node);
1785 0 : count_info *info = lookup_count (e->goto_locus, stack, node);
1786 0 : if (info)
1787 0 : counts.add (info);
1788 0 : }
1789 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
1790 0 : !gsi_end_p (gsi); gsi_next (&gsi))
1791 : {
1792 0 : inline_stack stack;
1793 0 : gimple *stmt = gsi_stmt (gsi);
1794 0 : get_inline_stack_in_node (gimple_location (stmt), &stack, node);
1795 :
1796 0 : count_info *info = lookup_count (gimple_location (stmt), stack, node);
1797 0 : if (info)
1798 0 : counts.add (info);
1799 0 : for (unsigned int op = 0; op < gimple_num_ops (stmt); op++)
1800 0 : mark_expr_locations (this, gimple_op (stmt, op), node, counts);
1801 0 : if (gimple_code (stmt) == GIMPLE_CALL)
1802 : {
1803 0 : function_instance *inlined_fn = NULL;
1804 0 : function_instance *inlined_fn_nodisc = NULL;
1805 : /* Lookup callsite. */
1806 0 : if (stack.length ())
1807 : {
1808 0 : int c = 0;
1809 0 : int cnodis = 0;
1810 0 : for (auto const &iter : callsites)
1811 0 : if (iter.first.first == stack[0].afdo_loc)
1812 : {
1813 0 : if (!c)
1814 0 : inlined_fn = iter.second;
1815 0 : c++;
1816 : }
1817 : /* Discriminators are sometimes lost; try to find the
1818 : call without discriminator info. */
1819 0 : else if (iter.first.first == (stack[0].afdo_loc & ~65535))
1820 : {
1821 0 : if (!cnodis)
1822 0 : inlined_fn_nodisc = iter.second;
1823 0 : cnodis++;
1824 : }
1825 0 : if ((c > 1 || (!c && cnodis > 1))
1826 0 : && warning (OPT_Wauto_profile,
1827 : "duplicated callsite in auto-profile of %q+F"
1828 : " with relative location %i,"
1829 : " discriminator %i",
1830 0 : node->decl, stack[0].afdo_loc >> 16,
1831 0 : stack[0].afdo_loc & 65535))
1832 0 : inform (gimple_location (stmt), "corresponding call");
1833 0 : if (inlined_fn && info && info->targets.size ()
1834 0 : && warning (OPT_Wauto_profile,
1835 : "both call targets and inline callsite"
1836 : " information is present in auto-profile"
1837 : " of function %q+F with relative location"
1838 : " %i, discriminator %i",
1839 0 : node->decl, stack[0].afdo_loc >> 16,
1840 0 : stack[0].afdo_loc & 65535))
1841 0 : inform (gimple_location (stmt), "corresponding call");
1842 0 : tree callee = gimple_call_fndecl (stmt);
1843 0 : cgraph_node *callee_node;
1844 0 : unsigned int loc = stack[0].afdo_loc;
1845 0 : bool lost_discriminator = false;
1846 0 : if (!inlined_fn && inlined_fn_nodisc)
1847 : {
1848 0 : if (!lineno_to_call_computed)
1849 : {
1850 0 : basic_block bb2;
1851 0 : FOR_EACH_BB_FN (bb2,
1852 : DECL_STRUCT_FUNCTION (node->decl))
1853 0 : for (gimple_stmt_iterator gsi2
1854 0 : = gsi_start_bb (bb2);
1855 0 : !gsi_end_p (gsi2); gsi_next (&gsi2))
1856 0 : if (gcall *call
1857 0 : = dyn_cast <gcall *> (gsi_stmt (gsi2)))
1858 : {
1859 0 : inline_stack stack2;
1860 0 : get_inline_stack_in_node
1861 0 : (gimple_location (call),
1862 : &stack2, node);
1863 0 : if (stack2.length ())
1864 0 : lineno_to_call.get_or_insert
1865 0 : (stack2[0].afdo_loc >> 16).safe_push (call);
1866 0 : }
1867 : lineno_to_call_computed = true;
1868 : }
1869 : /* If we can determine lost discriminator uniquely,
1870 : use it. */
1871 0 : if (lineno_to_call.get
1872 0 : (stack[0].afdo_loc >> 16)->length () == 1)
1873 : {
1874 0 : if (warning (OPT_Wauto_profile,
1875 : "auto-profile of %q+F seem to contain"
1876 : " lost discriminator %i for"
1877 : " call of %s at relative location %i",
1878 : node->decl, loc & 65535,
1879 : afdo_string_table->get_symbol_name (
1880 : inlined_fn_nodisc->symbol_name ()),
1881 : loc >> 16))
1882 0 : inform (gimple_location (stmt),
1883 : "corresponding call");
1884 0 : inlined_fn = inlined_fn_nodisc;
1885 0 : if (dump_file)
1886 0 : fprintf (dump_file, " Lost discriminator %i\n",
1887 : loc & 65535);
1888 0 : loc = loc & ~65535;
1889 : }
1890 : lost_discriminator = true;
1891 : }
1892 0 : if (callee && (callee_node = cgraph_node::get (callee)))
1893 : {
1894 0 : if (inlined_fn)
1895 : {
1896 0 : int old_name = inlined_fn->symbol_name ();
1897 0 : int r = match_with_target (node, stmt, inlined_fn,
1898 : callee_node);
1899 0 : if (r == 2)
1900 : {
1901 0 : auto iter = callsites.find ({loc, old_name});
1902 0 : gcc_checking_assert (
1903 : old_name != inlined_fn->symbol_name ()
1904 : && iter != callsites.end ()
1905 : && iter->second == inlined_fn);
1906 0 : callsite key2 = {stack[0].afdo_loc,
1907 0 : inlined_fn->symbol_name ()};
1908 0 : callsites.erase (iter);
1909 0 : callsites[key2] = inlined_fn;
1910 : }
1911 0 : if (r)
1912 0 : functions.add (inlined_fn);
1913 : else
1914 0 : functions_to_offline.add (inlined_fn);
1915 : }
1916 :
1917 0 : if (info && info->targets.size () > 1)
1918 0 : warning_at (gimple_location (stmt), OPT_Wauto_profile,
1919 : "auto-profile of %q+F contains multiple"
1920 : " targets for a direct call with relative"
1921 : " location %i, discriminator %i",
1922 0 : node->decl, stack[0].afdo_loc >> 16,
1923 0 : stack[0].afdo_loc & 65535);
1924 : /* We do not need target profile for direct calls. */
1925 0 : if (info)
1926 0 : info->targets.clear ();
1927 : }
1928 : else
1929 : {
1930 0 : if (inlined_fn
1931 0 : && inlined_fn->get_call_location ()
1932 : != UNKNOWN_LOCATION)
1933 : {
1934 0 : if (warning (OPT_Wauto_profile,
1935 : "function contains two calls of the same"
1936 : " relative location +%i,"
1937 : " discriminator %i,"
1938 : " that leads to lost auto-profile",
1939 : loc >> 16,
1940 : loc & 65535))
1941 : {
1942 0 : inform (gimple_location (stmt),
1943 : "location of the first call");
1944 0 : inform (inlined_fn->get_call_location (),
1945 : "location of the second call");
1946 : }
1947 0 : if (dump_file)
1948 0 : fprintf (dump_file,
1949 : " Duplicated call location\n");
1950 0 : inlined_fn = NULL;
1951 : }
1952 0 : if (inlined_fn)
1953 : {
1954 0 : inlined_fn->set_call_location
1955 0 : (gimple_location (stmt));
1956 : /* Do renaming if needed so we can look up
1957 : cgraph node and recurse into inlined function. */
1958 0 : int *newn
1959 0 : = to_symbol_name.get (inlined_fn->symbol_name ());
1960 0 : gcc_checking_assert (
1961 : !newn || *newn != inlined_fn->symbol_name ());
1962 0 : if (newn || lost_discriminator)
1963 : {
1964 0 : auto iter = callsites.find (
1965 0 : {loc, inlined_fn->symbol_name ()});
1966 0 : gcc_checking_assert (iter != callsites.end ()
1967 : && iter->second
1968 : == inlined_fn);
1969 0 : callsite key2
1970 0 : = {stack[0].afdo_loc,
1971 0 : newn ? *newn : inlined_fn->symbol_name ()};
1972 0 : callsites.erase (iter);
1973 0 : callsites[key2] = inlined_fn;
1974 0 : inlined_fn->set_symbol_name (
1975 0 : newn ? *newn : inlined_fn->symbol_name ());
1976 : }
1977 0 : functions.add (inlined_fn);
1978 : }
1979 0 : if (info)
1980 0 : targets.add (info);
1981 : }
1982 : }
1983 0 : dump_stmt (stmt, info, inlined_fn, stack);
1984 : }
1985 : else
1986 0 : dump_stmt (stmt, info, NULL, stack);
1987 0 : }
1988 : }
1989 0 : bool warned = false;
1990 0 : for (auto &iter : pos_counts)
1991 0 : if (iter.second.targets.size ()
1992 0 : && counts.contains (&iter.second)
1993 0 : && !targets.contains (&iter.second))
1994 : {
1995 0 : if (!warned)
1996 0 : warned = warning_at
1997 0 : (DECL_SOURCE_LOCATION (node->decl),
1998 0 : OPT_Wauto_profile,
1999 : "auto-profile of %q+F contains indirect call targets"
2000 : " not associated with an indirect call statement",
2001 : node->decl);
2002 0 : if (warned)
2003 0 : inform (DECL_SOURCE_LOCATION (node->decl),
2004 : "count %" PRIu64
2005 : " with relative location +%i, discriminator %i",
2006 0 : iter.second.count, iter.first >> 16, iter.first & 65535);
2007 0 : if (dump_file)
2008 : {
2009 0 : fprintf (dump_file, "Removing targets of ");
2010 0 : dump_afdo_loc (dump_file, iter.first);
2011 0 : fprintf (dump_file, "\n");
2012 : }
2013 0 : iter.second.targets.clear ();
2014 : }
2015 0 : warned = false;
2016 : /* Profile sometimes contains extra location for start or end of function
2017 : (prologue, epilogue).
2018 : TODO: If present, perhaps it can be used to determine entry block
2019 : and exit block counts. */
2020 0 : unsigned int end_location = get_combined_location
2021 0 : (DECL_STRUCT_FUNCTION (node->decl)->function_end_locus, node->decl);
2022 0 : unsigned int start_location = get_combined_location
2023 0 : (DECL_STRUCT_FUNCTION (node->decl)->function_start_locus, node->decl);
2024 : /* When outputting code to builtins location we use line number 0.
2025 : create_gcov is stupid and happily computes offsets across files.
2026 : Silently ignore it. */
2027 0 : unsigned int zero_location
2028 0 : = ((unsigned)(1-DECL_SOURCE_LINE (node->decl))) << 16;
2029 0 : for (position_count_map::const_iterator iter = pos_counts.begin ();
2030 0 : iter != pos_counts.end ();)
2031 0 : if (!counts.contains (&iter->second))
2032 : {
2033 0 : if (iter->first != end_location
2034 : && iter->first != start_location
2035 : && (iter->first & 65535) != zero_location
2036 : && iter->first
2037 : /* FIXME: dwarf5 does not represent inline stack of debug
2038 : statements and consequently create_gcov is sometimes
2039 : mixing up statements from other functions. Do not warn
2040 : user about this until this problem is solved.
2041 : We still write info into dump file. */
2042 : && 0)
2043 : {
2044 : if (!warned)
2045 : warned = warning_at (DECL_SOURCE_LOCATION (node->decl),
2046 : OPT_Wauto_profile,
2047 : "auto-profile of %q+F contains extra statements",
2048 : node->decl);
2049 : if (warned)
2050 : inform (DECL_SOURCE_LOCATION (node->decl),
2051 : "count %" PRIu64 " with relative location +%i,"
2052 : " discriminator %i",
2053 : iter->second.count, iter->first >> 16,
2054 : iter->first & 65535);
2055 : if ((iter->first >> 16) > (end_location >> 16) && warned)
2056 : inform (DECL_SOURCE_LOCATION (node->decl),
2057 : "location is after end of function");
2058 : }
2059 0 : if (dump_file)
2060 : {
2061 0 : fprintf (dump_file, "Removing unmatched count ");
2062 0 : dump_afdo_loc (dump_file, iter->first);
2063 0 : fprintf (dump_file, ":%" PRIu64, iter->second.count);
2064 0 : for (auto &titer : iter->second.targets)
2065 0 : fprintf (dump_file, " %s:%" PRIu64,
2066 0 : afdo_string_table->get_symbol_name (titer.first),
2067 0 : (int64_t) titer.second);
2068 0 : fprintf (dump_file, "\n");
2069 : }
2070 0 : iter = pos_counts.erase (iter);
2071 : }
2072 : else
2073 0 : iter++;
2074 0 : warned = false;
2075 0 : for (callsite_map::const_iterator iter = callsites.begin ();
2076 0 : iter != callsites.end ();)
2077 0 : if (!functions.contains (iter->second))
2078 : {
2079 0 : function_instance *f = iter->second;
2080 : /* If we did not see the corresponding statement, warn. */
2081 0 : if (!functions_to_offline.contains (iter->second))
2082 : {
2083 0 : if (!warned)
2084 0 : warned = warning_at (DECL_SOURCE_LOCATION (node->decl),
2085 0 : OPT_Wauto_profile,
2086 : "auto-profile of %q+F contains"
2087 : " extra callsites",
2088 : node->decl);
2089 0 : if (warned)
2090 0 : inform (DECL_SOURCE_LOCATION (node->decl),
2091 : "call of %s with total count %" PRId64
2092 : ", relative location +%i, discriminator %i",
2093 0 : afdo_string_table->get_symbol_name (iter->first.second),
2094 0 : iter->second->total_count (), iter->first.first >> 16,
2095 0 : iter->first.first & 65535);
2096 0 : if ((iter->first.first >> 16) > (end_location >> 16) && warned)
2097 0 : inform (DECL_SOURCE_LOCATION (node->decl),
2098 : "location is after end of function");
2099 0 : if (dump_file)
2100 : {
2101 0 : fprintf (dump_file,
2102 : "Offlining inline with no corresponding gimple stmt ");
2103 0 : f->dump_inline_stack (dump_file);
2104 0 : fprintf (dump_file, "\n");
2105 : }
2106 : }
2107 0 : else if (dump_file)
2108 : {
2109 0 : fprintf (dump_file,
2110 : "Offlining mismatched inline ");
2111 0 : f->dump_inline_stack (dump_file);
2112 0 : fprintf (dump_file, "\n");
2113 : }
2114 0 : callsites.erase (iter);
2115 0 : offline (f, new_functions);
2116 0 : iter = callsites.begin ();
2117 : }
2118 : else
2119 0 : iter++;
2120 0 : for (auto &iter : callsites)
2121 0 : if (cgraph_node *n = iter.second->get_cgraph_node ())
2122 0 : iter.second->match (n, new_functions, to_symbol_name);
2123 0 : return true;
2124 0 : }
2125 :
2126 : /* Walk inlined functions and if their name is not in SEEN
2127 : remove it. Also rename function names as given by
2128 : to_symbol_name map. */
2129 :
2130 : void
2131 0 : function_instance::remove_external_functions
2132 : (name_index_set &seen,
2133 : name_index_map &to_symbol_name,
2134 : vec <function_instance *> &new_functions)
2135 : {
2136 0 : auto_vec <callsite, 20> to_rename;
2137 :
2138 0 : for (callsite_map::const_iterator iter = callsites.begin ();
2139 0 : iter != callsites.end ();)
2140 0 : if (!seen.contains (iter->first.second))
2141 : {
2142 0 : function_instance *f = iter->second;
2143 0 : if (dump_file)
2144 : {
2145 0 : fprintf (dump_file, " Removing external inline: ");
2146 0 : f->dump_inline_stack (dump_file);
2147 0 : fprintf (dump_file, "\n");
2148 : }
2149 0 : iter = callsites.erase (iter);
2150 0 : f->set_inlined_to (NULL);
2151 0 : f->offline_if_in_set (seen, new_functions);
2152 0 : delete f;
2153 : }
2154 : else
2155 : {
2156 0 : gcc_checking_assert ((int) iter->first.second
2157 : == iter->second->symbol_name ());
2158 0 : int *newn = iter->second->get_call_location () == UNKNOWN_LOCATION
2159 0 : ? to_symbol_name.get (iter->first.second)
2160 : : NULL;
2161 0 : if (newn)
2162 : {
2163 0 : gcc_checking_assert (iter->second->inlined_to ());
2164 0 : to_rename.safe_push (iter->first);
2165 : }
2166 0 : iter->second->remove_external_functions
2167 0 : (seen, to_symbol_name, new_functions);
2168 0 : ++iter;
2169 : }
2170 0 : for (auto &key : to_rename)
2171 : {
2172 0 : auto iter = callsites.find (key);
2173 0 : callsite key2 = key;
2174 0 : key2.second = *to_symbol_name.get (key.second);
2175 0 : iter->second->set_symbol_name (key2.second);
2176 0 : callsites.erase (iter);
2177 0 : callsites[key2] = iter->second;
2178 : }
2179 0 : auto_vec <int, 20> target_to_rename;
2180 0 : for (auto &iter : pos_counts)
2181 : {
2182 0 : for (auto const &titer : iter.second.targets)
2183 : {
2184 0 : int *ren = to_symbol_name.get (titer.first);
2185 0 : if (ren)
2186 0 : target_to_rename.safe_push (titer.first);
2187 : }
2188 0 : while (target_to_rename.length ())
2189 : {
2190 0 : int key = target_to_rename.pop ();
2191 0 : int key2 = *to_symbol_name.get (key);
2192 0 : auto i = iter.second.targets.find (key);
2193 0 : if (iter.second.targets.count (key2) == 0)
2194 0 : iter.second.targets[key2] = i->second;
2195 : else
2196 0 : iter.second.targets[key2] += i->second;
2197 0 : iter.second.targets.erase (i);
2198 : }
2199 : }
2200 0 : }
2201 :
2202 : /* Look for inline instances that was not realized and
2203 : remove them while possibly merging them to offline variants. */
2204 :
2205 : void
2206 0 : function_instance::offline_if_not_realized
2207 : (vec <function_instance *> &new_functions)
2208 : {
2209 0 : for (callsite_map::const_iterator iter = callsites.begin ();
2210 0 : iter != callsites.end ();)
2211 0 : if (!iter->second->realized_p ())
2212 : {
2213 0 : function_instance *f = iter->second;
2214 0 : if (dump_file)
2215 : {
2216 0 : fprintf (dump_file, "Offlining unrealized inline ");
2217 0 : f->dump_inline_stack (dump_file);
2218 0 : fprintf (dump_file, "\n");
2219 : }
2220 0 : iter = callsites.erase (iter);
2221 0 : offline (f, new_functions);
2222 : }
2223 : else
2224 : {
2225 0 : iter->second->offline_if_not_realized (new_functions);
2226 0 : ++iter;
2227 : }
2228 0 : }
2229 :
2230 : /* Dump instance to F indented by INDENT. */
2231 :
2232 : void
2233 0 : function_instance::dump (FILE *f, int indent, bool nested) const
2234 : {
2235 0 : if (!nested)
2236 0 : fprintf (f, "%*s%s total:%" PRIu64 " head:%" PRId64 "\n", indent, "",
2237 : afdo_string_table->get_symbol_name (symbol_name ()),
2238 0 : (int64_t) total_count (), (int64_t) head_count ());
2239 : else
2240 0 : fprintf (f, " total:%" PRIu64 "\n", (int64_t)total_count ());
2241 0 : for (auto const &iter : pos_counts)
2242 : {
2243 0 : fprintf (f, "%*s", indent + 2, "");
2244 0 : dump_afdo_loc (f, iter.first);
2245 0 : fprintf (f, ": %" PRIu64, (int64_t)iter.second.count);
2246 :
2247 0 : for (auto const &titer : iter.second.targets)
2248 0 : fprintf (f, " %s:%" PRIu64,
2249 0 : afdo_string_table->get_symbol_name (titer.first),
2250 0 : (int64_t) titer.second);
2251 0 : fprintf (f,"\n");
2252 : }
2253 0 : for (auto const &iter : callsites)
2254 : {
2255 0 : fprintf (f, "%*s", indent + 2, "");
2256 0 : dump_afdo_loc (f, iter.first.first);
2257 0 : fprintf (f, ": %s",
2258 0 : afdo_string_table->get_symbol_name (iter.first.second));
2259 0 : iter.second->dump (f, indent + 2, true);
2260 0 : gcc_checking_assert ((int) iter.first.second
2261 : == iter.second->symbol_name ());
2262 : }
2263 0 : }
2264 :
2265 : /* Dump inline path. */
2266 :
2267 : void
2268 0 : function_instance::dump_inline_stack (FILE *f) const
2269 : {
2270 0 : auto_vec <callsite, 20> stack;
2271 0 : const function_instance *p = this, *s = inlined_to ();
2272 0 : while (s)
2273 : {
2274 0 : bool found = false;
2275 0 : for (callsite_map::const_iterator iter = s->callsites.begin ();
2276 0 : iter != s->callsites.end (); ++iter)
2277 0 : if (iter->second == p)
2278 : {
2279 0 : gcc_checking_assert (
2280 : !found && (int) iter->first.second == p->symbol_name ());
2281 0 : stack.safe_push ({iter->first.first, s->symbol_name ()});
2282 0 : found = true;
2283 : }
2284 0 : gcc_checking_assert (found);
2285 0 : p = s;
2286 0 : s = s->inlined_to ();
2287 : }
2288 0 : for (callsite &s: stack)
2289 : {
2290 0 : fprintf (f, "%s:", afdo_string_table->get_symbol_name (s.second));
2291 0 : dump_afdo_loc (f, s.first);
2292 0 : fprintf (f, " ");
2293 : }
2294 0 : fprintf (f, "%s", afdo_string_table->get_symbol_name (symbol_name ()));
2295 0 : }
2296 :
2297 : /* Dump instance to stderr. */
2298 :
2299 : void
2300 0 : function_instance::debug () const
2301 : {
2302 0 : dump (stderr);
2303 0 : }
2304 :
2305 : /* Return profile info for LOC in INFO. */
2306 :
2307 : bool
2308 0 : function_instance::get_count_info (location_t loc, count_info *info) const
2309 : {
2310 0 : position_count_map::const_iterator iter = pos_counts.find (loc);
2311 0 : if (iter == pos_counts.end ())
2312 : return false;
2313 0 : *info = iter->second;
2314 0 : return true;
2315 : }
2316 :
2317 : /* Read the inlined indirect call target profile for STMT and store it in
2318 : MAP, return the total count for all inlined indirect calls. */
2319 :
2320 : gcov_type
2321 0 : function_instance::find_icall_target_map (tree fn, gcall *stmt,
2322 : icall_target_map *map) const
2323 : {
2324 0 : gcov_type ret = 0;
2325 0 : unsigned stmt_offset = get_relative_location_for_stmt (fn, stmt);
2326 :
2327 0 : for (callsite_map::const_iterator iter = callsites.begin ();
2328 0 : iter != callsites.end (); ++iter)
2329 : {
2330 0 : unsigned callee = iter->second->symbol_name ();
2331 : /* Check if callsite location match the stmt. */
2332 0 : if (iter->first.first != stmt_offset
2333 0 : || iter->second->removed_icall_target ())
2334 0 : continue;
2335 0 : struct cgraph_node *node = cgraph_node::get_for_asmname (
2336 : get_identifier (afdo_string_table->get_symbol_name (callee)));
2337 0 : if (node == NULL)
2338 0 : continue;
2339 0 : (*map)[callee] = iter->second->total_count () * afdo_count_scale;
2340 0 : ret += iter->second->total_count () * afdo_count_scale;
2341 : }
2342 0 : return ret;
2343 : }
2344 :
2345 : /* Remove the inlined indirect call target profile for STMT. */
2346 :
2347 : void
2348 0 : function_instance::remove_icall_target (tree fn, gcall *stmt)
2349 : {
2350 0 : unsigned stmt_offset = get_relative_location_for_stmt (fn, stmt);
2351 0 : int n = 0;
2352 :
2353 0 : for (auto iter : callsites)
2354 0 : if (iter.first.first == stmt_offset)
2355 : {
2356 0 : iter.second->remove_icall_target ();
2357 0 : n++;
2358 : }
2359 : /* TODO: If we add support for multiple targets, we may want to
2360 : remove only those we succesfully inlined. */
2361 0 : gcc_assert (n);
2362 0 : }
2363 :
2364 : /* Offline all functions not defined in the current unit.
2365 : We will not be able to early inline them.
2366 : Doing so early will get VPT decisions more realistic. */
2367 :
2368 : void
2369 0 : autofdo_source_profile::offline_external_functions ()
2370 : {
2371 : /* First check all available definitions and mark their names as
2372 : visible. */
2373 0 : cgraph_node *node;
2374 0 : name_index_set seen;
2375 0 : name_index_map to_symbol_name;
2376 0 : size_t last_name;
2377 :
2378 : /* Add renames erasing suffixes produced by late clones, such as
2379 : .isra, .ipcp. */
2380 0 : for (size_t i = 1; i < afdo_string_table->num_entries (); i++)
2381 : {
2382 0 : const char *n1 = afdo_string_table->get_symbol_name (i);
2383 0 : std::pair<const char *, int> name_filename
2384 0 : = afdo_string_table->get_original_name (n1);
2385 0 : const char *n2 = name_filename.first;
2386 0 : if (!strcmp (n1, n2))
2387 : {
2388 : /* Watch for duplicate entries.
2389 : This seems to happen in practice and may be useful to distinguish
2390 : multiple static symbols of the same name, but we do not realy
2391 : have a way to differentiate them in get_symbol_name lookup. */
2392 0 : int index = afdo_string_table->get_index (n1);
2393 0 : if (index != (int)i)
2394 : {
2395 0 : if (dump_file)
2396 0 : fprintf (dump_file,
2397 : "string table in auto-profile contains"
2398 : " duplicated name %s\n", n1);
2399 0 : to_symbol_name.put (i, index);
2400 : }
2401 0 : continue;
2402 0 : }
2403 0 : if (dump_file)
2404 0 : fprintf (dump_file, "Adding rename removing clone suffixes %s -> %s\n",
2405 : n1, n2);
2406 0 : int index = afdo_string_table->get_index (n2);
2407 0 : if (index == -1)
2408 0 : index = afdo_string_table->add_symbol_name (xstrdup (n2),
2409 : name_filename.second);
2410 0 : to_symbol_name.put (i, index);
2411 : }
2412 0 : last_name = afdo_string_table->num_entries ();
2413 0 : FOR_EACH_DEFINED_FUNCTION (node)
2414 : {
2415 0 : const char *name = raw_symbol_name (node->decl);
2416 0 : const char *dwarf_name = lang_hooks.dwarf_name (node->decl, 0);
2417 0 : int index = afdo_string_table->get_index (name);
2418 :
2419 : /* Inline function may be identified by its dwarf names;
2420 : rename them to symbol names. With LTO dwarf names are
2421 : lost in free_lange_data. */
2422 0 : if (strcmp (name, dwarf_name))
2423 : {
2424 0 : int index2 = afdo_string_table->get_index (dwarf_name);
2425 0 : if (index2 != -1)
2426 : {
2427 0 : if (index == -1)
2428 0 : index = afdo_string_table->add_symbol_name (
2429 0 : xstrdup (name),
2430 : afdo_string_table->add_filename (
2431 0 : get_normalized_path (DECL_SOURCE_FILE (node->decl))));
2432 0 : if (dump_file)
2433 : {
2434 0 : fprintf (dump_file, "Adding dwarf->symbol rename %s -> %s\n",
2435 : afdo_string_table->get_symbol_name (index2), name);
2436 0 : if (to_symbol_name.get (index2))
2437 0 : fprintf (dump_file, "Dwarf name is not unique");
2438 : }
2439 0 : to_symbol_name.put (index2, index);
2440 0 : seen.add (index2);
2441 : }
2442 : }
2443 0 : if (index != -1)
2444 : {
2445 0 : if (dump_file)
2446 0 : fprintf (dump_file, "%s is defined in node %s\n",
2447 : afdo_string_table->get_symbol_name (index),
2448 : node->dump_name ());
2449 0 : seen.add (index);
2450 : }
2451 : else
2452 : {
2453 0 : if (dump_file)
2454 : {
2455 0 : if (dwarf_name && strcmp (dwarf_name, name))
2456 0 : fprintf (dump_file,
2457 : "Node %s not in auto profile (%s neither %s)\n",
2458 : node->dump_name (),
2459 : name,
2460 : dwarf_name);
2461 : else
2462 0 : fprintf (dump_file,
2463 : "Node %s (symbol %s) not in auto profile\n",
2464 : node->dump_name (),
2465 : name);
2466 : }
2467 : }
2468 : }
2469 :
2470 0 : for (auto iter : to_symbol_name)
2471 : {
2472 : /* In case dwarf name was duplicated and later renamed,
2473 : handle both. No more than one hop should be needed. */
2474 0 : int *newn = to_symbol_name.get (iter.second);
2475 0 : if (newn)
2476 0 : iter.second = *newn;
2477 0 : gcc_checking_assert (!to_symbol_name.get (iter.second));
2478 0 : if (seen.contains (iter.second))
2479 0 : seen.add (iter.first);
2480 : }
2481 :
2482 : /* Now process all toplevel (offline) function instances.
2483 :
2484 : If instance has no definition in this translation unit,
2485 : first offline all inlined functions which are defined here
2486 : (so we do not lose profile due to cross-module inlining
2487 : done by link-time optimizers).
2488 :
2489 : If instance has a definition, look into all inlined functions
2490 : and remove external ones (result of cross-module inlining).
2491 :
2492 : TODO: after early-inlining we ought to offline all functions
2493 : that were not inlined. */
2494 0 : vec <function_instance *>&fns = duplicate_functions_;
2495 0 : auto_vec <function_instance *, 20>fns2;
2496 : /* Populate worklist with all functions to process. Processing
2497 : may introduce new functions by offlining. */
2498 0 : for (auto &function : map_)
2499 : {
2500 0 : function.second->set_in_worklist ();
2501 0 : fns.safe_push (function.second);
2502 : }
2503 :
2504 : /* There are two worklists. First all functions needs to be matched
2505 : with gimple body and only then we want to do merging, since matching
2506 : should be done on unmodified profile and merging works better if
2507 : mismatches are already resolved both in source and destination. */
2508 0 : while (fns.length () || fns2.length ())
2509 : {
2510 : /* In case renaming introduced new name, keep seen up to date. */
2511 0 : for (; last_name < afdo_string_table->num_entries (); last_name++)
2512 : {
2513 0 : const char *name = afdo_string_table->get_symbol_name (last_name);
2514 0 : symtab_node *n
2515 0 : = afdo_string_table->get_cgraph_node (last_name);
2516 0 : if (dump_file)
2517 0 : fprintf (dump_file, "New name %s %s\n", name,
2518 : n ? "wth corresponding definition"
2519 : : "with no corresponding definition");
2520 0 : if (n)
2521 0 : seen.add (last_name);
2522 : }
2523 0 : if (fns.length ())
2524 : {
2525 0 : function_instance *f = fns.pop ();
2526 0 : if (f->get_location () == UNKNOWN_LOCATION)
2527 : {
2528 0 : int index = f->symbol_name ();
2529 0 : int *newn = to_symbol_name.get (index);
2530 0 : if (newn)
2531 : {
2532 0 : if (find_function_instance (f->get_descriptor ()) == f)
2533 0 : remove_function_instance (f);
2534 0 : f->set_symbol_name (*newn);
2535 0 : if (!find_function_instance (f->get_descriptor ()))
2536 0 : add_function_instance (f);
2537 : }
2538 0 : if (cgraph_node *n = f->get_cgraph_node ())
2539 : {
2540 0 : gcc_checking_assert (seen.contains (f->symbol_name ()));
2541 0 : f->match (n, fns, to_symbol_name);
2542 : }
2543 : }
2544 0 : fns2.safe_push (f);
2545 : }
2546 : else
2547 : {
2548 0 : function_instance *f = fns2.pop ();
2549 0 : int index = f->symbol_name ();
2550 0 : gcc_checking_assert (f->in_worklist_p ());
2551 :
2552 : /* If map has different function_instance of same name, then
2553 : this is a duplicated entry which needs to be merged. */
2554 0 : function_instance *index_inst
2555 0 : = find_function_instance (f->get_descriptor ());
2556 0 : if (index_inst && index_inst != f)
2557 : {
2558 0 : if (dump_file)
2559 : {
2560 0 : fprintf (dump_file, "Merging duplicate instance: ");
2561 0 : f->dump_inline_stack (dump_file);
2562 0 : fprintf (dump_file, "\n");
2563 : }
2564 0 : index_inst->merge (f, fns);
2565 0 : gcc_checking_assert (!f->inlined_to ());
2566 0 : f->clear_in_worklist ();
2567 0 : delete f;
2568 : }
2569 : /* If name was not seen in the symbol table, remove it. */
2570 0 : else if (!seen.contains (index))
2571 : {
2572 0 : f->offline_if_in_set (seen, fns);
2573 0 : f->clear_in_worklist ();
2574 0 : if (dump_file)
2575 0 : fprintf (dump_file, "Removing external %s\n",
2576 : afdo_string_table->get_symbol_name (
2577 : f->symbol_name ()));
2578 0 : if (index_inst == f)
2579 0 : remove_function_instance (f);
2580 0 : delete f;
2581 : }
2582 : /* If this is offline function instance seen in this
2583 : translation unit offline external inlines and possibly
2584 : rename from dwarf name. */
2585 : else
2586 : {
2587 0 : f->remove_external_functions (seen, to_symbol_name, fns);
2588 0 : f->clear_in_worklist ();
2589 : }
2590 : }
2591 : }
2592 0 : if (dump_file)
2593 0 : for (auto const &function : map_)
2594 : {
2595 0 : seen.contains (function.second->symbol_name ());
2596 0 : function.second->dump (dump_file);
2597 : }
2598 0 : }
2599 :
2600 : /* Walk scope block BLOCK and mark all inlined functions as realized. */
2601 :
2602 : static void
2603 0 : walk_block (tree fn, function_instance *s, tree block)
2604 : {
2605 0 : if (inlined_function_outer_scope_p (block))
2606 : {
2607 0 : unsigned loc = get_relative_location_for_locus
2608 0 : (fn, BLOCK_SUPERCONTEXT (block),
2609 0 : BLOCK_SOURCE_LOCATION (block));
2610 0 : function_instance *ns
2611 : = s->get_function_instance_by_decl
2612 0 : (loc, BLOCK_ABSTRACT_ORIGIN (block),
2613 0 : BLOCK_SOURCE_LOCATION (block));
2614 0 : if (!ns)
2615 : {
2616 0 : if (dump_file)
2617 : {
2618 0 : fprintf (dump_file, " Failed to find inlined instance:");
2619 0 : s->dump_inline_stack (dump_file);
2620 0 : fprintf (dump_file, ":");
2621 0 : dump_afdo_loc (dump_file, loc);
2622 0 : fprintf (dump_file, " %s\n",
2623 0 : raw_symbol_name (BLOCK_ABSTRACT_ORIGIN (block)));
2624 : }
2625 0 : return;
2626 : }
2627 0 : s = ns;
2628 0 : if (dump_file)
2629 : {
2630 0 : fprintf (dump_file, " Marking realized inline: ");
2631 0 : s->dump_inline_stack (dump_file);
2632 0 : fprintf (dump_file, "\n");
2633 : }
2634 0 : s->set_realized ();
2635 : }
2636 0 : for (tree t = BLOCK_SUBBLOCKS (block); t ; t = BLOCK_CHAIN (t))
2637 0 : walk_block (fn, s, t);
2638 : }
2639 :
2640 : /* Offline all inline functions that are not marked as realized.
2641 : This will merge their profile into offline versions where available.
2642 : Also remove all functions we will no longer use. */
2643 :
2644 : void
2645 0 : autofdo_source_profile::offline_unrealized_inlines ()
2646 : {
2647 0 : auto_vec <function_instance *>fns;
2648 : /* Populate worklist with all functions to process. Processing
2649 : may introduce new functions by offlining. */
2650 0 : for (auto const &function : map_)
2651 : {
2652 0 : fns.safe_push (function.second);
2653 0 : function.second->set_in_worklist ();
2654 : }
2655 0 : while (fns.length ())
2656 : {
2657 0 : function_instance *f = fns.pop ();
2658 0 : int index = f->symbol_name ();
2659 0 : function_instance *index_inst
2660 0 : = find_function_instance (f->get_descriptor ());
2661 0 : bool in_map = index_inst != nullptr;
2662 0 : if (in_map)
2663 0 : if (cgraph_node *n = f->get_cgraph_node ())
2664 : {
2665 0 : if (dump_file)
2666 0 : fprintf (dump_file, "Marking realized %s\n",
2667 : afdo_string_table->get_symbol_name (index));
2668 0 : f->set_realized ();
2669 0 : if (DECL_INITIAL (n->decl)
2670 0 : && DECL_INITIAL (n->decl) != error_mark_node)
2671 0 : walk_block (n->decl, f, DECL_INITIAL (n->decl));
2672 : }
2673 0 : f->offline_if_not_realized (fns);
2674 0 : gcc_checking_assert ((in_map || !f->realized_p ())
2675 : && f->in_worklist_p ());
2676 :
2677 : /* If this is duplicated instance, merge it into one in map. */
2678 0 : if (in_map && index_inst != f)
2679 : {
2680 0 : if (dump_file)
2681 : {
2682 0 : fprintf (dump_file, "Merging duplicate instance: ");
2683 0 : f->dump_inline_stack (dump_file);
2684 0 : fprintf (dump_file, "\n");
2685 : }
2686 0 : index_inst->merge (f, fns);
2687 0 : f->clear_in_worklist ();
2688 0 : gcc_checking_assert (!f->inlined_to ());
2689 0 : delete f;
2690 : }
2691 : /* If function is not in symbol table, remove it. */
2692 0 : else if (!f->realized_p ())
2693 : {
2694 0 : if (dump_file)
2695 0 : fprintf (dump_file, "Removing optimized out function %s\n",
2696 : afdo_string_table->get_symbol_name (f->symbol_name ()));
2697 0 : if (in_map)
2698 0 : remove_function_instance (index_inst);
2699 0 : f->clear_in_worklist ();
2700 0 : delete f;
2701 : }
2702 : else
2703 0 : f->clear_in_worklist ();
2704 : }
2705 0 : if (dump_file)
2706 0 : for (auto const &function : map_)
2707 0 : function.second->dump (dump_file);
2708 0 : }
2709 :
2710 : /* Read the profile and create a function_instance with head count as
2711 : HEAD_COUNT. Recursively read callsites to create nested function_instances
2712 : too. STACK is used to track the recursive creation process. */
2713 :
2714 : /* function instance profile format:
2715 :
2716 : ENTRY_COUNT: 8 bytes
2717 : TIMESTAMP: 8 bytes (only for toplevel symbols)
2718 : NAME_INDEX: 4 bytes
2719 : NUM_POS_COUNTS: 4 bytes
2720 : NUM_CALLSITES: 4 byte
2721 : POS_COUNT_1:
2722 : POS_1_OFFSET: 4 bytes
2723 : NUM_TARGETS: 4 bytes
2724 : COUNT: 8 bytes
2725 : TARGET_1:
2726 : VALUE_PROFILE_TYPE: 4 bytes
2727 : TARGET_IDX: 8 bytes
2728 : COUNT: 8 bytes
2729 : TARGET_2
2730 : ...
2731 : TARGET_n
2732 : POS_COUNT_2
2733 : ...
2734 : POS_COUNT_N
2735 : CALLSITE_1:
2736 : CALLSITE_1_OFFSET: 4 bytes
2737 : FUNCTION_INSTANCE_PROFILE (nested)
2738 : CALLSITE_2
2739 : ...
2740 : CALLSITE_n. */
2741 :
2742 : function_instance *
2743 0 : function_instance::read_function_instance (function_instance_stack *stack,
2744 : bool toplevel)
2745 : {
2746 0 : gcov_type_unsigned timestamp = 0;
2747 0 : gcov_type head_count = -1;
2748 0 : if (toplevel)
2749 : {
2750 0 : head_count = gcov_read_counter ();
2751 0 : timestamp = (gcov_type_unsigned) gcov_read_counter ();
2752 : }
2753 0 : unsigned name = gcov_read_unsigned ();
2754 0 : unsigned num_pos_counts = gcov_read_unsigned ();
2755 0 : unsigned num_callsites = gcov_read_unsigned ();
2756 0 : function_instance *s
2757 : = new function_instance (name,
2758 0 : afdo_string_table->get_filename_by_symbol (name),
2759 0 : head_count);
2760 0 : if (timestamp > 0)
2761 0 : s->set_timestamp (timestamp);
2762 0 : if (!stack->is_empty ())
2763 0 : s->set_inlined_to (stack->last ());
2764 0 : stack->safe_push (s);
2765 :
2766 0 : for (unsigned i = 0; i < num_pos_counts; i++)
2767 : {
2768 0 : unsigned offset = gcov_read_unsigned ();
2769 0 : unsigned num_targets = gcov_read_unsigned ();
2770 0 : gcov_type count = gcov_read_counter ();
2771 0 : s->pos_counts[offset].count = count;
2772 :
2773 0 : for (unsigned j = 0; j < stack->length (); j++)
2774 0 : (*stack)[j]->total_count_ += count;
2775 0 : for (unsigned j = 0; j < num_targets; j++)
2776 : {
2777 : /* Only indirect call target histogram is supported now. */
2778 0 : gcov_read_unsigned ();
2779 0 : gcov_type target_idx = gcov_read_counter ();
2780 0 : s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
2781 : }
2782 : }
2783 0 : for (unsigned i = 0; i < num_callsites; i++)
2784 : {
2785 0 : unsigned offset = gcov_read_unsigned ();
2786 0 : function_instance *callee_function_instance
2787 0 : = read_function_instance (stack, false);
2788 0 : s->callsites[std::make_pair (offset,
2789 0 : callee_function_instance->symbol_name ())]
2790 0 : = callee_function_instance;
2791 : }
2792 0 : stack->pop ();
2793 0 : return s;
2794 : }
2795 :
2796 : /* Member functions for autofdo_source_profile. */
2797 :
2798 0 : autofdo_source_profile::~autofdo_source_profile ()
2799 : {
2800 0 : for (name_function_instance_map::const_iterator iter = map_.begin ();
2801 0 : iter != map_.end (); ++iter)
2802 0 : delete iter->second;
2803 0 : }
2804 :
2805 : /* For a given DECL, returns the top-level function_instance. */
2806 :
2807 : function_instance *
2808 0 : autofdo_source_profile::get_function_instance_by_decl (tree decl, const char *filename) const
2809 : {
2810 0 : if (!filename)
2811 0 : filename = get_normalized_path (DECL_SOURCE_FILE (decl));
2812 0 : int index = afdo_string_table->get_index_by_decl (decl);
2813 0 : if (index == -1)
2814 : return NULL;
2815 :
2816 0 : function_instance_descriptor descriptor (
2817 0 : afdo_string_table->get_filename_index (filename), index);
2818 0 : return find_function_instance (descriptor);
2819 : }
2820 :
2821 : /* For a given DESCRIPTOR, return the matching instance if found. */
2822 :
2823 : function_instance *
2824 0 : autofdo_source_profile::get_function_instance_by_descriptor (
2825 : function_instance_descriptor descriptor) const
2826 : {
2827 0 : return find_function_instance (descriptor);
2828 : }
2829 :
2830 : /* Add function instance FN. */
2831 :
2832 : void
2833 0 : autofdo_source_profile::add_function_instance (function_instance *fn)
2834 : {
2835 0 : gcc_checking_assert (map_.find (fn->get_descriptor ()) == map_.end ());
2836 0 : map_[fn->get_descriptor ()] = fn;
2837 0 : }
2838 :
2839 : /* Find count_info for a given gimple STMT. If found, store the count_info
2840 : in INFO and return true; otherwise return false. */
2841 :
2842 : bool
2843 0 : autofdo_source_profile::get_count_info (gimple *stmt, count_info *info,
2844 : cgraph_node *node) const
2845 : {
2846 0 : gcc_checking_assert (stmt_loc_used_by_debug_info (stmt));
2847 0 : return get_count_info (gimple_location (stmt), info, node);
2848 : }
2849 :
2850 : bool
2851 0 : autofdo_source_profile::get_count_info (location_t gimple_loc,
2852 : count_info *info,
2853 : cgraph_node *node) const
2854 : {
2855 0 : if (LOCATION_LOCUS (gimple_loc) == cfun->function_end_locus)
2856 : return false;
2857 :
2858 0 : inline_stack stack;
2859 0 : get_inline_stack_in_node (gimple_loc, &stack, node);
2860 0 : if (stack.length () == 0)
2861 : return false;
2862 0 : function_instance *s = get_function_instance_by_inline_stack (stack);
2863 0 : if (s == NULL)
2864 : return false;
2865 0 : return s->get_count_info (stack[0].afdo_loc, info);
2866 0 : }
2867 :
2868 : /* Update value profile INFO for STMT from the inlined indirect callsite.
2869 : Return true if INFO is updated. */
2870 :
2871 : bool
2872 0 : autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
2873 : count_info *info,
2874 : cgraph_node *node)
2875 : {
2876 0 : if (dump_file)
2877 : {
2878 0 : fprintf (dump_file, "Checking indirect call -> direct call ");
2879 0 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2880 : }
2881 :
2882 0 : if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
2883 : {
2884 0 : if (dump_file)
2885 0 : fprintf (dump_file, " bad locus (function end)\n");
2886 0 : return false;
2887 : }
2888 :
2889 0 : count_info old_info;
2890 0 : get_count_info (stmt, &old_info, node);
2891 0 : gcov_type total = 0;
2892 0 : for (icall_target_map::const_iterator iter = old_info.targets.begin ();
2893 0 : iter != old_info.targets.end (); ++iter)
2894 0 : total += iter->second;
2895 0 : total *= afdo_count_scale;
2896 :
2897 : /* Program behavior changed, original promoted (and inlined) target is not
2898 : hot any more. Will avoid promote the original target.
2899 :
2900 : To check if original promoted target is still hot, we check the total
2901 : count of the unpromoted targets (stored in TOTAL). If a callsite count
2902 : (stored in INFO) is smaller than half of the total count, the original
2903 : promoted target is considered not hot any more. */
2904 0 : if (info->count < total / 2)
2905 : {
2906 0 : if (dump_file)
2907 0 : fprintf (dump_file, " not hot anymore %ld < %ld",
2908 : (long)info->count,
2909 : (long)total /2);
2910 0 : return false;
2911 : }
2912 :
2913 0 : inline_stack stack;
2914 0 : get_inline_stack_in_node (gimple_location (stmt), &stack, node);
2915 0 : if (stack.length () == 0)
2916 : {
2917 0 : if (dump_file)
2918 0 : fprintf (dump_file, " no inline stack\n");
2919 0 : return false;
2920 : }
2921 0 : function_instance *s = get_function_instance_by_inline_stack (stack);
2922 0 : if (s == NULL)
2923 : {
2924 0 : if (dump_file)
2925 : {
2926 0 : fprintf (dump_file, " function not found in inline stack:");
2927 0 : dump_inline_stack (dump_file, &stack);
2928 : }
2929 0 : return false;
2930 : }
2931 0 : icall_target_map map;
2932 0 : if (s->find_icall_target_map (node ? node->decl
2933 : : current_function_decl,
2934 : stmt, &map) == 0)
2935 : {
2936 0 : if (dump_file)
2937 : {
2938 0 : fprintf (dump_file, " no target map for stack: ");
2939 0 : dump_inline_stack (dump_file, &stack);
2940 : }
2941 0 : return false;
2942 : }
2943 0 : for (icall_target_map::const_iterator iter = map.begin ();
2944 0 : iter != map.end (); ++iter)
2945 0 : info->targets[iter->first] = iter->second;
2946 0 : if (dump_file)
2947 : {
2948 0 : fprintf (dump_file, " looks good; stack:");
2949 0 : dump_inline_stack (dump_file, &stack);
2950 : }
2951 : return true;
2952 0 : }
2953 :
2954 : void
2955 0 : autofdo_source_profile::remove_icall_target (cgraph_edge *e)
2956 : {
2957 0 : autofdo::inline_stack stack;
2958 0 : autofdo::get_inline_stack_in_node (gimple_location (e->call_stmt),
2959 : &stack, e->caller);
2960 0 : autofdo::function_instance *s
2961 0 : = get_function_instance_by_inline_stack (stack);
2962 0 : s->remove_icall_target (e->caller->decl, e->call_stmt);
2963 0 : }
2964 :
2965 : /* Find total count of the callee of EDGE. */
2966 :
2967 : gcov_type
2968 0 : autofdo_source_profile::get_callsite_total_count (
2969 : struct cgraph_edge *edge) const
2970 : {
2971 0 : inline_stack stack;
2972 0 : stack.safe_push ({edge->callee->decl, 0, UNKNOWN_LOCATION});
2973 :
2974 0 : get_inline_stack_in_node (gimple_location (edge->call_stmt), &stack,
2975 : edge->caller);
2976 0 : if (dump_file)
2977 : {
2978 0 : if (!edge->caller->inlined_to)
2979 0 : fprintf (dump_file, "Looking up afdo profile for call %s -> %s stack:",
2980 0 : edge->caller->dump_name (), edge->callee->dump_name ());
2981 : else
2982 0 : fprintf (dump_file, "Looking up afdo profile for call %s -> %s transitively %s stack:",
2983 0 : edge->caller->dump_name (), edge->callee->dump_name (),
2984 : edge->caller->inlined_to->dump_name ());
2985 0 : dump_inline_stack (dump_file, &stack);
2986 : }
2987 :
2988 0 : function_instance *s = get_function_instance_by_inline_stack (stack);
2989 0 : if (s == NULL)
2990 : {
2991 0 : if (dump_file)
2992 0 : fprintf (dump_file, "No function instance found\n");
2993 0 : return 0;
2994 : }
2995 0 : if (afdo_string_table->get_index_by_decl (edge->callee->decl)
2996 0 : != s->symbol_name ())
2997 : {
2998 0 : if (dump_file)
2999 0 : fprintf (dump_file, "Mismatched name of callee %s and profile %s\n",
3000 0 : raw_symbol_name (edge->callee->decl),
3001 : afdo_string_table->get_symbol_name (s->symbol_name ()));
3002 0 : return 0;
3003 : }
3004 :
3005 0 : return s->total_count () * afdo_count_scale;
3006 0 : }
3007 :
3008 : /* Read AutoFDO profile and returns TRUE on success. */
3009 :
3010 : /* source profile format:
3011 :
3012 : GCOV_TAG_AFDO_FUNCTION: 4 bytes
3013 : LENGTH: 4 bytes
3014 : NUM_FUNCTIONS: 4 bytes
3015 : FUNCTION_INSTANCE_1
3016 : FUNCTION_INSTANCE_2
3017 : ...
3018 : FUNCTION_INSTANCE_N. */
3019 :
3020 : bool
3021 0 : autofdo_source_profile::read ()
3022 : {
3023 0 : if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
3024 : {
3025 0 : inform (UNKNOWN_LOCATION, "Not expected TAG.");
3026 0 : return false;
3027 : }
3028 :
3029 0 : gcc_checking_assert (!afdo_source_profile);
3030 0 : afdo_source_profile = this;
3031 :
3032 : /* Skip the length of the section. */
3033 0 : gcov_read_unsigned ();
3034 :
3035 : /* Read in the function/callsite profile, and store it in local
3036 : data structure. */
3037 0 : unsigned function_num = gcov_read_unsigned ();
3038 0 : for (unsigned i = 0; i < function_num; i++)
3039 : {
3040 0 : function_instance::function_instance_stack stack;
3041 0 : function_instance *s
3042 0 : = function_instance::read_function_instance (&stack);
3043 :
3044 0 : if (find_function_instance (s->get_descriptor ()) == nullptr)
3045 0 : add_function_instance (s);
3046 : else
3047 0 : fatal_error (UNKNOWN_LOCATION,
3048 : "auto-profile contains duplicated function instance %s",
3049 : afdo_string_table->get_symbol_name (s->symbol_name ()));
3050 0 : s->prop_timestamp ();
3051 0 : timestamp_info_map.insert({s->timestamp (), 0});
3052 0 : }
3053 :
3054 : /* timestamp_info_map is std::map with timestamp as key,
3055 : so it's already sorted in ascending order wrt timestamps.
3056 : This loop maps function with lowest timestamp to 1, and so on.
3057 : In afdo_annotate_cfg, node->tp_first_run is then set to corresponding
3058 : tp_first_run value. */
3059 :
3060 0 : int tp_first_run = 1;
3061 0 : for (auto &p : timestamp_info_map)
3062 0 : p.second = tp_first_run++;
3063 :
3064 0 : afdo_profile_info->sum_max = afdo_summary_info->max_count;
3065 : /* Scale up the profile, but leave some bits in case some counts gets
3066 : bigger than sum_max eventually. */
3067 0 : if (afdo_profile_info->sum_max)
3068 0 : afdo_count_scale
3069 0 : = MAX (((gcov_type)1 << (profile_count::n_bits - 10))
3070 : / afdo_profile_info->sum_max, 1);
3071 0 : afdo_profile_info->cutoff *= afdo_count_scale;
3072 : /* Derive the hot count threshold from the profile summary. */
3073 0 : afdo_hot_bb_threshold = afdo_summary_info->get_threshold_count (
3074 0 : param_hot_bb_count_ws_permille * 1000)
3075 0 : * afdo_count_scale;
3076 0 : set_hot_bb_threshold (afdo_hot_bb_threshold);
3077 0 : if (dump_file)
3078 0 : fprintf (dump_file,
3079 : "Max count in profile %" PRIu64 "\n"
3080 : "Setting scale %" PRIu64 "\n"
3081 : "Scaled max count %" PRIu64 "\n"
3082 : "Cutoff %" PRIu64 "\n"
3083 : "Unscaled hot count threshold %" PRIu64 "\n"
3084 : "Hot count threshold %" PRIu64 "\n\n",
3085 : (int64_t) afdo_profile_info->sum_max, (int64_t) afdo_count_scale,
3086 0 : (int64_t) (afdo_profile_info->sum_max * afdo_count_scale),
3087 0 : (int64_t) afdo_profile_info->cutoff,
3088 0 : (int64_t) afdo_summary_info->get_threshold_count (
3089 0 : param_hot_bb_count_ws_permille * 1000),
3090 : (int64_t) afdo_hot_bb_threshold);
3091 0 : afdo_profile_info->sum_max *= afdo_count_scale;
3092 0 : return true;
3093 : }
3094 :
3095 : /* Return the function_instance in the profile that correspond to the
3096 : inline STACK. */
3097 :
3098 : function_instance *
3099 0 : autofdo_source_profile::get_function_instance_by_inline_stack (
3100 : const inline_stack &stack) const
3101 : {
3102 0 : function_instance_descriptor descriptor (
3103 : afdo_string_table->get_filename_index (
3104 0 : get_normalized_path (DECL_SOURCE_FILE (stack[stack.length () - 1].decl))),
3105 0 : afdo_string_table->get_index_by_decl (stack[stack.length () - 1].decl));
3106 0 : function_instance *s = find_function_instance (descriptor);
3107 :
3108 0 : if (s == NULL)
3109 : {
3110 0 : if (dump_file)
3111 0 : fprintf (dump_file, "No offline instance for %s\n",
3112 0 : raw_symbol_name (stack[stack.length () - 1].decl));
3113 0 : return NULL;
3114 : }
3115 :
3116 0 : for (unsigned i = stack.length () - 1; i > 0; i--)
3117 : {
3118 0 : s = s->get_function_instance_by_decl (stack[i].afdo_loc,
3119 0 : stack[i - 1].decl,
3120 0 : stack[i].location);
3121 0 : if (s == NULL)
3122 : {
3123 : /* afdo inliner extends the stack by last entry with unknown
3124 : location while checking if function was inlined during train run.
3125 : We do not want to print diagnostics about every function
3126 : which is not inlined. */
3127 : if (s && dump_enabled_p () && stack[i].location != UNKNOWN_LOCATION)
3128 : dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS,
3129 : dump_user_location_t::from_location_t
3130 : (stack[i].location),
3131 : "auto-profile has no inlined function instance "
3132 : "for inlined call of %s at relative "
3133 : " location +%i, discriminator %i\n",
3134 : raw_symbol_name (stack[i - 1].decl),
3135 : stack[i].afdo_loc >> 16,
3136 : stack[i].afdo_loc & 65535);
3137 : return NULL;
3138 : }
3139 : }
3140 : return s;
3141 : }
3142 :
3143 : /* Find the matching function instance which has DESCRIPTOR as its
3144 : descriptor. If not found, also try checking if an instance exists with the
3145 : same name which has no associated filename. */
3146 :
3147 : autofdo_source_profile::name_function_instance_map::const_iterator
3148 0 : autofdo_source_profile::find_iter_for_function_instance (
3149 : function_instance_descriptor descriptor) const
3150 : {
3151 0 : auto it = map_.find (descriptor);
3152 :
3153 : /* Try searching for the symbol not having a filename if it isn't found. */
3154 0 : if (it == map_.end ())
3155 0 : it = map_.find (
3156 0 : function_instance_descriptor (string_table::unknown_filename,
3157 0 : (int) descriptor.symbol_name ()));
3158 0 : return it;
3159 : }
3160 :
3161 : /* Similar to the above, but return a pointer to the instance instead of an
3162 : iterator. */
3163 :
3164 : function_instance *
3165 0 : autofdo_source_profile::find_function_instance (
3166 : function_instance_descriptor descriptor) const
3167 : {
3168 0 : auto it = find_iter_for_function_instance (descriptor);
3169 0 : return it == map_.end () ? NULL : it->second;
3170 : }
3171 :
3172 : /* Remove a function instance from the map. Returns true if the entry was
3173 : actually deleted. */
3174 :
3175 : bool
3176 0 : autofdo_source_profile::remove_function_instance (function_instance *inst)
3177 : {
3178 0 : auto iter = find_iter_for_function_instance (inst->get_descriptor ());
3179 0 : if (iter != map_.end ())
3180 : {
3181 0 : map_.erase (iter);
3182 0 : return true;
3183 : }
3184 : return false;
3185 : }
3186 :
3187 : /* Module profile is only used by LIPO. Here we simply ignore it. */
3188 :
3189 : static void
3190 0 : fake_read_autofdo_module_profile ()
3191 : {
3192 : /* Read in the module info. */
3193 0 : gcov_read_unsigned ();
3194 :
3195 : /* Skip the length of the section. */
3196 0 : gcov_read_unsigned ();
3197 :
3198 : /* Read in the file name table. */
3199 0 : unsigned total_module_num = gcov_read_unsigned ();
3200 0 : gcc_assert (total_module_num == 0);
3201 0 : }
3202 :
3203 : /* Read data from profile data file. */
3204 :
3205 : static void
3206 0 : read_profile (void)
3207 : {
3208 0 : if (gcov_open (auto_profile_file, 1) == 0)
3209 : {
3210 0 : error ("cannot open profile file %s", auto_profile_file);
3211 0 : return;
3212 : }
3213 :
3214 0 : if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
3215 : {
3216 0 : error ("AutoFDO profile magic number does not match");
3217 0 : return;
3218 : }
3219 :
3220 : /* Skip the version number. */
3221 0 : unsigned version = gcov_read_unsigned ();
3222 0 : if (version != AUTO_PROFILE_VERSION)
3223 : {
3224 0 : error ("AutoFDO profile version %u does not match %u",
3225 : version, AUTO_PROFILE_VERSION);
3226 0 : return;
3227 : }
3228 :
3229 : /* Skip the empty integer. */
3230 0 : gcov_read_unsigned ();
3231 :
3232 : /* summary_info. */
3233 0 : afdo_summary_info = new summary_info ();
3234 0 : if (!afdo_summary_info->read ())
3235 : {
3236 0 : error ("cannot read summary information from %s", auto_profile_file);
3237 0 : return;
3238 : }
3239 :
3240 : /* string_table. */
3241 0 : afdo_string_table = new string_table ();
3242 0 : if (!afdo_string_table->read ())
3243 : {
3244 0 : error ("cannot read string table from %s", auto_profile_file);
3245 0 : return;
3246 : }
3247 :
3248 : /* autofdo_source_profile. */
3249 0 : afdo_source_profile = autofdo_source_profile::create ();
3250 0 : if (afdo_source_profile == NULL
3251 0 : || gcov_is_error ())
3252 : {
3253 0 : error ("cannot read function profile from %s", auto_profile_file);
3254 0 : delete afdo_source_profile;
3255 0 : afdo_source_profile = NULL;
3256 0 : return;
3257 : }
3258 :
3259 : /* autofdo_module_profile. */
3260 0 : fake_read_autofdo_module_profile ();
3261 0 : if (gcov_is_error ())
3262 : {
3263 0 : error ("cannot read module profile from %s", auto_profile_file);
3264 0 : return;
3265 : }
3266 : }
3267 :
3268 : /* From AutoFDO profiles, find values inside STMT for that we want to measure
3269 : histograms for indirect-call optimization.
3270 :
3271 : This function is actually served for 2 purposes:
3272 : * before annotation, we need to mark histogram, promote and inline
3273 : * after annotation, we just need to mark, and let follow-up logic to
3274 : decide if it needs to promote and inline. */
3275 :
3276 : static bool
3277 0 : afdo_indirect_call (gcall *stmt, const icall_target_map &map,
3278 : bool transform, cgraph_edge *indirect_edge)
3279 : {
3280 0 : tree callee;
3281 :
3282 0 : if (map.size () == 0)
3283 : {
3284 0 : if (dump_file)
3285 0 : fprintf (dump_file, "No targets found\n");
3286 0 : return false;
3287 : }
3288 0 : if (!stmt)
3289 : {
3290 0 : if (dump_file)
3291 0 : fprintf (dump_file, "No call statement\n");
3292 0 : return false;
3293 : }
3294 0 : if (gimple_call_internal_p (stmt))
3295 : {
3296 0 : if (dump_file)
3297 0 : fprintf (dump_file, "Internal call\n");
3298 0 : return false;
3299 : }
3300 0 : if (gimple_call_fndecl (stmt) != NULL_TREE)
3301 : {
3302 0 : if (dump_file)
3303 0 : fprintf (dump_file, "Call is already direct\n");
3304 0 : return false;
3305 : }
3306 :
3307 0 : gcov_type total = 0;
3308 0 : icall_target_map::const_iterator max_iter = map.end ();
3309 :
3310 0 : for (icall_target_map::const_iterator iter = map.begin ();
3311 0 : iter != map.end (); ++iter)
3312 : {
3313 0 : total += iter->second;
3314 0 : if (max_iter == map.end () || max_iter->second < iter->second)
3315 : max_iter = iter;
3316 : }
3317 0 : total *= afdo_count_scale;
3318 0 : struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
3319 0 : get_identifier (afdo_string_table->get_symbol_name (max_iter->first)));
3320 0 : if (direct_call == NULL)
3321 : {
3322 0 : if (dump_file)
3323 0 : fprintf (dump_file, "Failed to find cgraph node for %s\n",
3324 0 : afdo_string_table->get_symbol_name (max_iter->first));
3325 0 : return false;
3326 : }
3327 :
3328 0 : callee = gimple_call_fn (stmt);
3329 :
3330 0 : if (!transform)
3331 : {
3332 0 : if (!direct_call->profile_id)
3333 : {
3334 0 : if (dump_file)
3335 0 : fprintf (dump_file, "No profile id\n");
3336 0 : return false;
3337 : }
3338 0 : histogram_value hist = gimple_alloc_histogram_value (
3339 : cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
3340 0 : hist->n_counters = 4;
3341 0 : hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
3342 0 : gimple_add_histogram_value (cfun, stmt, hist);
3343 :
3344 : /* Total counter */
3345 0 : hist->hvalue.counters[0] = total;
3346 : /* Number of value/counter pairs */
3347 0 : hist->hvalue.counters[1] = 1;
3348 : /* Value */
3349 0 : hist->hvalue.counters[2] = direct_call->profile_id;
3350 : /* Counter */
3351 0 : hist->hvalue.counters[3] = max_iter->second * afdo_count_scale;
3352 :
3353 0 : if (!direct_call->profile_id)
3354 : {
3355 0 : if (dump_file)
3356 0 : fprintf (dump_file, "Histogram attached\n");
3357 0 : return false;
3358 : }
3359 : return false;
3360 : }
3361 :
3362 0 : if (dump_file)
3363 : {
3364 0 : fprintf (dump_file, "Indirect call -> direct call ");
3365 0 : print_generic_expr (dump_file, callee, TDF_SLIM);
3366 0 : fprintf (dump_file, " => ");
3367 0 : print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
3368 : }
3369 :
3370 0 : if (!direct_call->definition)
3371 : {
3372 0 : if (dump_file)
3373 0 : fprintf (dump_file, " no definition available\n");
3374 0 : return false;
3375 : }
3376 :
3377 0 : if (dump_file)
3378 : {
3379 0 : fprintf (dump_file, " transformation on insn ");
3380 0 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3381 0 : fprintf (dump_file, "\n");
3382 : }
3383 :
3384 0 : indirect_edge->make_speculative
3385 0 : (direct_call,
3386 0 : gimple_bb (stmt)->count.apply_scale (99, 100));
3387 0 : return true;
3388 : }
3389 :
3390 : /* From AutoFDO profiles, find values inside STMT for that we want to measure
3391 : histograms and adds them to list VALUES. */
3392 :
3393 : static bool
3394 0 : afdo_vpt (gcall *gs, const icall_target_map &map,
3395 : bool transform, cgraph_edge *indirect_edge)
3396 : {
3397 0 : return afdo_indirect_call (gs, map, transform, indirect_edge);
3398 : }
3399 :
3400 : typedef std::set<basic_block> bb_set;
3401 :
3402 : static bool
3403 0 : is_bb_annotated (const basic_block bb, const bb_set &annotated)
3404 : {
3405 0 : if (annotated.find (bb) != annotated.end ())
3406 : {
3407 0 : gcc_checking_assert (bb->count.quality () == AFDO
3408 : || !bb->count.nonzero_p ());
3409 : return true;
3410 : }
3411 0 : gcc_checking_assert (bb->count.quality () != AFDO
3412 : || !bb->count.nonzero_p ());
3413 : return false;
3414 : }
3415 :
3416 : static void
3417 0 : set_bb_annotated (basic_block bb, bb_set *annotated)
3418 : {
3419 0 : gcc_checking_assert (bb->count.quality () == AFDO
3420 : || !bb->count.nonzero_p ());
3421 0 : annotated->insert (bb);
3422 0 : }
3423 :
3424 : /* Update COUNT by known autofdo count C. */
3425 : static void
3426 0 : update_count_by_afdo_count (profile_count *count, gcov_type c)
3427 : {
3428 0 : if (c)
3429 0 : *count = profile_count::from_gcov_type (c).afdo ();
3430 : /* In case we have guessed profile which is already zero, preserve
3431 : quality info. */
3432 0 : else if (count->nonzero_p ()
3433 0 : || count->quality () == GUESSED
3434 0 : || count->quality () == GUESSED_LOCAL)
3435 0 : *count = profile_count::zero ().afdo ();
3436 0 : }
3437 :
3438 : /* Update COUNT by known autofdo count C. */
3439 : static void
3440 0 : update_count_by_afdo_count (profile_count *count, profile_count c)
3441 : {
3442 0 : if (c.nonzero_p ())
3443 0 : *count = c;
3444 : /* In case we have guessed profile which is already zero, preserve
3445 : quality info. */
3446 0 : else if (count->nonzero_p ()
3447 0 : || count->quality () < c.quality ())
3448 0 : *count = c;
3449 0 : }
3450 :
3451 : /* Try to determine unscaled count of edge E.
3452 : Return -1 if nothing is known. */
3453 :
3454 : static gcov_type
3455 0 : afdo_unscaled_edge_count (edge e)
3456 : {
3457 0 : gcov_type max_count = -1;
3458 0 : basic_block bb_succ = e->dest;
3459 0 : count_info info;
3460 0 : if (afdo_source_profile->get_count_info (e->goto_locus, &info))
3461 : {
3462 0 : if (info.count > max_count)
3463 : max_count = info.count;
3464 0 : if (dump_file && info.count)
3465 : {
3466 0 : fprintf (dump_file,
3467 : " goto location of edge %i->%i with count %" PRIu64"\n",
3468 0 : e->src->index, e->dest->index, (int64_t)info.count);
3469 : }
3470 : }
3471 0 : for (gphi_iterator gpi = gsi_start_phis (bb_succ);
3472 0 : !gsi_end_p (gpi); gsi_next (&gpi))
3473 : {
3474 0 : gphi *phi = gpi.phi ();
3475 0 : location_t phi_loc
3476 0 : = gimple_phi_arg_location_from_edge (phi, e);
3477 0 : if (afdo_source_profile->get_count_info (phi_loc, &info))
3478 : {
3479 0 : if (info.count > max_count)
3480 : max_count = info.count;
3481 0 : if (dump_file && info.count)
3482 : {
3483 0 : fprintf (dump_file,
3484 : " phi op of edge %i->%i with count %" PRIu64": ",
3485 0 : e->src->index, e->dest->index, (int64_t)info.count);
3486 0 : print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
3487 : }
3488 : }
3489 : }
3490 0 : return max_count;
3491 0 : }
3492 :
3493 : /* For a given BB, set its execution count. Attach value profile if a stmt
3494 : is not in PROMOTED, because we only want to promote an indirect call once.
3495 : Return TRUE if BB is annotated. */
3496 :
3497 : static bool
3498 0 : afdo_set_bb_count (basic_block bb, hash_set <basic_block> &zero_bbs)
3499 : {
3500 0 : gimple_stmt_iterator gsi;
3501 0 : gcov_type max_count = -1;
3502 0 : if (dump_file)
3503 0 : fprintf (dump_file, " Looking up AFDO count of bb %i\n", bb->index);
3504 :
3505 0 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3506 : {
3507 0 : count_info info;
3508 0 : gimple *stmt = gsi_stmt (gsi);
3509 0 : if (!stmt_loc_used_by_debug_info (stmt))
3510 0 : continue;
3511 0 : if (afdo_source_profile->get_count_info (stmt, &info))
3512 : {
3513 0 : if (info.count > max_count)
3514 : max_count = info.count;
3515 0 : if (dump_file)
3516 : {
3517 0 : fprintf (dump_file, " count %" PRIu64 " in stmt: ",
3518 : (int64_t)info.count);
3519 0 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3520 : }
3521 0 : gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
3522 : /* TODO; if inlined early and indirect call was not optimized out,
3523 : we will end up speculating again. Early inliner should remove
3524 : all targets for edges it speculated into safely. */
3525 0 : if (call
3526 0 : && info.targets.size () > 0)
3527 0 : afdo_vpt (call, info.targets, false, NULL);
3528 : }
3529 0 : }
3530 :
3531 0 : if (max_count == -1 && single_succ_p (bb))
3532 0 : max_count = afdo_unscaled_edge_count (single_succ_edge (bb));
3533 :
3534 0 : if (max_count == -1)
3535 : return false;
3536 :
3537 0 : if (max_count)
3538 : {
3539 0 : update_count_by_afdo_count (&bb->count, max_count * afdo_count_scale);
3540 0 : if (dump_file)
3541 0 : fprintf (dump_file,
3542 : " Annotated bb %i with count %" PRId64
3543 : ", scaled to %" PRId64 "\n",
3544 : bb->index, (int64_t)max_count,
3545 0 : (int64_t)(max_count * afdo_count_scale));
3546 0 : return true;
3547 : }
3548 : else
3549 : {
3550 0 : if (dump_file)
3551 0 : fprintf (dump_file,
3552 : " bb %i has statements with 0 count\n", bb->index);
3553 0 : zero_bbs.add (bb);
3554 : }
3555 0 : return false;
3556 : }
3557 :
3558 : /* BB1 and BB2 are in an equivalent class iff:
3559 : 1. BB1 dominates BB2.
3560 : 2. BB2 post-dominates BB1.
3561 : 3. BB1 and BB2 are in the same loop nest.
3562 : This function finds the equivalent class for each basic block, and
3563 : stores a pointer to the first BB in its equivalent class. Meanwhile,
3564 : set bb counts for the same equivalent class to be idenical. Update
3565 : ANNOTATED_BB for the first BB in its equivalent class. */
3566 :
3567 : static void
3568 0 : afdo_find_equiv_class (bb_set *annotated_bb)
3569 : {
3570 0 : basic_block bb;
3571 :
3572 0 : FOR_ALL_BB_FN (bb, cfun)
3573 0 : bb->aux = NULL;
3574 :
3575 0 : FOR_ALL_BB_FN (bb, cfun)
3576 : {
3577 0 : if (bb->aux != NULL)
3578 0 : continue;
3579 0 : bb->aux = bb;
3580 0 : for (basic_block bb1 : get_dominated_by (CDI_DOMINATORS, bb))
3581 0 : if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
3582 0 : && bb1->loop_father == bb->loop_father)
3583 : {
3584 0 : bb1->aux = bb;
3585 0 : if (is_bb_annotated (bb1, *annotated_bb)
3586 0 : && (!is_bb_annotated (bb, *annotated_bb)
3587 0 : || bb1->count > bb->count))
3588 : {
3589 0 : if (dump_file)
3590 : {
3591 0 : fprintf (dump_file,
3592 : " Copying count of bb %i to bb %i; count is:",
3593 : bb1->index,
3594 : bb->index);
3595 0 : bb1->count.dump (dump_file);
3596 0 : fprintf (dump_file, "\n");
3597 : }
3598 0 : update_count_by_afdo_count (&bb->count, bb1->count);
3599 0 : set_bb_annotated (bb, annotated_bb);
3600 : }
3601 0 : }
3602 :
3603 0 : for (basic_block bb1 : get_dominated_by (CDI_POST_DOMINATORS, bb))
3604 0 : if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
3605 0 : && bb1->loop_father == bb->loop_father)
3606 : {
3607 0 : bb1->aux = bb;
3608 0 : if (is_bb_annotated (bb1, *annotated_bb)
3609 0 : && (!is_bb_annotated (bb, *annotated_bb)
3610 0 : || bb1->count > bb->count))
3611 : {
3612 0 : if (dump_file)
3613 : {
3614 0 : fprintf (dump_file,
3615 : " Copying count of bb %i to bb %i; count is:",
3616 : bb1->index,
3617 : bb->index);
3618 0 : bb1->count.dump (dump_file);
3619 0 : fprintf (dump_file, "\n");
3620 : }
3621 0 : update_count_by_afdo_count (&bb->count, bb1->count);
3622 0 : set_bb_annotated (bb, annotated_bb);
3623 : }
3624 0 : }
3625 : }
3626 0 : }
3627 :
3628 : /* If a basic block's count is known, and only one of its in/out edges' count
3629 : is unknown, its count can be calculated. Meanwhile, if all of the in/out
3630 : edges' counts are known, then the basic block's unknown count can also be
3631 : calculated. Also, if a block has a single predecessor or successor, the block's
3632 : count can be propagated to that predecessor or successor.
3633 : IS_SUCC is true if out edges of a basic blocks are examined.
3634 : Update ANNOTATED_BB accordingly.
3635 : Return TRUE if any basic block/edge count is changed. */
3636 :
3637 : static bool
3638 0 : afdo_propagate_edge (bool is_succ, bb_set *annotated_bb)
3639 : {
3640 0 : basic_block bb;
3641 0 : bool changed = false;
3642 :
3643 0 : FOR_EACH_BB_FN (bb, cfun)
3644 : {
3645 0 : edge e, unknown_edge = NULL;
3646 0 : edge_iterator ei;
3647 0 : int num_unknown_edges = 0;
3648 0 : int num_edges = 0;
3649 0 : profile_count total_known_count = profile_count::zero ().afdo ();
3650 :
3651 0 : FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
3652 : {
3653 0 : gcc_assert (AFDO_EINFO (e) != NULL);
3654 0 : if (! AFDO_EINFO (e)->is_annotated ())
3655 0 : num_unknown_edges++, unknown_edge = e;
3656 : else
3657 0 : total_known_count += AFDO_EINFO (e)->get_count ();
3658 0 : num_edges++;
3659 : }
3660 0 : if (dump_file)
3661 : {
3662 0 : fprintf (dump_file, "bb %i %s propagating %s edges %i, "
3663 : "unknown edges %i, known count ",
3664 : bb->index,
3665 0 : is_bb_annotated (bb, *annotated_bb) ? "(annotated)" : "",
3666 : is_succ ? "successors" : "predecessors", num_edges,
3667 : num_unknown_edges);
3668 0 : total_known_count.dump (dump_file);
3669 0 : fprintf (dump_file, " bb count ");
3670 0 : bb->count.dump (dump_file);
3671 0 : fprintf (dump_file, "\n");
3672 : }
3673 :
3674 : /* Be careful not to annotate block with no successor in special cases. */
3675 0 : if (num_unknown_edges == 0 && num_edges
3676 0 : && !is_bb_annotated (bb, *annotated_bb))
3677 : {
3678 0 : if (dump_file)
3679 : {
3680 0 : fprintf (dump_file, " Annotating bb %i with count ", bb->index);
3681 0 : total_known_count.dump (dump_file);
3682 0 : fprintf (dump_file, "\n");
3683 : }
3684 0 : update_count_by_afdo_count (&bb->count, total_known_count);
3685 0 : set_bb_annotated (bb, annotated_bb);
3686 0 : changed = true;
3687 : }
3688 0 : else if (is_bb_annotated (bb, *annotated_bb)
3689 : /* We do not want to consider 0 (afdo) > 0 (precise) */
3690 0 : && total_known_count.nonzero_p ()
3691 0 : && bb->count < total_known_count)
3692 : {
3693 0 : if (dump_file)
3694 : {
3695 0 : fprintf (dump_file, " Increasing bb %i count from ",
3696 : bb->index);
3697 0 : bb->count.dump (dump_file);
3698 0 : fprintf (dump_file, " to ");
3699 0 : total_known_count.dump (dump_file);
3700 0 : fprintf (dump_file, " hoping to mitigate afdo inconsistency\n");
3701 : }
3702 0 : bb->count = total_known_count;
3703 0 : changed = true;
3704 : }
3705 0 : else if (num_unknown_edges == 1 && is_bb_annotated (bb, *annotated_bb))
3706 : {
3707 0 : if (bb->count > total_known_count)
3708 : {
3709 0 : profile_count new_count = bb->count - total_known_count;
3710 0 : AFDO_EINFO (unknown_edge)->set_count (new_count);
3711 : }
3712 : else
3713 0 : AFDO_EINFO (unknown_edge)->set_count
3714 0 : (profile_count::zero ().afdo ());
3715 0 : if (dump_file)
3716 : {
3717 0 : fprintf (dump_file, " Annotated edge %i->%i with count ",
3718 0 : unknown_edge->src->index, unknown_edge->dest->index);
3719 0 : AFDO_EINFO (unknown_edge)->get_count ().dump (dump_file);
3720 0 : fprintf (dump_file, "\n");
3721 : }
3722 0 : AFDO_EINFO (unknown_edge)->set_annotated ();
3723 0 : changed = true;
3724 : }
3725 0 : else if (num_unknown_edges > 1
3726 0 : && is_bb_annotated (bb, *annotated_bb)
3727 0 : && (total_known_count >= bb->count || !bb->count.nonzero_p ()))
3728 : {
3729 0 : FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
3730 : {
3731 0 : gcc_assert (AFDO_EINFO (e) != NULL);
3732 0 : if (! AFDO_EINFO (e)->is_annotated ())
3733 : {
3734 0 : AFDO_EINFO (e)->set_count
3735 0 : (profile_count::zero ().afdo ());
3736 0 : AFDO_EINFO (e)->set_annotated ();
3737 0 : if (dump_file)
3738 : {
3739 0 : fprintf (dump_file, " Annotated edge %i->%i with count ",
3740 0 : e->src->index, e->dest->index);
3741 0 : AFDO_EINFO (unknown_edge)->get_count ().dump (dump_file);
3742 0 : fprintf (dump_file, "\n");
3743 : }
3744 : }
3745 : }
3746 : }
3747 0 : else if (num_unknown_edges == 0
3748 0 : && is_bb_annotated (bb, *annotated_bb)
3749 0 : && (is_succ ? single_succ_p (bb) : single_pred_p (bb)))
3750 : {
3751 0 : edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb);
3752 0 : if (AFDO_EINFO (e)->is_annotated ()
3753 0 : && AFDO_EINFO (e)->get_count () < bb->count)
3754 : {
3755 0 : if (dump_file)
3756 : {
3757 0 : fprintf (dump_file, " Increasing edge %i->%i count from ",
3758 0 : e->src->index, e->dest->index);
3759 0 : AFDO_EINFO (e)->get_count ().dump (dump_file);
3760 0 : fprintf (dump_file, " to ");
3761 0 : bb->count.dump (dump_file);
3762 0 : fprintf (dump_file, " hoping to mitigate afdo inconsistency\n");
3763 : }
3764 0 : AFDO_EINFO (e)->set_count (bb->count);
3765 0 : changed = true;
3766 : }
3767 : }
3768 : }
3769 0 : return changed;
3770 : }
3771 :
3772 : /* Special propagation for circuit expressions. Because GCC translates
3773 : control flow into data flow for circuit expressions. E.g.
3774 : BB1:
3775 : if (a && b)
3776 : BB2
3777 : else
3778 : BB3
3779 :
3780 : will be translated into:
3781 :
3782 : BB1:
3783 : if (a)
3784 : goto BB.t1
3785 : else
3786 : goto BB.t3
3787 : BB.t1:
3788 : if (b)
3789 : goto BB.t2
3790 : else
3791 : goto BB.t3
3792 : BB.t2:
3793 : goto BB.t3
3794 : BB.t3:
3795 : tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
3796 : if (tmp)
3797 : goto BB2
3798 : else
3799 : goto BB3
3800 :
3801 : In this case, we need to propagate through PHI to determine the edge
3802 : count of BB1->BB.t1, BB.t1->BB.t2. */
3803 :
3804 : static void
3805 0 : afdo_propagate_circuit (const bb_set &annotated_bb)
3806 : {
3807 0 : basic_block bb;
3808 0 : FOR_ALL_BB_FN (bb, cfun)
3809 : {
3810 0 : gimple *def_stmt;
3811 0 : tree cmp_rhs, cmp_lhs;
3812 0 : gimple *cmp_stmt = last_nondebug_stmt (bb);
3813 0 : edge e;
3814 0 : edge_iterator ei;
3815 :
3816 0 : if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
3817 0 : continue;
3818 0 : cmp_rhs = gimple_cond_rhs (cmp_stmt);
3819 0 : cmp_lhs = gimple_cond_lhs (cmp_stmt);
3820 0 : if (!TREE_CONSTANT (cmp_rhs)
3821 0 : || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
3822 0 : continue;
3823 0 : if (TREE_CODE (cmp_lhs) != SSA_NAME)
3824 0 : continue;
3825 0 : if (!is_bb_annotated (bb, annotated_bb))
3826 0 : continue;
3827 0 : def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
3828 0 : while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
3829 0 : && gimple_assign_single_p (def_stmt)
3830 0 : && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
3831 0 : def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
3832 0 : if (!def_stmt)
3833 0 : continue;
3834 0 : gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
3835 0 : if (!phi_stmt)
3836 0 : continue;
3837 0 : FOR_EACH_EDGE (e, ei, bb->succs)
3838 : {
3839 0 : unsigned i, total = 0;
3840 0 : edge only_one;
3841 0 : bool check_value_one = (((integer_onep (cmp_rhs))
3842 0 : ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
3843 0 : ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
3844 0 : if (! AFDO_EINFO (e)->is_annotated ())
3845 0 : continue;
3846 0 : for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
3847 : {
3848 0 : tree val = gimple_phi_arg_def (phi_stmt, i);
3849 0 : edge ep = gimple_phi_arg_edge (phi_stmt, i);
3850 :
3851 0 : if (!TREE_CONSTANT (val)
3852 0 : || !(integer_zerop (val) || integer_onep (val)))
3853 0 : continue;
3854 0 : if (check_value_one ^ integer_onep (val))
3855 0 : continue;
3856 0 : total++;
3857 0 : only_one = ep;
3858 0 : if (! (AFDO_EINFO (e)->get_count ()).nonzero_p ()
3859 0 : && ! AFDO_EINFO (ep)->is_annotated ())
3860 : {
3861 0 : AFDO_EINFO (ep)->set_count (profile_count::zero ().afdo ());
3862 0 : AFDO_EINFO (ep)->set_annotated ();
3863 : }
3864 : }
3865 0 : if (total == 1 && ! AFDO_EINFO (only_one)->is_annotated ())
3866 : {
3867 0 : AFDO_EINFO (only_one)->set_count (AFDO_EINFO (e)->get_count ());
3868 0 : AFDO_EINFO (only_one)->set_annotated ();
3869 : }
3870 : }
3871 : }
3872 0 : }
3873 :
3874 : /* Propagate the basic block count and edge count on the control flow
3875 : graph. We do the propagation iteratively until stabilize. */
3876 :
3877 : static void
3878 0 : afdo_propagate (bb_set *annotated_bb)
3879 : {
3880 0 : bool changed = true;
3881 0 : int i = 0;
3882 :
3883 0 : basic_block bb;
3884 0 : FOR_ALL_BB_FN (bb, cfun)
3885 0 : if (!is_bb_annotated (bb, *annotated_bb)
3886 0 : && is_bb_annotated ((basic_block)bb->aux, *annotated_bb))
3887 : {
3888 0 : update_count_by_afdo_count (&bb->count, ((basic_block)bb->aux)->count);
3889 0 : set_bb_annotated (bb, annotated_bb);
3890 0 : if (dump_file)
3891 : {
3892 0 : fprintf (dump_file,
3893 : " Copying count of bb %i to bb %i; count is:",
3894 0 : ((basic_block)bb->aux)->index,
3895 : bb->index);
3896 0 : bb->count.dump (dump_file);
3897 0 : fprintf (dump_file, "\n");
3898 : }
3899 : }
3900 :
3901 0 : while (changed && i++ < 100)
3902 : {
3903 0 : changed = false;
3904 :
3905 0 : if (afdo_propagate_edge (true, annotated_bb))
3906 : changed = true;
3907 0 : if (afdo_propagate_edge (false, annotated_bb))
3908 0 : changed = true;
3909 0 : afdo_propagate_circuit (*annotated_bb);
3910 : }
3911 0 : if (dump_file)
3912 0 : fprintf (dump_file, "Propagation took %i iterations %s\n",
3913 : i, changed ? "; iteration limit reached\n" : "");
3914 0 : }
3915 :
3916 : /* qsort comparator of sreals. */
3917 : static int
3918 0 : cmp (const void *a, const void *b)
3919 : {
3920 0 : if (*(const sreal *)a < *(const sreal *)b)
3921 : return 1;
3922 0 : if (*(const sreal *)a > *(const sreal *)b)
3923 0 : return -1;
3924 : return 0;
3925 : }
3926 :
3927 : /* To scale a connected component of graph we collect desired scales of
3928 : basic blocks on the boundary and then compute a robust average. */
3929 :
3930 : struct scale
3931 : {
3932 : /* Scale desired. */
3933 : sreal scale;
3934 : /* Weight for averaging computed from execution count of the edge
3935 : scale originates from. */
3936 : uint64_t weight;
3937 : };
3938 :
3939 : /* Add scale ORIG/ANNOTATED to SCALES. */
3940 :
3941 : static void
3942 0 : add_scale (vec <scale> *scales, profile_count annotated, profile_count orig)
3943 : {
3944 0 : if (dump_file)
3945 : {
3946 0 : orig.dump (dump_file);
3947 0 : fprintf (dump_file, " should be ");
3948 0 : annotated.dump (dump_file);
3949 0 : fprintf (dump_file, "\n");
3950 : }
3951 0 : if (orig.nonzero_p ())
3952 : {
3953 0 : sreal scale
3954 0 : = annotated.guessed_local ()
3955 0 : .to_sreal_scale (orig);
3956 0 : if (dump_file)
3957 0 : fprintf (dump_file, " adding scale %.16f, weight %" PRId64 "\n",
3958 0 : scale.to_double (), annotated.value () + 1);
3959 0 : scales->safe_push ({scale, annotated.value () + 1});
3960 : }
3961 0 : }
3962 :
3963 : /* Scale counts of all basic blocks in BBS by SCALE and convert them to
3964 : IPA quality. */
3965 :
3966 : static void
3967 0 : scale_bbs (const vec <basic_block> &bbs, sreal scale)
3968 : {
3969 0 : if (dump_file)
3970 0 : fprintf (dump_file, " Scaling by %.16f\n", scale.to_double ());
3971 0 : for (basic_block b : bbs)
3972 0 : if (!(b->count == profile_count::zero ())
3973 0 : && b->count.initialized_p ())
3974 : {
3975 0 : profile_count o = b->count;
3976 0 : b->count = b->count.force_guessed () * scale;
3977 :
3978 : /* If we scaled to 0, make it auto-fdo since that is treated
3979 : less agressively. */
3980 0 : if (!b->count.nonzero_p () && o.nonzero_p ())
3981 0 : b->count = profile_count::zero ().afdo ();
3982 0 : if (dump_file)
3983 : {
3984 0 : fprintf (dump_file, " bb %i count updated ", b->index);
3985 0 : o.dump (dump_file);
3986 0 : fprintf (dump_file, " -> ");
3987 0 : b->count.dump (dump_file);
3988 0 : fprintf (dump_file, "\n");
3989 : }
3990 : }
3991 0 : }
3992 :
3993 : /* Determine scaling factor by taking robust average of SCALES
3994 : and taking into account limits.
3995 : MAX_COUNT is maximal guessed count to be scaled while MAC_COUNT_IN_FN
3996 : is maximal count in function determined by auto-fdo. */
3997 :
3998 : sreal
3999 0 : determine_scale (vec <scale> *scales, profile_count max_count,
4000 : profile_count max_count_in_fn)
4001 : {
4002 0 : scales->qsort (cmp);
4003 :
4004 0 : uint64_t overall_weight = 0;
4005 0 : for (scale &e : *scales)
4006 0 : overall_weight += e.weight;
4007 :
4008 0 : uint64_t cummulated = 0, weight_sum = 0;
4009 0 : sreal scale_sum = 0;
4010 0 : for (scale &e : *scales)
4011 : {
4012 0 : uint64_t prev = cummulated;
4013 0 : cummulated += e.weight;
4014 0 : if (cummulated >= overall_weight / 4
4015 0 : && prev <= 3 * overall_weight / 4)
4016 : {
4017 0 : scale_sum += e.scale * e.weight;
4018 0 : weight_sum += e.weight;
4019 0 : if (dump_file)
4020 0 : fprintf (dump_file, " accounting scale %.16f, weight %" PRId64 "\n",
4021 : e.scale.to_double (), e.weight);
4022 : }
4023 0 : else if (dump_file)
4024 0 : fprintf (dump_file, " ignoring scale %.16f, weight %" PRId64 "\n",
4025 : e.scale.to_double (), e.weight);
4026 : }
4027 0 : sreal scale = scale_sum / (sreal)weight_sum;
4028 :
4029 : /* Avoid scaled regions to have very large counts.
4030 : Otherwise they may dominate ipa-profile's histogram computing cutoff
4031 : of hot basic blocks. */
4032 0 : if (max_count * scale > max_count_in_fn.guessed_local ().apply_scale (128, 1))
4033 : {
4034 0 : if (dump_file)
4035 : {
4036 0 : fprintf (dump_file, "Scaling by %.16f produces max count ",
4037 : scale.to_double ());
4038 0 : (max_count * scale).dump (dump_file);
4039 0 : fprintf (dump_file, " that exceeds max count in fn ");
4040 0 : max_count_in_fn.dump (dump_file);
4041 0 : fprintf (dump_file, "; capping\n");
4042 : }
4043 0 : scale = max_count_in_fn.guessed_local ().to_sreal_scale (max_count);
4044 : }
4045 0 : return scale;
4046 : }
4047 :
4048 : /* Scale profile of the whole function to approximately match auto-profile. */
4049 :
4050 : bool
4051 0 : scale_bb_profile ()
4052 : {
4053 0 : const function_instance *s
4054 : = afdo_source_profile->get_function_instance_by_decl
4055 0 : (current_function_decl);
4056 :
4057 : /* In the first pass only store non-zero counts. */
4058 0 : gcov_type head_count = s->head_count () * autofdo::afdo_count_scale;
4059 0 : hash_set <basic_block> zero_bbs;
4060 0 : auto_vec <basic_block, 20> bbs (n_basic_blocks_for_fn (cfun));
4061 0 : auto_vec <scale, 20> scales;
4062 0 : basic_block bb;
4063 0 : profile_count max_count = profile_count::zero ();
4064 0 : profile_count max_count_in_fn = profile_count::zero ();
4065 0 : bbs.quick_push (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4066 0 : bbs.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun));
4067 0 : if (head_count > 0)
4068 : {
4069 0 : profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
4070 0 : max_count = entry_count;
4071 0 : update_count_by_afdo_count (&entry_count, head_count);
4072 0 : max_count_in_fn = entry_count;
4073 0 : add_scale (&scales, entry_count, ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
4074 : }
4075 0 : FOR_EACH_BB_FN (bb, cfun)
4076 : {
4077 0 : profile_count cnt = bb->count;
4078 0 : bbs.safe_push (bb);
4079 0 : max_count = profile_count::max_prefer_initialized (max_count, cnt);
4080 0 : if (afdo_set_bb_count (bb, zero_bbs))
4081 : {
4082 0 : std::swap (cnt, bb->count);
4083 0 : max_count_in_fn
4084 0 : = profile_count::max_prefer_initialized (max_count_in_fn, cnt);
4085 0 : add_scale (&scales, cnt, bb->count);
4086 : }
4087 : }
4088 0 : if (scales.length ())
4089 : {
4090 0 : sreal scale = determine_scale (&scales, max_count, max_count_in_fn);
4091 0 : scale_bbs (bbs, scale);
4092 0 : return true;
4093 : }
4094 : return false;
4095 0 : }
4096 :
4097 : /* In case given basic block was fully optimized out, AutoFDO
4098 : will have no data about it. In this case try to preserve static profile.
4099 : Identify connected components (in undirected form of CFG) which has
4100 : no annotations at all. Look at thir boundaries and try to determine
4101 : scaling factor and scale. */
4102 :
4103 : void
4104 0 : afdo_adjust_guessed_profile (bb_set *annotated_bb)
4105 : {
4106 : /* Basic blocks of connected component currently processed. */
4107 0 : auto_vec <basic_block, 20> bbs (n_basic_blocks_for_fn (cfun));
4108 : /* Scale factors found. */
4109 0 : auto_vec <scale, 20> scales;
4110 0 : auto_vec <basic_block, 20> stack (n_basic_blocks_for_fn (cfun));
4111 :
4112 0 : basic_block seed_bb;
4113 0 : unsigned int component_id = 1;
4114 :
4115 : /* Map from basic block to its component.
4116 : 0 is used for univisited BBs,
4117 : 1 means that BB is annotated,
4118 : >=2 is an id of the component BB belongs to. */
4119 0 : auto_vec <unsigned int, 20> component;
4120 0 : component.safe_grow (last_basic_block_for_fn (cfun));
4121 0 : profile_count max_count_in_fn = profile_count::zero ();
4122 0 : FOR_ALL_BB_FN (seed_bb, cfun)
4123 0 : if (is_bb_annotated (seed_bb, *annotated_bb))
4124 : {
4125 0 : component[seed_bb->index] = 1;
4126 0 : max_count_in_fn
4127 0 : = profile_count::max_prefer_initialized (max_count_in_fn, seed_bb->count);
4128 : }
4129 : else
4130 0 : component[seed_bb->index] = 0;
4131 0 : FOR_ALL_BB_FN (seed_bb, cfun)
4132 0 : if (!component[seed_bb->index])
4133 : {
4134 0 : stack.quick_push (seed_bb);
4135 0 : component_id++;
4136 0 : bbs.truncate (0);
4137 0 : scales.truncate (0);
4138 0 : component[seed_bb->index] = component_id;
4139 0 : profile_count max_count = profile_count::zero ();
4140 :
4141 : /* Identify connected component starting in BB. */
4142 0 : if (dump_file)
4143 0 : fprintf (dump_file, "Starting connected component in bb %i\n",
4144 : seed_bb->index);
4145 0 : do
4146 : {
4147 0 : basic_block b = stack.pop ();
4148 :
4149 0 : bbs.quick_push (b);
4150 0 : max_count = profile_count::max_prefer_initialized (max_count, b->count);
4151 :
4152 0 : for (edge e: b->preds)
4153 0 : if (!component[e->src->index])
4154 : {
4155 0 : stack.quick_push (e->src);
4156 0 : component[e->src->index] = component_id;
4157 : }
4158 0 : for (edge e: b->succs)
4159 0 : if (!component[e->dest->index])
4160 : {
4161 0 : stack.quick_push (e->dest);
4162 0 : component[e->dest->index] = component_id;
4163 : }
4164 : }
4165 0 : while (!stack.is_empty ());
4166 :
4167 : /* If all blocks in components has 0 count, we do not need
4168 : to scale, only we must convert to IPA quality. */
4169 0 : if (!max_count.nonzero_p ())
4170 : {
4171 0 : if (dump_file)
4172 0 : fprintf (dump_file, " All counts are 0; scale = 1\n");
4173 0 : scale_bbs (bbs, 1);
4174 0 : continue;
4175 : }
4176 :
4177 : /* Now visit the component and try to figure out its desired
4178 : frequency. */
4179 0 : for (basic_block b : bbs)
4180 : {
4181 0 : if (dump_file)
4182 : {
4183 0 : fprintf (dump_file, " visiting bb %i with count ", b->index);
4184 0 : b->count.dump (dump_file);
4185 0 : fprintf (dump_file, "\n");
4186 : }
4187 0 : if (!b->count.nonzero_p ())
4188 0 : continue;
4189 : /* Sum of counts of annotated edges into B. */
4190 0 : profile_count annotated_count = profile_count::zero ();
4191 : /* Sum of counts of edges into B with source in current
4192 : component. */
4193 0 : profile_count current_component_count = profile_count::zero ();
4194 0 : bool boundary = false;
4195 :
4196 0 : for (edge e: b->preds)
4197 0 : if (AFDO_EINFO (e)->is_annotated ())
4198 : {
4199 0 : if (dump_file)
4200 : {
4201 0 : fprintf (dump_file, " Annotated pred edge to %i "
4202 0 : "with count ", e->src->index);
4203 0 : AFDO_EINFO (e)->get_count ().dump (dump_file);
4204 0 : fprintf (dump_file, "\n");
4205 : }
4206 0 : boundary = true;
4207 0 : annotated_count += AFDO_EINFO (e)->get_count ();
4208 : }
4209 : /* If source is anotated, combine with static
4210 : probability prediction.
4211 : TODO: We can do better in case some of edges out are
4212 : annotated and distribute only remaining count out of BB. */
4213 0 : else if (is_bb_annotated (e->src, *annotated_bb))
4214 : {
4215 0 : boundary = true;
4216 0 : if (dump_file)
4217 : {
4218 0 : fprintf (dump_file, " Annotated predecessor %i "
4219 : "with count ", e->src->index);
4220 0 : e->src->count.dump (dump_file);
4221 0 : fprintf (dump_file, " edge count using static profile ");
4222 0 : e->count ().dump (dump_file);
4223 0 : fprintf (dump_file, "\n");
4224 : }
4225 0 : annotated_count += e->count ();
4226 : }
4227 : else
4228 : {
4229 0 : current_component_count += e->count ();
4230 0 : gcc_checking_assert (component[e->src->index] == component_id);
4231 : }
4232 0 : if (boundary && current_component_count.initialized_p ())
4233 : {
4234 0 : if (dump_file)
4235 0 : fprintf (dump_file, " bb %i in count ", b->index);
4236 0 : add_scale (&scales,
4237 : annotated_count,
4238 : b->count - current_component_count);
4239 : }
4240 0 : for (edge e: b->succs)
4241 0 : if (AFDO_EINFO (e)->is_annotated ())
4242 : {
4243 0 : if (dump_file)
4244 0 : fprintf (dump_file, " edge %i->%i count ",
4245 0 : b->index, e->dest->index);
4246 0 : add_scale (&scales, AFDO_EINFO (e)->get_count (), e->count ());
4247 : }
4248 0 : else if (is_bb_annotated (e->dest, *annotated_bb))
4249 : {
4250 0 : profile_count annotated_count = e->dest->count;
4251 0 : profile_count out_count = profile_count::zero ();
4252 0 : bool ok = true;
4253 :
4254 0 : for (edge e2: e->dest->preds)
4255 0 : if (AFDO_EINFO (e2)->is_annotated ())
4256 0 : annotated_count -= AFDO_EINFO (e2)->get_count ();
4257 0 : else if (component[e2->src->index] == component_id)
4258 0 : out_count += e2->count ();
4259 0 : else if (is_bb_annotated (e2->src, *annotated_bb))
4260 0 : annotated_count -= e2->count ();
4261 0 : else if (e2->probability.nonzero_p ())
4262 : {
4263 : ok = false;
4264 : break;
4265 : }
4266 0 : if (!ok)
4267 0 : continue;
4268 0 : if (dump_file)
4269 0 : fprintf (dump_file,
4270 : " edge %i->%i has annotated successor; count ",
4271 0 : b->index, e->dest->index);
4272 0 : add_scale (&scales, annotated_count, e->count ());
4273 : }
4274 :
4275 : }
4276 :
4277 : /* If we failed to find annotated entry or exit edge,
4278 : look for exit edges and scale profile so the dest
4279 : BB get all flow it needs. This is imprecise because
4280 : the edge is not annotated and thus BB has more than
4281 : one such predecessor. */
4282 0 : if (!scales.length ())
4283 0 : for (basic_block b : bbs)
4284 0 : if (b->count.nonzero_p ())
4285 0 : for (edge e: b->succs)
4286 0 : if (is_bb_annotated (e->dest, *annotated_bb))
4287 : {
4288 0 : profile_count annotated_count = e->dest->count;
4289 0 : for (edge e2: e->dest->preds)
4290 0 : if (AFDO_EINFO (e2)->is_annotated ())
4291 0 : annotated_count -= AFDO_EINFO (e2)->get_count ();
4292 0 : if (dump_file)
4293 0 : fprintf (dump_file,
4294 : " edge %i->%i has annotated successor;"
4295 : " upper bound count ",
4296 0 : b->index, e->dest->index);
4297 0 : add_scale (&scales, annotated_count, e->count ());
4298 : }
4299 0 : if (!scales.length ())
4300 : {
4301 0 : if (dump_file)
4302 0 : fprintf (dump_file,
4303 : " Can not determine count from the boundary; giving up\n");
4304 0 : continue;
4305 : }
4306 0 : gcc_checking_assert (scales.length ());
4307 0 : sreal scale = determine_scale (&scales, max_count, max_count_in_fn);
4308 0 : scale_bbs (bbs, scale);
4309 : }
4310 0 : }
4311 :
4312 : /* Propagate counts on control flow graph and calculate branch
4313 : probabilities. */
4314 :
4315 : static void
4316 0 : afdo_calculate_branch_prob (bb_set *annotated_bb)
4317 : {
4318 0 : edge e;
4319 0 : edge_iterator ei;
4320 0 : basic_block bb;
4321 :
4322 0 : FOR_ALL_BB_FN (bb, cfun)
4323 : {
4324 0 : gcc_assert (bb->aux == NULL);
4325 0 : FOR_EACH_EDGE (e, ei, bb->succs)
4326 : {
4327 0 : gcc_assert (e->aux == NULL);
4328 0 : e->aux = new edge_info ();
4329 0 : gcov_type c = afdo_unscaled_edge_count (e);
4330 0 : if (c == 0 && e->count () == profile_count::zero ())
4331 : {
4332 0 : AFDO_EINFO (e)->set_count (profile_count::zero ());
4333 0 : if (dump_file)
4334 0 : fprintf (dump_file,
4335 : " Annotating edge %i->%i with count 0;"
4336 : " static profile aggress",
4337 0 : e->src->index, e->dest->index);
4338 : }
4339 0 : else if (c > 0)
4340 : {
4341 0 : AFDO_EINFO (e)->set_count
4342 0 : (profile_count::from_gcov_type
4343 0 : (c * autofdo::afdo_count_scale).afdo ());
4344 0 : if (dump_file)
4345 : {
4346 0 : fprintf (dump_file,
4347 : " Annotating edge %i->%i with count ",
4348 0 : e->src->index, e->dest->index);
4349 0 : AFDO_EINFO (e)->get_count ().dump (dump_file);
4350 0 : fprintf (dump_file, "\n");
4351 : }
4352 : }
4353 : }
4354 : }
4355 :
4356 0 : afdo_find_equiv_class (annotated_bb);
4357 0 : afdo_propagate (annotated_bb);
4358 :
4359 0 : FOR_EACH_BB_FN (bb, cfun)
4360 0 : if (is_bb_annotated (bb, *annotated_bb))
4361 : {
4362 0 : bool all_known = true;
4363 0 : profile_count total_count = profile_count::zero ().afdo ();
4364 :
4365 0 : FOR_EACH_EDGE (e, ei, bb->succs)
4366 : {
4367 0 : gcc_assert (AFDO_EINFO (e) != NULL);
4368 0 : if (! AFDO_EINFO (e)->is_annotated ())
4369 : {
4370 : /* If by static profile this edge never happens,
4371 : still propagate the rest. */
4372 0 : if (e->probability.nonzero_p ())
4373 : {
4374 : all_known = false;
4375 : break;
4376 : }
4377 : }
4378 : else
4379 0 : total_count += AFDO_EINFO (e)->get_count ();
4380 : }
4381 0 : if (!all_known || !total_count.nonzero_p ())
4382 0 : continue;
4383 0 : if (dump_file)
4384 : {
4385 0 : fprintf (dump_file, "Total count of bb %i is ", bb->index);
4386 0 : total_count.dump (dump_file);
4387 0 : fprintf (dump_file, "\n");
4388 : }
4389 :
4390 0 : FOR_EACH_EDGE (e, ei, bb->succs)
4391 0 : if (AFDO_EINFO (e)->is_annotated ())
4392 : {
4393 0 : profile_count cnt = AFDO_EINFO (e)->get_count ();
4394 : /* If probability is 1, preserve reliable static prediction
4395 : (This is, for example the case of single fallthru edge
4396 : or single fallthru plus unlikely EH edge.) */
4397 0 : if (cnt == total_count
4398 0 : && e->probability == profile_probability::always ())
4399 : ;
4400 0 : else if (cnt.nonzero_p ())
4401 0 : e->probability
4402 0 : = cnt.probability_in (total_count);
4403 : /* If probability is zero, preserve reliable static
4404 : prediction. */
4405 0 : else if (e->probability.nonzero_p ()
4406 0 : || e->probability.quality () == GUESSED)
4407 0 : e->probability = profile_probability::never ().afdo ();
4408 0 : if (dump_file)
4409 : {
4410 0 : fprintf (dump_file, " probability of edge %i->%i"
4411 : " with count ",
4412 0 : e->src->index, e->dest->index);
4413 0 : cnt.dump (dump_file);
4414 0 : fprintf (dump_file, " set to ");
4415 0 : e->probability.dump (dump_file);
4416 0 : fprintf (dump_file, "\n");
4417 : }
4418 : }
4419 : }
4420 0 : afdo_adjust_guessed_profile (annotated_bb);
4421 0 : FOR_ALL_BB_FN (bb, cfun)
4422 : {
4423 0 : bb->aux = NULL;
4424 0 : FOR_EACH_EDGE (e, ei, bb->succs)
4425 0 : if (AFDO_EINFO (e) != NULL)
4426 : {
4427 0 : delete AFDO_EINFO (e);
4428 0 : e->aux = NULL;
4429 : }
4430 : }
4431 0 : }
4432 :
4433 : /* Annotate auto profile to the control flow graph. */
4434 :
4435 : static void
4436 0 : afdo_annotate_cfg (void)
4437 : {
4438 0 : basic_block bb;
4439 0 : bb_set annotated_bb;
4440 0 : const function_instance *s
4441 0 : = afdo_source_profile->get_function_instance_by_decl (
4442 : current_function_decl);
4443 :
4444 : /* FIXME: This is a workaround for sourcefile tracking, if afdo_string_table
4445 : ends up with empty filename or incorrect filename for the function and
4446 : should be removed once issues with sourcefile tracking get fixed. */
4447 0 : if (s == NULL)
4448 0 : for (unsigned i = 0; i < afdo_string_table->filenames ().length (); i++)
4449 : {
4450 0 : s = afdo_source_profile->get_function_instance_by_decl (current_function_decl, afdo_string_table->filenames()[i]);
4451 0 : if (s)
4452 : break;
4453 : }
4454 :
4455 0 : if (s == NULL)
4456 : {
4457 0 : if (dump_file)
4458 0 : fprintf (dump_file, "No afdo profile for %s\n",
4459 0 : cgraph_node::get (current_function_decl)->dump_name ());
4460 : /* create_gcov only dumps symbols with some samples in them.
4461 : This means that we get nonempty zero_bbs only if some
4462 : nonzero counts in profile were not matched with statements. */
4463 0 : if (!flag_profile_partial_training
4464 0 : && !param_auto_profile_reorder_only)
4465 : {
4466 0 : FOR_ALL_BB_FN (bb, cfun)
4467 0 : if (bb->count.quality () == GUESSED_LOCAL)
4468 0 : bb->count = bb->count.global0afdo ();
4469 0 : update_max_bb_count ();
4470 : }
4471 0 : return;
4472 : }
4473 :
4474 0 : auto ts_it = timestamp_info_map.find (s->timestamp ());
4475 0 : if (ts_it != timestamp_info_map.end ())
4476 : {
4477 0 : cgraph_node *node = cgraph_node::get (current_function_decl);
4478 0 : node->tp_first_run = ts_it->second;
4479 :
4480 0 : if (dump_file)
4481 0 : fprintf (dump_file, "Setting %s->tp_first_run to %d\n",
4482 : node->asm_name (), node->tp_first_run);
4483 : }
4484 :
4485 0 : if (param_auto_profile_reorder_only)
4486 : return;
4487 :
4488 0 : calculate_dominance_info (CDI_POST_DOMINATORS);
4489 0 : calculate_dominance_info (CDI_DOMINATORS);
4490 0 : loop_optimizer_init (0);
4491 :
4492 0 : if (dump_file)
4493 : {
4494 0 : fprintf (dump_file, "\n\nAnnotating BB profile of %s\n",
4495 0 : cgraph_node::get (current_function_decl)->dump_name ());
4496 0 : fprintf (dump_file, "\n");
4497 0 : s->dump (dump_file);
4498 0 : fprintf (dump_file, "\n");
4499 : }
4500 0 : bool profile_found = false;
4501 0 : hash_set <basic_block> zero_bbs;
4502 0 : gcov_type head_count = s->head_count () * autofdo::afdo_count_scale;
4503 :
4504 0 : if (!param_auto_profile_bbs)
4505 : {
4506 0 : if (scale_bb_profile ())
4507 : return;
4508 : }
4509 : else
4510 : {
4511 : /* In the first pass only store non-zero counts. */
4512 0 : profile_found = head_count > 0;
4513 0 : FOR_EACH_BB_FN (bb, cfun)
4514 : {
4515 0 : if (afdo_set_bb_count (bb, zero_bbs))
4516 : {
4517 0 : if (bb->count.quality () == AFDO)
4518 : {
4519 0 : gcc_assert (bb->count.nonzero_p ());
4520 : profile_found = true;
4521 : }
4522 0 : set_bb_annotated (bb, &annotated_bb);
4523 : }
4524 : }
4525 : }
4526 : /* Exit without clobbering static profile if there was no
4527 : non-zero count. */
4528 0 : if (!profile_found)
4529 : {
4530 : /* create_gcov only dumps symbols with some samples in them.
4531 : This means that we get nonempty zero_bbs only if some
4532 : nonzero counts in profile were not matched with statements.
4533 : ??? We can adjust create_gcov to also recordinfo
4534 : about function with no samples. Then we can distinguish
4535 : between lost profiles which should be kept local and
4536 : real functions with 0 samples during train run. */
4537 0 : if (zero_bbs.is_empty ())
4538 : {
4539 0 : if (dump_file)
4540 0 : fprintf (dump_file, "No afdo samples found"
4541 : "; Setting global count to afdo0\n");
4542 : }
4543 : else
4544 : {
4545 0 : if (dump_file)
4546 0 : fprintf (dump_file, "Setting global count to afdo0\n");
4547 : }
4548 0 : if (!flag_profile_partial_training)
4549 : {
4550 0 : FOR_ALL_BB_FN (bb, cfun)
4551 0 : if (bb->count.quality () == GUESSED_LOCAL)
4552 0 : bb->count = bb->count.global0afdo ();
4553 0 : update_max_bb_count ();
4554 : }
4555 :
4556 0 : loop_optimizer_finalize ();
4557 0 : free_dominance_info (CDI_DOMINATORS);
4558 0 : free_dominance_info (CDI_POST_DOMINATORS);
4559 0 : return;
4560 : }
4561 : /* We try to preserve static profile for BBs with 0
4562 : afdo samples, but if even static profile agrees with 0,
4563 : consider it final so propagation works better. */
4564 0 : for (basic_block bb : zero_bbs)
4565 0 : if (!bb->count.nonzero_p ())
4566 : {
4567 0 : update_count_by_afdo_count (&bb->count, 0);
4568 0 : set_bb_annotated (bb, &annotated_bb);
4569 0 : if (dump_file)
4570 : {
4571 0 : fprintf (dump_file, " Annotating bb %i with count ", bb->index);
4572 0 : bb->count.dump (dump_file);
4573 0 : fprintf (dump_file,
4574 : " (has 0 count in both static and afdo profile)\n");
4575 : }
4576 : }
4577 :
4578 : /* Update profile. */
4579 0 : if (head_count > 0)
4580 : {
4581 0 : update_count_by_afdo_count (&ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
4582 : head_count);
4583 0 : set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun), &annotated_bb);
4584 0 : if (!is_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, annotated_bb)
4585 0 : || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
4586 0 : > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
4587 : {
4588 0 : ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
4589 0 : = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
4590 0 : set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb,
4591 : &annotated_bb);
4592 : }
4593 0 : if (!is_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun), annotated_bb)
4594 0 : || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
4595 0 : > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
4596 : {
4597 0 : EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
4598 0 : = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
4599 0 : set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
4600 : }
4601 : }
4602 :
4603 : /* Calculate, propagate count and probability information on CFG. */
4604 0 : afdo_calculate_branch_prob (&annotated_bb);
4605 :
4606 : /* If we failed to turn some of original guessed profile to global,
4607 : set basic blocks uninitialized. */
4608 0 : FOR_ALL_BB_FN (bb, cfun)
4609 0 : if (!bb->count.ipa_p ())
4610 : {
4611 : /* We skip annotating entry profile if it is 0
4612 : in hope to be able to determine it better from the
4613 : static profile.
4614 :
4615 : Now we know we can not derive it from other info,
4616 : so set it since it is better than UNKNOWN. */
4617 0 : if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
4618 0 : bb->count = profile_count::zero ().afdo ();
4619 : else
4620 0 : bb->count = profile_count::uninitialized ();
4621 0 : if (dump_file)
4622 0 : fprintf (dump_file, " Unknown count of bb %i\n", bb->index);
4623 0 : cfun->cfg->full_profile = false;
4624 : }
4625 :
4626 0 : cgraph_node::get (current_function_decl)->count
4627 0 : = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
4628 0 : update_max_bb_count ();
4629 0 : profile_status_for_fn (cfun) = PROFILE_READ;
4630 0 : if (flag_value_profile_transformations)
4631 : {
4632 0 : gimple_value_profile_transformations ();
4633 0 : free_dominance_info (CDI_DOMINATORS);
4634 0 : free_dominance_info (CDI_POST_DOMINATORS);
4635 0 : update_ssa (TODO_update_ssa);
4636 : }
4637 :
4638 0 : loop_optimizer_finalize ();
4639 0 : free_dominance_info (CDI_DOMINATORS);
4640 0 : free_dominance_info (CDI_POST_DOMINATORS);
4641 0 : }
4642 :
4643 : /* Use AutoFDO profile to annotate the control flow graph.
4644 : Return the todo flag. */
4645 :
4646 : static unsigned int
4647 0 : auto_profile (void)
4648 : {
4649 0 : struct cgraph_node *node;
4650 :
4651 0 : if (symtab->state == FINISHED || !afdo_source_profile)
4652 : return 0;
4653 :
4654 0 : init_node_map (true);
4655 0 : profile_info = autofdo::afdo_profile_info;
4656 0 : afdo_source_profile->offline_unrealized_inlines ();
4657 :
4658 0 : FOR_EACH_FUNCTION (node)
4659 : {
4660 0 : if (!gimple_has_body_p (node->decl))
4661 0 : continue;
4662 :
4663 : /* Don't profile functions produced for builtin stuff. */
4664 0 : if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
4665 0 : continue;
4666 :
4667 0 : push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4668 :
4669 : /* Local pure-const may imply need to fixup the cfg.
4670 : This is similar to what is done in tree-profile.cc. */
4671 0 : if ((execute_fixup_cfg () & TODO_cleanup_cfg))
4672 0 : cleanup_tree_cfg ();
4673 :
4674 0 : autofdo::afdo_annotate_cfg ();
4675 0 : compute_function_frequency ();
4676 :
4677 0 : free_dominance_info (CDI_DOMINATORS);
4678 0 : free_dominance_info (CDI_POST_DOMINATORS);
4679 0 : cgraph_edge::rebuild_edges ();
4680 0 : pop_cfun ();
4681 : }
4682 :
4683 : return 0;
4684 : }
4685 : } /* namespace autofdo. */
4686 :
4687 : /* Read the profile from the profile data file. */
4688 :
4689 : void
4690 0 : read_autofdo_file (void)
4691 : {
4692 0 : if (auto_profile_file == NULL)
4693 0 : auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
4694 :
4695 0 : autofdo::afdo_profile_info = XNEW (gcov_summary);
4696 0 : autofdo::afdo_profile_info->runs = 1;
4697 0 : autofdo::afdo_profile_info->sum_max = 0;
4698 0 : autofdo::afdo_profile_info->cutoff = 1;
4699 :
4700 : /* Read the profile from the profile file. */
4701 0 : autofdo::read_profile ();
4702 0 : }
4703 :
4704 : /* Free the resources. */
4705 :
4706 : void
4707 0 : end_auto_profile (void)
4708 : {
4709 0 : delete autofdo::afdo_source_profile;
4710 0 : delete autofdo::afdo_string_table;
4711 0 : delete autofdo::afdo_summary_info;
4712 0 : profile_info = NULL;
4713 0 : }
4714 :
4715 : /* Returns TRUE if EDGE is hot enough to be inlined early. */
4716 :
4717 : bool
4718 0 : afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
4719 : {
4720 0 : gcov_type count
4721 0 : = autofdo::afdo_source_profile->get_callsite_total_count (edge);
4722 :
4723 0 : if (count > 0)
4724 : {
4725 0 : bool is_hot;
4726 0 : profile_count pcount = profile_count::from_gcov_type (count).afdo ();
4727 0 : is_hot = maybe_hot_afdo_count_p (pcount);
4728 0 : if (dump_file)
4729 : {
4730 0 : fprintf (dump_file, "Call %s -> %s has %s afdo profile count ",
4731 0 : edge->caller->dump_name (), edge->callee->dump_name (),
4732 : is_hot ? "hot" : "cold");
4733 0 : pcount.dump (dump_file);
4734 0 : fprintf (dump_file, "\n");
4735 : }
4736 0 : return is_hot;
4737 : }
4738 :
4739 : return false;
4740 : }
4741 :
4742 : /* Do indirect call promotion during early inlining to make the
4743 : IR match the profiled binary before actual annotation.
4744 :
4745 : This is needed because an indirect call might have been promoted
4746 : and inlined in the profiled binary. If we do not promote and
4747 : inline these indirect calls before annotation, the profile for
4748 : these promoted functions will be lost.
4749 :
4750 : e.g. foo() --indirect_call--> bar()
4751 : In profiled binary, the callsite is promoted and inlined, making
4752 : the profile look like:
4753 :
4754 : foo: {
4755 : loc_foo_1: count_1
4756 : bar@loc_foo_2: {
4757 : loc_bar_1: count_2
4758 : loc_bar_2: count_3
4759 : }
4760 : }
4761 :
4762 : Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
4763 : If we perform annotation on it, the profile inside bar@loc_foo2
4764 : will be wasted.
4765 :
4766 : To avoid this, we promote loc_foo_2 and inline the promoted bar
4767 : function before annotation, so the profile inside bar@loc_foo2
4768 : will be useful. */
4769 :
4770 : bool
4771 0 : afdo_vpt_for_early_inline (cgraph_node *node)
4772 : {
4773 0 : if (!node->indirect_calls)
4774 : return false;
4775 0 : bool changed = false;
4776 0 : cgraph_node *outer = node->inlined_to ? node->inlined_to : node;
4777 0 : if (autofdo::afdo_source_profile->get_function_instance_by_decl
4778 0 : (outer->decl) == NULL)
4779 : return false;
4780 0 : for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
4781 : {
4782 0 : gcov_type bb_count = 0;
4783 0 : autofdo::count_info info;
4784 0 : basic_block bb = gimple_bb (e->call_stmt);
4785 :
4786 : /* TODO: This is quadratic; cache the value. */
4787 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
4788 0 : !gsi_end_p (gsi); gsi_next (&gsi))
4789 : {
4790 0 : gimple *stmt = gsi_stmt (gsi);
4791 0 : if (!stmt_loc_used_by_debug_info (stmt))
4792 0 : continue;
4793 0 : autofdo::count_info info;
4794 0 : if (autofdo::afdo_source_profile->get_count_info (stmt, &info, node))
4795 0 : bb_count = MAX (bb_count, info.count);
4796 0 : }
4797 0 : autofdo::afdo_source_profile->get_count_info (e->call_stmt, &info, node);
4798 0 : info.count = bb_count;
4799 0 : if (!autofdo::afdo_source_profile->update_inlined_ind_target
4800 0 : (e->call_stmt, &info, node))
4801 0 : continue;
4802 0 : changed |= autofdo::afdo_vpt (e->call_stmt, info.targets, true, e);
4803 0 : }
4804 : return changed;
4805 : }
4806 :
4807 : /* If speculation used during early inline, remove the target
4808 : so we do not speculate the indirect edge again during afdo pass. */
4809 :
4810 : void
4811 0 : remove_afdo_speculative_target (cgraph_edge *e)
4812 : {
4813 0 : autofdo::afdo_source_profile->remove_icall_target (e);
4814 0 : }
4815 :
4816 : namespace
4817 : {
4818 :
4819 : const pass_data pass_data_ipa_auto_profile = {
4820 : SIMPLE_IPA_PASS, "afdo", /* name */
4821 : OPTGROUP_NONE, /* optinfo_flags */
4822 : TV_IPA_AUTOFDO, /* tv_id */
4823 : 0, /* properties_required */
4824 : 0, /* properties_provided */
4825 : 0, /* properties_destroyed */
4826 : 0, /* todo_flags_start */
4827 : 0, /* todo_flags_finish */
4828 : };
4829 :
4830 : class pass_ipa_auto_profile : public simple_ipa_opt_pass
4831 : {
4832 : public:
4833 287872 : pass_ipa_auto_profile (gcc::context *ctxt)
4834 575744 : : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
4835 : {
4836 : }
4837 :
4838 : /* opt_pass methods: */
4839 : bool
4840 232112 : gate (function *) final override
4841 : {
4842 232112 : return flag_auto_profile;
4843 : }
4844 : unsigned int
4845 0 : execute (function *) final override
4846 : {
4847 0 : return autofdo::auto_profile ();
4848 : }
4849 : }; // class pass_ipa_auto_profile
4850 :
4851 : } // anon namespace
4852 :
4853 : simple_ipa_opt_pass *
4854 287872 : make_pass_ipa_auto_profile (gcc::context *ctxt)
4855 : {
4856 287872 : return new pass_ipa_auto_profile (ctxt);
4857 : }
4858 :
4859 : namespace
4860 : {
4861 :
4862 : const pass_data pass_data_ipa_auto_profile_offline = {
4863 : SIMPLE_IPA_PASS, "afdo_offline", /* name */
4864 : OPTGROUP_NONE, /* optinfo_flags */
4865 : TV_IPA_AUTOFDO_OFFLINE, /* tv_id */
4866 : 0, /* properties_required */
4867 : 0, /* properties_provided */
4868 : 0, /* properties_destroyed */
4869 : 0, /* todo_flags_start */
4870 : 0, /* todo_flags_finish */
4871 : };
4872 :
4873 : class pass_ipa_auto_profile_offline : public simple_ipa_opt_pass
4874 : {
4875 : public:
4876 287872 : pass_ipa_auto_profile_offline (gcc::context *ctxt)
4877 575744 : : simple_ipa_opt_pass (pass_data_ipa_auto_profile_offline, ctxt)
4878 : {
4879 : }
4880 :
4881 : /* opt_pass methods: */
4882 : bool
4883 232112 : gate (function *) final override
4884 : {
4885 232112 : return flag_auto_profile;
4886 : }
4887 : unsigned int
4888 0 : execute (function *) final override
4889 : {
4890 0 : read_autofdo_file ();
4891 0 : if (autofdo::afdo_source_profile)
4892 0 : autofdo::afdo_source_profile->offline_external_functions ();
4893 0 : return 0;
4894 : }
4895 : }; // class pass_ipa_auto_profile
4896 :
4897 : } // anon namespace
4898 :
4899 : simple_ipa_opt_pass *
4900 287872 : make_pass_ipa_auto_profile_offline (gcc::context *ctxt)
4901 : {
4902 287872 : return new pass_ipa_auto_profile_offline (ctxt);
4903 : }
|