Branch data Line data Source code
1 : : /* IPA function body analysis.
2 : : Copyright (C) 2003-2025 Free Software Foundation, Inc.
3 : : Contributed by Jan Hubicka
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : #ifndef GCC_IPA_SUMMARY_H
22 : : #define GCC_IPA_SUMMARY_H
23 : :
24 : : #include "sreal.h"
25 : : #include "ipa-predicate.h"
26 : :
27 : :
28 : : /* Hints are reasons why IPA heuristics should prefer specializing given
29 : : function. They are represented as bitmap of the following values. */
30 : : enum ipa_hints_vals {
31 : : /* When specialization turns indirect call into a direct call,
32 : : it is good idea to do so. */
33 : : INLINE_HINT_indirect_call = 1,
34 : : /* Inlining may make loop iterations or loop stride known. It is good idea
35 : : to do so because it enables loop optimizations. */
36 : : INLINE_HINT_loop_iterations = 2,
37 : : INLINE_HINT_loop_stride = 4,
38 : : /* Inlining within same strongly connected component of callgraph is often
39 : : a loss due to increased stack frame usage and prologue setup costs. */
40 : : INLINE_HINT_same_scc = 8,
41 : : /* Inlining functions in strongly connected component is not such a great
42 : : win. */
43 : : INLINE_HINT_in_scc = 16,
44 : : /* If function is declared inline by user, it may be good idea to inline
45 : : it. Set by simple_edge_hints in ipa-inline-analysis.cc. */
46 : : INLINE_HINT_declared_inline = 32,
47 : : /* Programs are usually still organized for non-LTO compilation and thus
48 : : if functions are in different modules, inlining may not be so important.
49 : : Set by simple_edge_hints in ipa-inline-analysis.cc. */
50 : : INLINE_HINT_cross_module = 64,
51 : : /* We know that the callee is hot by profile. */
52 : : INLINE_HINT_known_hot = 128,
53 : : /* There is builtin_constant_p dependent on parameter which is usually
54 : : a strong hint to inline. */
55 : : INLINE_HINT_builtin_constant_p = 256
56 : : };
57 : :
58 : : typedef int ipa_hints;
59 : :
60 : : /* Simple description of whether a memory load or a condition refers to a load
61 : : from an aggregate and if so, how and where from in the aggregate.
62 : : Individual fields have the same meaning like fields with the same name in
63 : : struct condition. */
64 : :
65 : : struct agg_position_info
66 : : {
67 : : HOST_WIDE_INT offset;
68 : : bool agg_contents;
69 : : bool by_ref;
70 : : };
71 : :
72 : : /* Representation of function body size and time depending on the call
73 : : context. We keep simple array of record, every containing of predicate
74 : : and time/size to account. */
75 : 27423807 : class size_time_entry
76 : : {
77 : : public:
78 : : /* Predicate for code to be executed. */
79 : : ipa_predicate exec_predicate;
80 : : /* Predicate for value to be constant and optimized out in a specialized copy.
81 : : When deciding on specialization this makes it possible to see how much
82 : : the executed code paths will simplify. */
83 : : ipa_predicate nonconst_predicate;
84 : : int size;
85 : : sreal time;
86 : : };
87 : :
88 : : /* Summary about function and stack frame sizes. We keep this info
89 : : for inline clones and also for WPA streaming. For this reason this is not
90 : : part of ipa_fn_summary which exists only for offline functions. */
91 : : class ipa_size_summary
92 : : {
93 : : public:
94 : : /* Estimated stack frame consumption by the function. */
95 : : HOST_WIDE_INT estimated_self_stack_size;
96 : : /* Size of the function body. */
97 : : int self_size;
98 : : /* Estimated size of the function after inlining. */
99 : : int size;
100 : :
101 : 6654838 : ipa_size_summary ()
102 : 6654838 : : estimated_self_stack_size (0), self_size (0), size (0)
103 : : {
104 : : }
105 : : };
106 : :
107 : : /* Structure to capture how frequently some interesting events occur given a
108 : : particular predicate. The structure is used to estimate how often we
109 : : encounter loops with known iteration count or stride in various
110 : : contexts. */
111 : :
112 : 65509 : struct GTY(()) ipa_freqcounting_predicate
113 : : {
114 : : /* The described event happens with this frequency... */
115 : : sreal freq;
116 : : /* ...when this predicate evaluates to false. */
117 : : ipa_predicate * GTY((skip)) predicate;
118 : : };
119 : :
120 : : /* Function inlining information. */
121 : : class GTY(()) ipa_fn_summary
122 : : {
123 : : public:
124 : : /* Keep all field empty so summary dumping works during its computation.
125 : : This is useful for debugging. */
126 : 8983459 : ipa_fn_summary ()
127 : 8983459 : : min_size (0),
128 : 8983459 : inlinable (false), single_caller (false),
129 : 8983459 : fp_expressions (false), safe_to_inline_to_always_inline (0),
130 : 8983459 : target_info (0), estimated_stack_size (false),
131 : 8983459 : time (0), conds (NULL),
132 : 8983459 : size_time_table (), call_size_time_table (vNULL),
133 : 8983459 : loop_iterations (NULL), loop_strides (NULL),
134 : 8983459 : builtin_constant_p_parms (vNULL),
135 : 8983459 : growth (0), scc_no (0)
136 : : {
137 : 8983459 : }
138 : :
139 : : /* Copy constructor. */
140 : 2234542 : ipa_fn_summary (const ipa_fn_summary &s)
141 : 2234542 : : min_size (s.min_size),
142 : 2234542 : inlinable (s.inlinable), single_caller (s.single_caller),
143 : 2234542 : fp_expressions (s.fp_expressions),
144 : 2234542 : target_info (s.target_info),
145 : 2234542 : estimated_stack_size (s.estimated_stack_size),
146 : 2234542 : time (s.time), conds (s.conds), size_time_table (),
147 : 2234542 : call_size_time_table (vNULL),
148 : 2234542 : loop_iterations (s.loop_iterations), loop_strides (s.loop_strides),
149 : 2234542 : builtin_constant_p_parms (s.builtin_constant_p_parms),
150 : 2234542 : growth (s.growth), scc_no (s.scc_no)
151 : 2234542 : {}
152 : :
153 : : /* Default constructor. */
154 : : ~ipa_fn_summary ();
155 : :
156 : : /* Information about the function body itself. */
157 : :
158 : : /* Minimal size increase after inlining. */
159 : : int min_size;
160 : :
161 : : /* False when there something makes inlining impossible (such as va_arg). */
162 : : unsigned inlinable : 1;
163 : : /* True wen there is only one caller of the function before small function
164 : : inlining. */
165 : : unsigned int single_caller : 1;
166 : : /* True if function contains any floating point expressions. */
167 : : unsigned int fp_expressions : 1;
168 : : /* Cache for analysis of can_early_inline_edge_p. */
169 : : unsigned int safe_to_inline_to_always_inline : 2;
170 : : /* Like fp_expressions field above, but it's to hold some target specific
171 : : information, such as some target specific isa flags. Note that for
172 : : offloading target compilers, this field isn't streamed. */
173 : : unsigned int target_info;
174 : :
175 : : /* Information about function that will result after applying all the
176 : : inline decisions present in the callgraph. Generally kept up to
177 : : date only for functions that are not inline clones. */
178 : :
179 : : /* Estimated stack frame consumption by the function. */
180 : : HOST_WIDE_INT estimated_stack_size;
181 : : /* Estimated runtime of function after inlining. */
182 : : sreal GTY((skip)) time;
183 : :
184 : : /* Conditional size/time information. The summaries are being
185 : : merged during inlining. */
186 : : conditions conds;
187 : : /* Normal code is accounted in size_time_table, while calls are
188 : : accounted in call_size_time_table. This is because calls
189 : : are often adjusted by IPA optimizations and thus this summary
190 : : is generated from call summary information when needed. */
191 : : auto_vec<size_time_entry> GTY((skip)) size_time_table;
192 : : /* Unlike size_time_table that is initialized for all summaries
193 : : call_size_time_table is allocated only for functions with
194 : : many calls. Use effecient vl_ptr storage. */
195 : : vec<size_time_entry, va_heap, vl_ptr> GTY((skip)) call_size_time_table;
196 : :
197 : : /* Predicates on when some loops in the function can have known bounds. */
198 : : vec<ipa_freqcounting_predicate, va_gc> *loop_iterations;
199 : : /* Predicates on when some loops in the function can have known strides. */
200 : : vec<ipa_freqcounting_predicate, va_gc> *loop_strides;
201 : : /* Parameters tested by builtin_constant_p. */
202 : : vec<int, va_heap, vl_ptr> GTY((skip)) builtin_constant_p_parms;
203 : : /* Estimated growth for inlining all copies of the function before start
204 : : of small functions inlining.
205 : : This value will get out of date as the callers are duplicated, but
206 : : using up-to-date value in the badness metric mean a lot of extra
207 : : expenses. */
208 : : int growth;
209 : : /* Number of SCC on the beginning of inlining process. */
210 : : int scc_no;
211 : :
212 : : /* Record time and size under given predicates. */
213 : : void account_size_time (int, sreal, const ipa_predicate &,
214 : : const ipa_predicate &,
215 : : bool call = false);
216 : :
217 : : /* We keep values scaled up, so fractional sizes can be accounted. */
218 : : static const int size_scale = 2;
219 : : /* Maximal size of size_time_table before we start to be conservative. */
220 : : static const int max_size_time_table_size = 256;
221 : : };
222 : :
223 : : class GTY((user)) ipa_fn_summary_t:
224 : : public fast_function_summary <ipa_fn_summary *, va_gc>
225 : : {
226 : : public:
227 : 438675 : ipa_fn_summary_t (symbol_table *symtab):
228 : 877350 : fast_function_summary <ipa_fn_summary *, va_gc> (symtab) {}
229 : :
230 : 438675 : static ipa_fn_summary_t *create_ggc (symbol_table *symtab)
231 : : {
232 : 438675 : class ipa_fn_summary_t *summary
233 : 438675 : = new (ggc_alloc_no_dtor<ipa_fn_summary_t> ()) ipa_fn_summary_t (symtab);
234 : 438675 : summary->disable_insertion_hook ();
235 : 438675 : return summary;
236 : : }
237 : :
238 : : /* Remove ipa_fn_summary for all callees of NODE. */
239 : : void remove_callees (cgraph_node *node);
240 : :
241 : : void insert (cgraph_node *, ipa_fn_summary *) final override;
242 : 0 : void remove (cgraph_node *node, ipa_fn_summary *) final override
243 : : {
244 : 0 : remove_callees (node);
245 : 0 : }
246 : :
247 : : void duplicate (cgraph_node *src, cgraph_node *dst,
248 : : ipa_fn_summary *src_data, ipa_fn_summary *dst_data)
249 : : final override;
250 : : };
251 : :
252 : : extern GTY(()) fast_function_summary <ipa_fn_summary *, va_gc>
253 : : *ipa_fn_summaries;
254 : :
255 : : class ipa_size_summary_t:
256 : : public fast_function_summary <ipa_size_summary *, va_heap>
257 : : {
258 : : public:
259 : 438675 : ipa_size_summary_t (symbol_table *symtab):
260 : 438675 : fast_function_summary <ipa_size_summary *, va_heap> (symtab)
261 : : {
262 : 438675 : disable_insertion_hook ();
263 : 438675 : }
264 : :
265 : 2580121 : void duplicate (cgraph_node *, cgraph_node *,
266 : : ipa_size_summary *src_data,
267 : : ipa_size_summary *dst_data) final override
268 : : {
269 : 2580121 : *dst_data = *src_data;
270 : 2580121 : }
271 : : };
272 : : extern fast_function_summary <ipa_size_summary *, va_heap>
273 : : *ipa_size_summaries;
274 : :
275 : : /* Information kept about callgraph edges. */
276 : : class ipa_call_summary
277 : : {
278 : : public:
279 : : /* Keep all field empty so summary dumping works during its computation.
280 : : This is useful for debugging. */
281 : 24548875 : ipa_call_summary ()
282 : 0 : : predicate (NULL), param (vNULL), call_stmt_size (0), call_stmt_time (0),
283 : 0 : loop_depth (0), is_return_callee_uncaptured (false)
284 : : {
285 : : }
286 : :
287 : : /* Copy constructor. */
288 : 3010704 : ipa_call_summary (const ipa_call_summary &s):
289 : 3010704 : predicate (s.predicate), param (s.param), call_stmt_size (s.call_stmt_size),
290 : 3010704 : call_stmt_time (s.call_stmt_time), loop_depth (s.loop_depth),
291 : 3010704 : is_return_callee_uncaptured (s.is_return_callee_uncaptured)
292 : : {
293 : : }
294 : :
295 : : /* Default destructor. */
296 : : ~ipa_call_summary ();
297 : :
298 : : ipa_predicate *predicate;
299 : : /* Vector indexed by parameters. */
300 : : vec<inline_param_summary> param;
301 : : /* Estimated size and time of the call statement. */
302 : : int call_stmt_size;
303 : : int call_stmt_time;
304 : : /* Depth of loop nest, 0 means no nesting. */
305 : : unsigned int loop_depth;
306 : : /* Indicates whether the caller returns the value of it's callee. */
307 : : bool is_return_callee_uncaptured;
308 : : };
309 : :
310 : : class ipa_call_summary_t: public fast_call_summary <ipa_call_summary *, va_heap>
311 : : {
312 : : public:
313 : 438675 : ipa_call_summary_t (symbol_table *symtab):
314 : 438675 : fast_call_summary <ipa_call_summary *, va_heap> (symtab) {}
315 : :
316 : : /* Hook that is called by summary when an edge is duplicated. */
317 : : void duplicate (cgraph_edge *src, cgraph_edge *dst,
318 : : ipa_call_summary *src_data,
319 : : ipa_call_summary *dst_data) final override;
320 : : };
321 : :
322 : : /* Estimated execution times, code sizes and other information about the
323 : : code executing a call described by ipa_call_context. */
324 : :
325 : 16226005 : struct ipa_call_estimates
326 : : {
327 : : /* Estimated size needed to execute call in the given context. */
328 : : int size;
329 : :
330 : : /* Minimal size needed for the call that is + independent on the call context
331 : : and can be used for fast estimates. */
332 : : int min_size;
333 : :
334 : : /* Estimated time needed to execute call in the given context. */
335 : : sreal time;
336 : :
337 : : /* Estimated time needed to execute the function when not ignoring
338 : : computations known to be constant in this context. */
339 : : sreal nonspecialized_time;
340 : :
341 : : /* Further discovered reasons why to inline or specialize the give calls. */
342 : : ipa_hints hints;
343 : :
344 : : /* Frequency how often a loop with known number of iterations is encountered.
345 : : Calculated with hints. */
346 : : sreal loops_with_known_iterations;
347 : :
348 : : /* Frequency how often a loop with known strides is encountered. Calculated
349 : : with hints. */
350 : : sreal loops_with_known_strides;
351 : : };
352 : :
353 : : class ipa_cached_call_context;
354 : :
355 : : /* This object describe a context of call. That is a summary of known
356 : : information about its parameters. Main purpose of this context is
357 : : to give more realistic estimations of function runtime, size and
358 : : inline hints. */
359 : : class ipa_call_context
360 : : {
361 : : public:
362 : : ipa_call_context (cgraph_node *node,
363 : : clause_t possible_truths,
364 : : clause_t nonspec_possible_truths,
365 : : vec<inline_param_summary> inline_param_summary,
366 : : ipa_auto_call_arg_values *arg_values);
367 : 1025265 : ipa_call_context ()
368 : 1025265 : : m_node(NULL)
369 : : {
370 : : }
371 : : void estimate_size_and_time (ipa_call_estimates *estimates,
372 : : bool est_times = true, bool est_hints = true);
373 : : bool equal_to (const ipa_call_context &);
374 : 1647999 : bool exists_p ()
375 : : {
376 : 1647999 : return m_node != NULL;
377 : : }
378 : : private:
379 : : /* Called function. */
380 : : cgraph_node *m_node;
381 : : /* Clause describing what predicate conditionals can be satisfied
382 : : in this context if function is inlined/specialized. */
383 : : clause_t m_possible_truths;
384 : : /* Clause describing what predicate conditionals can be satisfied
385 : : in this context if function is kept offline. */
386 : : clause_t m_nonspec_possible_truths;
387 : : /* Inline summary maintains info about change probabilities. */
388 : : vec<inline_param_summary> m_inline_param_summary;
389 : :
390 : : /* Even after having calculated clauses, the information about argument
391 : : values is used to resolve indirect calls. */
392 : : ipa_call_arg_values m_avals;
393 : :
394 : : friend ipa_cached_call_context;
395 : : };
396 : :
397 : : /* Variant of ipa_call_context that is stored in a cache over a longer period
398 : : of time. */
399 : :
400 : 0 : class ipa_cached_call_context : public ipa_call_context
401 : : {
402 : : public:
403 : : void duplicate_from (const ipa_call_context &ctx);
404 : : void release ();
405 : : };
406 : :
407 : : extern fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
408 : :
409 : : /* In ipa-fnsummary.cc */
410 : : void ipa_debug_fn_summary (struct cgraph_node *);
411 : : void ipa_dump_fn_summaries (FILE *f);
412 : : void ipa_dump_fn_summary (FILE *f, struct cgraph_node *node);
413 : : void ipa_dump_hints (FILE *f, ipa_hints);
414 : : void ipa_free_fn_summary (void);
415 : : void ipa_free_size_summary (void);
416 : : void inline_analyze_function (struct cgraph_node *node);
417 : : void estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
418 : : ipa_auto_call_arg_values *avals,
419 : : ipa_call_estimates *estimates);
420 : : void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge);
421 : : void ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset = true);
422 : : void compute_fn_summary (struct cgraph_node *, bool);
423 : : bool refs_local_or_readonly_memory_p (tree);
424 : : bool points_to_local_or_readonly_memory_p (tree);
425 : :
426 : :
427 : : void evaluate_properties_for_edge (struct cgraph_edge *e,
428 : : bool inline_p,
429 : : clause_t *clause_ptr,
430 : : clause_t *nonspec_clause_ptr,
431 : : ipa_auto_call_arg_values *avals,
432 : : bool compute_contexts);
433 : :
434 : : void ipa_fnsummary_cc_finalize (void);
435 : : HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node);
436 : : void ipa_remove_from_growth_caches (struct cgraph_edge *edge);
437 : :
438 : : /* Return true if EDGE is a cross module call. */
439 : :
440 : : inline bool
441 : 5555615 : cross_module_call_p (struct cgraph_edge *edge)
442 : : {
443 : : /* Here we do not want to walk to alias target becuase ICF may create
444 : : cross-unit aliases. */
445 : 5555615 : if (edge->caller->unit_id == edge->callee->unit_id)
446 : : return false;
447 : : /* If the call is to a (former) comdat function or s symbol with mutiple
448 : : extern inline definitions then treat is as in-module call. */
449 : 4048 : if (edge->callee->merged_extern_inline || edge->callee->merged_comdat
450 : 4048 : || DECL_COMDAT (edge->callee->decl))
451 : 7 : return false;
452 : : return true;
453 : : }
454 : :
455 : : #endif /* GCC_IPA_FNSUMMARY_H */
|