Branch data Line data Source code
1 : : /* Inlining decision heuristics.
2 : : Copyright (C) 2003-2025 Free Software Foundation, Inc.
3 : : Contributed by Jan Hubicka
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : /* Inlining decision heuristics
22 : :
23 : : The implementation of inliner is organized as follows:
24 : :
25 : : inlining heuristics limits
26 : :
27 : : can_inline_edge_p allow to check that particular inlining is allowed
28 : : by the limits specified by user (allowed function growth, growth and so
29 : : on).
30 : :
31 : : Functions are inlined when it is obvious the result is profitable (such
32 : : as functions called once or when inlining reduce code size).
33 : : In addition to that we perform inlining of small functions and recursive
34 : : inlining.
35 : :
36 : : inlining heuristics
37 : :
38 : : The inliner itself is split into two passes:
39 : :
40 : : pass_early_inlining
41 : :
42 : : Simple local inlining pass inlining callees into current function.
43 : : This pass makes no use of whole unit analysis and thus it can do only
44 : : very simple decisions based on local properties.
45 : :
46 : : The strength of the pass is that it is run in topological order
47 : : (reverse postorder) on the callgraph. Functions are converted into SSA
48 : : form just before this pass and optimized subsequently. As a result, the
49 : : callees of the function seen by the early inliner was already optimized
50 : : and results of early inlining adds a lot of optimization opportunities
51 : : for the local optimization.
52 : :
53 : : The pass handle the obvious inlining decisions within the compilation
54 : : unit - inlining auto inline functions, inlining for size and
55 : : flattening.
56 : :
57 : : main strength of the pass is the ability to eliminate abstraction
58 : : penalty in C++ code (via combination of inlining and early
59 : : optimization) and thus improve quality of analysis done by real IPA
60 : : optimizers.
61 : :
62 : : Because of lack of whole unit knowledge, the pass cannot really make
63 : : good code size/performance tradeoffs. It however does very simple
64 : : speculative inlining allowing code size to grow by
65 : : EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 : : optimizations performed later are very likely to eliminate the cost.
67 : :
68 : : pass_ipa_inline
69 : :
70 : : This is the real inliner able to handle inlining with whole program
71 : : knowledge. It performs following steps:
72 : :
73 : : 1) inlining of small functions. This is implemented by greedy
74 : : algorithm ordering all inlinable cgraph edges by their badness and
75 : : inlining them in this order as long as inline limits allows doing so.
76 : :
77 : : This heuristics is not very good on inlining recursive calls. Recursive
78 : : calls can be inlined with results similar to loop unrolling. To do so,
79 : : special purpose recursive inliner is executed on function when
80 : : recursive edge is met as viable candidate.
81 : :
82 : : 2) Unreachable functions are removed from callgraph. Inlining leads
83 : : to devirtualization and other modification of callgraph so functions
84 : : may become unreachable during the process. Also functions declared as
85 : : extern inline or virtual functions are removed, since after inlining
86 : : we no longer need the offline bodies.
87 : :
88 : : 3) Functions called once and not exported from the unit are inlined.
89 : : This should almost always lead to reduction of code size by eliminating
90 : : the need for offline copy of the function. */
91 : :
92 : : #include "config.h"
93 : : #include "system.h"
94 : : #include "coretypes.h"
95 : : #include "backend.h"
96 : : #include "target.h"
97 : : #include "rtl.h"
98 : : #include "tree.h"
99 : : #include "gimple.h"
100 : : #include "alloc-pool.h"
101 : : #include "tree-pass.h"
102 : : #include "gimple-ssa.h"
103 : : #include "cgraph.h"
104 : : #include "lto-streamer.h"
105 : : #include "trans-mem.h"
106 : : #include "calls.h"
107 : : #include "tree-inline.h"
108 : : #include "profile.h"
109 : : #include "symbol-summary.h"
110 : : #include "tree-vrp.h"
111 : : #include "sreal.h"
112 : : #include "ipa-cp.h"
113 : : #include "ipa-prop.h"
114 : : #include "ipa-fnsummary.h"
115 : : #include "ipa-inline.h"
116 : : #include "ipa-utils.h"
117 : : #include "auto-profile.h"
118 : : #include "builtins.h"
119 : : #include "fibonacci_heap.h"
120 : : #include "stringpool.h"
121 : : #include "attribs.h"
122 : : #include "asan.h"
123 : : #include "ipa-strub.h"
124 : : #include "ipa-modref-tree.h"
125 : : #include "ipa-modref.h"
126 : :
127 : : /* Inliner uses greedy algorithm to inline calls in a priority order.
128 : : Badness is used as the key in a Fibonacci heap which roughly corresponds
129 : : to negation of benefit to cost ratios.
130 : : In case multiple calls has same priority we want to stabilize the outcomes
131 : : for which we use ids. */
132 : : class inline_badness
133 : : {
134 : : public:
135 : : sreal badness;
136 : : int uid;
137 : 1226990 : inline_badness ()
138 : 999002 : : badness (sreal::min ()), uid (0)
139 : : {
140 : : }
141 : 3503505 : inline_badness (cgraph_edge *e, sreal b)
142 : 17925 : : badness (b), uid (e->get_uid ())
143 : : {
144 : : }
145 : 906109 : bool operator<= (const inline_badness &other)
146 : : {
147 : 906109 : if (badness != other.badness)
148 : 906109 : return badness <= other.badness;
149 : 0 : return uid <= other.uid;
150 : : }
151 : 999002 : bool operator== (const inline_badness &other)
152 : : {
153 : 1846524 : return badness == other.badness && uid == other.uid;
154 : : }
155 : 0 : bool operator!= (const inline_badness &other)
156 : : {
157 : 999002 : return badness != other.badness || uid != other.uid;
158 : : }
159 : 27010829 : bool operator< (const inline_badness &other)
160 : : {
161 : 27010829 : if (badness != other.badness)
162 : 22699225 : return badness < other.badness;
163 : 4311604 : return uid < other.uid;
164 : : }
165 : 12366280 : bool operator> (const inline_badness &other)
166 : : {
167 : 12366280 : if (badness != other.badness)
168 : 10292905 : return badness > other.badness;
169 : 2073375 : return uid > other.uid;
170 : : }
171 : : };
172 : :
173 : : typedef fibonacci_heap <inline_badness, cgraph_edge> edge_heap_t;
174 : : typedef fibonacci_node <inline_badness, cgraph_edge> edge_heap_node_t;
175 : :
176 : : /* Statistics we collect about inlining algorithm. */
177 : : static int overall_size;
178 : : static profile_count max_count;
179 : : static profile_count spec_rem;
180 : :
181 : : /* Return false when inlining edge E would lead to violating
182 : : limits on function unit growth or stack usage growth.
183 : :
184 : : The relative function body growth limit is present generally
185 : : to avoid problems with non-linear behavior of the compiler.
186 : : To allow inlining huge functions into tiny wrapper, the limit
187 : : is always based on the bigger of the two functions considered.
188 : :
189 : : For stack growth limits we always base the growth in stack usage
190 : : of the callers. We want to prevent applications from segfaulting
191 : : on stack overflow when functions with huge stack frames gets
192 : : inlined. */
193 : :
194 : : static bool
195 : 6403671 : caller_growth_limits (struct cgraph_edge *e)
196 : : {
197 : 6403671 : struct cgraph_node *to = e->caller;
198 : 6403671 : struct cgraph_node *what = e->callee->ultimate_alias_target ();
199 : 6403671 : int newsize;
200 : 6403671 : int limit = 0;
201 : 6403671 : HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
202 : 6403671 : ipa_size_summary *outer_info = ipa_size_summaries->get (to);
203 : :
204 : : /* Look for function e->caller is inlined to. While doing
205 : : so work out the largest function body on the way. As
206 : : described above, we want to base our function growth
207 : : limits based on that. Not on the self size of the
208 : : outer function, not on the self size of inline code
209 : : we immediately inline to. This is the most relaxed
210 : : interpretation of the rule "do not grow large functions
211 : : too much in order to prevent compiler from exploding". */
212 : 8833783 : while (true)
213 : : {
214 : 7618727 : ipa_size_summary *size_info = ipa_size_summaries->get (to);
215 : 7618727 : if (limit < size_info->self_size)
216 : : limit = size_info->self_size;
217 : 7618727 : if (stack_size_limit < size_info->estimated_self_stack_size)
218 : : stack_size_limit = size_info->estimated_self_stack_size;
219 : 7618727 : if (to->inlined_to)
220 : 1215056 : to = to->callers->caller;
221 : : else
222 : : break;
223 : 1215056 : }
224 : :
225 : 6403671 : ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
226 : 6403671 : ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
227 : :
228 : 6403671 : if (limit < what_size_info->self_size)
229 : : limit = what_size_info->self_size;
230 : :
231 : 6403671 : limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
232 : :
233 : : /* Check the size after inlining against the function limits. But allow
234 : : the function to shrink if it went over the limits by forced inlining. */
235 : 6403671 : newsize = estimate_size_after_inlining (to, e);
236 : 6403671 : if (newsize >= ipa_size_summaries->get (what)->size
237 : 6220981 : && newsize > opt_for_fn (to->decl, param_large_function_insns)
238 : 6659304 : && newsize > limit)
239 : : {
240 : 3103 : e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
241 : 3103 : return false;
242 : : }
243 : :
244 : 6400568 : if (!what_info->estimated_stack_size)
245 : : return true;
246 : :
247 : : /* FIXME: Stack size limit often prevents inlining in Fortran programs
248 : : due to large i/o datastructures used by the Fortran front-end.
249 : : We ought to ignore this limit when we know that the edge is executed
250 : : on every invocation of the caller (i.e. its call statement dominates
251 : : exit block). We do not track this information, yet. */
252 : 2155584 : stack_size_limit += ((gcov_type)stack_size_limit
253 : 1077792 : * opt_for_fn (to->decl, param_stack_frame_growth)
254 : 1077792 : / 100);
255 : :
256 : 1077792 : inlined_stack = (ipa_get_stack_frame_offset (to)
257 : 1077792 : + outer_info->estimated_self_stack_size
258 : 1077792 : + what_info->estimated_stack_size);
259 : : /* Check new stack consumption with stack consumption at the place
260 : : stack is used. */
261 : 1077792 : if (inlined_stack > stack_size_limit
262 : : /* If function already has large stack usage from sibling
263 : : inline call, we can inline, too.
264 : : This bit overoptimistically assume that we are good at stack
265 : : packing. */
266 : 326153 : && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
267 : 1390116 : && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
268 : : {
269 : 64865 : e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
270 : 64865 : return false;
271 : : }
272 : : return true;
273 : : }
274 : :
275 : : /* Dump info about why inlining has failed. */
276 : :
277 : : static void
278 : 4915560 : report_inline_failed_reason (struct cgraph_edge *e)
279 : : {
280 : 4915560 : if (dump_enabled_p ())
281 : : {
282 : 2387 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
283 : : " not inlinable: %C -> %C, %s\n",
284 : : e->caller, e->callee,
285 : : cgraph_inline_failed_string (e->inline_failed));
286 : 2387 : if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
287 : 2387 : || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
288 : 2 : && e->caller->lto_file_data
289 : 2387 : && e->callee->ultimate_alias_target ()->lto_file_data)
290 : : {
291 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
292 : : " LTO objects: %s, %s\n",
293 : 0 : e->caller->lto_file_data->file_name,
294 : 0 : e->callee->ultimate_alias_target ()->lto_file_data->file_name);
295 : : }
296 : 2387 : if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
297 : 2 : if (dump_file)
298 : 0 : cl_target_option_print_diff
299 : 0 : (dump_file, 2, target_opts_for_fn (e->caller->decl),
300 : 0 : target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
301 : 2387 : if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
302 : 0 : if (dump_file)
303 : 0 : cl_optimization_print_diff
304 : 0 : (dump_file, 2, opts_for_fn (e->caller->decl),
305 : 0 : opts_for_fn (e->callee->ultimate_alias_target ()->decl));
306 : : }
307 : 4915560 : }
308 : :
309 : : /* Decide whether sanitizer-related attributes allow inlining. */
310 : :
311 : : static bool
312 : 9294863 : sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
313 : : {
314 : 9294863 : if (!caller || !callee)
315 : : return true;
316 : :
317 : : /* Follow clang and allow inlining for always_inline functions. */
318 : 9294863 : if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
319 : : return true;
320 : :
321 : 8730704 : const sanitize_code codes[] =
322 : : {
323 : : SANITIZE_ADDRESS,
324 : : SANITIZE_THREAD,
325 : : SANITIZE_UNDEFINED,
326 : : SANITIZE_UNDEFINED_NONDEFAULT,
327 : : SANITIZE_POINTER_COMPARE,
328 : : SANITIZE_POINTER_SUBTRACT
329 : : };
330 : :
331 : 61113888 : for (unsigned i = 0; i < ARRAY_SIZE (codes); i++)
332 : 104766764 : if (sanitize_flags_p (codes[i], caller)
333 : 52383382 : != sanitize_flags_p (codes[i], callee))
334 : : return false;
335 : :
336 : 8730506 : if (sanitize_coverage_p (caller) != sanitize_coverage_p (callee))
337 : : return false;
338 : :
339 : : return true;
340 : : }
341 : :
342 : : /* Used for flags where it is safe to inline when caller's value is
343 : : grater than callee's. */
344 : : #define check_maybe_up(flag) \
345 : : (opts_for_fn (caller->decl)->x_##flag \
346 : : != opts_for_fn (callee->decl)->x_##flag \
347 : : && (!always_inline \
348 : : || opts_for_fn (caller->decl)->x_##flag \
349 : : < opts_for_fn (callee->decl)->x_##flag))
350 : : /* Used for flags where it is safe to inline when caller's value is
351 : : smaller than callee's. */
352 : : #define check_maybe_down(flag) \
353 : : (opts_for_fn (caller->decl)->x_##flag \
354 : : != opts_for_fn (callee->decl)->x_##flag \
355 : : && (!always_inline \
356 : : || opts_for_fn (caller->decl)->x_##flag \
357 : : > opts_for_fn (callee->decl)->x_##flag))
358 : : /* Used for flags where exact match is needed for correctness. */
359 : : #define check_match(flag) \
360 : : (opts_for_fn (caller->decl)->x_##flag \
361 : : != opts_for_fn (callee->decl)->x_##flag)
362 : :
363 : : /* Decide if we can inline the edge and possibly update
364 : : inline_failed reason.
365 : : We check whether inlining is possible at all and whether
366 : : caller growth limits allow doing so.
367 : :
368 : : if REPORT is true, output reason to the dump file. */
369 : :
370 : : static bool
371 : 13389043 : can_inline_edge_p (struct cgraph_edge *e, bool report,
372 : : bool early = false)
373 : : {
374 : 13389043 : gcc_checking_assert (e->inline_failed);
375 : :
376 : 13389043 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
377 : : {
378 : 3615352 : if (report)
379 : 3584574 : report_inline_failed_reason (e);
380 : 3615352 : return false;
381 : : }
382 : :
383 : 9773691 : bool inlinable = true;
384 : 9773691 : enum availability avail;
385 : 8546108 : cgraph_node *caller = (e->caller->inlined_to
386 : 9773691 : ? e->caller->inlined_to : e->caller);
387 : 9773691 : cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
388 : :
389 : 9773691 : if (!callee->definition)
390 : : {
391 : 952 : e->inline_failed = CIF_BODY_NOT_AVAILABLE;
392 : 952 : inlinable = false;
393 : : }
394 : 9773691 : if (!early && (!opt_for_fn (callee->decl, optimize)
395 : 5548659 : || !opt_for_fn (caller->decl, optimize)))
396 : : {
397 : 305 : e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
398 : 305 : inlinable = false;
399 : : }
400 : 9773386 : else if (callee->calls_comdat_local)
401 : : {
402 : 19328 : e->inline_failed = CIF_USES_COMDAT_LOCAL;
403 : 19328 : inlinable = false;
404 : : }
405 : 9754058 : else if (avail <= AVAIL_INTERPOSABLE)
406 : : {
407 : 128477 : e->inline_failed = CIF_OVERWRITABLE;
408 : 128477 : inlinable = false;
409 : : }
410 : : /* All edges with call_stmt_cannot_inline_p should have inline_failed
411 : : initialized to one of FINAL_ERROR reasons. */
412 : 9625581 : else if (e->call_stmt_cannot_inline_p)
413 : 0 : gcc_unreachable ();
414 : : /* Don't inline if the functions have different EH personalities. */
415 : 9625581 : else if (DECL_FUNCTION_PERSONALITY (caller->decl)
416 : 2761816 : && DECL_FUNCTION_PERSONALITY (callee->decl)
417 : 9625581 : && (DECL_FUNCTION_PERSONALITY (caller->decl)
418 : 213863 : != DECL_FUNCTION_PERSONALITY (callee->decl)))
419 : : {
420 : 0 : e->inline_failed = CIF_EH_PERSONALITY;
421 : 0 : inlinable = false;
422 : : }
423 : : /* TM pure functions should not be inlined into non-TM_pure
424 : : functions. */
425 : 9625581 : else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
426 : : {
427 : 30 : e->inline_failed = CIF_UNSPECIFIED;
428 : 30 : inlinable = false;
429 : : }
430 : : /* Check compatibility of target optimization options. */
431 : 9625551 : else if (!targetm.target_option.can_inline_p (caller->decl,
432 : : callee->decl))
433 : : {
434 : 449 : e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
435 : 449 : inlinable = false;
436 : : }
437 : 9625102 : else if (ipa_fn_summaries->get (callee) == NULL
438 : 9625100 : || !ipa_fn_summaries->get (callee)->inlinable)
439 : : {
440 : 330239 : e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
441 : 330239 : inlinable = false;
442 : : }
443 : : /* Don't inline a function with mismatched sanitization attributes. */
444 : 9294863 : else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
445 : : {
446 : 198 : e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
447 : 198 : inlinable = false;
448 : : }
449 : :
450 : 9773691 : if (inlinable && !strub_inlinable_to_p (callee, caller))
451 : : {
452 : 1101 : e->inline_failed = CIF_UNSPECIFIED;
453 : 1101 : inlinable = false;
454 : : }
455 : 9773691 : if (!inlinable && report)
456 : 474093 : report_inline_failed_reason (e);
457 : : return inlinable;
458 : : }
459 : :
460 : : /* Return inlining_insns_single limit for function N. If HINT or HINT2 is true
461 : : scale up the bound. */
462 : :
463 : : static int
464 : 8143814 : inline_insns_single (cgraph_node *n, bool hint, bool hint2)
465 : : {
466 : 8143814 : if (hint && hint2)
467 : : {
468 : 2746594 : int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
469 : 2746594 : spd = spd * spd;
470 : 2746594 : if (spd > 1000000)
471 : : spd = 1000000;
472 : 2746594 : return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
473 : : }
474 : 5397220 : if (hint || hint2)
475 : 450042 : return opt_for_fn (n->decl, param_max_inline_insns_single)
476 : 450042 : * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
477 : 4947178 : return opt_for_fn (n->decl, param_max_inline_insns_single);
478 : : }
479 : :
480 : : /* Return inlining_insns_auto limit for function N. If HINT or HINT2 is true
481 : : scale up the bound. */
482 : :
483 : : static int
484 : 5807923 : inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
485 : : {
486 : 5807923 : int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
487 : 5807923 : if (hint && hint2)
488 : : {
489 : 2129255 : int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
490 : 2129255 : spd = spd * spd;
491 : 2129255 : if (spd > 1000000)
492 : : spd = 1000000;
493 : 2129255 : return max_inline_insns_auto * spd / 100;
494 : : }
495 : 3678668 : if (hint || hint2)
496 : 1615319 : return max_inline_insns_auto
497 : 1615319 : * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
498 : : return max_inline_insns_auto;
499 : : }
500 : :
501 : : enum can_inline_edge_by_limits_flags
502 : : {
503 : : /* True if we are early inlining. */
504 : : CAN_INLINE_EARLY = 1,
505 : : /* Ignore size limits. */
506 : : CAN_INLINE_DISREGARD_LIMITS = 2,
507 : : /* Force size limits (ignore always_inline). This is used for
508 : : recrusive inlining where always_inline may lead to inline bombs
509 : : and technically it is non-sential anyway. */
510 : : CAN_INLINE_FORCE_LIMITS = 4,
511 : : /* Report decision to dump file. */
512 : : CAN_INLINE_REPORT = 8,
513 : : };
514 : :
515 : : /* Decide if we can inline the edge and possibly update
516 : : inline_failed reason.
517 : : We check whether inlining is possible at all and whether
518 : : caller growth limits allow doing so. */
519 : :
520 : : static bool
521 : 6971141 : can_inline_edge_by_limits_p (struct cgraph_edge *e, int flags)
522 : : {
523 : 6971141 : gcc_checking_assert (e->inline_failed);
524 : :
525 : 6971141 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
526 : : {
527 : 210 : if (flags & CAN_INLINE_REPORT)
528 : 210 : report_inline_failed_reason (e);
529 : 210 : return false;
530 : : }
531 : :
532 : 6970931 : bool inlinable = true;
533 : 6970931 : enum availability avail;
534 : 6330811 : cgraph_node *caller = (e->caller->inlined_to
535 : 6970931 : ? e->caller->inlined_to : e->caller);
536 : 6970931 : cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
537 : 6970931 : tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
538 : 6970931 : tree callee_tree
539 : 6970931 : = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
540 : : /* Check if caller growth allows the inlining. */
541 : 6970931 : if (!(flags & CAN_INLINE_DISREGARD_LIMITS)
542 : 6969224 : && ((flags & CAN_INLINE_FORCE_LIMITS)
543 : 6936654 : || (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
544 : 6371239 : && !lookup_attribute ("flatten",
545 : 6371239 : DECL_ATTRIBUTES (caller->decl))))
546 : 13374602 : && !caller_growth_limits (e))
547 : : inlinable = false;
548 : 6902963 : else if (callee->externally_visible
549 : 4864239 : && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
550 : 11329637 : && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
551 : : {
552 : 2 : e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
553 : 2 : inlinable = false;
554 : : }
555 : : /* Don't inline a function with a higher optimization level than the
556 : : caller. FIXME: this is really just tip of iceberg of handling
557 : : optimization attribute. */
558 : 6902961 : else if (caller_tree != callee_tree)
559 : : {
560 : 9624 : bool always_inline =
561 : 9624 : (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
562 : 12027 : && lookup_attribute ("always_inline",
563 : 2403 : DECL_ATTRIBUTES (callee->decl)));
564 : 9624 : ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
565 : 9624 : ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
566 : :
567 : : /* Until GCC 4.9 we did not check the semantics-altering flags
568 : : below and inlined across optimization boundaries.
569 : : Enabling checks below breaks several packages by refusing
570 : : to inline library always_inline functions. See PR65873.
571 : : Disable the check for early inlining for now until better solution
572 : : is found. */
573 : 9624 : if (always_inline && (flags & CAN_INLINE_EARLY))
574 : : ;
575 : : /* There are some options that change IL semantics which means
576 : : we cannot inline in these cases for correctness reason.
577 : : Not even for always_inline declared functions. */
578 : 7221 : else if (check_match (flag_wrapv)
579 : 7221 : || check_match (flag_trapv)
580 : 7221 : || check_match (flag_pcc_struct_return)
581 : 7221 : || check_maybe_down (optimize_debug)
582 : : /* When caller or callee does FP math, be sure FP codegen flags
583 : : compatible. */
584 : 7215 : || ((caller_info->fp_expressions && callee_info->fp_expressions)
585 : 1273 : && (check_maybe_up (flag_rounding_math)
586 : 1273 : || check_maybe_up (flag_trapping_math)
587 : 1271 : || check_maybe_down (flag_unsafe_math_optimizations)
588 : 1271 : || check_maybe_down (flag_finite_math_only)
589 : 1270 : || check_maybe_up (flag_signaling_nans)
590 : 1270 : || check_maybe_up (flag_complex_method)
591 : 1269 : || check_maybe_up (flag_signed_zeros)
592 : 1269 : || check_maybe_down (flag_associative_math)
593 : 1253 : || check_maybe_down (flag_reciprocal_math)
594 : 1253 : || check_maybe_down (flag_fp_int_builtin_inexact)
595 : : /* Strictly speaking only when the callee contains function
596 : : calls that may end up setting errno. */
597 : 1253 : || check_maybe_up (flag_errno_math)))
598 : : /* We do not want to make code compiled with exceptions to be
599 : : brought into a non-EH function unless we know that the callee
600 : : does not throw.
601 : : This is tracked by DECL_FUNCTION_PERSONALITY. */
602 : 7195 : || (check_maybe_up (flag_non_call_exceptions)
603 : 0 : && DECL_FUNCTION_PERSONALITY (callee->decl))
604 : 7195 : || (check_maybe_up (flag_exceptions)
605 : 16 : && DECL_FUNCTION_PERSONALITY (callee->decl))
606 : : /* When devirtualization is disabled for callee, it is not safe
607 : : to inline it as we possibly mangled the type info.
608 : : Allow early inlining of always inlines. */
609 : 14416 : || (!(flags & CAN_INLINE_EARLY) && check_maybe_down (flag_devirtualize)))
610 : : {
611 : 34 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
612 : 34 : inlinable = false;
613 : : }
614 : : /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
615 : 7187 : else if (always_inline)
616 : : ;
617 : : /* When user added an attribute to the callee honor it. */
618 : 7187 : else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
619 : 7187 : && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
620 : : {
621 : 2456 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
622 : 2456 : inlinable = false;
623 : : }
624 : : /* If explicit optimize attribute are not used, the mismatch is caused
625 : : by different command line options used to build different units.
626 : : Do not care about COMDAT functions - those are intended to be
627 : : optimized with the optimization flags of module they are used in.
628 : : Also do not care about mixing up size/speed optimization when
629 : : DECL_DISREGARD_INLINE_LIMITS is set. */
630 : 4731 : else if ((callee->merged_comdat
631 : 0 : && !lookup_attribute ("optimize",
632 : 0 : DECL_ATTRIBUTES (caller->decl)))
633 : 4731 : || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
634 : : ;
635 : : /* If mismatch is caused by merging two LTO units with different
636 : : optimization flags we want to be bit nicer. However never inline
637 : : if one of functions is not optimized at all. */
638 : 4731 : else if (!opt_for_fn (callee->decl, optimize)
639 : 4731 : || !opt_for_fn (caller->decl, optimize))
640 : : {
641 : 0 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
642 : 0 : inlinable = false;
643 : : }
644 : : /* If callee is optimized for size and caller is not, allow inlining if
645 : : code shrinks or we are in param_max_inline_insns_single limit and
646 : : callee is inline (and thus likely an unified comdat).
647 : : This will allow caller to run faster. */
648 : 4731 : else if (opt_for_fn (callee->decl, optimize_size)
649 : 4731 : > opt_for_fn (caller->decl, optimize_size))
650 : : {
651 : 118 : int growth = estimate_edge_growth (e);
652 : 118 : if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
653 : 118 : && (!DECL_DECLARED_INLINE_P (callee->decl)
654 : 65 : && growth >= MAX (inline_insns_single (caller, false, false),
655 : : inline_insns_auto (caller, false, false))))
656 : : {
657 : 0 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
658 : 0 : inlinable = false;
659 : : }
660 : : }
661 : : /* If callee is more aggressively optimized for performance than caller,
662 : : we generally want to inline only cheap (runtime wise) functions. */
663 : 4613 : else if (opt_for_fn (callee->decl, optimize_size)
664 : : < opt_for_fn (caller->decl, optimize_size)
665 : 4613 : || (opt_for_fn (callee->decl, optimize)
666 : : > opt_for_fn (caller->decl, optimize)))
667 : : {
668 : 12675 : if (estimate_edge_time (e)
669 : 4225 : >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
670 : : {
671 : 1514 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
672 : 1514 : inlinable = false;
673 : : }
674 : : }
675 : :
676 : : }
677 : :
678 : 71974 : if (!inlinable && (flags & CAN_INLINE_REPORT))
679 : 64540 : report_inline_failed_reason (e);
680 : : return inlinable;
681 : : }
682 : :
683 : :
684 : : /* Return true if the edge E is inlinable during early inlining. */
685 : :
686 : : static bool
687 : 4225031 : can_early_inline_edge_p (struct cgraph_edge *e)
688 : : {
689 : 4223478 : cgraph_node *caller = (e->caller->inlined_to
690 : 4225031 : ? e->caller->inlined_to : e->caller);
691 : 4225031 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
692 : : /* Early inliner might get called at WPA stage when IPA pass adds new
693 : : function. In this case we cannot really do any of early inlining
694 : : because function bodies are missing. */
695 : 4225031 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
696 : : return false;
697 : 4224786 : if (!gimple_has_body_p (callee->decl))
698 : : {
699 : 0 : e->inline_failed = CIF_BODY_NOT_AVAILABLE;
700 : 0 : return false;
701 : : }
702 : 8449572 : gcc_assert (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
703 : : && gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)));
704 : 4224786 : if (coverage_instrumentation_p ()
705 : 4226395 : && ((lookup_attribute ("no_profile_instrument_function",
706 : 1609 : DECL_ATTRIBUTES (caller->decl)) == NULL_TREE)
707 : 1609 : != (lookup_attribute ("no_profile_instrument_function",
708 : 3218 : DECL_ATTRIBUTES (callee->decl)) == NULL_TREE)))
709 : : return false;
710 : :
711 : 4224784 : if (!can_inline_edge_p (e, true, true)
712 : 4224784 : || !can_inline_edge_by_limits_p (e, CAN_INLINE_EARLY | CAN_INLINE_REPORT))
713 : 89196 : return false;
714 : : /* When inlining regular functions into always-inline functions
715 : : during early inlining watch for possible inline cycles. */
716 : 4135588 : if (DECL_DISREGARD_INLINE_LIMITS (caller->decl)
717 : 229924 : && lookup_attribute ("always_inline", DECL_ATTRIBUTES (caller->decl))
718 : 4365503 : && (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
719 : 142350 : || !lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee->decl))))
720 : : {
721 : : /* If there are indirect calls, inlining may produce direct call.
722 : : TODO: We may lift this restriction if we avoid errors on formely
723 : : indirect calls to always_inline functions. Taking address
724 : : of always_inline function is generally bad idea and should
725 : : have been declared as undefined, but sadly we allow this. */
726 : 87566 : if (caller->indirect_calls || e->callee->indirect_calls)
727 : : return false;
728 : 86082 : ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
729 : 86082 : if (callee_info->safe_to_inline_to_always_inline)
730 : 22276 : return callee_info->safe_to_inline_to_always_inline - 1;
731 : 132615 : for (cgraph_edge *e2 = callee->callees; e2; e2 = e2->next_callee)
732 : : {
733 : 68826 : struct cgraph_node *callee2 = e2->callee->ultimate_alias_target ();
734 : : /* As early inliner runs in RPO order, we will see uninlined
735 : : always_inline calls only in the case of cyclic graphs. */
736 : 68826 : if (DECL_DISREGARD_INLINE_LIMITS (callee2->decl)
737 : 68826 : || lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee2->decl)))
738 : : {
739 : 0 : callee_info->safe_to_inline_to_always_inline = 1;
740 : 0 : return false;
741 : : }
742 : : /* With LTO watch for case where function is later replaced
743 : : by always_inline definition.
744 : : TODO: We may either stop treating noninlined cross-module always
745 : : inlines as errors, or we can extend decl merging to produce
746 : : syntacic alias and honor always inline only in units it has
747 : : been declared as such. */
748 : 68826 : if (flag_lto && callee2->externally_visible)
749 : : {
750 : 17 : callee_info->safe_to_inline_to_always_inline = 1;
751 : 17 : return false;
752 : : }
753 : : }
754 : 63789 : callee_info->safe_to_inline_to_always_inline = 2;
755 : : }
756 : : return true;
757 : : }
758 : :
759 : :
760 : : /* Return number of calls in N. Ignore cheap builtins. */
761 : :
762 : : static int
763 : 829484 : num_calls (struct cgraph_node *n)
764 : : {
765 : 829484 : struct cgraph_edge *e;
766 : 829484 : int num = 0;
767 : :
768 : 1629115 : for (e = n->callees; e; e = e->next_callee)
769 : 799631 : if (!is_inexpensive_builtin (e->callee->decl))
770 : 726633 : num++;
771 : 829484 : return num;
772 : : }
773 : :
774 : :
775 : : /* Return true if we are interested in inlining small function. */
776 : :
777 : : static bool
778 : 3566123 : want_early_inline_function_p (struct cgraph_edge *e)
779 : : {
780 : 3566123 : bool want_inline = true;
781 : 3566123 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
782 : :
783 : 3566123 : if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
784 : : ;
785 : 3566116 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
786 : 3566116 : && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
787 : : {
788 : 62 : e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
789 : 62 : report_inline_failed_reason (e);
790 : 62 : want_inline = false;
791 : : }
792 : : else
793 : : {
794 : : /* First take care of very large functions. */
795 : 3566054 : int min_growth = estimate_min_edge_growth (e), growth = 0;
796 : 3566054 : int n;
797 : 3566054 : int early_inlining_insns = param_early_inlining_insns;
798 : :
799 : 3566054 : if (min_growth > early_inlining_insns)
800 : : {
801 : 433996 : if (dump_enabled_p ())
802 : 40 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
803 : : " will not early inline: %C->%C, "
804 : : "call is cold and code would grow "
805 : : "at least by %i\n",
806 : : e->caller, callee,
807 : : min_growth);
808 : : want_inline = false;
809 : : }
810 : : else
811 : 3132058 : growth = estimate_edge_growth (e);
812 : :
813 : :
814 : 3132058 : if (!want_inline || growth <= param_max_inline_insns_size)
815 : : ;
816 : 1220600 : else if (!e->maybe_hot_p ())
817 : : {
818 : 20337 : if (dump_enabled_p ())
819 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
820 : : " will not early inline: %C->%C, "
821 : : "call is cold and code would grow by %i\n",
822 : : e->caller, callee,
823 : : growth);
824 : : want_inline = false;
825 : : }
826 : 1200263 : else if (growth > early_inlining_insns)
827 : : {
828 : 370779 : if (dump_enabled_p ())
829 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
830 : : " will not early inline: %C->%C, "
831 : : "growth %i exceeds --param early-inlining-insns\n",
832 : : e->caller, callee, growth);
833 : : want_inline = false;
834 : : }
835 : 829484 : else if ((n = num_calls (callee)) != 0
836 : 829484 : && growth * (n + 1) > early_inlining_insns)
837 : : {
838 : 236703 : if (dump_enabled_p ())
839 : 11 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
840 : : " will not early inline: %C->%C, "
841 : : "growth %i exceeds --param early-inlining-insns "
842 : : "divided by number of calls\n",
843 : : e->caller, callee, growth);
844 : : want_inline = false;
845 : : }
846 : : }
847 : 3566123 : return want_inline;
848 : : }
849 : :
850 : : /* Compute time of the edge->caller + edge->callee execution when inlining
851 : : does not happen. */
852 : :
853 : : inline sreal
854 : 443575 : compute_uninlined_call_time (struct cgraph_edge *edge,
855 : : sreal uninlined_call_time,
856 : : sreal freq)
857 : : {
858 : 299767 : cgraph_node *caller = (edge->caller->inlined_to
859 : 443575 : ? edge->caller->inlined_to
860 : : : edge->caller);
861 : :
862 : 443575 : if (freq > 0)
863 : 431185 : uninlined_call_time *= freq;
864 : : else
865 : 12390 : uninlined_call_time = uninlined_call_time >> 11;
866 : :
867 : 443575 : sreal caller_time = ipa_fn_summaries->get (caller)->time;
868 : 443575 : return uninlined_call_time + caller_time;
869 : : }
870 : :
871 : : /* Same as compute_uinlined_call_time but compute time when inlining
872 : : does happen. */
873 : :
874 : : inline sreal
875 : 443575 : compute_inlined_call_time (struct cgraph_edge *edge,
876 : : sreal time,
877 : : sreal freq)
878 : : {
879 : 299767 : cgraph_node *caller = (edge->caller->inlined_to
880 : 443575 : ? edge->caller->inlined_to
881 : : : edge->caller);
882 : 443575 : sreal caller_time = ipa_fn_summaries->get (caller)->time;
883 : :
884 : 443575 : if (freq > 0)
885 : 431185 : time *= freq;
886 : : else
887 : 12390 : time = time >> 11;
888 : :
889 : : /* This calculation should match one in ipa-inline-analysis.cc
890 : : (estimate_edge_size_and_time). */
891 : 443575 : time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
892 : 443575 : time += caller_time;
893 : 443575 : if (time <= 0)
894 : 0 : time = ((sreal) 1) >> 8;
895 : 443575 : gcc_checking_assert (time >= 0);
896 : 443575 : return time;
897 : : }
898 : :
899 : : /* Determine time saved by inlining EDGE of frequency FREQ
900 : : where callee's runtime w/o inlining is UNINLINED_TYPE
901 : : and with inlined is INLINED_TYPE. */
902 : :
903 : : inline sreal
904 : 9235232 : inlining_speedup (struct cgraph_edge *edge,
905 : : sreal freq,
906 : : sreal uninlined_time,
907 : : sreal inlined_time)
908 : : {
909 : 9235232 : sreal speedup = uninlined_time - inlined_time;
910 : : /* Handling of call_time should match one in ipa-inline-fnsummary.c
911 : : (estimate_edge_size_and_time). */
912 : 9235232 : sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
913 : :
914 : 9235232 : if (freq > 0)
915 : : {
916 : 9201138 : speedup = (speedup + call_time);
917 : 11421855 : if (freq != 1)
918 : 6980421 : speedup = speedup * freq;
919 : : }
920 : 34094 : else if (freq == 0)
921 : 34094 : speedup = speedup >> 11;
922 : 9235232 : gcc_checking_assert (speedup >= 0);
923 : 9235232 : return speedup;
924 : : }
925 : :
926 : : /* Return expected speedup of the callee function alone
927 : : (i.e. not estimate of call overhead and also no scalling
928 : : by call frequency. */
929 : :
930 : : static sreal
931 : 3066986 : callee_speedup (struct cgraph_edge *e)
932 : : {
933 : 3066986 : sreal unspec_time;
934 : 3066986 : sreal spec_time = estimate_edge_time (e, &unspec_time);
935 : 3066986 : return unspec_time - spec_time;
936 : : }
937 : :
938 : : /* Return true if the speedup for inlining E is bigger than
939 : : param_inline_min_speedup. */
940 : :
941 : : static bool
942 : 443575 : big_speedup_p (struct cgraph_edge *e)
943 : : {
944 : 443575 : sreal unspec_time;
945 : 443575 : sreal spec_time = estimate_edge_time (e, &unspec_time);
946 : 443575 : sreal freq = e->sreal_frequency ();
947 : 443575 : sreal time = compute_uninlined_call_time (e, unspec_time, freq);
948 : 443575 : sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
949 : 299767 : cgraph_node *caller = (e->caller->inlined_to
950 : 443575 : ? e->caller->inlined_to
951 : : : e->caller);
952 : 443575 : int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
953 : :
954 : 443575 : if ((time - inlined_time) * 100 > time * limit)
955 : : return true;
956 : : return false;
957 : : }
958 : :
959 : : /* Return true if we are interested in inlining small function.
960 : : When REPORT is true, report reason to dump file. */
961 : :
962 : : static bool
963 : 4926725 : want_inline_small_function_p (struct cgraph_edge *e, bool report)
964 : : {
965 : 4926725 : bool want_inline = true;
966 : 4926725 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
967 : 3763595 : cgraph_node *to = (e->caller->inlined_to
968 : 4926725 : ? e->caller->inlined_to : e->caller);
969 : :
970 : : /* Allow this function to be called before can_inline_edge_p,
971 : : since it's usually cheaper. */
972 : 4926725 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
973 : : want_inline = false;
974 : 4926725 : else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
975 : : return true;
976 : 4920909 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
977 : 4920909 : && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
978 : : {
979 : 45084 : e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
980 : 45084 : want_inline = false;
981 : : }
982 : :
983 : : /* Early return before lookup of summaries. */
984 : 45084 : if (!want_inline)
985 : : {
986 : 45084 : if (report)
987 : 40806 : report_inline_failed_reason (e);
988 : 45084 : return false;
989 : : }
990 : :
991 : 4875825 : ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
992 : 4875825 : ipa_call_summary *call_info = ipa_call_summaries->get (e);
993 : :
994 : : /* Do fast and conservative check if the function can be good
995 : : inline candidate. */
996 : 4875825 : if ((!DECL_DECLARED_INLINE_P (callee->decl)
997 : 2129486 : && (!e->count.ipa ().initialized_p ()
998 : 39363 : || !e->maybe_hot_p (callee_info->time)))
999 : 7005080 : && callee_info->min_size - call_info->call_stmt_size
1000 : 2129255 : > inline_insns_auto (e->caller, true, true))
1001 : : {
1002 : 1013 : e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
1003 : 1013 : want_inline = false;
1004 : : }
1005 : 4874812 : else if ((DECL_DECLARED_INLINE_P (callee->decl)
1006 : 2128473 : || e->count.ipa ().nonzero_p ())
1007 : 7621406 : && callee_info->min_size - call_info->call_stmt_size
1008 : 2746594 : > inline_insns_single (e->caller, true, true))
1009 : : {
1010 : 0 : e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
1011 : 0 : ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
1012 : : : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
1013 : 0 : want_inline = false;
1014 : : }
1015 : : else
1016 : : {
1017 : 4874812 : int growth = estimate_edge_growth (e);
1018 : 4874812 : ipa_hints hints = estimate_edge_hints (e);
1019 : : /* We have two independent groups of hints. If one matches in each
1020 : : of groups the limits are inreased. If both groups matches, limit
1021 : : is increased even more. */
1022 : 4874812 : bool apply_hints = (hints & (INLINE_HINT_indirect_call
1023 : : | INLINE_HINT_known_hot
1024 : : | INLINE_HINT_loop_iterations
1025 : : | INLINE_HINT_loop_stride));
1026 : 4874812 : bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
1027 : :
1028 : 4874812 : if (growth <= opt_for_fn (to->decl,
1029 : : param_max_inline_insns_size))
1030 : : ;
1031 : : /* Apply param_max_inline_insns_single limit. Do not do so when
1032 : : hints suggests that inlining given function is very profitable.
1033 : : Avoid computation of big_speedup_p when not necessary to change
1034 : : outcome of decision. */
1035 : 4754253 : else if (DECL_DECLARED_INLINE_P (callee->decl)
1036 : 2695399 : && growth >= inline_insns_single (e->caller, apply_hints,
1037 : : apply_hints2)
1038 : 5184343 : && (apply_hints || apply_hints2
1039 : 425572 : || growth >= inline_insns_single (e->caller, true,
1040 : : apply_hints2)
1041 : 198043 : || !big_speedup_p (e)))
1042 : : {
1043 : 428767 : e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
1044 : 428767 : want_inline = false;
1045 : : }
1046 : 4325486 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
1047 : 2058854 : && !opt_for_fn (e->caller->decl, flag_inline_functions)
1048 : 4330846 : && growth >= opt_for_fn (to->decl,
1049 : : param_max_inline_insns_small))
1050 : : {
1051 : : /* growth_positive_p is expensive, always test it last. */
1052 : 5360 : if (growth >= inline_insns_single (e->caller, false, false)
1053 : 5360 : || growth_positive_p (callee, e, growth))
1054 : : {
1055 : 4973 : e->inline_failed = CIF_NOT_DECLARED_INLINED;
1056 : 4973 : want_inline = false;
1057 : : }
1058 : : }
1059 : : /* Apply param_max_inline_insns_auto limit for functions not declared
1060 : : inline. Bypass the limit when speedup seems big. */
1061 : 4320126 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
1062 : 2053494 : && growth >= inline_insns_auto (e->caller, apply_hints,
1063 : : apply_hints2)
1064 : 5590992 : && (apply_hints || apply_hints2
1065 : 1254242 : || growth >= inline_insns_auto (e->caller, true,
1066 : : apply_hints2)
1067 : 245368 : || !big_speedup_p (e)))
1068 : : {
1069 : : /* growth_positive_p is expensive, always test it last. */
1070 : 1253140 : if (growth >= inline_insns_single (e->caller, false, false)
1071 : 1253140 : || growth_positive_p (callee, e, growth))
1072 : : {
1073 : 1158870 : e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
1074 : 1158870 : want_inline = false;
1075 : : }
1076 : : }
1077 : : /* If call is cold, do not inline when function body would grow. */
1078 : 3066986 : else if (!e->maybe_hot_p (callee_speedup (e))
1079 : 3066986 : && (growth >= inline_insns_single (e->caller, false, false)
1080 : 774848 : || growth_positive_p (callee, e, growth)))
1081 : : {
1082 : 704182 : e->inline_failed = CIF_UNLIKELY_CALL;
1083 : 704182 : want_inline = false;
1084 : : }
1085 : : }
1086 : 4875825 : if (!want_inline && report)
1087 : 586454 : report_inline_failed_reason (e);
1088 : : return want_inline;
1089 : : }
1090 : :
1091 : : /* EDGE is self recursive edge.
1092 : : We handle two cases - when function A is inlining into itself
1093 : : or when function A is being inlined into another inliner copy of function
1094 : : A within function B.
1095 : :
1096 : : In first case OUTER_NODE points to the toplevel copy of A, while
1097 : : in the second case OUTER_NODE points to the outermost copy of A in B.
1098 : :
1099 : : In both cases we want to be extra selective since
1100 : : inlining the call will just introduce new recursive calls to appear. */
1101 : :
1102 : : static bool
1103 : 18058 : want_inline_self_recursive_call_p (struct cgraph_edge *edge,
1104 : : struct cgraph_node *outer_node,
1105 : : bool peeling,
1106 : : int depth)
1107 : : {
1108 : 18058 : char const *reason = NULL;
1109 : 18058 : bool want_inline = true;
1110 : 18058 : sreal caller_freq = 1;
1111 : 18058 : int max_depth = opt_for_fn (outer_node->decl,
1112 : : param_max_inline_recursive_depth_auto);
1113 : :
1114 : 18058 : if (DECL_DECLARED_INLINE_P (edge->caller->decl))
1115 : 2198 : max_depth = opt_for_fn (outer_node->decl,
1116 : : param_max_inline_recursive_depth);
1117 : :
1118 : 18058 : if (!edge->maybe_hot_p ())
1119 : : {
1120 : : reason = "recursive call is cold";
1121 : : want_inline = false;
1122 : : }
1123 : 17721 : else if (depth > max_depth)
1124 : : {
1125 : : reason = "--param max-inline-recursive-depth exceeded.";
1126 : : want_inline = false;
1127 : : }
1128 : 15559 : else if (outer_node->inlined_to
1129 : 18819 : && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1130 : : {
1131 : 0 : reason = "caller frequency is 0";
1132 : 0 : want_inline = false;
1133 : : }
1134 : :
1135 : 0 : if (!want_inline)
1136 : : ;
1137 : : /* Inlining of self recursive function into copy of itself within other
1138 : : function is transformation similar to loop peeling.
1139 : :
1140 : : Peeling is profitable if we can inline enough copies to make probability
1141 : : of actual call to the self recursive function very small. Be sure that
1142 : : the probability of recursion is small.
1143 : :
1144 : : We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1145 : : This way the expected number of recursion is at most max_depth. */
1146 : 15559 : else if (peeling)
1147 : : {
1148 : 3260 : sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1149 : 3260 : int i;
1150 : 7222 : for (i = 1; i < depth; i++)
1151 : 3962 : max_prob = max_prob * max_prob;
1152 : 3260 : if (edge->sreal_frequency () >= max_prob * caller_freq)
1153 : : {
1154 : 1537 : reason = "frequency of recursive call is too large";
1155 : 1537 : want_inline = false;
1156 : : }
1157 : : }
1158 : : /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1159 : : recursion depth is large. We reduce function call overhead and increase
1160 : : chances that things fit in hardware return predictor.
1161 : :
1162 : : Recursive inlining might however increase cost of stack frame setup
1163 : : actually slowing down functions whose recursion tree is wide rather than
1164 : : deep.
1165 : :
1166 : : Deciding reliably on when to do recursive inlining without profile feedback
1167 : : is tricky. For now we disable recursive inlining when probability of self
1168 : : recursion is low.
1169 : :
1170 : : Recursive inlining of self recursive call within loop also results in
1171 : : large loop depths that generally optimize badly. We may want to throttle
1172 : : down inlining in those cases. In particular this seems to happen in one
1173 : : of libstdc++ rb tree methods. */
1174 : : else
1175 : : {
1176 : 12299 : if (edge->sreal_frequency () * 100
1177 : 12299 : <= caller_freq
1178 : 24598 : * opt_for_fn (outer_node->decl,
1179 : : param_min_inline_recursive_probability))
1180 : : {
1181 : 544 : reason = "frequency of recursive call is too small";
1182 : 544 : want_inline = false;
1183 : : }
1184 : : }
1185 : 18058 : if (!can_inline_edge_by_limits_p (edge, CAN_INLINE_FORCE_LIMITS | CAN_INLINE_REPORT))
1186 : : {
1187 : : reason = "inline limits exceeded for always_inline function";
1188 : : want_inline = false;
1189 : : }
1190 : 18058 : if (!want_inline && dump_enabled_p ())
1191 : 7 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1192 : : " not inlining recursively: %s\n", reason);
1193 : 18058 : return want_inline;
1194 : : }
1195 : :
1196 : : /* Return true when NODE has uninlinable caller;
1197 : : set HAS_HOT_CALL if it has hot call.
1198 : : Worker for cgraph_for_node_and_aliases. */
1199 : :
1200 : : static bool
1201 : 70979 : check_callers (struct cgraph_node *node, void *has_hot_call)
1202 : : {
1203 : 70979 : struct cgraph_edge *e;
1204 : 122514 : for (e = node->callers; e; e = e->next_caller)
1205 : : {
1206 : 77757 : if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1207 : 77757 : || !opt_for_fn (e->caller->decl, optimize))
1208 : : return true;
1209 : 77757 : if (!can_inline_edge_p (e, true))
1210 : : return true;
1211 : 77730 : if (e->recursive_p ())
1212 : : return true;
1213 : 77730 : if (!can_inline_edge_by_limits_p (e, CAN_INLINE_REPORT))
1214 : : return true;
1215 : : /* Inlining large functions to large loop depth is often harmful because
1216 : : of register pressure it implies. */
1217 : 51585 : if ((int)ipa_call_summaries->get (e)->loop_depth
1218 : 51585 : > param_inline_functions_called_once_loop_depth)
1219 : : return true;
1220 : : /* Do not produce gigantic functions. */
1221 : 94865 : if (estimate_size_after_inlining (e->caller->inlined_to ?
1222 : : e->caller->inlined_to : e->caller, e)
1223 : 51585 : > param_inline_functions_called_once_insns)
1224 : : return true;
1225 : 51535 : if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1226 : 10116 : *(bool *)has_hot_call = true;
1227 : : }
1228 : : return false;
1229 : : }
1230 : :
1231 : : /* If NODE has a caller, return true. */
1232 : :
1233 : : static bool
1234 : 2121099 : has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1235 : : {
1236 : 2121099 : if (node->callers)
1237 : 665664 : return true;
1238 : : return false;
1239 : : }
1240 : :
1241 : : /* Decide if inlining NODE would reduce unit size by eliminating
1242 : : the offline copy of function.
1243 : : When COLD is true the cold calls are considered, too. */
1244 : :
1245 : : static bool
1246 : 4727952 : want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1247 : : {
1248 : 4727952 : bool has_hot_call = false;
1249 : :
1250 : : /* Aliases gets inlined along with the function they alias. */
1251 : 4727952 : if (node->alias)
1252 : : return false;
1253 : : /* Already inlined? */
1254 : 4653845 : if (node->inlined_to)
1255 : : return false;
1256 : : /* Does it have callers? */
1257 : 2073260 : if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1258 : : return false;
1259 : : /* Inlining into all callers would increase size? */
1260 : 665664 : if (growth_positive_p (node, NULL, INT_MIN) > 0)
1261 : : return false;
1262 : : /* All inlines must be possible. */
1263 : 66467 : if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1264 : : true))
1265 : : return false;
1266 : 40245 : if (!cold && !has_hot_call)
1267 : : return false;
1268 : : return true;
1269 : : }
1270 : :
1271 : : /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1272 : : in estimate_edge_badness. */
1273 : :
1274 : : static bool
1275 : 613647 : wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1276 : : {
1277 : 613647 : return size < (DECL_DECLARED_INLINE_P (where->decl)
1278 : 613647 : ? inline_insns_single (where, false, false)
1279 : 370867 : : inline_insns_auto (where, false, false));
1280 : : }
1281 : :
1282 : : /* A cost model driving the inlining heuristics in a way so the edges with
1283 : : smallest badness are inlined first. After each inlining is performed
1284 : : the costs of all caller edges of nodes affected are recomputed so the
1285 : : metrics may accurately depend on values such as number of inlinable callers
1286 : : of the function or function body size. */
1287 : :
1288 : : static sreal
1289 : 9742091 : edge_badness (struct cgraph_edge *edge, bool dump)
1290 : : {
1291 : 9742091 : sreal badness;
1292 : 9742091 : int growth;
1293 : 9742091 : sreal edge_time, unspec_edge_time;
1294 : 9742091 : struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1295 : 9742091 : class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1296 : 9742091 : ipa_hints hints;
1297 : 7825228 : cgraph_node *caller = (edge->caller->inlined_to
1298 : 9742091 : ? edge->caller->inlined_to
1299 : : : edge->caller);
1300 : :
1301 : 9742091 : growth = estimate_edge_growth (edge);
1302 : 9742091 : edge_time = estimate_edge_time (edge, &unspec_edge_time);
1303 : 9742091 : hints = estimate_edge_hints (edge);
1304 : 9742091 : gcc_checking_assert (edge_time >= 0);
1305 : : /* Check that inlined time is better, but tolerate some roundoff issues.
1306 : : FIXME: When callee profile drops to 0 we account calls more. This
1307 : : should be fixed by never doing that. */
1308 : 9742091 : gcc_checking_assert ((edge_time * 100
1309 : : - callee_info->time * 101).to_int () <= 0
1310 : : || callee->count.ipa ().initialized_p ());
1311 : 9742091 : gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1312 : :
1313 : 9742091 : if (dump)
1314 : : {
1315 : 164 : fprintf (dump_file, " Badness calculation for %s -> %s\n",
1316 : 164 : edge->caller->dump_name (),
1317 : 164 : edge->callee->dump_name ());
1318 : 164 : fprintf (dump_file, " size growth %i, time %f unspec %f ",
1319 : : growth,
1320 : : edge_time.to_double (),
1321 : : unspec_edge_time.to_double ());
1322 : 164 : ipa_dump_hints (dump_file, hints);
1323 : 164 : if (big_speedup_p (edge))
1324 : 133 : fprintf (dump_file, " big_speedup");
1325 : 164 : fprintf (dump_file, "\n");
1326 : : }
1327 : :
1328 : : /* Always prefer inlining saving code size. */
1329 : 9742091 : if (growth <= 0)
1330 : : {
1331 : 480597 : badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1332 : 480597 : if (dump)
1333 : 87 : fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1334 : : growth);
1335 : : }
1336 : : /* Inlining into EXTERNAL functions is not going to change anything unless
1337 : : they are themselves inlined. */
1338 : 9261494 : else if (DECL_EXTERNAL (caller->decl))
1339 : : {
1340 : 25026 : if (dump)
1341 : 0 : fprintf (dump_file, " max: function is external\n");
1342 : 25026 : return sreal::max ();
1343 : : }
1344 : : /* When profile is available. Compute badness as:
1345 : :
1346 : : time_saved * caller_count
1347 : : goodness = -------------------------------------------------
1348 : : growth_of_caller * overall_growth * combined_size
1349 : :
1350 : : badness = - goodness
1351 : :
1352 : : Again use negative value to make calls with profile appear hotter
1353 : : then calls without.
1354 : : */
1355 : 9236468 : else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1356 : 9236468 : || caller->count.ipa ().nonzero_p ())
1357 : : {
1358 : 9235155 : sreal numerator, denominator;
1359 : 9235155 : int overall_growth;
1360 : 9235155 : sreal freq = edge->sreal_frequency ();
1361 : :
1362 : 9235155 : numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1363 : 9235155 : if (numerator <= 0)
1364 : 7014 : numerator = ((sreal) 1 >> 8);
1365 : 9235155 : if (caller->count.ipa ().nonzero_p ())
1366 : 182 : numerator *= caller->count.ipa ().to_gcov_type ();
1367 : 9234973 : else if (caller->count.ipa ().initialized_p ())
1368 : 798 : numerator = numerator >> 11;
1369 : 9235155 : denominator = growth;
1370 : :
1371 : 9235155 : overall_growth = callee_info->growth;
1372 : :
1373 : : /* Look for inliner wrappers of the form:
1374 : :
1375 : : inline_caller ()
1376 : : {
1377 : : do_fast_job...
1378 : : if (need_more_work)
1379 : : noninline_callee ();
1380 : : }
1381 : : Without penalizing this case, we usually inline noninline_callee
1382 : : into the inline_caller because overall_growth is small preventing
1383 : : further inlining of inline_caller.
1384 : :
1385 : : Penalize only callgraph edges to functions with small overall
1386 : : growth ...
1387 : : */
1388 : 9235155 : if (growth > overall_growth
1389 : : /* ... and having only one caller which is not inlined ... */
1390 : 1988327 : && callee_info->single_caller
1391 : 1143384 : && !edge->caller->inlined_to
1392 : : /* ... and edges executed only conditionally ... */
1393 : 817036 : && freq < 1
1394 : : /* ... consider case where callee is not inline but caller is ... */
1395 : 9624217 : && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1396 : 105199 : && DECL_DECLARED_INLINE_P (caller->decl))
1397 : : /* ... or when early optimizers decided to split and edge
1398 : : frequency still indicates splitting is a win ... */
1399 : 378977 : || (callee->split_part && !caller->split_part
1400 : 87073 : && freq * 100
1401 : 9313426 : < opt_for_fn (caller->decl,
1402 : : param_partial_inlining_entry_probability)
1403 : : /* ... and do not overwrite user specified hints. */
1404 : 86737 : && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1405 : 64604 : || DECL_DECLARED_INLINE_P (caller->decl)))))
1406 : : {
1407 : 95539 : ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1408 : 95539 : int caller_growth = caller_info->growth;
1409 : :
1410 : : /* Only apply the penalty when caller looks like inline candidate,
1411 : : and it is not called once. */
1412 : 53701 : if (!caller_info->single_caller && overall_growth < caller_growth
1413 : 51308 : && caller_info->inlinable
1414 : 146793 : && wrapper_heuristics_may_apply
1415 : 51254 : (caller, ipa_size_summaries->get (caller)->size))
1416 : : {
1417 : 41651 : if (dump)
1418 : 1 : fprintf (dump_file,
1419 : : " Wrapper penalty. Increasing growth %i to %i\n",
1420 : : overall_growth, caller_growth);
1421 : : overall_growth = caller_growth;
1422 : : }
1423 : : }
1424 : 9235155 : if (overall_growth > 0)
1425 : : {
1426 : : /* Strongly prefer functions with few callers that can be inlined
1427 : : fully. The square root here leads to smaller binaries at average.
1428 : : Watch however for extreme cases and return to linear function
1429 : : when growth is large. */
1430 : 7903672 : if (overall_growth < 256)
1431 : 4095319 : overall_growth *= overall_growth;
1432 : : else
1433 : 3808353 : overall_growth += 256 * 256 - 256;
1434 : 7903672 : denominator *= overall_growth;
1435 : : }
1436 : 9235155 : denominator *= ipa_size_summaries->get (caller)->size + growth;
1437 : :
1438 : 9235155 : badness = - numerator / denominator;
1439 : :
1440 : 9235155 : if (dump)
1441 : : {
1442 : 308 : fprintf (dump_file,
1443 : : " %f: guessed profile. frequency %f, count %" PRId64
1444 : : " caller count %" PRId64
1445 : : " time saved %f"
1446 : : " overall growth %i (current) %i (original)"
1447 : : " %i (compensated)\n",
1448 : : badness.to_double (),
1449 : : freq.to_double (),
1450 : 77 : edge->count.ipa ().initialized_p ()
1451 : 0 : ? edge->count.ipa ().to_gcov_type () : -1,
1452 : 77 : caller->count.ipa ().initialized_p ()
1453 : 0 : ? caller->count.ipa ().to_gcov_type () : -1,
1454 : 154 : inlining_speedup (edge, freq, unspec_edge_time,
1455 : : edge_time).to_double (),
1456 : : estimate_growth (callee),
1457 : : callee_info->growth, overall_growth);
1458 : : }
1459 : : }
1460 : : /* When function local profile is not available or it does not give
1461 : : useful information (i.e. frequency is zero), base the cost on
1462 : : loop nest and overall size growth, so we optimize for overall number
1463 : : of functions fully inlined in program. */
1464 : : else
1465 : : {
1466 : 1313 : int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1467 : 1313 : badness = growth;
1468 : :
1469 : : /* Decrease badness if call is nested. */
1470 : 1313 : if (badness > 0)
1471 : 1313 : badness = badness >> nest;
1472 : : else
1473 : 0 : badness = badness << nest;
1474 : 1313 : if (dump)
1475 : 0 : fprintf (dump_file, " %f: no profile. nest %i\n",
1476 : : badness.to_double (), nest);
1477 : : }
1478 : 9717065 : gcc_checking_assert (badness != 0);
1479 : :
1480 : 9717065 : if (edge->recursive_p ())
1481 : 15665 : badness = badness.shift (badness > 0 ? 4 : -4);
1482 : 9717065 : if ((hints & (INLINE_HINT_indirect_call
1483 : : | INLINE_HINT_loop_iterations
1484 : : | INLINE_HINT_loop_stride))
1485 : 8659368 : || callee_info->growth <= 0)
1486 : 4873616 : badness = badness.shift (badness > 0 ? -2 : 2);
1487 : 9717065 : if (hints & INLINE_HINT_builtin_constant_p)
1488 : 26316 : badness = badness.shift (badness > 0 ? -4 : 4);
1489 : 9717065 : if (hints & (INLINE_HINT_same_scc))
1490 : 72090 : badness = badness.shift (badness > 0 ? 3 : -3);
1491 : 9681019 : else if (hints & (INLINE_HINT_in_scc))
1492 : 79424 : badness = badness.shift (badness > 0 ? 2 : -2);
1493 : 9641307 : else if (hints & (INLINE_HINT_cross_module))
1494 : 3546 : badness = badness.shift (badness > 0 ? 1 : -1);
1495 : 9717065 : if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1496 : 17440 : badness = badness.shift (badness > 0 ? -4 : 4);
1497 : 9708345 : else if ((hints & INLINE_HINT_declared_inline))
1498 : 14332856 : badness = badness.shift (badness > 0 ? -3 : 3);
1499 : 9717065 : if (dump)
1500 : 164 : fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1501 : 9717065 : return badness;
1502 : : }
1503 : :
1504 : : /* Recompute badness of EDGE and update its key in HEAP if needed. */
1505 : : static inline void
1506 : 4935352 : update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1507 : : {
1508 : 4935352 : sreal badness = edge_badness (edge, false);
1509 : 4935352 : if (edge->aux)
1510 : : {
1511 : 3901361 : edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1512 : 3901361 : gcc_checking_assert (n->get_data () == edge);
1513 : :
1514 : : /* fibonacci_heap::replace_key does busy updating of the
1515 : : heap that is unnecessarily expensive.
1516 : : We do lazy increases: after extracting minimum if the key
1517 : : turns out to be out of date, it is re-inserted into heap
1518 : : with correct value. */
1519 : 3901361 : if (badness < n->get_key ().badness)
1520 : : {
1521 : 906109 : if (dump_file && (dump_flags & TDF_DETAILS))
1522 : : {
1523 : 78 : fprintf (dump_file,
1524 : : " decreasing badness %s -> %s, %f to %f\n",
1525 : 39 : edge->caller->dump_name (),
1526 : 39 : edge->callee->dump_name (),
1527 : 78 : n->get_key ().badness.to_double (),
1528 : : badness.to_double ());
1529 : : }
1530 : 906109 : inline_badness b (edge, badness);
1531 : 906109 : heap->decrease_key (n, b);
1532 : : }
1533 : : }
1534 : : else
1535 : : {
1536 : 1033991 : if (dump_file && (dump_flags & TDF_DETAILS))
1537 : : {
1538 : 266 : fprintf (dump_file,
1539 : : " enqueuing call %s -> %s, badness %f\n",
1540 : 133 : edge->caller->dump_name (),
1541 : 133 : edge->callee->dump_name (),
1542 : : badness.to_double ());
1543 : : }
1544 : 1033991 : inline_badness b (edge, badness);
1545 : 1033991 : edge->aux = heap->insert (b, edge);
1546 : : }
1547 : 4935352 : }
1548 : :
1549 : :
1550 : : /* NODE was inlined.
1551 : : All caller edges needs to be reset because
1552 : : size estimates change. Similarly callees needs reset
1553 : : because better context may be known. */
1554 : :
1555 : : static void
1556 : 912068 : reset_edge_caches (struct cgraph_node *node)
1557 : : {
1558 : 912068 : struct cgraph_edge *edge;
1559 : 912068 : struct cgraph_edge *e = node->callees;
1560 : 912068 : struct cgraph_node *where = node;
1561 : 912068 : struct ipa_ref *ref;
1562 : :
1563 : 912068 : if (where->inlined_to)
1564 : 858785 : where = where->inlined_to;
1565 : :
1566 : 912068 : reset_node_cache (where);
1567 : :
1568 : 912068 : if (edge_growth_cache != NULL)
1569 : 3035369 : for (edge = where->callers; edge; edge = edge->next_caller)
1570 : 2123350 : if (edge->inline_failed)
1571 : 2123350 : edge_growth_cache->remove (edge);
1572 : :
1573 : 963533 : FOR_EACH_ALIAS (where, ref)
1574 : 102930 : reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1575 : :
1576 : 912068 : if (!e)
1577 : : return;
1578 : :
1579 : 2526059 : while (true)
1580 : 2526059 : if (!e->inline_failed && e->callee->callees)
1581 : : e = e->callee->callees;
1582 : : else
1583 : : {
1584 : 2052726 : if (edge_growth_cache != NULL && e->inline_failed)
1585 : 1941054 : edge_growth_cache->remove (e);
1586 : 2052726 : if (e->next_callee)
1587 : : e = e->next_callee;
1588 : : else
1589 : : {
1590 : 1235611 : do
1591 : : {
1592 : 1235611 : if (e->caller == node)
1593 : : return;
1594 : 473333 : e = e->caller->callers;
1595 : : }
1596 : 473333 : while (!e->next_callee);
1597 : : e = e->next_callee;
1598 : : }
1599 : : }
1600 : : }
1601 : :
1602 : : /* Recompute HEAP nodes for each of caller of NODE.
1603 : : UPDATED_NODES track nodes we already visited, to avoid redundant work.
1604 : : When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1605 : : it is inlinable. Otherwise check all edges. */
1606 : :
1607 : : static void
1608 : 911710 : update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1609 : : bitmap updated_nodes,
1610 : : struct cgraph_edge *check_inlinablity_for)
1611 : : {
1612 : 911710 : struct cgraph_edge *edge;
1613 : 911710 : struct ipa_ref *ref;
1614 : :
1615 : 911710 : if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1616 : 899745 : || node->inlined_to)
1617 : 11965 : return;
1618 : 899745 : if (!bitmap_set_bit (updated_nodes, node->get_summary_id ()))
1619 : : return;
1620 : :
1621 : 950895 : FOR_EACH_ALIAS (node, ref)
1622 : : {
1623 : 51150 : struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1624 : 51150 : update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1625 : : }
1626 : :
1627 : 2912811 : for (edge = node->callers; edge; edge = edge->next_caller)
1628 : 2013066 : if (edge->inline_failed)
1629 : : {
1630 : 2013066 : if (!check_inlinablity_for
1631 : 2013066 : || check_inlinablity_for == edge)
1632 : : {
1633 : 2013066 : if (can_inline_edge_p (edge, false)
1634 : 1980187 : && want_inline_small_function_p (edge, false)
1635 : 2540371 : && can_inline_edge_by_limits_p (edge, 0))
1636 : 520536 : update_edge_key (heap, edge);
1637 : 1492530 : else if (edge->aux)
1638 : : {
1639 : 82139 : report_inline_failed_reason (edge);
1640 : 82139 : heap->delete_node ((edge_heap_node_t *) edge->aux);
1641 : 82139 : edge->aux = NULL;
1642 : : }
1643 : : }
1644 : 0 : else if (edge->aux)
1645 : 0 : update_edge_key (heap, edge);
1646 : : }
1647 : : }
1648 : :
1649 : : /* Recompute HEAP nodes for each uninlined call in NODE
1650 : : If UPDATE_SINCE is non-NULL check if edges called within that function
1651 : : are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1652 : : where all edges have new context).
1653 : :
1654 : : This is used when we know that edge badnesses are going only to increase
1655 : : (we introduced new call site) and thus all we need is to insert newly
1656 : : created edges into heap. */
1657 : :
1658 : : static void
1659 : 860617 : update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1660 : : struct cgraph_node *update_since,
1661 : : bitmap updated_nodes)
1662 : : {
1663 : 860617 : struct cgraph_edge *e = node->callees;
1664 : 860617 : bool check_inlinability = update_since == node;
1665 : :
1666 : 860617 : if (!e)
1667 : : return;
1668 : 24684322 : while (true)
1669 : 24684322 : if (!e->inline_failed && e->callee->callees)
1670 : : {
1671 : 4061213 : if (e->callee == update_since)
1672 : 394591 : check_inlinability = true;
1673 : : e = e->callee->callees;
1674 : : }
1675 : : else
1676 : : {
1677 : 20623109 : enum availability avail;
1678 : 20623109 : struct cgraph_node *callee;
1679 : 20623109 : if (!check_inlinability)
1680 : : {
1681 : 18574220 : if (e->aux
1682 : 21804599 : && !bitmap_bit_p (updated_nodes,
1683 : 3230379 : e->callee->ultimate_alias_target
1684 : 3230379 : (&avail, e->caller)->get_summary_id ()))
1685 : 3230378 : update_edge_key (heap, e);
1686 : : }
1687 : : /* We do not reset callee growth cache here. Since we added a new call,
1688 : : growth should have just increased and consequently badness metric
1689 : : don't need updating. */
1690 : 2048889 : else if (e->inline_failed
1691 : 1939681 : && (callee = e->callee->ultimate_alias_target (&avail,
1692 : 1939681 : e->caller))
1693 : 1939681 : && avail >= AVAIL_AVAILABLE
1694 : 556767 : && ipa_fn_summaries->get (callee) != NULL
1695 : 556762 : && ipa_fn_summaries->get (callee)->inlinable
1696 : 2592001 : && !bitmap_bit_p (updated_nodes, callee->get_summary_id ()))
1697 : : {
1698 : 543112 : if (can_inline_edge_p (e, false)
1699 : 539185 : && want_inline_small_function_p (e, false)
1700 : 819550 : && can_inline_edge_by_limits_p (e, 0))
1701 : : {
1702 : 275773 : gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1703 : 275773 : gcc_checking_assert (check_inlinability || e->aux);
1704 : 275773 : update_edge_key (heap, e);
1705 : : }
1706 : 267339 : else if (e->aux)
1707 : : {
1708 : 7410 : report_inline_failed_reason (e);
1709 : 7410 : heap->delete_node ((edge_heap_node_t *) e->aux);
1710 : 7410 : e->aux = NULL;
1711 : : }
1712 : : }
1713 : : /* In case we redirected to unreachable node we only need to remove the
1714 : : fibheap entry. */
1715 : 1505777 : else if (e->aux)
1716 : : {
1717 : 3333 : heap->delete_node ((edge_heap_node_t *) e->aux);
1718 : 3333 : e->aux = NULL;
1719 : : }
1720 : 20623109 : if (e->next_callee)
1721 : : e = e->next_callee;
1722 : : else
1723 : : {
1724 : 4883915 : do
1725 : : {
1726 : 4883915 : if (e->caller == node)
1727 : 822702 : return;
1728 : 4061213 : if (e->caller == update_since)
1729 : 394591 : check_inlinability = false;
1730 : 4061213 : e = e->caller->callers;
1731 : : }
1732 : 4061213 : while (!e->next_callee);
1733 : : e = e->next_callee;
1734 : : }
1735 : : }
1736 : : }
1737 : :
1738 : : /* Enqueue all recursive calls from NODE into priority queue depending on
1739 : : how likely we want to recursively inline the call. */
1740 : :
1741 : : static void
1742 : 21174 : lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1743 : : edge_heap_t *heap)
1744 : : {
1745 : 21174 : struct cgraph_edge *e;
1746 : 21174 : enum availability avail;
1747 : :
1748 : 58897 : for (e = where->callees; e; e = e->next_callee)
1749 : 37723 : if (e->callee == node
1750 : 37723 : || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1751 : 1095 : && avail > AVAIL_INTERPOSABLE))
1752 : : {
1753 : 16072 : inline_badness b (e, -e->sreal_frequency ());
1754 : 16072 : heap->insert (b, e);
1755 : : }
1756 : 58897 : for (e = where->callees; e; e = e->next_callee)
1757 : 37723 : if (!e->inline_failed)
1758 : 7566 : lookup_recursive_calls (node, e->callee, heap);
1759 : 21174 : }
1760 : :
1761 : : /* Decide on recursive inlining: in the case function has recursive calls,
1762 : : inline until body size reaches given argument. If any new indirect edges
1763 : : are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1764 : : is NULL. */
1765 : :
1766 : : static bool
1767 : 1853 : recursive_inlining (struct cgraph_edge *edge,
1768 : : vec<cgraph_edge *> *new_edges)
1769 : : {
1770 : 1345 : cgraph_node *to = (edge->caller->inlined_to
1771 : 1853 : ? edge->caller->inlined_to : edge->caller);
1772 : 1853 : int limit = opt_for_fn (to->decl,
1773 : : param_max_inline_insns_recursive_auto);
1774 : 1853 : inline_badness b (edge, sreal::min ());
1775 : 1853 : edge_heap_t heap (b);
1776 : 1853 : struct cgraph_node *node;
1777 : 1853 : struct cgraph_edge *e;
1778 : 1853 : struct cgraph_node *master_clone = NULL, *next;
1779 : 1853 : int depth = 0;
1780 : 1853 : int n = 0;
1781 : :
1782 : 1853 : node = edge->caller;
1783 : 1853 : if (node->inlined_to)
1784 : 508 : node = node->inlined_to;
1785 : :
1786 : 1853 : if (DECL_DECLARED_INLINE_P (node->decl))
1787 : 356 : limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1788 : :
1789 : : /* Make sure that function is small enough to be considered for inlining. */
1790 : 1853 : if (estimate_size_after_inlining (node, edge) >= limit)
1791 : : return false;
1792 : 1853 : lookup_recursive_calls (node, node, &heap);
1793 : 1853 : if (heap.empty ())
1794 : : return false;
1795 : :
1796 : 1853 : if (dump_file)
1797 : 4 : fprintf (dump_file,
1798 : : " Performing recursive inlining on %s\n", node->dump_name ());
1799 : :
1800 : : /* Do the inlining and update list of recursive call during process. */
1801 : 16344 : while (!heap.empty ())
1802 : : {
1803 : 14512 : struct cgraph_edge *curr = heap.extract_min ();
1804 : 14512 : struct cgraph_node *cnode, *dest = curr->callee;
1805 : :
1806 : 14512 : if (!can_inline_edge_p (curr, true)
1807 : 14512 : || !can_inline_edge_by_limits_p (curr, CAN_INLINE_REPORT | CAN_INLINE_FORCE_LIMITS))
1808 : 0 : continue;
1809 : :
1810 : : /* MASTER_CLONE is produced in the case we already started modified
1811 : : the function. Be sure to redirect edge to the original body before
1812 : : estimating growths otherwise we will be seeing growths after inlining
1813 : : the already modified body. */
1814 : 14512 : if (master_clone)
1815 : : {
1816 : 12655 : curr->redirect_callee (master_clone);
1817 : 12655 : if (edge_growth_cache != NULL)
1818 : 12655 : edge_growth_cache->remove (curr);
1819 : : }
1820 : :
1821 : 14512 : if (estimate_size_after_inlining (node, curr) > limit)
1822 : : {
1823 : 21 : curr->redirect_callee (dest);
1824 : 21 : if (edge_growth_cache != NULL)
1825 : 21 : edge_growth_cache->remove (curr);
1826 : : break;
1827 : : }
1828 : :
1829 : 14491 : depth = 1;
1830 : 14491 : for (cnode = curr->caller;
1831 : 76309 : cnode->inlined_to; cnode = cnode->callers->caller)
1832 : 123636 : if (node->decl
1833 : 61818 : == curr->callee->ultimate_alias_target ()->decl)
1834 : 61818 : depth++;
1835 : :
1836 : 14491 : if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1837 : : {
1838 : 2736 : curr->redirect_callee (dest);
1839 : 2736 : if (edge_growth_cache != NULL)
1840 : 2736 : edge_growth_cache->remove (curr);
1841 : 2736 : continue;
1842 : : }
1843 : :
1844 : 11755 : if (dump_file)
1845 : : {
1846 : 14 : fprintf (dump_file,
1847 : : " Inlining call of depth %i", depth);
1848 : 28 : if (node->count.nonzero_p () && curr->count.initialized_p ())
1849 : : {
1850 : 2 : fprintf (dump_file, " called approx. %.2f times per call",
1851 : 2 : (double)curr->count.to_gcov_type ()
1852 : 2 : / node->count.to_gcov_type ());
1853 : : }
1854 : 14 : fprintf (dump_file, "\n");
1855 : : }
1856 : 11755 : if (!master_clone)
1857 : : {
1858 : : /* We need original clone to copy around. */
1859 : 1545 : master_clone = node->create_clone (node->decl, node->count,
1860 : 1545 : false, vNULL, true, NULL, NULL, NULL);
1861 : 4390 : for (e = master_clone->callees; e; e = e->next_callee)
1862 : 2845 : if (!e->inline_failed)
1863 : 459 : clone_inlined_nodes (e, true, false, NULL);
1864 : 1545 : curr->redirect_callee (master_clone);
1865 : 1545 : if (edge_growth_cache != NULL)
1866 : 1545 : edge_growth_cache->remove (curr);
1867 : : }
1868 : :
1869 : 11755 : inline_call (curr, false, new_edges, &overall_size, true);
1870 : 11755 : reset_node_cache (node);
1871 : 11755 : lookup_recursive_calls (node, curr->callee, &heap);
1872 : 11755 : n++;
1873 : : }
1874 : :
1875 : 1853 : if (!heap.empty () && dump_file)
1876 : 0 : fprintf (dump_file, " Recursive inlining growth limit met.\n");
1877 : :
1878 : 1853 : if (!master_clone)
1879 : : return false;
1880 : :
1881 : 1545 : if (dump_enabled_p ())
1882 : 4 : dump_printf_loc (MSG_NOTE, edge->call_stmt,
1883 : : "\n Inlined %i times, "
1884 : : "body grown from size %i to %i, time %f to %f\n", n,
1885 : 4 : ipa_size_summaries->get (master_clone)->size,
1886 : 4 : ipa_size_summaries->get (node)->size,
1887 : 4 : ipa_fn_summaries->get (master_clone)->time.to_double (),
1888 : 4 : ipa_fn_summaries->get (node)->time.to_double ());
1889 : :
1890 : : /* Remove master clone we used for inlining. We rely that clones inlined
1891 : : into master clone gets queued just before master clone so we don't
1892 : : need recursion. */
1893 : 20480 : for (node = symtab->first_function (); node != master_clone;
1894 : : node = next)
1895 : : {
1896 : 17390 : next = symtab->next_function (node);
1897 : 17390 : if (node->inlined_to == master_clone)
1898 : 923 : node->remove ();
1899 : : }
1900 : 1545 : master_clone->remove ();
1901 : 1545 : return true;
1902 : 1853 : }
1903 : :
1904 : :
1905 : : /* Given whole compilation unit estimate of INSNS, compute how large we can
1906 : : allow the unit to grow. */
1907 : :
1908 : : static int64_t
1909 : 939987 : compute_max_insns (cgraph_node *node, int insns)
1910 : : {
1911 : 939987 : int max_insns = insns;
1912 : 939987 : if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1913 : : max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1914 : :
1915 : 939987 : return ((int64_t) max_insns
1916 : 939987 : * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1917 : : }
1918 : :
1919 : :
1920 : : /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1921 : :
1922 : : static void
1923 : 860305 : add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> &new_edges)
1924 : : {
1925 : 1723094 : while (new_edges.length () > 0)
1926 : : {
1927 : 2484 : struct cgraph_edge *edge = new_edges.pop ();
1928 : :
1929 : 2484 : gcc_assert (!edge->aux);
1930 : 2484 : gcc_assert (edge->callee);
1931 : 2484 : if (edge->inline_failed
1932 : 2484 : && can_inline_edge_p (edge, true)
1933 : 1024 : && want_inline_small_function_p (edge, true)
1934 : 3210 : && can_inline_edge_by_limits_p (edge, CAN_INLINE_REPORT))
1935 : : {
1936 : 726 : inline_badness b (edge, edge_badness (edge, false));
1937 : 726 : edge->aux = heap->insert (b, edge);
1938 : : }
1939 : : }
1940 : 860305 : }
1941 : :
1942 : : /* Remove EDGE from the fibheap. */
1943 : :
1944 : : static void
1945 : 5361 : heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1946 : : {
1947 : 5361 : if (e->aux)
1948 : : {
1949 : 11 : ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1950 : 11 : e->aux = NULL;
1951 : : }
1952 : 5361 : }
1953 : :
1954 : : /* Return true if speculation of edge E seems useful.
1955 : : If ANTICIPATE_INLINING is true, be conservative and hope that E
1956 : : may get inlined. */
1957 : :
1958 : : bool
1959 : 43273 : speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1960 : : {
1961 : : /* If we have already decided to inline the edge, it seems useful.
1962 : : Also if ipa-cp or other pass worked hard enough to produce a clone,
1963 : : we already decided this is a good idea. */
1964 : 43273 : if (!e->inline_failed
1965 : 6881 : || e->callee->clone_of)
1966 : : return true;
1967 : :
1968 : 6366 : enum availability avail;
1969 : 6366 : struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1970 : : e->callee);
1971 : :
1972 : 6366 : gcc_assert (e->speculative && !e->indirect_unknown_callee);
1973 : :
1974 : : /* Even if calll statement is not hot, we can still have useful speculation
1975 : : in cases where a lot of time is spent is callee.
1976 : : Do not check maybe_hot_p. */
1977 : 6713 : if (!e->count.nonzero_p ())
1978 : : return false;
1979 : :
1980 : : /* See if IP optimizations found something potentially useful about the
1981 : : function. Do this only if the call seems hot since this is about
1982 : : optimizing the code surrounding call site rahter than improving
1983 : : callee. */
1984 : 6330 : if (avail >= AVAIL_AVAILABLE && e->maybe_hot_p ())
1985 : : {
1986 : 6246 : int ecf_flags = flags_from_decl_or_type (target->decl);
1987 : 6246 : if (ecf_flags & ECF_CONST)
1988 : : {
1989 : 207 : if (!(e->speculative_call_indirect_edge ()->indirect_info
1990 : 207 : ->ecf_flags & ECF_CONST))
1991 : : return true;
1992 : : }
1993 : 6039 : else if (ecf_flags & ECF_PURE)
1994 : : {
1995 : 1583 : if (!(e->speculative_call_indirect_edge ()->indirect_info
1996 : 1583 : ->ecf_flags & ECF_PURE))
1997 : : return true;
1998 : : }
1999 : 4456 : else if (get_modref_function_summary (target))
2000 : : return true;
2001 : : }
2002 : : /* If we did not managed to inline the function nor redirect
2003 : : to an ipa-cp clone (that are seen by having local flag set),
2004 : : it is probably pointless to inline it unless hardware is missing
2005 : : indirect call predictor.
2006 : :
2007 : : At this point we know we will not dispatch into faster version of
2008 : : callee, so if call itself is not hot, we definitely can give up
2009 : : speculating. */
2010 : 2054 : if (!anticipate_inlining && (!target->local || !e->maybe_hot_p ()))
2011 : 341 : return false;
2012 : : /* For overwritable targets there is not much to do. */
2013 : 1713 : if (!can_inline_edge_p (e, false)
2014 : 1713 : || !can_inline_edge_by_limits_p (e, CAN_INLINE_DISREGARD_LIMITS))
2015 : 6 : return false;
2016 : : /* OK, speculation seems interesting. */
2017 : : return true;
2018 : : }
2019 : :
2020 : : /* We know that EDGE is not going to be inlined.
2021 : : See if we can remove speculation. */
2022 : :
2023 : : static void
2024 : 81412 : resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
2025 : : {
2026 : 81412 : if (edge->speculative && !speculation_useful_p (edge, false))
2027 : : {
2028 : 25 : struct cgraph_node *node = edge->caller;
2029 : 15 : struct cgraph_node *where = node->inlined_to
2030 : 25 : ? node->inlined_to : node;
2031 : 25 : auto_bitmap updated_nodes;
2032 : :
2033 : 25 : if (edge->count.ipa ().initialized_p ())
2034 : 0 : spec_rem += edge->count.ipa ();
2035 : 25 : cgraph_edge::resolve_speculation (edge);
2036 : 25 : reset_edge_caches (where);
2037 : 25 : ipa_update_overall_fn_summary (where);
2038 : 25 : update_caller_keys (edge_heap, where,
2039 : : updated_nodes, NULL);
2040 : 25 : update_callee_keys (edge_heap, where, NULL,
2041 : : updated_nodes);
2042 : 25 : }
2043 : 81412 : }
2044 : :
2045 : : /* Return true if NODE should be accounted for overall size estimate.
2046 : : Skip all nodes optimized for size so we can measure the growth of hot
2047 : : part of program no matter of the padding. */
2048 : :
2049 : : bool
2050 : 3675457 : inline_account_function_p (struct cgraph_node *node)
2051 : : {
2052 : 3675457 : return (!DECL_EXTERNAL (node->decl)
2053 : 3503876 : && !opt_for_fn (node->decl, optimize_size)
2054 : 7090344 : && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
2055 : : }
2056 : :
2057 : : /* Count number of callers of NODE and store it into DATA (that
2058 : : points to int. Worker for cgraph_for_node_and_aliases. */
2059 : :
2060 : : static bool
2061 : 1447218 : sum_callers (struct cgraph_node *node, void *data)
2062 : : {
2063 : 1447218 : struct cgraph_edge *e;
2064 : 1447218 : int *num_calls = (int *)data;
2065 : :
2066 : 3436383 : for (e = node->callers; e; e = e->next_caller)
2067 : 1989165 : (*num_calls)++;
2068 : 1447218 : return false;
2069 : : }
2070 : :
2071 : : /* We only propagate across edges with non-interposable callee. */
2072 : :
2073 : : inline bool
2074 : 6906706 : ignore_edge_p (struct cgraph_edge *e)
2075 : : {
2076 : 6906706 : enum availability avail;
2077 : 6906706 : e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
2078 : 6906706 : return (avail <= AVAIL_INTERPOSABLE);
2079 : : }
2080 : :
2081 : : /* We use greedy algorithm for inlining of small functions:
2082 : : All inline candidates are put into prioritized heap ordered in
2083 : : increasing badness.
2084 : :
2085 : : The inlining of small functions is bounded by unit growth parameters. */
2086 : :
2087 : : static void
2088 : 227988 : inline_small_functions (void)
2089 : : {
2090 : 227988 : struct cgraph_node *node;
2091 : 227988 : struct cgraph_edge *edge;
2092 : 227988 : inline_badness b;
2093 : 227988 : edge_heap_t edge_heap (b);
2094 : 227988 : auto_bitmap updated_nodes;
2095 : 227988 : int min_size;
2096 : 227988 : auto_vec<cgraph_edge *> new_indirect_edges;
2097 : 227988 : int initial_size = 0;
2098 : 227988 : struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
2099 : 227988 : struct cgraph_edge_hook_list *edge_removal_hook_holder;
2100 : 227988 : new_indirect_edges.create (8);
2101 : :
2102 : 227988 : edge_removal_hook_holder
2103 : 227988 : = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
2104 : :
2105 : : /* Compute overall unit size and other global parameters used by badness
2106 : : metrics. */
2107 : :
2108 : 227988 : max_count = profile_count::uninitialized ();
2109 : 227988 : ipa_reduced_postorder (order, true, ignore_edge_p);
2110 : 227988 : free (order);
2111 : :
2112 : 2095024 : FOR_EACH_DEFINED_FUNCTION (node)
2113 : 1867036 : if (!node->inlined_to)
2114 : : {
2115 : 1769212 : if (!node->alias && node->analyzed
2116 : 1769212 : && (node->has_gimple_body_p () || node->thunk)
2117 : 3636217 : && opt_for_fn (node->decl, optimize))
2118 : : {
2119 : 1347127 : class ipa_fn_summary *info = ipa_fn_summaries->get (node);
2120 : 1347127 : struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
2121 : :
2122 : : /* Do not account external functions, they will be optimized out
2123 : : if not inlined. Also only count the non-cold portion of program. */
2124 : 1347127 : if (inline_account_function_p (node))
2125 : 1247087 : initial_size += ipa_size_summaries->get (node)->size;
2126 : 1347127 : info->growth = estimate_growth (node);
2127 : :
2128 : 1347127 : int num_calls = 0;
2129 : 1347127 : node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2130 : : true);
2131 : 1347127 : if (num_calls == 1)
2132 : 449463 : info->single_caller = true;
2133 : 1347127 : if (dfs && dfs->next_cycle)
2134 : : {
2135 : 5992 : struct cgraph_node *n2;
2136 : 5992 : int id = dfs->scc_no + 1;
2137 : 13319 : for (n2 = node; n2;
2138 : 7327 : n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
2139 : 11985 : if (opt_for_fn (n2->decl, optimize))
2140 : : {
2141 : 11977 : ipa_fn_summary *info2 = ipa_fn_summaries->get
2142 : 11977 : (n2->inlined_to ? n2->inlined_to : n2);
2143 : 11977 : if (info2->scc_no)
2144 : : break;
2145 : 7319 : info2->scc_no = id;
2146 : : }
2147 : : }
2148 : : }
2149 : :
2150 : 4229215 : for (edge = node->callers; edge; edge = edge->next_caller)
2151 : 2362210 : max_count = max_count.max (edge->count.ipa ());
2152 : : }
2153 : 227988 : ipa_free_postorder_info ();
2154 : 227988 : initialize_growth_caches ();
2155 : :
2156 : 227988 : if (dump_file)
2157 : 178 : fprintf (dump_file,
2158 : : "\nDeciding on inlining of small functions. Starting with size %i.\n",
2159 : : initial_size);
2160 : :
2161 : 227988 : overall_size = initial_size;
2162 : 227988 : min_size = overall_size;
2163 : :
2164 : : /* Populate the heap with all edges we might inline. */
2165 : :
2166 : 2095024 : FOR_EACH_DEFINED_FUNCTION (node)
2167 : : {
2168 : 1867036 : bool update = false;
2169 : 1867036 : struct cgraph_edge *next = NULL;
2170 : 1867036 : bool has_speculative = false;
2171 : :
2172 : 1867036 : if (!opt_for_fn (node->decl, optimize)
2173 : : /* With -Og we do not want to perform IPA inlining of small
2174 : : functions since there are no scalar cleanups after it
2175 : : that would realize the anticipated win. All abstraction
2176 : : is removed during early inlining. */
2177 : 1867036 : || opt_for_fn (node->decl, optimize_debug))
2178 : 449692 : continue;
2179 : :
2180 : 1417344 : if (dump_file)
2181 : 806 : fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2182 : :
2183 : 6955225 : for (edge = node->callees; edge; edge = edge->next_callee)
2184 : : {
2185 : 5537881 : if (edge->inline_failed
2186 : 5537850 : && !edge->aux
2187 : 5537808 : && can_inline_edge_p (edge, true)
2188 : 1541614 : && want_inline_small_function_p (edge, true)
2189 : 916885 : && can_inline_edge_by_limits_p (edge, CAN_INLINE_REPORT)
2190 : 6446546 : && edge->inline_failed)
2191 : : {
2192 : 908665 : gcc_assert (!edge->aux);
2193 : 908665 : update_edge_key (&edge_heap, edge);
2194 : : }
2195 : 5537881 : if (edge->speculative)
2196 : 5043 : has_speculative = true;
2197 : : }
2198 : 1417344 : if (has_speculative)
2199 : 22049 : for (edge = node->callees; edge; edge = next)
2200 : : {
2201 : 17950 : next = edge->next_callee;
2202 : 17950 : if (edge->speculative
2203 : 17950 : && !speculation_useful_p (edge, edge->aux != NULL))
2204 : : {
2205 : 273 : cgraph_edge::resolve_speculation (edge);
2206 : 273 : update = true;
2207 : : }
2208 : : }
2209 : 4099 : if (update)
2210 : : {
2211 : 0 : struct cgraph_node *where = node->inlined_to
2212 : 205 : ? node->inlined_to : node;
2213 : 205 : ipa_update_overall_fn_summary (where);
2214 : 205 : reset_edge_caches (where);
2215 : 205 : update_caller_keys (&edge_heap, where,
2216 : : updated_nodes, NULL);
2217 : 205 : update_callee_keys (&edge_heap, where, NULL,
2218 : : updated_nodes);
2219 : 205 : bitmap_clear (updated_nodes);
2220 : : }
2221 : : }
2222 : :
2223 : 227988 : gcc_assert (in_lto_p
2224 : : || !(max_count > 0)
2225 : : || flag_auto_profile
2226 : : || (profile_info && flag_branch_probabilities));
2227 : :
2228 : 2714566 : while (!edge_heap.empty ())
2229 : : {
2230 : 2486578 : int old_size = overall_size;
2231 : 2486578 : struct cgraph_node *where, *callee;
2232 : 2486578 : sreal badness = edge_heap.min_key ().badness;
2233 : 2486578 : sreal current_badness;
2234 : 2486578 : int growth;
2235 : :
2236 : 2486578 : edge = edge_heap.extract_min ();
2237 : 2486578 : gcc_assert (edge->aux);
2238 : 2486578 : edge->aux = NULL;
2239 : 2486578 : if (!edge->inline_failed || !edge->callee->analyzed)
2240 : 1626248 : continue;
2241 : :
2242 : : /* Be sure that caches are maintained consistent.
2243 : : This check is affected by scaling roundoff errors when compiling for
2244 : : IPA this we skip it in that case. */
2245 : 4805846 : if (flag_checking && !edge->callee->count.ipa_p ()
2246 : 4805849 : && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2247 : : {
2248 : 2319353 : sreal cached_badness = edge_badness (edge, false);
2249 : :
2250 : 2319353 : int old_size_est = estimate_edge_size (edge);
2251 : 2319353 : sreal old_time_est = estimate_edge_time (edge);
2252 : 2319353 : int old_hints_est = estimate_edge_hints (edge);
2253 : :
2254 : 2319353 : if (edge_growth_cache != NULL)
2255 : 2319353 : edge_growth_cache->remove (edge);
2256 : 4163852 : reset_node_cache (edge->caller->inlined_to
2257 : : ? edge->caller->inlined_to
2258 : : : edge->caller);
2259 : 2319353 : gcc_assert (old_size_est == estimate_edge_size (edge));
2260 : 2319353 : gcc_assert (old_time_est == estimate_edge_time (edge));
2261 : : /* FIXME:
2262 : :
2263 : : gcc_assert (old_hints_est == estimate_edge_hints (edge));
2264 : :
2265 : : fails with profile feedback because some hints depends on
2266 : : maybe_hot_edge_p predicate and because callee gets inlined to other
2267 : : calls, the edge may become cold.
2268 : : This ought to be fixed by computing relative probabilities
2269 : : for given invocation but that will be better done once whole
2270 : : code is converted to sreals. Disable for now and revert to "wrong"
2271 : : value so enable/disable checking paths agree. */
2272 : 2319353 : edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2273 : :
2274 : : /* When updating the edge costs, we only decrease badness in the keys.
2275 : : Increases of badness are handled lazily; when we see key with out
2276 : : of date value on it, we re-insert it now. */
2277 : 2319353 : current_badness = edge_badness (edge, false);
2278 : 2319353 : gcc_assert (cached_badness == current_badness);
2279 : 2319353 : gcc_assert (current_badness >= badness);
2280 : : }
2281 : : else
2282 : 167143 : current_badness = edge_badness (edge, false);
2283 : 2486496 : if (current_badness != badness)
2284 : : {
2285 : 1700908 : if (edge_heap.min () && current_badness > edge_heap.min_key ().badness)
2286 : : {
2287 : 1544754 : inline_badness b (edge, current_badness);
2288 : 1544754 : edge->aux = edge_heap.insert (b, edge);
2289 : 1544754 : continue;
2290 : 1544754 : }
2291 : : else
2292 : 156154 : badness = current_badness;
2293 : : }
2294 : :
2295 : 941742 : if (!can_inline_edge_p (edge, true)
2296 : 941742 : || !can_inline_edge_by_limits_p (edge, CAN_INLINE_REPORT))
2297 : : {
2298 : 1755 : resolve_noninline_speculation (&edge_heap, edge);
2299 : 1755 : continue;
2300 : : }
2301 : :
2302 : 939987 : callee = edge->callee->ultimate_alias_target ();
2303 : 939987 : growth = estimate_edge_growth (edge);
2304 : 939987 : if (dump_file)
2305 : : {
2306 : 456 : fprintf (dump_file,
2307 : : "\nConsidering %s with %i size\n",
2308 : : callee->dump_name (),
2309 : 456 : ipa_size_summaries->get (callee)->size);
2310 : 912 : fprintf (dump_file,
2311 : : " to be inlined into %s in %s:%i\n"
2312 : : " Estimated badness is %f, frequency %.2f.\n",
2313 : 456 : edge->caller->dump_name (),
2314 : 456 : edge->call_stmt
2315 : 434 : && (LOCATION_LOCUS (gimple_location ((const gimple *)
2316 : : edge->call_stmt))
2317 : : > BUILTINS_LOCATION)
2318 : 425 : ? gimple_filename ((const gimple *) edge->call_stmt)
2319 : : : "unknown",
2320 : 456 : edge->call_stmt
2321 : 434 : ? gimple_lineno ((const gimple *) edge->call_stmt)
2322 : : : -1,
2323 : : badness.to_double (),
2324 : 456 : edge->sreal_frequency ().to_double ());
2325 : 456 : if (edge->count.ipa ().initialized_p ())
2326 : : {
2327 : 0 : fprintf (dump_file, " Called ");
2328 : 0 : edge->count.ipa ().dump (dump_file);
2329 : 0 : fprintf (dump_file, " times\n");
2330 : : }
2331 : 456 : if (dump_flags & TDF_DETAILS)
2332 : 164 : edge_badness (edge, true);
2333 : : }
2334 : :
2335 : 939987 : where = edge->caller;
2336 : :
2337 : 939987 : if (overall_size + growth > compute_max_insns (where, min_size)
2338 : 939987 : && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2339 : : {
2340 : 75272 : edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2341 : 75272 : report_inline_failed_reason (edge);
2342 : 75272 : resolve_noninline_speculation (&edge_heap, edge);
2343 : 75272 : continue;
2344 : : }
2345 : :
2346 : 864715 : if (!want_inline_small_function_p (edge, true))
2347 : : {
2348 : 2233 : resolve_noninline_speculation (&edge_heap, edge);
2349 : 2233 : continue;
2350 : : }
2351 : :
2352 : 862482 : profile_count old_count = callee->count;
2353 : :
2354 : : /* Heuristics for inlining small functions work poorly for
2355 : : recursive calls where we do effects similar to loop unrolling.
2356 : : When inlining such edge seems profitable, leave decision on
2357 : : specific inliner. */
2358 : 862482 : if (edge->recursive_p ())
2359 : : {
2360 : 1853 : if (where->inlined_to)
2361 : 508 : where = where->inlined_to;
2362 : :
2363 : : /* Disable always_inline on self recursive functions.
2364 : : This prevents some inlining bombs such as one in PR113291
2365 : : from exploding.
2366 : : It is not enough to stop inlining in self recursive always_inlines
2367 : : since they may grow large enough so always inlining them even
2368 : : with recursin depth 0 is too much.
2369 : :
2370 : : All sane uses of always_inline should be handled during
2371 : : early optimizations. */
2372 : 1853 : DECL_DISREGARD_INLINE_LIMITS (where->decl) = false;
2373 : :
2374 : 1853 : if (!recursive_inlining (edge,
2375 : 1853 : opt_for_fn (edge->caller->decl,
2376 : : flag_indirect_inlining)
2377 : : ? &new_indirect_edges : NULL))
2378 : : {
2379 : 308 : edge->inline_failed = CIF_RECURSIVE_INLINING;
2380 : 308 : resolve_noninline_speculation (&edge_heap, edge);
2381 : 308 : continue;
2382 : : }
2383 : 1545 : reset_edge_caches (where);
2384 : : /* Recursive inliner inlines all recursive calls of the function
2385 : : at once. Consequently we need to update all callee keys. */
2386 : 1545 : if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2387 : 1520 : add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2388 : 1545 : update_callee_keys (&edge_heap, where, where, updated_nodes);
2389 : 1545 : bitmap_clear (updated_nodes);
2390 : : }
2391 : : else
2392 : : {
2393 : 860629 : struct cgraph_node *outer_node = NULL;
2394 : 860629 : int depth = 0;
2395 : :
2396 : : /* Consider the case where self recursive function A is inlined
2397 : : into B. This is desired optimization in some cases, since it
2398 : : leads to effect similar of loop peeling and we might completely
2399 : : optimize out the recursive call. However we must be extra
2400 : : selective. */
2401 : :
2402 : 860629 : where = edge->caller;
2403 : 1247667 : while (where->inlined_to)
2404 : : {
2405 : 387038 : if (where->decl == callee->decl)
2406 : 7876 : outer_node = where, depth++;
2407 : 387038 : where = where->callers->caller;
2408 : : }
2409 : 862473 : if (outer_node
2410 : 860629 : && !want_inline_self_recursive_call_p (edge, outer_node,
2411 : : true, depth))
2412 : : {
2413 : 1844 : edge->inline_failed
2414 : 1844 : = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2415 : 1844 : ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2416 : 1844 : resolve_noninline_speculation (&edge_heap, edge);
2417 : 1844 : continue;
2418 : : }
2419 : 858785 : else if (depth && dump_file)
2420 : 6 : fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2421 : :
2422 : 858785 : gcc_checking_assert (!callee->inlined_to);
2423 : :
2424 : 858785 : int old_size = ipa_size_summaries->get (where)->size;
2425 : 858785 : sreal old_time = ipa_fn_summaries->get (where)->time;
2426 : :
2427 : 858785 : inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2428 : 858785 : reset_edge_caches (edge->callee);
2429 : 858785 : add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2430 : :
2431 : : /* If caller's size and time increased we do not need to update
2432 : : all edges because badness is not going to decrease. */
2433 : 858785 : if (old_size <= ipa_size_summaries->get (where)->size
2434 : 810142 : && old_time <= ipa_fn_summaries->get (where)->time
2435 : : /* Wrapper penalty may be non-monotonous in this respect.
2436 : : Fortunately it only affects small functions. */
2437 : 1421178 : && !wrapper_heuristics_may_apply (where, old_size))
2438 : 403754 : update_callee_keys (&edge_heap, edge->callee, edge->callee,
2439 : : updated_nodes);
2440 : : else
2441 : 455031 : update_callee_keys (&edge_heap, where,
2442 : : edge->callee,
2443 : : updated_nodes);
2444 : : }
2445 : 860330 : where = edge->caller;
2446 : 860330 : if (where->inlined_to)
2447 : 206759 : where = where->inlined_to;
2448 : :
2449 : : /* Our profitability metric can depend on local properties
2450 : : such as number of inlinable calls and size of the function body.
2451 : : After inlining these properties might change for the function we
2452 : : inlined into (since it's body size changed) and for the functions
2453 : : called by function we inlined (since number of it inlinable callers
2454 : : might change). */
2455 : 860330 : update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2456 : : /* Offline copy count has possibly changed, recompute if profile is
2457 : : available. */
2458 : 860330 : struct cgraph_node *n
2459 : 860330 : = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2460 : 574103 : if (n != edge->callee && n->analyzed && !(n->count == old_count)
2461 : 860387 : && n->count.ipa_p ())
2462 : 57 : update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2463 : 860330 : bitmap_clear (updated_nodes);
2464 : :
2465 : 860330 : if (dump_enabled_p ())
2466 : : {
2467 : 465 : ipa_fn_summary *s = ipa_fn_summaries->get (where);
2468 : :
2469 : : /* dump_printf can't handle %+i. */
2470 : 465 : char buf_net_change[100];
2471 : 465 : snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2472 : : overall_size - old_size);
2473 : :
2474 : 930 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2475 : : " Inlined %C into %C which now has time %f and "
2476 : : "size %i, net change of %s%s.\n",
2477 : : edge->callee, edge->caller,
2478 : : s->time.to_double (),
2479 : 465 : ipa_size_summaries->get (edge->caller)->size,
2480 : : buf_net_change,
2481 : 465 : cross_module_call_p (edge)
2482 : : ? " (cross module)" : "");
2483 : : }
2484 : 860330 : if (min_size > overall_size)
2485 : : {
2486 : 214042 : min_size = overall_size;
2487 : :
2488 : 214042 : if (dump_file)
2489 : 332 : fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2490 : : }
2491 : : }
2492 : :
2493 : 227988 : free_growth_caches ();
2494 : 227988 : if (dump_enabled_p ())
2495 : 438 : dump_printf (MSG_NOTE,
2496 : : "Unit growth for small function inlining: %i->%i (%i%%)\n",
2497 : : initial_size, overall_size,
2498 : 195 : initial_size ? overall_size * 100 / (initial_size) - 100 : 0);
2499 : 227988 : symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2500 : 227988 : }
2501 : :
2502 : : /* Flatten NODE. Performed both during early inlining and
2503 : : at IPA inlining time. */
2504 : :
2505 : : static void
2506 : 703 : flatten_function (struct cgraph_node *node, bool early, bool update)
2507 : : {
2508 : 703 : struct cgraph_edge *e;
2509 : :
2510 : : /* We shouldn't be called recursively when we are being processed. */
2511 : 703 : gcc_assert (node->aux == NULL);
2512 : :
2513 : 703 : node->aux = (void *) node;
2514 : :
2515 : 1624 : for (e = node->callees; e; e = e->next_callee)
2516 : : {
2517 : 921 : struct cgraph_node *orig_callee;
2518 : 921 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2519 : :
2520 : : /* We've hit cycle? It is time to give up. */
2521 : 921 : if (callee->aux)
2522 : : {
2523 : 15 : if (dump_enabled_p ())
2524 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2525 : : "Not inlining %C into %C to avoid cycle.\n",
2526 : : callee, e->caller);
2527 : 15 : if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2528 : 15 : e->inline_failed = CIF_RECURSIVE_INLINING;
2529 : 15 : continue;
2530 : : }
2531 : :
2532 : : /* When the edge is already inlined, we just need to recurse into
2533 : : it in order to fully flatten the leaves. */
2534 : 906 : if (!e->inline_failed)
2535 : : {
2536 : 350 : flatten_function (callee, early, false);
2537 : 350 : continue;
2538 : : }
2539 : :
2540 : : /* Flatten attribute needs to be processed during late inlining. For
2541 : : extra code quality we however do flattening during early optimization,
2542 : : too. */
2543 : 311 : if (!early
2544 : 556 : ? !can_inline_edge_p (e, true)
2545 : 245 : && !can_inline_edge_by_limits_p (e, CAN_INLINE_REPORT)
2546 : 311 : : !can_early_inline_edge_p (e))
2547 : 419 : continue;
2548 : :
2549 : 137 : if (e->recursive_p ())
2550 : : {
2551 : 0 : if (dump_enabled_p ())
2552 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2553 : : "Not inlining: recursive call.\n");
2554 : 0 : continue;
2555 : : }
2556 : :
2557 : 137 : if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2558 : 274 : != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2559 : : {
2560 : 4 : if (dump_enabled_p ())
2561 : 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2562 : : "Not inlining: SSA form does not match.\n");
2563 : 4 : continue;
2564 : : }
2565 : :
2566 : : /* Inline the edge and flatten the inline clone. Avoid
2567 : : recursing through the original node if the node was cloned. */
2568 : 133 : if (dump_enabled_p ())
2569 : 3 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2570 : : " Inlining %C into %C.\n",
2571 : : callee, e->caller);
2572 : 133 : orig_callee = callee;
2573 : 133 : inline_call (e, true, NULL, NULL, false);
2574 : 133 : if (e->callee != orig_callee)
2575 : 94 : orig_callee->aux = (void *) node;
2576 : 133 : flatten_function (e->callee, early, false);
2577 : 133 : if (e->callee != orig_callee)
2578 : 94 : orig_callee->aux = NULL;
2579 : : }
2580 : :
2581 : 703 : node->aux = NULL;
2582 : 703 : cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2583 : 703 : if (update && opt_for_fn (where->decl, optimize))
2584 : 209 : ipa_update_overall_fn_summary (where);
2585 : 703 : }
2586 : :
2587 : : /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2588 : : DATA points to number of calls originally found so we avoid infinite
2589 : : recursion. */
2590 : :
2591 : : static bool
2592 : 28301 : inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2593 : : hash_set<cgraph_node *> *callers)
2594 : : {
2595 : 28301 : int *num_calls = (int *)data;
2596 : 28301 : bool callee_removed = false;
2597 : :
2598 : 58953 : while (node->callers && !node->inlined_to)
2599 : : {
2600 : 31820 : struct cgraph_node *caller = node->callers->caller;
2601 : :
2602 : 31820 : if (!can_inline_edge_p (node->callers, true)
2603 : 31820 : || !can_inline_edge_by_limits_p (node->callers, CAN_INLINE_REPORT)
2604 : 63640 : || node->callers->recursive_p ())
2605 : : {
2606 : 0 : if (dump_file)
2607 : 0 : fprintf (dump_file, "Uninlinable call found; giving up.\n");
2608 : 0 : *num_calls = 0;
2609 : 0 : return false;
2610 : : }
2611 : :
2612 : 31820 : if (dump_file)
2613 : : {
2614 : 2 : cgraph_node *ultimate = node->ultimate_alias_target ();
2615 : 2 : fprintf (dump_file,
2616 : : "\nInlining %s size %i.\n",
2617 : : ultimate->dump_name (),
2618 : 2 : ipa_size_summaries->get (ultimate)->size);
2619 : 2 : fprintf (dump_file,
2620 : : " Called once from %s %i insns.\n",
2621 : : node->callers->caller->dump_name (),
2622 : 2 : ipa_size_summaries->get (node->callers->caller)->size);
2623 : : }
2624 : :
2625 : : /* Remember which callers we inlined to, delaying updating the
2626 : : overall summary. */
2627 : 31820 : callers->add (node->callers->caller);
2628 : 31820 : inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2629 : 31820 : if (dump_file)
2630 : 2 : fprintf (dump_file,
2631 : : " Inlined into %s which now has %i size\n",
2632 : : caller->dump_name (),
2633 : 2 : ipa_size_summaries->get (caller)->size);
2634 : 31820 : if (!(*num_calls)--)
2635 : : {
2636 : 0 : if (dump_file)
2637 : 0 : fprintf (dump_file, "New calls found; giving up.\n");
2638 : 0 : return callee_removed;
2639 : : }
2640 : 31820 : if (callee_removed)
2641 : : return true;
2642 : : }
2643 : : return false;
2644 : : }
2645 : :
2646 : : /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2647 : : update. */
2648 : :
2649 : : static bool
2650 : 28301 : inline_to_all_callers (struct cgraph_node *node, void *data)
2651 : : {
2652 : 28301 : hash_set<cgraph_node *> callers;
2653 : 28301 : bool res = inline_to_all_callers_1 (node, data, &callers);
2654 : : /* Perform the delayed update of the overall summary of all callers
2655 : : processed. This avoids quadratic behavior in the cases where
2656 : : we have a lot of calls to the same function. */
2657 : 55177 : for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2658 : 82053 : i != callers.end (); ++i)
2659 : 26876 : ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2660 : 28301 : return res;
2661 : 28301 : }
2662 : :
2663 : : /* Output overall time estimate. */
2664 : : static void
2665 : 356 : dump_overall_stats (void)
2666 : : {
2667 : 356 : sreal sum_weighted = 0, sum = 0;
2668 : 356 : struct cgraph_node *node;
2669 : :
2670 : 2234 : FOR_EACH_DEFINED_FUNCTION (node)
2671 : 1878 : if (!node->inlined_to
2672 : 1376 : && !node->alias)
2673 : : {
2674 : 1274 : ipa_fn_summary *s = ipa_fn_summaries->get (node);
2675 : 1274 : if (s != NULL)
2676 : : {
2677 : 1170 : sum += s->time;
2678 : 1170 : if (node->count.ipa ().initialized_p ())
2679 : 14 : sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2680 : : }
2681 : : }
2682 : 356 : fprintf (dump_file, "Overall time estimate: "
2683 : : "%f weighted by profile: "
2684 : : "%f\n", sum.to_double (), sum_weighted.to_double ());
2685 : 356 : }
2686 : :
2687 : : /* Output some useful stats about inlining. */
2688 : :
2689 : : static void
2690 : 178 : dump_inline_stats (void)
2691 : : {
2692 : 178 : int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2693 : 178 : int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2694 : 178 : int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2695 : 178 : int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2696 : 178 : int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2697 : 178 : int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2698 : 178 : int64_t reason[CIF_N_REASONS][2];
2699 : 5518 : sreal reason_freq[CIF_N_REASONS];
2700 : 178 : int i;
2701 : 178 : struct cgraph_node *node;
2702 : :
2703 : 178 : memset (reason, 0, sizeof (reason));
2704 : 5518 : for (i=0; i < CIF_N_REASONS; i++)
2705 : 5340 : reason_freq[i] = 0;
2706 : 1185 : FOR_EACH_DEFINED_FUNCTION (node)
2707 : : {
2708 : 1007 : struct cgraph_edge *e;
2709 : 5839 : for (e = node->callees; e; e = e->next_callee)
2710 : : {
2711 : 4832 : if (e->inline_failed)
2712 : : {
2713 : 4333 : if (e->count.ipa ().initialized_p ())
2714 : 2611 : reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2715 : 4333 : reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2716 : 4333 : reason[(int) e->inline_failed][1] ++;
2717 : 4333 : if (DECL_VIRTUAL_P (e->callee->decl)
2718 : 4333 : && e->count.ipa ().initialized_p ())
2719 : : {
2720 : 0 : if (e->indirect_inlining_edge)
2721 : 0 : noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2722 : : else
2723 : 0 : noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2724 : : }
2725 : 4333 : else if (e->count.ipa ().initialized_p ())
2726 : : {
2727 : 2611 : if (e->indirect_inlining_edge)
2728 : 0 : noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2729 : : else
2730 : 2611 : noninlined_cnt += e->count.ipa ().to_gcov_type ();
2731 : : }
2732 : : }
2733 : 499 : else if (e->count.ipa ().initialized_p ())
2734 : : {
2735 : 0 : if (e->speculative)
2736 : : {
2737 : 0 : if (DECL_VIRTUAL_P (e->callee->decl))
2738 : 0 : inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2739 : : else
2740 : 0 : inlined_speculative += e->count.ipa ().to_gcov_type ();
2741 : : }
2742 : 0 : else if (DECL_VIRTUAL_P (e->callee->decl))
2743 : : {
2744 : 0 : if (e->indirect_inlining_edge)
2745 : 0 : inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2746 : : else
2747 : 0 : inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2748 : : }
2749 : : else
2750 : : {
2751 : 0 : if (e->indirect_inlining_edge)
2752 : 0 : inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2753 : : else
2754 : 0 : inlined_cnt += e->count.ipa ().to_gcov_type ();
2755 : : }
2756 : : }
2757 : : }
2758 : 1100 : for (e = node->indirect_calls; e; e = e->next_callee)
2759 : 93 : if (e->indirect_info->polymorphic
2760 : 93 : & e->count.ipa ().initialized_p ())
2761 : 0 : indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2762 : 93 : else if (e->count.ipa ().initialized_p ())
2763 : 0 : indirect_cnt += e->count.ipa ().to_gcov_type ();
2764 : : }
2765 : 178 : if (max_count.initialized_p ())
2766 : : {
2767 : 0 : fprintf (dump_file,
2768 : : "Inlined %" PRId64 " + speculative "
2769 : : "%" PRId64 " + speculative polymorphic "
2770 : : "%" PRId64 " + previously indirect "
2771 : : "%" PRId64 " + virtual "
2772 : : "%" PRId64 " + virtual and previously indirect "
2773 : : "%" PRId64 "\n" "Not inlined "
2774 : : "%" PRId64 " + previously indirect "
2775 : : "%" PRId64 " + virtual "
2776 : : "%" PRId64 " + virtual and previously indirect "
2777 : : "%" PRId64 " + still indirect "
2778 : : "%" PRId64 " + still indirect polymorphic "
2779 : : "%" PRId64 "\n", inlined_cnt,
2780 : : inlined_speculative, inlined_speculative_ply,
2781 : : inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2782 : : noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2783 : : noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2784 : 0 : fprintf (dump_file, "Removed speculations ");
2785 : 0 : spec_rem.dump (dump_file);
2786 : 0 : fprintf (dump_file, "\n");
2787 : : }
2788 : 178 : dump_overall_stats ();
2789 : 178 : fprintf (dump_file, "\nWhy inlining failed?\n");
2790 : 5696 : for (i = 0; i < CIF_N_REASONS; i++)
2791 : 5340 : if (reason[i][1])
2792 : 149 : fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2793 : : cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2794 : : (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2795 : 178 : }
2796 : :
2797 : : /* Called when node is removed. */
2798 : :
2799 : : static void
2800 : 0 : flatten_remove_node_hook (struct cgraph_node *node, void *data)
2801 : : {
2802 : 0 : if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2803 : : return;
2804 : :
2805 : 0 : hash_set<struct cgraph_node *> *removed
2806 : : = (hash_set<struct cgraph_node *> *) data;
2807 : 0 : removed->add (node);
2808 : : }
2809 : :
2810 : : /* Decide on the inlining. We do so in the topological order to avoid
2811 : : expenses on updating data structures. */
2812 : :
2813 : : static unsigned int
2814 : 227988 : ipa_inline (void)
2815 : : {
2816 : 227988 : struct cgraph_node *node;
2817 : 227988 : int nnodes;
2818 : 227988 : struct cgraph_node **order;
2819 : 227988 : int i, j;
2820 : 227988 : int cold;
2821 : 227988 : bool remove_functions = false;
2822 : :
2823 : 227988 : order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2824 : :
2825 : 227988 : if (dump_file)
2826 : 178 : ipa_dump_fn_summaries (dump_file);
2827 : :
2828 : 227988 : nnodes = ipa_reverse_postorder (order);
2829 : 227988 : spec_rem = profile_count::zero ();
2830 : :
2831 : 7679364 : FOR_EACH_FUNCTION (node)
2832 : : {
2833 : 3611694 : node->aux = 0;
2834 : :
2835 : : /* Recompute the default reasons for inlining because they may have
2836 : : changed during merging. */
2837 : 3611694 : if (in_lto_p)
2838 : : {
2839 : 430964 : for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2840 : : {
2841 : 327849 : gcc_assert (e->inline_failed);
2842 : 327849 : initialize_inline_failed (e);
2843 : : }
2844 : 104366 : for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2845 : 1251 : initialize_inline_failed (e);
2846 : : }
2847 : : }
2848 : :
2849 : 227988 : if (dump_file)
2850 : 178 : fprintf (dump_file, "\nFlattening functions:\n");
2851 : :
2852 : : /* First shrink order array, so that it only contains nodes with
2853 : : flatten attribute. */
2854 : 3839682 : for (i = nnodes - 1, j = i; i >= 0; i--)
2855 : : {
2856 : 3611694 : node = order[i];
2857 : 3611694 : if (node->definition
2858 : : /* Do not try to flatten aliases. These may happen for example when
2859 : : creating local aliases. */
2860 : 1867018 : && !node->alias
2861 : 5380919 : && lookup_attribute ("flatten",
2862 : 1769225 : DECL_ATTRIBUTES (node->decl)) != NULL)
2863 : 85 : order[j--] = order[i];
2864 : : }
2865 : :
2866 : : /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2867 : : nodes with flatten attribute. If there is more than one such
2868 : : node, we need to register a node removal hook, as flatten_function
2869 : : could remove other nodes with flatten attribute. See PR82801. */
2870 : 227988 : struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2871 : 227988 : hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2872 : 227988 : if (j < nnodes - 2)
2873 : : {
2874 : 15 : flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2875 : 15 : node_removal_hook_holder
2876 : 15 : = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2877 : : flatten_removed_nodes);
2878 : : }
2879 : :
2880 : : /* In the first pass handle functions to be flattened. Do this with
2881 : : a priority so none of our later choices will make this impossible. */
2882 : 228073 : for (i = nnodes - 1; i > j; i--)
2883 : : {
2884 : 85 : node = order[i];
2885 : 85 : if (flatten_removed_nodes
2886 : 85 : && flatten_removed_nodes->contains (node))
2887 : 0 : continue;
2888 : :
2889 : : /* Handle nodes to be flattened.
2890 : : Ideally when processing callees we stop inlining at the
2891 : : entry of cycles, possibly cloning that entry point and
2892 : : try to flatten itself turning it into a self-recursive
2893 : : function. */
2894 : 85 : if (dump_file)
2895 : 4 : fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2896 : 85 : flatten_function (node, false, true);
2897 : : }
2898 : :
2899 : 227988 : if (j < nnodes - 2)
2900 : : {
2901 : 15 : symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2902 : 30 : delete flatten_removed_nodes;
2903 : : }
2904 : 227988 : free (order);
2905 : :
2906 : 227988 : if (dump_file)
2907 : 178 : dump_overall_stats ();
2908 : :
2909 : 227988 : inline_small_functions ();
2910 : :
2911 : 227988 : gcc_assert (symtab->state == IPA_SSA);
2912 : 227988 : symtab->state = IPA_SSA_AFTER_INLINING;
2913 : : /* Do first after-inlining removal. We want to remove all "stale" extern
2914 : : inline functions and virtual functions so we really know what is called
2915 : : once. */
2916 : 227988 : symtab->remove_unreachable_nodes (dump_file);
2917 : :
2918 : : /* Inline functions with a property that after inlining into all callers the
2919 : : code size will shrink because the out-of-line copy is eliminated.
2920 : : We do this regardless on the callee size as long as function growth limits
2921 : : are met. */
2922 : 227988 : if (dump_file)
2923 : 178 : fprintf (dump_file,
2924 : : "\nDeciding on functions to be inlined into all callers and "
2925 : : "removing useless speculations:\n");
2926 : :
2927 : : /* Inlining one function called once has good chance of preventing
2928 : : inlining other function into the same callee. Ideally we should
2929 : : work in priority order, but probably inlining hot functions first
2930 : : is good cut without the extra pain of maintaining the queue.
2931 : :
2932 : : ??? this is not really fitting the bill perfectly: inlining function
2933 : : into callee often leads to better optimization of callee due to
2934 : : increased context for optimization.
2935 : : For example if main() function calls a function that outputs help
2936 : : and then function that does the main optimization, we should inline
2937 : : the second with priority even if both calls are cold by themselves.
2938 : :
2939 : : We probably want to implement new predicate replacing our use of
2940 : : maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2941 : : to be hot. */
2942 : 683964 : for (cold = 0; cold <= 1; cold ++)
2943 : : {
2944 : 6083392 : FOR_EACH_DEFINED_FUNCTION (node)
2945 : : {
2946 : 5627416 : struct cgraph_edge *edge, *next;
2947 : 5627416 : bool update=false;
2948 : :
2949 : 5627416 : if (!opt_for_fn (node->decl, optimize)
2950 : 5627416 : || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2951 : 899464 : continue;
2952 : :
2953 : 19013925 : for (edge = node->callees; edge; edge = next)
2954 : : {
2955 : 14285973 : next = edge->next_callee;
2956 : 14285973 : if (edge->speculative && !speculation_useful_p (edge, false))
2957 : : {
2958 : 43 : if (edge->count.ipa ().initialized_p ())
2959 : 0 : spec_rem += edge->count.ipa ();
2960 : 43 : cgraph_edge::resolve_speculation (edge);
2961 : 43 : update = true;
2962 : 43 : remove_functions = true;
2963 : : }
2964 : : }
2965 : 4727952 : if (update)
2966 : : {
2967 : 2 : struct cgraph_node *where = node->inlined_to
2968 : 43 : ? node->inlined_to : node;
2969 : 43 : reset_edge_caches (where);
2970 : 43 : ipa_update_overall_fn_summary (where);
2971 : : }
2972 : 4727952 : if (want_inline_function_to_all_callers_p (node, cold))
2973 : : {
2974 : 24829 : int num_calls = 0;
2975 : 24829 : node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2976 : : true);
2977 : 24829 : while (node->call_for_symbol_and_aliases
2978 : 25997 : (inline_to_all_callers, &num_calls, true))
2979 : : ;
2980 : 24829 : remove_functions = true;
2981 : : }
2982 : : }
2983 : : }
2984 : :
2985 : 227988 : if (dump_enabled_p ())
2986 : 243 : dump_printf (MSG_NOTE,
2987 : : "\nInlined %i calls, eliminated %i functions\n\n",
2988 : : ncalls_inlined, nfunctions_inlined);
2989 : 227988 : if (dump_file)
2990 : 178 : dump_inline_stats ();
2991 : :
2992 : 227988 : if (dump_file)
2993 : 178 : ipa_dump_fn_summaries (dump_file);
2994 : 227988 : return remove_functions ? TODO_remove_functions : 0;
2995 : : }
2996 : :
2997 : : /* Inline always-inline function calls in NODE
2998 : : (which itself is possibly inline). */
2999 : :
3000 : : static bool
3001 : 3438389 : inline_always_inline_functions (struct cgraph_node *node)
3002 : : {
3003 : 3438389 : struct cgraph_edge *e;
3004 : 3438389 : bool inlined = false;
3005 : :
3006 : 13238121 : for (e = node->callees; e; e = e->next_callee)
3007 : : {
3008 : 9799732 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
3009 : 9799732 : gcc_checking_assert (!callee->aux || callee->aux == (void *)(size_t)1);
3010 : 9799732 : if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
3011 : : /* Watch for self-recursive cycles. */
3012 : 9799732 : || callee->aux)
3013 : 9239777 : continue;
3014 : :
3015 : 559955 : if (e->recursive_p ())
3016 : : {
3017 : 6 : if (dump_enabled_p ())
3018 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
3019 : : " Not inlining recursive call to %C.\n",
3020 : : e->callee);
3021 : 6 : e->inline_failed = CIF_RECURSIVE_INLINING;
3022 : 6 : continue;
3023 : : }
3024 : 559949 : if (callee->definition
3025 : 559914 : && !ipa_fn_summaries->get (callee))
3026 : 221 : compute_fn_summary (callee, true);
3027 : :
3028 : 559949 : if (!can_early_inline_edge_p (e))
3029 : : {
3030 : : /* Set inlined to true if the callee is marked "always_inline" but
3031 : : is not inlinable. This will allow flagging an error later in
3032 : : expand_call_inline in tree-inline.cc. */
3033 : 362 : if (lookup_attribute ("always_inline",
3034 : 362 : DECL_ATTRIBUTES (callee->decl)) != NULL)
3035 : 28 : inlined = true;
3036 : 362 : continue;
3037 : : }
3038 : :
3039 : 559587 : if (dump_enabled_p ())
3040 : 18 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
3041 : : " Inlining %C into %C (always_inline).\n",
3042 : : e->callee, e->caller);
3043 : 559587 : inline_call (e, true, NULL, NULL, false);
3044 : 559587 : callee->aux = (void *)(size_t)1;
3045 : : /* Inline recursively to handle the case where always_inline function was
3046 : : not optimized yet since it is a part of a cycle in callgraph. */
3047 : 559587 : inline_always_inline_functions (e->callee);
3048 : 559587 : callee->aux = NULL;
3049 : 559587 : inlined = true;
3050 : : }
3051 : 3438389 : return inlined;
3052 : : }
3053 : :
3054 : : /* Decide on the inlining. We do so in the topological order to avoid
3055 : : expenses on updating data structures. */
3056 : :
3057 : : static bool
3058 : 2421631 : early_inline_small_functions (struct cgraph_node *node)
3059 : : {
3060 : 2421631 : struct cgraph_edge *e;
3061 : 2421631 : bool inlined = false;
3062 : :
3063 : 10248827 : for (e = node->callees; e; e = e->next_callee)
3064 : : {
3065 : 7827196 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
3066 : :
3067 : : /* We can encounter not-yet-analyzed function during
3068 : : early inlining on callgraphs with strongly
3069 : : connected components. */
3070 : 7827196 : ipa_fn_summary *s = ipa_fn_summaries->get (callee);
3071 : 7827196 : if (s == NULL || !s->inlinable || !e->inline_failed)
3072 : 4117169 : continue;
3073 : :
3074 : : /* Do not consider functions not declared inline. */
3075 : 3710027 : if (!DECL_DECLARED_INLINE_P (callee->decl)
3076 : 860037 : && !opt_for_fn (node->decl, flag_inline_small_functions)
3077 : 3755348 : && !opt_for_fn (node->decl, flag_inline_functions))
3078 : 45256 : continue;
3079 : :
3080 : 3664771 : if (dump_enabled_p ())
3081 : 155 : dump_printf_loc (MSG_NOTE, e->call_stmt,
3082 : : "Considering inline candidate %C.\n",
3083 : : callee);
3084 : :
3085 : 3664771 : if (!can_early_inline_edge_p (e))
3086 : 90389 : continue;
3087 : :
3088 : 3574382 : if (e->recursive_p ())
3089 : : {
3090 : 8259 : if (dump_enabled_p ())
3091 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
3092 : : " Not inlining: recursive call.\n");
3093 : 8259 : continue;
3094 : : }
3095 : :
3096 : 3566123 : if (!want_early_inline_function_p (e))
3097 : 1061877 : continue;
3098 : :
3099 : 2504246 : if (dump_enabled_p ())
3100 : 102 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
3101 : : " Inlining %C into %C.\n",
3102 : : callee, e->caller);
3103 : 2504246 : inline_call (e, true, NULL, NULL, false);
3104 : 2504246 : inlined = true;
3105 : : }
3106 : :
3107 : 2421631 : if (inlined)
3108 : 853125 : ipa_update_overall_fn_summary (node);
3109 : :
3110 : 2421631 : return inlined;
3111 : : }
3112 : :
3113 : : /* With auto-fdo inline all functions that was inlined in the train run
3114 : : and inlining seems useful. That is there are enough samples in the callee
3115 : : function.
3116 : :
3117 : : Unlike early inlining, we inline recursively. Profile data is also used
3118 : : to produce speculative calls which we then inline. In the case some
3119 : : speculatin was introduced, set SPECULATIVE_CALLS. */
3120 : :
3121 : : static bool
3122 : 2423140 : inline_functions_by_afdo (struct cgraph_node *node, bool *speculative_calls)
3123 : : {
3124 : 2423140 : if (!flag_auto_profile || !flag_auto_profile_inlining)
3125 : : return false;
3126 : 0 : struct cgraph_edge *e;
3127 : 0 : bool inlined = false;
3128 : :
3129 : 0 : *speculative_calls |= afdo_vpt_for_early_inline (node);
3130 : :
3131 : 0 : cgraph_edge *next;
3132 : 0 : for (e = node->callees; e; e = next)
3133 : : {
3134 : 0 : next = e->next_callee;
3135 : :
3136 : 0 : if (!e->inline_failed)
3137 : : {
3138 : 0 : inlined |= inline_functions_by_afdo (e->callee, speculative_calls);
3139 : 0 : continue;
3140 : : }
3141 : 0 : if (!afdo_callsite_hot_enough_for_early_inline (e))
3142 : : {
3143 : : /* If we do not want to inline, remove the speculation. */
3144 : 0 : if (e->speculative)
3145 : 0 : cgraph_edge::resolve_speculation (e);
3146 : 0 : continue;
3147 : : }
3148 : :
3149 : 0 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
3150 : 0 : if (callee->definition
3151 : 0 : && !ipa_fn_summaries->get (callee))
3152 : 0 : compute_fn_summary (callee, true);
3153 : :
3154 : 0 : if (!can_early_inline_edge_p (e))
3155 : : {
3156 : 0 : if (dump_enabled_p ())
3157 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
3158 : : "Not inlining %C -> %C using auto-profile, %s.",
3159 : : e->caller, e->callee,
3160 : : cgraph_inline_failed_string (e->inline_failed));
3161 : : /* If we do not want to inline, remove the speculation. */
3162 : 0 : if (e->speculative)
3163 : 0 : cgraph_edge::resolve_speculation (e);
3164 : 0 : continue;
3165 : : }
3166 : : /* We can handle recursive inlining by first producing
3167 : : inline clone. */
3168 : 0 : if (e->recursive_p ())
3169 : : {
3170 : 0 : if (dump_enabled_p ())
3171 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
3172 : : "Not inlining %C recursively"
3173 : : " using auto-profile.\n",
3174 : : e->callee);
3175 : : /* If we do not want to inline, remove the speculation. */
3176 : 0 : if (e->speculative)
3177 : 0 : cgraph_edge::resolve_speculation (e);
3178 : 0 : continue;
3179 : : }
3180 : :
3181 : 0 : if (dump_enabled_p ())
3182 : : {
3183 : 0 : if (e->caller->inlined_to)
3184 : 0 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
3185 : : "Inlining using auto-profile %C into %C "
3186 : : "which is transitively inlined to %C.\n",
3187 : : callee, e->caller, e->caller->inlined_to);
3188 : : else
3189 : 0 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
3190 : : "Inlining using auto-profile %C into %C.\n",
3191 : : callee, e->caller);
3192 : : }
3193 : 0 : if (e->speculative)
3194 : 0 : remove_afdo_speculative_target (e);
3195 : 0 : inline_call (e, true, NULL, NULL, false);
3196 : 0 : inlined |= inline_functions_by_afdo (e->callee, speculative_calls);
3197 : 0 : inlined = true;
3198 : : }
3199 : :
3200 : 0 : if (inlined && !node->inlined_to)
3201 : 0 : ipa_update_overall_fn_summary (node);
3202 : :
3203 : : return inlined;
3204 : : }
3205 : :
3206 : : unsigned int
3207 : 2878816 : early_inliner (function *fun)
3208 : : {
3209 : 2878816 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
3210 : 2878816 : struct cgraph_edge *edge;
3211 : 2878816 : unsigned int todo = 0;
3212 : 2878816 : int iterations = 0;
3213 : 2878816 : bool inlined = false;
3214 : :
3215 : 2878816 : if (seen_error ())
3216 : : return 0;
3217 : :
3218 : : /* Do nothing if datastructures for ipa-inliner are already computed. This
3219 : : happens when some pass decides to construct new function and
3220 : : cgraph_add_new_function calls lowering passes and early optimization on
3221 : : it. This may confuse ourself when early inliner decide to inline call to
3222 : : function clone, because function clones don't have parameter list in
3223 : : ipa-prop matching their signature. */
3224 : 2878810 : if (ipa_node_params_sum)
3225 : : return 0;
3226 : :
3227 : 2878802 : if (flag_checking)
3228 : 2878772 : node->verify ();
3229 : 2878802 : node->remove_all_references ();
3230 : :
3231 : : /* Even when not optimizing or not inlining inline always-inline
3232 : : functions. */
3233 : 2878802 : inlined = inline_always_inline_functions (node);
3234 : :
3235 : 2878802 : if (!optimize
3236 : 2447692 : || flag_no_inline
3237 : 2423181 : || !flag_early_inlining)
3238 : : ;
3239 : 2421672 : else if (lookup_attribute ("flatten",
3240 : 2421672 : DECL_ATTRIBUTES (node->decl)) != NULL)
3241 : : {
3242 : : /* When the function is marked to be flattened, recursively inline
3243 : : all calls in it. */
3244 : 135 : if (dump_enabled_p ())
3245 : 0 : dump_printf (MSG_OPTIMIZED_LOCATIONS,
3246 : : "Flattening %C\n", node);
3247 : 135 : flatten_function (node, true, true);
3248 : 135 : inlined = true;
3249 : : }
3250 : : else
3251 : : {
3252 : : /* If some always_inline functions was inlined, apply the changes.
3253 : : This way we will not account always inline into growth limits and
3254 : : moreover we will inline calls from always inlines that we skipped
3255 : : previously because of conditional in can_early_inline_edge_p
3256 : : which prevents some inlining to always_inline. */
3257 : 2421537 : if (inlined)
3258 : : {
3259 : 296222 : timevar_push (TV_INTEGRATION);
3260 : 296222 : todo |= optimize_inline_calls (current_function_decl);
3261 : : /* optimize_inline_calls call above might have introduced new
3262 : : statements that don't have inline parameters computed. */
3263 : 1420567 : for (edge = node->callees; edge; edge = edge->next_callee)
3264 : : {
3265 : : /* We can enounter not-yet-analyzed function during
3266 : : early inlining on callgraphs with strongly
3267 : : connected components. */
3268 : 1124345 : ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3269 : 1124345 : es->call_stmt_size
3270 : 1124345 : = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3271 : 1124345 : es->call_stmt_time
3272 : 1124345 : = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3273 : : }
3274 : 296222 : ipa_update_overall_fn_summary (node);
3275 : 296222 : inlined = false;
3276 : 296222 : timevar_pop (TV_INTEGRATION);
3277 : : }
3278 : : /* We iterate incremental inlining to get trivial cases of indirect
3279 : : inlining. */
3280 : 3274662 : while (iterations < opt_for_fn (node->decl,
3281 : : param_early_inliner_max_iterations))
3282 : : {
3283 : 2421631 : bool inlined = early_inline_small_functions (node);
3284 : 2421631 : bool speculative_calls = false;
3285 : 2421631 : inlined |= inline_functions_by_afdo (node, &speculative_calls);
3286 : 2421631 : if (!inlined)
3287 : : break;
3288 : 853125 : timevar_push (TV_INTEGRATION);
3289 : 853125 : if (speculative_calls)
3290 : : {
3291 : 0 : cgraph_edge *next;
3292 : 0 : for (cgraph_edge *e = node->callees; e; e = next)
3293 : : {
3294 : 0 : next = e->next_callee;
3295 : 0 : cgraph_edge::redirect_call_stmt_to_callee (e);
3296 : : }
3297 : : }
3298 : 853125 : todo |= optimize_inline_calls (current_function_decl);
3299 : :
3300 : : /* Technically we ought to recompute inline parameters so the new
3301 : : iteration of early inliner works as expected. We however have
3302 : : values approximately right and thus we only need to update edge
3303 : : info that might be cleared out for newly discovered edges. */
3304 : 3423245 : for (edge = node->callees; edge; edge = edge->next_callee)
3305 : : {
3306 : : /* We have no summary for new bound store calls yet. */
3307 : 2570120 : ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3308 : 2570120 : es->call_stmt_size
3309 : 2570120 : = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3310 : 2570120 : es->call_stmt_time
3311 : 2570120 : = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3312 : : }
3313 : 853125 : if (iterations < opt_for_fn (node->decl,
3314 : 853125 : param_early_inliner_max_iterations) - 1)
3315 : 94 : ipa_update_overall_fn_summary (node);
3316 : 853125 : timevar_pop (TV_INTEGRATION);
3317 : 853125 : iterations++;
3318 : 853125 : inlined = false;
3319 : : }
3320 : 2421537 : if (dump_file)
3321 : 203 : fprintf (dump_file, "Iterations: %i\n", iterations);
3322 : : }
3323 : :
3324 : : /* do AFDO inlining in case it was not done as part of early inlining. */
3325 : 2878802 : if (optimize
3326 : 2447692 : && !flag_no_inline
3327 : 2423181 : && !flag_early_inlining
3328 : 1509 : && flag_auto_profile_inlining)
3329 : : {
3330 : 1509 : bool speculative_calls = false;
3331 : 1509 : inlined |= inline_functions_by_afdo (node, &speculative_calls);
3332 : 1509 : if (speculative_calls)
3333 : : {
3334 : 0 : cgraph_edge *next;
3335 : 0 : for (cgraph_edge *e = node->callees; e; e = next)
3336 : : {
3337 : 0 : next = e->next_callee;
3338 : 0 : cgraph_edge::redirect_call_stmt_to_callee (e);
3339 : : }
3340 : : }
3341 : : }
3342 : :
3343 : 2878802 : if (inlined)
3344 : : {
3345 : 21960 : timevar_push (TV_INTEGRATION);
3346 : 21960 : todo |= optimize_inline_calls (current_function_decl);
3347 : 21960 : timevar_pop (TV_INTEGRATION);
3348 : : }
3349 : :
3350 : 2878802 : fun->always_inline_functions_inlined = true;
3351 : :
3352 : 2878802 : return todo;
3353 : : }
3354 : :
3355 : : /* Do inlining of small functions. Doing so early helps profiling and other
3356 : : passes to be somewhat more effective and avoids some code duplication in
3357 : : later real inlining pass for testcases with very many function calls. */
3358 : :
3359 : : namespace {
3360 : :
3361 : : const pass_data pass_data_early_inline =
3362 : : {
3363 : : GIMPLE_PASS, /* type */
3364 : : "einline", /* name */
3365 : : OPTGROUP_INLINE, /* optinfo_flags */
3366 : : TV_EARLY_INLINING, /* tv_id */
3367 : : PROP_ssa, /* properties_required */
3368 : : 0, /* properties_provided */
3369 : : 0, /* properties_destroyed */
3370 : : 0, /* todo_flags_start */
3371 : : 0, /* todo_flags_finish */
3372 : : };
3373 : :
3374 : : class pass_early_inline : public gimple_opt_pass
3375 : : {
3376 : : public:
3377 : 284673 : pass_early_inline (gcc::context *ctxt)
3378 : 569346 : : gimple_opt_pass (pass_data_early_inline, ctxt)
3379 : : {}
3380 : :
3381 : : /* opt_pass methods: */
3382 : : unsigned int execute (function *) final override;
3383 : :
3384 : : }; // class pass_early_inline
3385 : :
3386 : : unsigned int
3387 : 2878816 : pass_early_inline::execute (function *fun)
3388 : : {
3389 : 2878816 : return early_inliner (fun);
3390 : : }
3391 : :
3392 : : } // anon namespace
3393 : :
3394 : : gimple_opt_pass *
3395 : 284673 : make_pass_early_inline (gcc::context *ctxt)
3396 : : {
3397 : 284673 : return new pass_early_inline (ctxt);
3398 : : }
3399 : :
3400 : : namespace {
3401 : :
3402 : : const pass_data pass_data_ipa_inline =
3403 : : {
3404 : : IPA_PASS, /* type */
3405 : : "inline", /* name */
3406 : : OPTGROUP_INLINE, /* optinfo_flags */
3407 : : TV_IPA_INLINING, /* tv_id */
3408 : : 0, /* properties_required */
3409 : : 0, /* properties_provided */
3410 : : 0, /* properties_destroyed */
3411 : : 0, /* todo_flags_start */
3412 : : ( TODO_dump_symtab ), /* todo_flags_finish */
3413 : : };
3414 : :
3415 : : class pass_ipa_inline : public ipa_opt_pass_d
3416 : : {
3417 : : public:
3418 : 284673 : pass_ipa_inline (gcc::context *ctxt)
3419 : : : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3420 : : NULL, /* generate_summary */
3421 : : NULL, /* write_summary */
3422 : : NULL, /* read_summary */
3423 : : NULL, /* write_optimization_summary */
3424 : : NULL, /* read_optimization_summary */
3425 : : NULL, /* stmt_fixup */
3426 : : 0, /* function_transform_todo_flags_start */
3427 : : inline_transform, /* function_transform */
3428 : 284673 : NULL) /* variable_transform */
3429 : 284673 : {}
3430 : :
3431 : : /* opt_pass methods: */
3432 : 227988 : unsigned int execute (function *) final override { return ipa_inline (); }
3433 : :
3434 : : }; // class pass_ipa_inline
3435 : :
3436 : : } // anon namespace
3437 : :
3438 : : ipa_opt_pass_d *
3439 : 284673 : make_pass_ipa_inline (gcc::context *ctxt)
3440 : : {
3441 : 284673 : return new pass_ipa_inline (ctxt);
3442 : : }
|