Branch data Line data Source code
1 : : /* Inlining decision heuristics.
2 : : Copyright (C) 2003-2025 Free Software Foundation, Inc.
3 : : Contributed by Jan Hubicka
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : /* Inlining decision heuristics
22 : :
23 : : The implementation of inliner is organized as follows:
24 : :
25 : : inlining heuristics limits
26 : :
27 : : can_inline_edge_p allow to check that particular inlining is allowed
28 : : by the limits specified by user (allowed function growth, growth and so
29 : : on).
30 : :
31 : : Functions are inlined when it is obvious the result is profitable (such
32 : : as functions called once or when inlining reduce code size).
33 : : In addition to that we perform inlining of small functions and recursive
34 : : inlining.
35 : :
36 : : inlining heuristics
37 : :
38 : : The inliner itself is split into two passes:
39 : :
40 : : pass_early_inlining
41 : :
42 : : Simple local inlining pass inlining callees into current function.
43 : : This pass makes no use of whole unit analysis and thus it can do only
44 : : very simple decisions based on local properties.
45 : :
46 : : The strength of the pass is that it is run in topological order
47 : : (reverse postorder) on the callgraph. Functions are converted into SSA
48 : : form just before this pass and optimized subsequently. As a result, the
49 : : callees of the function seen by the early inliner was already optimized
50 : : and results of early inlining adds a lot of optimization opportunities
51 : : for the local optimization.
52 : :
53 : : The pass handle the obvious inlining decisions within the compilation
54 : : unit - inlining auto inline functions, inlining for size and
55 : : flattening.
56 : :
57 : : main strength of the pass is the ability to eliminate abstraction
58 : : penalty in C++ code (via combination of inlining and early
59 : : optimization) and thus improve quality of analysis done by real IPA
60 : : optimizers.
61 : :
62 : : Because of lack of whole unit knowledge, the pass cannot really make
63 : : good code size/performance tradeoffs. It however does very simple
64 : : speculative inlining allowing code size to grow by
65 : : EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 : : optimizations performed later are very likely to eliminate the cost.
67 : :
68 : : pass_ipa_inline
69 : :
70 : : This is the real inliner able to handle inlining with whole program
71 : : knowledge. It performs following steps:
72 : :
73 : : 1) inlining of small functions. This is implemented by greedy
74 : : algorithm ordering all inlinable cgraph edges by their badness and
75 : : inlining them in this order as long as inline limits allows doing so.
76 : :
77 : : This heuristics is not very good on inlining recursive calls. Recursive
78 : : calls can be inlined with results similar to loop unrolling. To do so,
79 : : special purpose recursive inliner is executed on function when
80 : : recursive edge is met as viable candidate.
81 : :
82 : : 2) Unreachable functions are removed from callgraph. Inlining leads
83 : : to devirtualization and other modification of callgraph so functions
84 : : may become unreachable during the process. Also functions declared as
85 : : extern inline or virtual functions are removed, since after inlining
86 : : we no longer need the offline bodies.
87 : :
88 : : 3) Functions called once and not exported from the unit are inlined.
89 : : This should almost always lead to reduction of code size by eliminating
90 : : the need for offline copy of the function. */
91 : :
92 : : #include "config.h"
93 : : #include "system.h"
94 : : #include "coretypes.h"
95 : : #include "backend.h"
96 : : #include "target.h"
97 : : #include "rtl.h"
98 : : #include "tree.h"
99 : : #include "gimple.h"
100 : : #include "alloc-pool.h"
101 : : #include "tree-pass.h"
102 : : #include "gimple-ssa.h"
103 : : #include "cgraph.h"
104 : : #include "lto-streamer.h"
105 : : #include "trans-mem.h"
106 : : #include "calls.h"
107 : : #include "tree-inline.h"
108 : : #include "profile.h"
109 : : #include "symbol-summary.h"
110 : : #include "tree-vrp.h"
111 : : #include "sreal.h"
112 : : #include "ipa-cp.h"
113 : : #include "ipa-prop.h"
114 : : #include "ipa-fnsummary.h"
115 : : #include "ipa-inline.h"
116 : : #include "ipa-utils.h"
117 : : #include "auto-profile.h"
118 : : #include "builtins.h"
119 : : #include "fibonacci_heap.h"
120 : : #include "stringpool.h"
121 : : #include "attribs.h"
122 : : #include "asan.h"
123 : : #include "ipa-strub.h"
124 : : #include "ipa-modref-tree.h"
125 : : #include "ipa-modref.h"
126 : :
127 : : /* Inliner uses greedy algorithm to inline calls in a priority order.
128 : : Badness is used as the key in a Fibonacci heap which roughly corresponds
129 : : to negation of benefit to cost ratios.
130 : : In case multiple calls has same priority we want to stabilize the outcomes
131 : : for which we use ids. */
132 : : class inline_badness
133 : : {
134 : : public:
135 : : sreal badness;
136 : : int uid;
137 : 994213 : inline_badness ()
138 : 767529 : : badness (sreal::min ()), uid (0)
139 : : {
140 : : }
141 : 2891073 : inline_badness (cgraph_edge *e, sreal b)
142 : 17089 : : badness (b), uid (e->get_uid ())
143 : : {
144 : : }
145 : 688662 : bool operator<= (const inline_badness &other)
146 : : {
147 : 688662 : if (badness != other.badness)
148 : 688662 : return badness <= other.badness;
149 : 0 : return uid <= other.uid;
150 : : }
151 : 767529 : bool operator== (const inline_badness &other)
152 : : {
153 : 1401513 : return badness == other.badness && uid == other.uid;
154 : : }
155 : 0 : bool operator!= (const inline_badness &other)
156 : : {
157 : 767529 : return badness != other.badness || uid != other.uid;
158 : : }
159 : 22166118 : bool operator< (const inline_badness &other)
160 : : {
161 : 22166118 : if (badness != other.badness)
162 : 18638349 : return badness < other.badness;
163 : 3527769 : return uid < other.uid;
164 : : }
165 : 10078282 : bool operator> (const inline_badness &other)
166 : : {
167 : 10078282 : if (badness != other.badness)
168 : 8374317 : return badness > other.badness;
169 : 1703965 : return uid > other.uid;
170 : : }
171 : : };
172 : :
173 : : typedef fibonacci_heap <inline_badness, cgraph_edge> edge_heap_t;
174 : : typedef fibonacci_node <inline_badness, cgraph_edge> edge_heap_node_t;
175 : :
176 : : /* Statistics we collect about inlining algorithm. */
177 : : static int overall_size;
178 : : static profile_count max_count;
179 : : static profile_count spec_rem;
180 : :
181 : : /* Return false when inlining edge E would lead to violating
182 : : limits on function unit growth or stack usage growth.
183 : :
184 : : The relative function body growth limit is present generally
185 : : to avoid problems with non-linear behavior of the compiler.
186 : : To allow inlining huge functions into tiny wrapper, the limit
187 : : is always based on the bigger of the two functions considered.
188 : :
189 : : For stack growth limits we always base the growth in stack usage
190 : : of the callers. We want to prevent applications from segfaulting
191 : : on stack overflow when functions with huge stack frames gets
192 : : inlined. */
193 : :
194 : : static bool
195 : 5586667 : caller_growth_limits (struct cgraph_edge *e)
196 : : {
197 : 5586667 : struct cgraph_node *to = e->caller;
198 : 5586667 : struct cgraph_node *what = e->callee->ultimate_alias_target ();
199 : 5586667 : int newsize;
200 : 5586667 : int limit = 0;
201 : 5586667 : HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
202 : 5586667 : ipa_size_summary *outer_info = ipa_size_summaries->get (to);
203 : :
204 : : /* Look for function e->caller is inlined to. While doing
205 : : so work out the largest function body on the way. As
206 : : described above, we want to base our function growth
207 : : limits based on that. Not on the self size of the
208 : : outer function, not on the self size of inline code
209 : : we immediately inline to. This is the most relaxed
210 : : interpretation of the rule "do not grow large functions
211 : : too much in order to prevent compiler from exploding". */
212 : 7856493 : while (true)
213 : : {
214 : 6721580 : ipa_size_summary *size_info = ipa_size_summaries->get (to);
215 : 6721580 : if (limit < size_info->self_size)
216 : : limit = size_info->self_size;
217 : 6721580 : if (stack_size_limit < size_info->estimated_self_stack_size)
218 : : stack_size_limit = size_info->estimated_self_stack_size;
219 : 6721580 : if (to->inlined_to)
220 : 1134913 : to = to->callers->caller;
221 : : else
222 : : break;
223 : 1134913 : }
224 : :
225 : 5586667 : ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
226 : 5586667 : ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
227 : :
228 : 5586667 : if (limit < what_size_info->self_size)
229 : : limit = what_size_info->self_size;
230 : :
231 : 5586667 : limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
232 : :
233 : : /* Check the size after inlining against the function limits. But allow
234 : : the function to shrink if it went over the limits by forced inlining. */
235 : 5586667 : newsize = estimate_size_after_inlining (to, e);
236 : 5586667 : if (newsize >= ipa_size_summaries->get (what)->size
237 : 5437708 : && newsize > opt_for_fn (to->decl, param_large_function_insns)
238 : 5822185 : && newsize > limit)
239 : : {
240 : 2534 : e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
241 : 2534 : return false;
242 : : }
243 : :
244 : 5584133 : if (!what_info->estimated_stack_size)
245 : : return true;
246 : :
247 : : /* FIXME: Stack size limit often prevents inlining in Fortran programs
248 : : due to large i/o datastructures used by the Fortran front-end.
249 : : We ought to ignore this limit when we know that the edge is executed
250 : : on every invocation of the caller (i.e. its call statement dominates
251 : : exit block). We do not track this information, yet. */
252 : 1829616 : stack_size_limit += ((gcov_type)stack_size_limit
253 : 914808 : * opt_for_fn (to->decl, param_stack_frame_growth)
254 : 914808 : / 100);
255 : :
256 : 914808 : inlined_stack = (ipa_get_stack_frame_offset (to)
257 : 914808 : + outer_info->estimated_self_stack_size
258 : 914808 : + what_info->estimated_stack_size);
259 : : /* Check new stack consumption with stack consumption at the place
260 : : stack is used. */
261 : 914808 : if (inlined_stack > stack_size_limit
262 : : /* If function already has large stack usage from sibling
263 : : inline call, we can inline, too.
264 : : This bit overoptimistically assume that we are good at stack
265 : : packing. */
266 : 276331 : && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
267 : 1180321 : && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
268 : : {
269 : 55548 : e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
270 : 55548 : return false;
271 : : }
272 : : return true;
273 : : }
274 : :
275 : : /* Dump info about why inlining has failed. */
276 : :
277 : : static void
278 : 4759737 : report_inline_failed_reason (struct cgraph_edge *e)
279 : : {
280 : 4759737 : if (dump_enabled_p ())
281 : : {
282 : 2389 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
283 : : " not inlinable: %C -> %C, %s\n",
284 : : e->caller, e->callee,
285 : : cgraph_inline_failed_string (e->inline_failed));
286 : 2389 : if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
287 : 2389 : || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
288 : 2 : && e->caller->lto_file_data
289 : 2389 : && e->callee->ultimate_alias_target ()->lto_file_data)
290 : : {
291 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
292 : : " LTO objects: %s, %s\n",
293 : 0 : e->caller->lto_file_data->file_name,
294 : 0 : e->callee->ultimate_alias_target ()->lto_file_data->file_name);
295 : : }
296 : 2389 : if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
297 : 2 : if (dump_file)
298 : 0 : cl_target_option_print_diff
299 : 0 : (dump_file, 2, target_opts_for_fn (e->caller->decl),
300 : 0 : target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
301 : 2389 : if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
302 : 0 : if (dump_file)
303 : 0 : cl_optimization_print_diff
304 : 0 : (dump_file, 2, opts_for_fn (e->caller->decl),
305 : 0 : opts_for_fn (e->callee->ultimate_alias_target ()->decl));
306 : : }
307 : 4759737 : }
308 : :
309 : : /* Decide whether sanitizer-related attributes allow inlining. */
310 : :
311 : : static bool
312 : 8137465 : sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
313 : : {
314 : 8137465 : if (!caller || !callee)
315 : : return true;
316 : :
317 : : /* Follow clang and allow inlining for always_inline functions. */
318 : 8137465 : if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
319 : : return true;
320 : :
321 : 7661746 : const sanitize_code codes[] =
322 : : {
323 : : SANITIZE_ADDRESS,
324 : : SANITIZE_THREAD,
325 : : SANITIZE_UNDEFINED,
326 : : SANITIZE_UNDEFINED_NONDEFAULT,
327 : : SANITIZE_POINTER_COMPARE,
328 : : SANITIZE_POINTER_SUBTRACT
329 : : };
330 : :
331 : 53631182 : for (unsigned i = 0; i < ARRAY_SIZE (codes); i++)
332 : 91939268 : if (sanitize_flags_p (codes[i], caller)
333 : 45969634 : != sanitize_flags_p (codes[i], callee))
334 : : return false;
335 : :
336 : 7661548 : if (sanitize_coverage_p (caller) != sanitize_coverage_p (callee))
337 : : return false;
338 : :
339 : : return true;
340 : : }
341 : :
342 : : /* Used for flags where it is safe to inline when caller's value is
343 : : grater than callee's. */
344 : : #define check_maybe_up(flag) \
345 : : (opts_for_fn (caller->decl)->x_##flag \
346 : : != opts_for_fn (callee->decl)->x_##flag \
347 : : && (!always_inline \
348 : : || opts_for_fn (caller->decl)->x_##flag \
349 : : < opts_for_fn (callee->decl)->x_##flag))
350 : : /* Used for flags where it is safe to inline when caller's value is
351 : : smaller than callee's. */
352 : : #define check_maybe_down(flag) \
353 : : (opts_for_fn (caller->decl)->x_##flag \
354 : : != opts_for_fn (callee->decl)->x_##flag \
355 : : && (!always_inline \
356 : : || opts_for_fn (caller->decl)->x_##flag \
357 : : > opts_for_fn (callee->decl)->x_##flag))
358 : : /* Used for flags where exact match is needed for correctness. */
359 : : #define check_match(flag) \
360 : : (opts_for_fn (caller->decl)->x_##flag \
361 : : != opts_for_fn (callee->decl)->x_##flag)
362 : :
363 : : /* Decide if we can inline the edge and possibly update
364 : : inline_failed reason.
365 : : We check whether inlining is possible at all and whether
366 : : caller growth limits allow doing so.
367 : :
368 : : if REPORT is true, output reason to the dump file. */
369 : :
370 : : static bool
371 : 12130123 : can_inline_edge_p (struct cgraph_edge *e, bool report,
372 : : bool early = false)
373 : : {
374 : 12130123 : gcc_checking_assert (e->inline_failed);
375 : :
376 : 12130123 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
377 : : {
378 : 3519086 : if (report)
379 : 3489187 : report_inline_failed_reason (e);
380 : 3519086 : return false;
381 : : }
382 : :
383 : 8611037 : bool inlinable = true;
384 : 8611037 : enum availability avail;
385 : 17222074 : cgraph_node *caller = (e->caller->inlined_to
386 : 8611037 : ? e->caller->inlined_to : e->caller);
387 : 8611037 : cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
388 : :
389 : 8611037 : if (!callee->definition)
390 : : {
391 : 838 : e->inline_failed = CIF_BODY_NOT_AVAILABLE;
392 : 838 : inlinable = false;
393 : : }
394 : 8611037 : if (!early && (!opt_for_fn (callee->decl, optimize)
395 : 4911284 : || !opt_for_fn (caller->decl, optimize)))
396 : : {
397 : 305 : e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
398 : 305 : inlinable = false;
399 : : }
400 : 8610732 : else if (callee->calls_comdat_local)
401 : : {
402 : 17990 : e->inline_failed = CIF_USES_COMDAT_LOCAL;
403 : 17990 : inlinable = false;
404 : : }
405 : 8592742 : else if (avail <= AVAIL_INTERPOSABLE)
406 : : {
407 : 126883 : e->inline_failed = CIF_OVERWRITABLE;
408 : 126883 : inlinable = false;
409 : : }
410 : : /* All edges with call_stmt_cannot_inline_p should have inline_failed
411 : : initialized to one of FINAL_ERROR reasons. */
412 : 8465859 : else if (e->call_stmt_cannot_inline_p)
413 : 0 : gcc_unreachable ();
414 : : /* Don't inline if the functions have different EH personalities. */
415 : 8465859 : else if (DECL_FUNCTION_PERSONALITY (caller->decl)
416 : 2383854 : && DECL_FUNCTION_PERSONALITY (callee->decl)
417 : 8465859 : && (DECL_FUNCTION_PERSONALITY (caller->decl)
418 : 192711 : != DECL_FUNCTION_PERSONALITY (callee->decl)))
419 : : {
420 : 0 : e->inline_failed = CIF_EH_PERSONALITY;
421 : 0 : inlinable = false;
422 : : }
423 : : /* TM pure functions should not be inlined into non-TM_pure
424 : : functions. */
425 : 8465859 : else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
426 : : {
427 : 30 : e->inline_failed = CIF_UNSPECIFIED;
428 : 30 : inlinable = false;
429 : : }
430 : : /* Check compatibility of target optimization options. */
431 : 8465829 : else if (!targetm.target_option.can_inline_p (caller->decl,
432 : : callee->decl))
433 : : {
434 : 448 : e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
435 : 448 : inlinable = false;
436 : : }
437 : 8465381 : else if (ipa_fn_summaries->get (callee) == NULL
438 : 8465379 : || !ipa_fn_summaries->get (callee)->inlinable)
439 : : {
440 : 327916 : e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
441 : 327916 : inlinable = false;
442 : : }
443 : : /* Don't inline a function with mismatched sanitization attributes. */
444 : 8137465 : else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
445 : : {
446 : 198 : e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
447 : 198 : inlinable = false;
448 : : }
449 : :
450 : 8611037 : if (inlinable && !strub_inlinable_to_p (callee, caller))
451 : : {
452 : 1098 : e->inline_failed = CIF_UNSPECIFIED;
453 : 1098 : inlinable = false;
454 : : }
455 : 8611037 : if (!inlinable && report)
456 : 469471 : report_inline_failed_reason (e);
457 : : return inlinable;
458 : : }
459 : :
460 : : /* Return inlining_insns_single limit for function N. If HINT or HINT2 is true
461 : : scale up the bound. */
462 : :
463 : : static int
464 : 6939912 : inline_insns_single (cgraph_node *n, bool hint, bool hint2)
465 : : {
466 : 6939912 : if (hint && hint2)
467 : : {
468 : 2295264 : int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
469 : 2295264 : spd = spd * spd;
470 : 2295264 : if (spd > 1000000)
471 : : spd = 1000000;
472 : 2295264 : return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
473 : : }
474 : 4644648 : if (hint || hint2)
475 : 315908 : return opt_for_fn (n->decl, param_max_inline_insns_single)
476 : 315908 : * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
477 : 4328740 : return opt_for_fn (n->decl, param_max_inline_insns_single);
478 : : }
479 : :
480 : : /* Return inlining_insns_auto limit for function N. If HINT or HINT2 is true
481 : : scale up the bound. */
482 : :
483 : : static int
484 : 5357004 : inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
485 : : {
486 : 5357004 : int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
487 : 5357004 : if (hint && hint2)
488 : : {
489 : 1974885 : int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
490 : 1974885 : spd = spd * spd;
491 : 1974885 : if (spd > 1000000)
492 : : spd = 1000000;
493 : 1974885 : return max_inline_insns_auto * spd / 100;
494 : : }
495 : 3382119 : if (hint || hint2)
496 : 1496973 : return max_inline_insns_auto
497 : 1496973 : * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
498 : : return max_inline_insns_auto;
499 : : }
500 : :
501 : : enum can_inline_edge_by_limits_flags
502 : : {
503 : : /* True if we are early inlining. */
504 : : CAN_INLINE_EARLY = 1,
505 : : /* Ignore size limits. */
506 : : CAN_INLINE_DISREGARD_LIMITS = 2,
507 : : /* Force size limits (ignore always_inline). This is used for
508 : : recrusive inlining where always_inline may lead to inline bombs
509 : : and technically it is non-sential anyway. */
510 : : CAN_INLINE_FORCE_LIMITS = 4,
511 : : /* Report decision to dump file. */
512 : : CAN_INLINE_REPORT = 8,
513 : : };
514 : :
515 : : /* Decide if we can inline the edge and possibly update
516 : : inline_failed reason.
517 : : We check whether inlining is possible at all and whether
518 : : caller growth limits allow doing so. */
519 : :
520 : : static bool
521 : 6065585 : can_inline_edge_by_limits_p (struct cgraph_edge *e, int flags)
522 : : {
523 : 6065585 : gcc_checking_assert (e->inline_failed);
524 : :
525 : 6065585 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
526 : : {
527 : 210 : if (flags & CAN_INLINE_REPORT)
528 : 210 : report_inline_failed_reason (e);
529 : 210 : return false;
530 : : }
531 : :
532 : 6065375 : bool inlinable = true;
533 : 6065375 : enum availability avail;
534 : 12130750 : cgraph_node *caller = (e->caller->inlined_to
535 : 6065375 : ? e->caller->inlined_to : e->caller);
536 : 6065375 : cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
537 : 6065375 : tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
538 : 6065375 : tree callee_tree
539 : 6065375 : = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
540 : : /* Check if caller growth allows the inlining. */
541 : 6065375 : if (!(flags & CAN_INLINE_DISREGARD_LIMITS)
542 : 6063756 : && ((flags & CAN_INLINE_FORCE_LIMITS)
543 : 6032995 : || (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
544 : 5556044 : && !lookup_attribute ("flatten",
545 : 5556044 : DECL_ATTRIBUTES (caller->decl))))
546 : 11652042 : && !caller_growth_limits (e))
547 : : inlinable = false;
548 : 6007293 : else if (callee->externally_visible
549 : 4076495 : && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
550 : 9729628 : && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
551 : : {
552 : 2 : e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
553 : 2 : inlinable = false;
554 : : }
555 : : /* Don't inline a function with a higher optimization level than the
556 : : caller. FIXME: this is really just tip of iceberg of handling
557 : : optimization attribute. */
558 : 6007291 : else if (caller_tree != callee_tree)
559 : : {
560 : 17716 : bool always_inline =
561 : 17716 : (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
562 : 28573 : && lookup_attribute ("always_inline",
563 : 10857 : DECL_ATTRIBUTES (callee->decl)));
564 : 17716 : ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
565 : 17716 : ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
566 : :
567 : : /* Until GCC 4.9 we did not check the semantics-altering flags
568 : : below and inlined across optimization boundaries.
569 : : Enabling checks below breaks several packages by refusing
570 : : to inline library always_inline functions. See PR65873.
571 : : Disable the check for early inlining for now until better solution
572 : : is found. */
573 : 17716 : if (always_inline && (flags & CAN_INLINE_EARLY))
574 : : ;
575 : : /* There are some options that change IL semantics which means
576 : : we cannot inline in these cases for correctness reason.
577 : : Not even for always_inline declared functions. */
578 : 6859 : else if (check_match (flag_wrapv)
579 : 6859 : || check_match (flag_trapv)
580 : 6859 : || check_match (flag_pcc_struct_return)
581 : 6859 : || check_maybe_down (optimize_debug)
582 : : /* When caller or callee does FP math, be sure FP codegen flags
583 : : compatible. */
584 : 6853 : || ((caller_info->fp_expressions && callee_info->fp_expressions)
585 : 1251 : && (check_maybe_up (flag_rounding_math)
586 : 1251 : || check_maybe_up (flag_trapping_math)
587 : 1249 : || check_maybe_down (flag_unsafe_math_optimizations)
588 : 1249 : || check_maybe_down (flag_finite_math_only)
589 : 1248 : || check_maybe_up (flag_signaling_nans)
590 : 1248 : || check_maybe_down (flag_cx_limited_range)
591 : 1248 : || check_maybe_up (flag_signed_zeros)
592 : 1248 : || check_maybe_down (flag_associative_math)
593 : 1232 : || check_maybe_down (flag_reciprocal_math)
594 : 1232 : || check_maybe_down (flag_fp_int_builtin_inexact)
595 : : /* Strictly speaking only when the callee contains function
596 : : calls that may end up setting errno. */
597 : 1232 : || check_maybe_up (flag_errno_math)))
598 : : /* We do not want to make code compiled with exceptions to be
599 : : brought into a non-EH function unless we know that the callee
600 : : does not throw.
601 : : This is tracked by DECL_FUNCTION_PERSONALITY. */
602 : 6834 : || (check_maybe_up (flag_non_call_exceptions)
603 : 0 : && DECL_FUNCTION_PERSONALITY (callee->decl))
604 : 6834 : || (check_maybe_up (flag_exceptions)
605 : 16 : && DECL_FUNCTION_PERSONALITY (callee->decl))
606 : : /* When devirtualization is disabled for callee, it is not safe
607 : : to inline it as we possibly mangled the type info.
608 : : Allow early inlining of always inlines. */
609 : 13693 : || (!(flags & CAN_INLINE_EARLY) && check_maybe_down (flag_devirtualize)))
610 : : {
611 : 33 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
612 : 33 : inlinable = false;
613 : : }
614 : : /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
615 : 6826 : else if (always_inline)
616 : : ;
617 : : /* When user added an attribute to the callee honor it. */
618 : 6826 : else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
619 : 6826 : && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
620 : : {
621 : 2162 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
622 : 2162 : inlinable = false;
623 : : }
624 : : /* If explicit optimize attribute are not used, the mismatch is caused
625 : : by different command line options used to build different units.
626 : : Do not care about COMDAT functions - those are intended to be
627 : : optimized with the optimization flags of module they are used in.
628 : : Also do not care about mixing up size/speed optimization when
629 : : DECL_DISREGARD_INLINE_LIMITS is set. */
630 : 4664 : else if ((callee->merged_comdat
631 : 0 : && !lookup_attribute ("optimize",
632 : 0 : DECL_ATTRIBUTES (caller->decl)))
633 : 4664 : || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
634 : : ;
635 : : /* If mismatch is caused by merging two LTO units with different
636 : : optimization flags we want to be bit nicer. However never inline
637 : : if one of functions is not optimized at all. */
638 : 4664 : else if (!opt_for_fn (callee->decl, optimize)
639 : 4664 : || !opt_for_fn (caller->decl, optimize))
640 : : {
641 : 0 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
642 : 0 : inlinable = false;
643 : : }
644 : : /* If callee is optimized for size and caller is not, allow inlining if
645 : : code shrinks or we are in param_max_inline_insns_single limit and
646 : : callee is inline (and thus likely an unified comdat).
647 : : This will allow caller to run faster. */
648 : 4664 : else if (opt_for_fn (callee->decl, optimize_size)
649 : 4664 : > opt_for_fn (caller->decl, optimize_size))
650 : : {
651 : 120 : int growth = estimate_edge_growth (e);
652 : 120 : if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
653 : 120 : && (!DECL_DECLARED_INLINE_P (callee->decl)
654 : 67 : && growth >= MAX (inline_insns_single (caller, false, false),
655 : : inline_insns_auto (caller, false, false))))
656 : : {
657 : 0 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
658 : 0 : inlinable = false;
659 : : }
660 : : }
661 : : /* If callee is more aggressively optimized for performance than caller,
662 : : we generally want to inline only cheap (runtime wise) functions. */
663 : 4544 : else if (opt_for_fn (callee->decl, optimize_size)
664 : : < opt_for_fn (caller->decl, optimize_size)
665 : 4544 : || (opt_for_fn (callee->decl, optimize)
666 : : > opt_for_fn (caller->decl, optimize)))
667 : : {
668 : 12576 : if (estimate_edge_time (e)
669 : 4192 : >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
670 : : {
671 : 1518 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
672 : 1518 : inlinable = false;
673 : : }
674 : : }
675 : :
676 : : }
677 : :
678 : 61797 : if (!inlinable && (flags & CAN_INLINE_REPORT))
679 : 58379 : report_inline_failed_reason (e);
680 : : return inlinable;
681 : : }
682 : :
683 : :
684 : : /* Return true if the edge E is inlinable during early inlining. */
685 : :
686 : : static bool
687 : 3699746 : can_early_inline_edge_p (struct cgraph_edge *e)
688 : : {
689 : 7399492 : cgraph_node *caller = (e->caller->inlined_to
690 : 3699746 : ? e->caller->inlined_to : e->caller);
691 : 3699746 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
692 : : /* Early inliner might get called at WPA stage when IPA pass adds new
693 : : function. In this case we cannot really do any of early inlining
694 : : because function bodies are missing. */
695 : 3699746 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
696 : : return false;
697 : 3699507 : if (!gimple_has_body_p (callee->decl))
698 : : {
699 : 0 : e->inline_failed = CIF_BODY_NOT_AVAILABLE;
700 : 0 : return false;
701 : : }
702 : 7399014 : gcc_assert (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
703 : : && gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)));
704 : 3697924 : if ((profile_arc_flag || condition_coverage_flag)
705 : 3701094 : && ((lookup_attribute ("no_profile_instrument_function",
706 : 1587 : DECL_ATTRIBUTES (caller->decl)) == NULL_TREE)
707 : 1587 : != (lookup_attribute ("no_profile_instrument_function",
708 : 3174 : DECL_ATTRIBUTES (callee->decl)) == NULL_TREE)))
709 : : return false;
710 : :
711 : 3699505 : if (!can_inline_edge_p (e, true, true)
712 : 3699505 : || !can_inline_edge_by_limits_p (e, CAN_INLINE_EARLY | CAN_INLINE_REPORT))
713 : 85236 : return false;
714 : : /* When inlining regular functions into always-inline functions
715 : : during early inlining watch for possible inline cycles. */
716 : 3614269 : if (DECL_DISREGARD_INLINE_LIMITS (caller->decl)
717 : 202093 : && lookup_attribute ("always_inline", DECL_ATTRIBUTES (caller->decl))
718 : 3816353 : && (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
719 : 126927 : || !lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee->decl))))
720 : : {
721 : : /* If there are indirect calls, inlining may produce direct call.
722 : : TODO: We may lift this restriction if we avoid errors on formely
723 : : indirect calls to always_inline functions. Taking address
724 : : of always_inline function is generally bad idea and should
725 : : have been declared as undefined, but sadly we allow this. */
726 : 75158 : if (caller->indirect_calls || e->callee->indirect_calls)
727 : : return false;
728 : 74075 : ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
729 : 74075 : if (callee_info->safe_to_inline_to_always_inline)
730 : 19289 : return callee_info->safe_to_inline_to_always_inline - 1;
731 : 115185 : for (cgraph_edge *e2 = callee->callees; e2; e2 = e2->next_callee)
732 : : {
733 : 60416 : struct cgraph_node *callee2 = e2->callee->ultimate_alias_target ();
734 : : /* As early inliner runs in RPO order, we will see uninlined
735 : : always_inline calls only in the case of cyclic graphs. */
736 : 60416 : if (DECL_DISREGARD_INLINE_LIMITS (callee2->decl)
737 : 60416 : || lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee2->decl)))
738 : : {
739 : 0 : callee_info->safe_to_inline_to_always_inline = 1;
740 : 0 : return false;
741 : : }
742 : : /* With LTO watch for case where function is later replaced
743 : : by always_inline definition.
744 : : TODO: We may either stop treating noninlined cross-module always
745 : : inlines as errors, or we can extend decl merging to produce
746 : : syntacic alias and honor always inline only in units it has
747 : : been declared as such. */
748 : 60416 : if (flag_lto && callee2->externally_visible)
749 : : {
750 : 17 : callee_info->safe_to_inline_to_always_inline = 1;
751 : 17 : return false;
752 : : }
753 : : }
754 : 54769 : callee_info->safe_to_inline_to_always_inline = 2;
755 : : }
756 : : return true;
757 : : }
758 : :
759 : :
760 : : /* Return number of calls in N. Ignore cheap builtins. */
761 : :
762 : : static int
763 : 754587 : num_calls (struct cgraph_node *n)
764 : : {
765 : 754587 : struct cgraph_edge *e;
766 : 754587 : int num = 0;
767 : :
768 : 1479244 : for (e = n->callees; e; e = e->next_callee)
769 : 724657 : if (!is_inexpensive_builtin (e->callee->decl))
770 : 664078 : num++;
771 : 754587 : return num;
772 : : }
773 : :
774 : :
775 : : /* Return true if we are interested in inlining small function. */
776 : :
777 : : static bool
778 : 3133503 : want_early_inline_function_p (struct cgraph_edge *e)
779 : : {
780 : 3133503 : bool want_inline = true;
781 : 3133503 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
782 : :
783 : 3133503 : if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
784 : : ;
785 : : /* For AutoFDO, we need to make sure that before profile summary, all
786 : : hot paths' IR look exactly the same as profiled binary. As a result,
787 : : in einliner, we will disregard size limit and inline those callsites
788 : : that are:
789 : : * inlined in the profiled binary, and
790 : : * the cloned callee has enough samples to be considered "hot". */
791 : 3133496 : else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
792 : : ;
793 : 3133496 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
794 : 3133496 : && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
795 : : {
796 : 153 : e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
797 : 153 : report_inline_failed_reason (e);
798 : 153 : want_inline = false;
799 : : }
800 : : else
801 : : {
802 : : /* First take care of very large functions. */
803 : 3133343 : int min_growth = estimate_min_edge_growth (e), growth = 0;
804 : 3133343 : int n;
805 : 3133343 : int early_inlining_insns = param_early_inlining_insns;
806 : :
807 : 3133343 : if (min_growth > early_inlining_insns)
808 : : {
809 : 394665 : if (dump_enabled_p ())
810 : 40 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
811 : : " will not early inline: %C->%C, "
812 : : "call is cold and code would grow "
813 : : "at least by %i\n",
814 : : e->caller, callee,
815 : : min_growth);
816 : : want_inline = false;
817 : : }
818 : : else
819 : 2738678 : growth = estimate_edge_growth (e);
820 : :
821 : :
822 : 2738678 : if (!want_inline || growth <= param_max_inline_insns_size)
823 : : ;
824 : 1103586 : else if (!e->maybe_hot_p ())
825 : : {
826 : 18214 : if (dump_enabled_p ())
827 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
828 : : " will not early inline: %C->%C, "
829 : : "call is cold and code would grow by %i\n",
830 : : e->caller, callee,
831 : : growth);
832 : : want_inline = false;
833 : : }
834 : 1085372 : else if (growth > early_inlining_insns)
835 : : {
836 : 330785 : if (dump_enabled_p ())
837 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
838 : : " will not early inline: %C->%C, "
839 : : "growth %i exceeds --param early-inlining-insns\n",
840 : : e->caller, callee, growth);
841 : : want_inline = false;
842 : : }
843 : 754587 : else if ((n = num_calls (callee)) != 0
844 : 754587 : && growth * (n + 1) > early_inlining_insns)
845 : : {
846 : 219183 : if (dump_enabled_p ())
847 : 11 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
848 : : " will not early inline: %C->%C, "
849 : : "growth %i exceeds --param early-inlining-insns "
850 : : "divided by number of calls\n",
851 : : e->caller, callee, growth);
852 : : want_inline = false;
853 : : }
854 : : }
855 : 3133503 : return want_inline;
856 : : }
857 : :
858 : : /* Compute time of the edge->caller + edge->callee execution when inlining
859 : : does not happen. */
860 : :
861 : : inline sreal
862 : 383166 : compute_uninlined_call_time (struct cgraph_edge *edge,
863 : : sreal uninlined_call_time,
864 : : sreal freq)
865 : : {
866 : 766332 : cgraph_node *caller = (edge->caller->inlined_to
867 : 383166 : ? edge->caller->inlined_to
868 : : : edge->caller);
869 : :
870 : 383166 : if (freq > 0)
871 : 373668 : uninlined_call_time *= freq;
872 : : else
873 : 9498 : uninlined_call_time = uninlined_call_time >> 11;
874 : :
875 : 383166 : sreal caller_time = ipa_fn_summaries->get (caller)->time;
876 : 383166 : return uninlined_call_time + caller_time;
877 : : }
878 : :
879 : : /* Same as compute_uinlined_call_time but compute time when inlining
880 : : does happen. */
881 : :
882 : : inline sreal
883 : 383166 : compute_inlined_call_time (struct cgraph_edge *edge,
884 : : sreal time,
885 : : sreal freq)
886 : : {
887 : 766332 : cgraph_node *caller = (edge->caller->inlined_to
888 : 383166 : ? edge->caller->inlined_to
889 : : : edge->caller);
890 : 383166 : sreal caller_time = ipa_fn_summaries->get (caller)->time;
891 : :
892 : 383166 : if (freq > 0)
893 : 373668 : time *= freq;
894 : : else
895 : 9498 : time = time >> 11;
896 : :
897 : : /* This calculation should match one in ipa-inline-analysis.cc
898 : : (estimate_edge_size_and_time). */
899 : 383166 : time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
900 : 383166 : time += caller_time;
901 : 383166 : if (time <= 0)
902 : 0 : time = ((sreal) 1) >> 8;
903 : 383166 : gcc_checking_assert (time >= 0);
904 : 383166 : return time;
905 : : }
906 : :
907 : : /* Determine time saved by inlining EDGE of frequency FREQ
908 : : where callee's runtime w/o inlining is UNINLINED_TYPE
909 : : and with inlined is INLINED_TYPE. */
910 : :
911 : : inline sreal
912 : 7659674 : inlining_speedup (struct cgraph_edge *edge,
913 : : sreal freq,
914 : : sreal uninlined_time,
915 : : sreal inlined_time)
916 : : {
917 : 7659674 : sreal speedup = uninlined_time - inlined_time;
918 : : /* Handling of call_time should match one in ipa-inline-fnsummary.c
919 : : (estimate_edge_size_and_time). */
920 : 7659674 : sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
921 : :
922 : 7659674 : if (freq > 0)
923 : : {
924 : 7629234 : speedup = (speedup + call_time);
925 : 9550477 : if (freq != 1)
926 : 5707991 : speedup = speedup * freq;
927 : : }
928 : 30440 : else if (freq == 0)
929 : 30440 : speedup = speedup >> 11;
930 : 7659674 : gcc_checking_assert (speedup >= 0);
931 : 7659674 : return speedup;
932 : : }
933 : :
934 : : /* Return true if the speedup for inlining E is bigger than
935 : : param_inline_min_speedup. */
936 : :
937 : : static bool
938 : 383166 : big_speedup_p (struct cgraph_edge *e)
939 : : {
940 : 383166 : sreal unspec_time;
941 : 383166 : sreal spec_time = estimate_edge_time (e, &unspec_time);
942 : 383166 : sreal freq = e->sreal_frequency ();
943 : 383166 : sreal time = compute_uninlined_call_time (e, unspec_time, freq);
944 : 383166 : sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
945 : 766332 : cgraph_node *caller = (e->caller->inlined_to
946 : 383166 : ? e->caller->inlined_to
947 : : : e->caller);
948 : 383166 : int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
949 : :
950 : 383166 : if ((time - inlined_time) * 100 > time * limit)
951 : : return true;
952 : : return false;
953 : : }
954 : :
955 : : /* Return true if we are interested in inlining small function.
956 : : When REPORT is true, report reason to dump file. */
957 : :
958 : : static bool
959 : 4320226 : want_inline_small_function_p (struct cgraph_edge *e, bool report)
960 : : {
961 : 4320226 : bool want_inline = true;
962 : 4320226 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
963 : 8640452 : cgraph_node *to = (e->caller->inlined_to
964 : 4320226 : ? e->caller->inlined_to : e->caller);
965 : :
966 : : /* Allow this function to be called before can_inline_edge_p,
967 : : since it's usually cheaper. */
968 : 4320226 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
969 : : want_inline = false;
970 : 4320226 : else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
971 : : ;
972 : 4315142 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
973 : 4315142 : && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
974 : : {
975 : 45130 : e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
976 : 45130 : want_inline = false;
977 : : }
978 : : /* Do fast and conservative check if the function can be good
979 : : inline candidate. */
980 : 4270012 : else if ((!DECL_DECLARED_INLINE_P (callee->decl)
981 : 1974989 : && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
982 : 6244897 : && ipa_fn_summaries->get (callee)->min_size
983 : 1974885 : - ipa_call_summaries->get (e)->call_stmt_size
984 : 1974885 : > inline_insns_auto (e->caller, true, true))
985 : : {
986 : 948 : e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
987 : 948 : want_inline = false;
988 : : }
989 : 4269064 : else if ((DECL_DECLARED_INLINE_P (callee->decl)
990 : 1974041 : || e->count.ipa ().nonzero_p ())
991 : 6564328 : && ipa_fn_summaries->get (callee)->min_size
992 : 2295264 : - ipa_call_summaries->get (e)->call_stmt_size
993 : 2295264 : > inline_insns_single (e->caller, true, true))
994 : : {
995 : 0 : e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
996 : 0 : ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
997 : : : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
998 : 0 : want_inline = false;
999 : : }
1000 : : else
1001 : : {
1002 : 4269064 : int growth = estimate_edge_growth (e);
1003 : 4269064 : ipa_hints hints = estimate_edge_hints (e);
1004 : : /* We have two independent groups of hints. If one matches in each
1005 : : of groups the limits are inreased. If both groups matches, limit
1006 : : is increased even more. */
1007 : 4269064 : bool apply_hints = (hints & (INLINE_HINT_indirect_call
1008 : : | INLINE_HINT_known_hot
1009 : : | INLINE_HINT_loop_iterations
1010 : : | INLINE_HINT_loop_stride));
1011 : 4269064 : bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
1012 : :
1013 : 4269064 : if (growth <= opt_for_fn (to->decl,
1014 : : param_max_inline_insns_size))
1015 : : ;
1016 : : /* Apply param_max_inline_insns_single limit. Do not do so when
1017 : : hints suggests that inlining given function is very profitable.
1018 : : Avoid computation of big_speedup_p when not necessary to change
1019 : : outcome of decision. */
1020 : 4152495 : else if (DECL_DECLARED_INLINE_P (callee->decl)
1021 : 2246848 : && growth >= inline_insns_single (e->caller, apply_hints,
1022 : : apply_hints2)
1023 : 4454256 : && (apply_hints || apply_hints2
1024 : 296630 : || growth >= inline_insns_single (e->caller, true,
1025 : : apply_hints2)
1026 : 151546 : || !big_speedup_p (e)))
1027 : : {
1028 : 301500 : e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
1029 : 301500 : want_inline = false;
1030 : : }
1031 : 3850995 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
1032 : 1905647 : && !opt_for_fn (e->caller->decl, flag_inline_functions)
1033 : 3856305 : && growth >= opt_for_fn (to->decl,
1034 : : param_max_inline_insns_small))
1035 : : {
1036 : : /* growth_positive_p is expensive, always test it last. */
1037 : 5310 : if (growth >= inline_insns_single (e->caller, false, false)
1038 : 5310 : || growth_positive_p (callee, e, growth))
1039 : : {
1040 : 4959 : e->inline_failed = CIF_NOT_DECLARED_INLINED;
1041 : 4959 : want_inline = false;
1042 : : }
1043 : : }
1044 : : /* Apply param_max_inline_insns_auto limit for functions not declared
1045 : : inline. Bypass the limit when speedup seems big. */
1046 : 3845685 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
1047 : 1900337 : && growth >= inline_insns_auto (e->caller, apply_hints,
1048 : : apply_hints2)
1049 : 5003973 : && (apply_hints || apply_hints2
1050 : 1142031 : || growth >= inline_insns_auto (e->caller, true,
1051 : : apply_hints2)
1052 : 231456 : || !big_speedup_p (e)))
1053 : : {
1054 : : /* growth_positive_p is expensive, always test it last. */
1055 : 1141726 : if (growth >= inline_insns_single (e->caller, false, false)
1056 : 1141726 : || growth_positive_p (callee, e, growth))
1057 : : {
1058 : 1055451 : e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
1059 : 1055451 : want_inline = false;
1060 : : }
1061 : : }
1062 : : /* If call is cold, do not inline when function body would grow. */
1063 : 2703959 : else if (!e->maybe_hot_p ()
1064 : 2703959 : && (growth >= inline_insns_single (e->caller, false, false)
1065 : 746418 : || growth_positive_p (callee, e, growth)))
1066 : : {
1067 : 681730 : e->inline_failed = CIF_UNLIKELY_CALL;
1068 : 681730 : want_inline = false;
1069 : : }
1070 : : }
1071 : 4320226 : if (!want_inline && report)
1072 : 617293 : report_inline_failed_reason (e);
1073 : 4320226 : return want_inline;
1074 : : }
1075 : :
1076 : : /* EDGE is self recursive edge.
1077 : : We handle two cases - when function A is inlining into itself
1078 : : or when function A is being inlined into another inliner copy of function
1079 : : A within function B.
1080 : :
1081 : : In first case OUTER_NODE points to the toplevel copy of A, while
1082 : : in the second case OUTER_NODE points to the outermost copy of A in B.
1083 : :
1084 : : In both cases we want to be extra selective since
1085 : : inlining the call will just introduce new recursive calls to appear. */
1086 : :
1087 : : static bool
1088 : 16993 : want_inline_self_recursive_call_p (struct cgraph_edge *edge,
1089 : : struct cgraph_node *outer_node,
1090 : : bool peeling,
1091 : : int depth)
1092 : : {
1093 : 16993 : char const *reason = NULL;
1094 : 16993 : bool want_inline = true;
1095 : 16993 : sreal caller_freq = 1;
1096 : 16993 : int max_depth = opt_for_fn (outer_node->decl,
1097 : : param_max_inline_recursive_depth_auto);
1098 : :
1099 : 16993 : if (DECL_DECLARED_INLINE_P (edge->caller->decl))
1100 : 2224 : max_depth = opt_for_fn (outer_node->decl,
1101 : : param_max_inline_recursive_depth);
1102 : :
1103 : 16993 : if (!edge->maybe_hot_p ())
1104 : : {
1105 : : reason = "recursive call is cold";
1106 : : want_inline = false;
1107 : : }
1108 : 16931 : else if (depth > max_depth)
1109 : : {
1110 : : reason = "--param max-inline-recursive-depth exceeded.";
1111 : : want_inline = false;
1112 : : }
1113 : 14859 : else if (outer_node->inlined_to
1114 : 18070 : && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1115 : : {
1116 : 0 : reason = "caller frequency is 0";
1117 : 0 : want_inline = false;
1118 : : }
1119 : :
1120 : 0 : if (!want_inline)
1121 : : ;
1122 : : /* Inlining of self recursive function into copy of itself within other
1123 : : function is transformation similar to loop peeling.
1124 : :
1125 : : Peeling is profitable if we can inline enough copies to make probability
1126 : : of actual call to the self recursive function very small. Be sure that
1127 : : the probability of recursion is small.
1128 : :
1129 : : We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1130 : : This way the expected number of recursion is at most max_depth. */
1131 : 14859 : else if (peeling)
1132 : : {
1133 : 3211 : sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1134 : 3211 : int i;
1135 : 7184 : for (i = 1; i < depth; i++)
1136 : 3973 : max_prob = max_prob * max_prob;
1137 : 3211 : if (edge->sreal_frequency () >= max_prob * caller_freq)
1138 : : {
1139 : 1498 : reason = "frequency of recursive call is too large";
1140 : 1498 : want_inline = false;
1141 : : }
1142 : : }
1143 : : /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1144 : : recursion depth is large. We reduce function call overhead and increase
1145 : : chances that things fit in hardware return predictor.
1146 : :
1147 : : Recursive inlining might however increase cost of stack frame setup
1148 : : actually slowing down functions whose recursion tree is wide rather than
1149 : : deep.
1150 : :
1151 : : Deciding reliably on when to do recursive inlining without profile feedback
1152 : : is tricky. For now we disable recursive inlining when probability of self
1153 : : recursion is low.
1154 : :
1155 : : Recursive inlining of self recursive call within loop also results in
1156 : : large loop depths that generally optimize badly. We may want to throttle
1157 : : down inlining in those cases. In particular this seems to happen in one
1158 : : of libstdc++ rb tree methods. */
1159 : : else
1160 : : {
1161 : 11648 : if (edge->sreal_frequency () * 100
1162 : 11648 : <= caller_freq
1163 : 23296 : * opt_for_fn (outer_node->decl,
1164 : : param_min_inline_recursive_probability))
1165 : : {
1166 : 602 : reason = "frequency of recursive call is too small";
1167 : 602 : want_inline = false;
1168 : : }
1169 : : }
1170 : 16993 : if (!can_inline_edge_by_limits_p (edge, CAN_INLINE_FORCE_LIMITS | CAN_INLINE_REPORT))
1171 : : {
1172 : : reason = "inline limits exceeded for always_inline function";
1173 : : want_inline = false;
1174 : : }
1175 : 16993 : if (!want_inline && dump_enabled_p ())
1176 : 7 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1177 : : " not inlining recursively: %s\n", reason);
1178 : 16993 : return want_inline;
1179 : : }
1180 : :
1181 : : /* Return true when NODE has uninlinable caller;
1182 : : set HAS_HOT_CALL if it has hot call.
1183 : : Worker for cgraph_for_node_and_aliases. */
1184 : :
1185 : : static bool
1186 : 71337 : check_callers (struct cgraph_node *node, void *has_hot_call)
1187 : : {
1188 : 71337 : struct cgraph_edge *e;
1189 : 123973 : for (e = node->callers; e; e = e->next_caller)
1190 : : {
1191 : 78744 : if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1192 : 78744 : || !opt_for_fn (e->caller->decl, optimize))
1193 : : return true;
1194 : 78744 : if (!can_inline_edge_p (e, true))
1195 : : return true;
1196 : 78717 : if (e->recursive_p ())
1197 : : return true;
1198 : 78717 : if (!can_inline_edge_by_limits_p (e, CAN_INLINE_REPORT))
1199 : : return true;
1200 : : /* Inlining large functions to large loop depth is often harmful because
1201 : : of register pressure it implies. */
1202 : 52682 : if ((int)ipa_call_summaries->get (e)->loop_depth
1203 : 52682 : > param_inline_functions_called_once_loop_depth)
1204 : : return true;
1205 : : /* Do not produce gigantic functions. */
1206 : 97123 : if (estimate_size_after_inlining (e->caller->inlined_to ?
1207 : : e->caller->inlined_to : e->caller, e)
1208 : 52682 : > param_inline_functions_called_once_insns)
1209 : : return true;
1210 : 52636 : if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1211 : 10020 : *(bool *)has_hot_call = true;
1212 : : }
1213 : : return false;
1214 : : }
1215 : :
1216 : : /* If NODE has a caller, return true. */
1217 : :
1218 : : static bool
1219 : 2074051 : has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1220 : : {
1221 : 2074051 : if (node->callers)
1222 : 643071 : return true;
1223 : : return false;
1224 : : }
1225 : :
1226 : : /* Decide if inlining NODE would reduce unit size by eliminating
1227 : : the offline copy of function.
1228 : : When COLD is true the cold calls are considered, too. */
1229 : :
1230 : : static bool
1231 : 4326908 : want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1232 : : {
1233 : 4326908 : bool has_hot_call = false;
1234 : :
1235 : : /* Aliases gets inlined along with the function they alias. */
1236 : 4326908 : if (node->alias)
1237 : : return false;
1238 : : /* Already inlined? */
1239 : 4253881 : if (node->inlined_to)
1240 : : return false;
1241 : : /* Does it have callers? */
1242 : 2027702 : if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1243 : : return false;
1244 : : /* Inlining into all callers would increase size? */
1245 : 643071 : if (growth_positive_p (node, NULL, INT_MIN) > 0)
1246 : : return false;
1247 : : /* All inlines must be possible. */
1248 : 67164 : if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1249 : : true))
1250 : : return false;
1251 : 41056 : if (!cold && !has_hot_call)
1252 : : return false;
1253 : : return true;
1254 : : }
1255 : :
1256 : : /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1257 : : in estimate_edge_badness. */
1258 : :
1259 : : static bool
1260 : 547232 : wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1261 : : {
1262 : 547232 : return size < (DECL_DECLARED_INLINE_P (where->decl)
1263 : 547232 : ? inline_insns_single (where, false, false)
1264 : 339684 : : inline_insns_auto (where, false, false));
1265 : : }
1266 : :
1267 : : /* A cost model driving the inlining heuristics in a way so the edges with
1268 : : smallest badness are inlined first. After each inlining is performed
1269 : : the costs of all caller edges of nodes affected are recomputed so the
1270 : : metrics may accurately depend on values such as number of inlinable callers
1271 : : of the function or function body size. */
1272 : :
1273 : : static sreal
1274 : 8165316 : edge_badness (struct cgraph_edge *edge, bool dump)
1275 : : {
1276 : 8165316 : sreal badness;
1277 : 8165316 : int growth;
1278 : 8165316 : sreal edge_time, unspec_edge_time;
1279 : 8165316 : struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1280 : 8165316 : class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1281 : 8165316 : ipa_hints hints;
1282 : 16330632 : cgraph_node *caller = (edge->caller->inlined_to
1283 : 8165316 : ? edge->caller->inlined_to
1284 : : : edge->caller);
1285 : :
1286 : 8165316 : growth = estimate_edge_growth (edge);
1287 : 8165316 : edge_time = estimate_edge_time (edge, &unspec_edge_time);
1288 : 8165316 : hints = estimate_edge_hints (edge);
1289 : 8165316 : gcc_checking_assert (edge_time >= 0);
1290 : : /* Check that inlined time is better, but tolerate some roundoff issues.
1291 : : FIXME: When callee profile drops to 0 we account calls more. This
1292 : : should be fixed by never doing that. */
1293 : 8165316 : gcc_checking_assert ((edge_time * 100
1294 : : - callee_info->time * 101).to_int () <= 0
1295 : : || callee->count.ipa ().initialized_p ());
1296 : 8165316 : gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1297 : :
1298 : 8165316 : if (dump)
1299 : : {
1300 : 164 : fprintf (dump_file, " Badness calculation for %s -> %s\n",
1301 : 164 : edge->caller->dump_name (),
1302 : 164 : edge->callee->dump_name ());
1303 : 164 : fprintf (dump_file, " size growth %i, time %f unspec %f ",
1304 : : growth,
1305 : : edge_time.to_double (),
1306 : : unspec_edge_time.to_double ());
1307 : 164 : ipa_dump_hints (dump_file, hints);
1308 : 164 : if (big_speedup_p (edge))
1309 : 133 : fprintf (dump_file, " big_speedup");
1310 : 164 : fprintf (dump_file, "\n");
1311 : : }
1312 : :
1313 : : /* Always prefer inlining saving code size. */
1314 : 8165316 : if (growth <= 0)
1315 : : {
1316 : 473761 : badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1317 : 473761 : if (dump)
1318 : 87 : fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1319 : : growth);
1320 : : }
1321 : : /* Inlining into EXTERNAL functions is not going to change anything unless
1322 : : they are themselves inlined. */
1323 : 7691555 : else if (DECL_EXTERNAL (caller->decl))
1324 : : {
1325 : 30646 : if (dump)
1326 : 0 : fprintf (dump_file, " max: function is external\n");
1327 : 30646 : return sreal::max ();
1328 : : }
1329 : : /* When profile is available. Compute badness as:
1330 : :
1331 : : time_saved * caller_count
1332 : : goodness = -------------------------------------------------
1333 : : growth_of_caller * overall_growth * combined_size
1334 : :
1335 : : badness = - goodness
1336 : :
1337 : : Again use negative value to make calls with profile appear hotter
1338 : : then calls without.
1339 : : */
1340 : 7660909 : else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1341 : 7660909 : || caller->count.ipa ().nonzero_p ())
1342 : : {
1343 : 7659597 : sreal numerator, denominator;
1344 : 7659597 : int overall_growth;
1345 : 7659597 : sreal freq = edge->sreal_frequency ();
1346 : :
1347 : 7659597 : numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1348 : 7659597 : if (numerator <= 0)
1349 : 6025 : numerator = ((sreal) 1 >> 8);
1350 : 7659597 : if (caller->count.ipa ().nonzero_p ())
1351 : 103 : numerator *= caller->count.ipa ().to_gcov_type ();
1352 : 7659494 : else if (caller->count.ipa ().initialized_p ())
1353 : 693 : numerator = numerator >> 11;
1354 : 7659597 : denominator = growth;
1355 : :
1356 : 7659597 : overall_growth = callee_info->growth;
1357 : :
1358 : : /* Look for inliner wrappers of the form:
1359 : :
1360 : : inline_caller ()
1361 : : {
1362 : : do_fast_job...
1363 : : if (need_more_work)
1364 : : noninline_callee ();
1365 : : }
1366 : : Without penalizing this case, we usually inline noninline_callee
1367 : : into the inline_caller because overall_growth is small preventing
1368 : : further inlining of inline_caller.
1369 : :
1370 : : Penalize only callgraph edges to functions with small overall
1371 : : growth ...
1372 : : */
1373 : 7659597 : if (growth > overall_growth
1374 : : /* ... and having only one caller which is not inlined ... */
1375 : 1723343 : && callee_info->single_caller
1376 : 1057869 : && !edge->caller->inlined_to
1377 : : /* ... and edges executed only conditionally ... */
1378 : 691349 : && freq < 1
1379 : : /* ... consider case where callee is not inline but caller is ... */
1380 : 7983656 : && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1381 : 98627 : && DECL_DECLARED_INLINE_P (caller->decl))
1382 : : /* ... or when early optimizers decided to split and edge
1383 : : frequency still indicates splitting is a win ... */
1384 : 315389 : || (callee->split_part && !caller->split_part
1385 : 73548 : && freq * 100
1386 : 7725459 : < opt_for_fn (caller->decl,
1387 : : param_partial_inlining_entry_probability)
1388 : : /* ... and do not overwrite user specified hints. */
1389 : 73219 : && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1390 : 50450 : || DECL_DECLARED_INLINE_P (caller->decl)))))
1391 : : {
1392 : 80905 : ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1393 : 80905 : int caller_growth = caller_info->growth;
1394 : :
1395 : : /* Only apply the penalty when caller looks like inline candidate,
1396 : : and it is not called once. */
1397 : 46020 : if (!caller_info->single_caller && overall_growth < caller_growth
1398 : 43897 : && caller_info->inlinable
1399 : 124748 : && wrapper_heuristics_may_apply
1400 : 43843 : (caller, ipa_size_summaries->get (caller)->size))
1401 : : {
1402 : 34749 : if (dump)
1403 : 1 : fprintf (dump_file,
1404 : : " Wrapper penalty. Increasing growth %i to %i\n",
1405 : : overall_growth, caller_growth);
1406 : : overall_growth = caller_growth;
1407 : : }
1408 : : }
1409 : 7659597 : if (overall_growth > 0)
1410 : : {
1411 : : /* Strongly prefer functions with few callers that can be inlined
1412 : : fully. The square root here leads to smaller binaries at average.
1413 : : Watch however for extreme cases and return to linear function
1414 : : when growth is large. */
1415 : 6434699 : if (overall_growth < 256)
1416 : 3358372 : overall_growth *= overall_growth;
1417 : : else
1418 : 3076327 : overall_growth += 256 * 256 - 256;
1419 : 6434699 : denominator *= overall_growth;
1420 : : }
1421 : 7659597 : denominator *= ipa_size_summaries->get (caller)->size + growth;
1422 : :
1423 : 7659597 : badness = - numerator / denominator;
1424 : :
1425 : 7659597 : if (dump)
1426 : : {
1427 : 308 : fprintf (dump_file,
1428 : : " %f: guessed profile. frequency %f, count %" PRId64
1429 : : " caller count %" PRId64
1430 : : " time saved %f"
1431 : : " overall growth %i (current) %i (original)"
1432 : : " %i (compensated)\n",
1433 : : badness.to_double (),
1434 : : freq.to_double (),
1435 : 77 : edge->count.ipa ().initialized_p ()
1436 : 0 : ? edge->count.ipa ().to_gcov_type () : -1,
1437 : 77 : caller->count.ipa ().initialized_p ()
1438 : 0 : ? caller->count.ipa ().to_gcov_type () : -1,
1439 : 154 : inlining_speedup (edge, freq, unspec_edge_time,
1440 : : edge_time).to_double (),
1441 : : estimate_growth (callee),
1442 : : callee_info->growth, overall_growth);
1443 : : }
1444 : : }
1445 : : /* When function local profile is not available or it does not give
1446 : : useful information (i.e. frequency is zero), base the cost on
1447 : : loop nest and overall size growth, so we optimize for overall number
1448 : : of functions fully inlined in program. */
1449 : : else
1450 : : {
1451 : 1312 : int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1452 : 1312 : badness = growth;
1453 : :
1454 : : /* Decrease badness if call is nested. */
1455 : 1312 : if (badness > 0)
1456 : 1312 : badness = badness >> nest;
1457 : : else
1458 : 0 : badness = badness << nest;
1459 : 1312 : if (dump)
1460 : 0 : fprintf (dump_file, " %f: no profile. nest %i\n",
1461 : : badness.to_double (), nest);
1462 : : }
1463 : 8134670 : gcc_checking_assert (badness != 0);
1464 : :
1465 : 8134670 : if (edge->recursive_p ())
1466 : 14019 : badness = badness.shift (badness > 0 ? 4 : -4);
1467 : 8134670 : if ((hints & (INLINE_HINT_indirect_call
1468 : : | INLINE_HINT_loop_iterations
1469 : : | INLINE_HINT_loop_stride))
1470 : 7306793 : || callee_info->growth <= 0)
1471 : 4182262 : badness = badness.shift (badness > 0 ? -2 : 2);
1472 : 8134670 : if (hints & INLINE_HINT_builtin_constant_p)
1473 : 21554 : badness = badness.shift (badness > 0 ? -4 : 4);
1474 : 8134670 : if (hints & (INLINE_HINT_same_scc))
1475 : 74006 : badness = badness.shift (badness > 0 ? 3 : -3);
1476 : 8097666 : else if (hints & (INLINE_HINT_in_scc))
1477 : 81390 : badness = badness.shift (badness > 0 ? 2 : -2);
1478 : 8056971 : else if (hints & (INLINE_HINT_cross_module))
1479 : 3386 : badness = badness.shift (badness > 0 ? 1 : -1);
1480 : 8134670 : if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1481 : 15244 : badness = badness.shift (badness > 0 ? -4 : 4);
1482 : 8127048 : else if ((hints & INLINE_HINT_declared_inline))
1483 : 12017587 : badness = badness.shift (badness > 0 ? -3 : 3);
1484 : 8134670 : if (dump)
1485 : 164 : fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1486 : 8134670 : return badness;
1487 : : }
1488 : :
1489 : : /* Recompute badness of EDGE and update its key in HEAP if needed. */
1490 : : static inline void
1491 : 4105046 : update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1492 : : {
1493 : 4105046 : sreal badness = edge_badness (edge, false);
1494 : 4105046 : if (edge->aux)
1495 : : {
1496 : 3211604 : edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1497 : 3211604 : gcc_checking_assert (n->get_data () == edge);
1498 : :
1499 : : /* fibonacci_heap::replace_key does busy updating of the
1500 : : heap that is unnecessarily expensive.
1501 : : We do lazy increases: after extracting minimum if the key
1502 : : turns out to be out of date, it is re-inserted into heap
1503 : : with correct value. */
1504 : 3211604 : if (badness < n->get_key ().badness)
1505 : : {
1506 : 688662 : if (dump_file && (dump_flags & TDF_DETAILS))
1507 : : {
1508 : 78 : fprintf (dump_file,
1509 : : " decreasing badness %s -> %s, %f to %f\n",
1510 : 39 : edge->caller->dump_name (),
1511 : 39 : edge->callee->dump_name (),
1512 : 78 : n->get_key ().badness.to_double (),
1513 : : badness.to_double ());
1514 : : }
1515 : 688662 : inline_badness b (edge, badness);
1516 : 688662 : heap->decrease_key (n, b);
1517 : : }
1518 : : }
1519 : : else
1520 : : {
1521 : 893442 : if (dump_file && (dump_flags & TDF_DETAILS))
1522 : : {
1523 : 266 : fprintf (dump_file,
1524 : : " enqueuing call %s -> %s, badness %f\n",
1525 : 133 : edge->caller->dump_name (),
1526 : 133 : edge->callee->dump_name (),
1527 : : badness.to_double ());
1528 : : }
1529 : 893442 : inline_badness b (edge, badness);
1530 : 893442 : edge->aux = heap->insert (b, edge);
1531 : : }
1532 : 4105046 : }
1533 : :
1534 : :
1535 : : /* NODE was inlined.
1536 : : All caller edges needs to be reset because
1537 : : size estimates change. Similarly callees needs reset
1538 : : because better context may be known. */
1539 : :
1540 : : static void
1541 : 814115 : reset_edge_caches (struct cgraph_node *node)
1542 : : {
1543 : 814115 : struct cgraph_edge *edge;
1544 : 814115 : struct cgraph_edge *e = node->callees;
1545 : 814115 : struct cgraph_node *where = node;
1546 : 814115 : struct ipa_ref *ref;
1547 : :
1548 : 814115 : if (where->inlined_to)
1549 : 759348 : where = where->inlined_to;
1550 : :
1551 : 814115 : reset_node_cache (where);
1552 : :
1553 : 814115 : if (edge_growth_cache != NULL)
1554 : 2658190 : for (edge = where->callers; edge; edge = edge->next_caller)
1555 : 1844121 : if (edge->inline_failed)
1556 : 1844121 : edge_growth_cache->remove (edge);
1557 : :
1558 : 867174 : FOR_EACH_ALIAS (where, ref)
1559 : 106118 : reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1560 : :
1561 : 814115 : if (!e)
1562 : : return;
1563 : :
1564 : 2187518 : while (true)
1565 : 2187518 : if (!e->inline_failed && e->callee->callees)
1566 : : e = e->callee->callees;
1567 : : else
1568 : : {
1569 : 1791684 : if (edge_growth_cache != NULL && e->inline_failed)
1570 : 1707435 : edge_growth_cache->remove (e);
1571 : 1791684 : if (e->next_callee)
1572 : : e = e->next_callee;
1573 : : else
1574 : : {
1575 : 1070401 : do
1576 : : {
1577 : 1070401 : if (e->caller == node)
1578 : : return;
1579 : 395834 : e = e->caller->callers;
1580 : : }
1581 : 395834 : while (!e->next_callee);
1582 : : e = e->next_callee;
1583 : : }
1584 : : }
1585 : : }
1586 : :
1587 : : /* Recompute HEAP nodes for each of caller of NODE.
1588 : : UPDATED_NODES track nodes we already visited, to avoid redundant work.
1589 : : When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1590 : : it is inlinable. Otherwise check all edges. */
1591 : :
1592 : : static void
1593 : 813764 : update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1594 : : bitmap updated_nodes,
1595 : : struct cgraph_edge *check_inlinablity_for)
1596 : : {
1597 : 813764 : struct cgraph_edge *edge;
1598 : 813764 : struct ipa_ref *ref;
1599 : :
1600 : 813764 : if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1601 : 803051 : || node->inlined_to)
1602 : 10713 : return;
1603 : 803051 : if (!bitmap_set_bit (updated_nodes, node->get_summary_id ()))
1604 : : return;
1605 : :
1606 : 855799 : FOR_EACH_ALIAS (node, ref)
1607 : : {
1608 : 52748 : struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1609 : 52748 : update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1610 : : }
1611 : :
1612 : 2539819 : for (edge = node->callers; edge; edge = edge->next_caller)
1613 : 1736768 : if (edge->inline_failed)
1614 : : {
1615 : 1736768 : if (!check_inlinablity_for
1616 : 1736768 : || check_inlinablity_for == edge)
1617 : : {
1618 : 1736768 : if (can_inline_edge_p (edge, false)
1619 : 1704886 : && want_inline_small_function_p (edge, false)
1620 : 2166357 : && can_inline_edge_by_limits_p (edge, 0))
1621 : 426734 : update_edge_key (heap, edge);
1622 : 1310034 : else if (edge->aux)
1623 : : {
1624 : 70908 : report_inline_failed_reason (edge);
1625 : 70908 : heap->delete_node ((edge_heap_node_t *) edge->aux);
1626 : 70908 : edge->aux = NULL;
1627 : : }
1628 : : }
1629 : 0 : else if (edge->aux)
1630 : 0 : update_edge_key (heap, edge);
1631 : : }
1632 : : }
1633 : :
1634 : : /* Recompute HEAP nodes for each uninlined call in NODE
1635 : : If UPDATE_SINCE is non-NULL check if edges called within that function
1636 : : are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1637 : : where all edges have new context).
1638 : :
1639 : : This is used when we know that edge badnesses are going only to increase
1640 : : (we introduced new call site) and thus all we need is to insert newly
1641 : : created edges into heap. */
1642 : :
1643 : : static void
1644 : 761053 : update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1645 : : struct cgraph_node *update_since,
1646 : : bitmap updated_nodes)
1647 : : {
1648 : 761053 : struct cgraph_edge *e = node->callees;
1649 : 761053 : bool check_inlinability = update_since == node;
1650 : :
1651 : 761053 : if (!e)
1652 : : return;
1653 : 22596183 : while (true)
1654 : 22596183 : if (!e->inline_failed && e->callee->callees)
1655 : : {
1656 : 3568374 : if (e->callee == update_since)
1657 : 340359 : check_inlinability = true;
1658 : : e = e->callee->callees;
1659 : : }
1660 : : else
1661 : : {
1662 : 19027809 : enum availability avail;
1663 : 19027809 : struct cgraph_node *callee;
1664 : 19027809 : if (!check_inlinability)
1665 : : {
1666 : 17238564 : if (e->aux
1667 : 19886058 : && !bitmap_bit_p (updated_nodes,
1668 : 2647494 : e->callee->ultimate_alias_target
1669 : 2647494 : (&avail, e->caller)->get_summary_id ()))
1670 : 2647494 : update_edge_key (heap, e);
1671 : : }
1672 : : /* We do not reset callee growth cache here. Since we added a new call,
1673 : : growth should have just increased and consequently badness metric
1674 : : don't need updating. */
1675 : 1789245 : else if (e->inline_failed
1676 : 1706170 : && (callee = e->callee->ultimate_alias_target (&avail,
1677 : 1706170 : e->caller))
1678 : 1706170 : && avail >= AVAIL_AVAILABLE
1679 : 471458 : && ipa_fn_summaries->get (callee) != NULL
1680 : 471456 : && ipa_fn_summaries->get (callee)->inlinable
1681 : 2247657 : && !bitmap_bit_p (updated_nodes, callee->get_summary_id ()))
1682 : : {
1683 : 458412 : if (can_inline_edge_p (e, false)
1684 : 455004 : && want_inline_small_function_p (e, false)
1685 : 716288 : && can_inline_edge_by_limits_p (e, 0))
1686 : : {
1687 : 257313 : gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1688 : 257313 : gcc_checking_assert (check_inlinability || e->aux);
1689 : 257313 : update_edge_key (heap, e);
1690 : : }
1691 : 201099 : else if (e->aux)
1692 : : {
1693 : 4947 : report_inline_failed_reason (e);
1694 : 4947 : heap->delete_node ((edge_heap_node_t *) e->aux);
1695 : 4947 : e->aux = NULL;
1696 : : }
1697 : : }
1698 : : /* In case we redirected to unreachable node we only need to remove the
1699 : : fibheap entry. */
1700 : 1330833 : else if (e->aux)
1701 : : {
1702 : 3002 : heap->delete_node ((edge_heap_node_t *) e->aux);
1703 : 3002 : e->aux = NULL;
1704 : : }
1705 : 19027809 : if (e->next_callee)
1706 : : e = e->next_callee;
1707 : : else
1708 : : {
1709 : 4296522 : do
1710 : : {
1711 : 4296522 : if (e->caller == node)
1712 : 728148 : return;
1713 : 3568374 : if (e->caller == update_since)
1714 : 340359 : check_inlinability = false;
1715 : 3568374 : e = e->caller->callers;
1716 : : }
1717 : 3568374 : while (!e->next_callee);
1718 : : e = e->next_callee;
1719 : : }
1720 : : }
1721 : : }
1722 : :
1723 : : /* Enqueue all recursive calls from NODE into priority queue depending on
1724 : : how likely we want to recursively inline the call. */
1725 : :
1726 : : static void
1727 : 20451 : lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1728 : : edge_heap_t *heap)
1729 : : {
1730 : 20451 : struct cgraph_edge *e;
1731 : 20451 : enum availability avail;
1732 : :
1733 : 56320 : for (e = where->callees; e; e = e->next_callee)
1734 : 35869 : if (e->callee == node
1735 : 35869 : || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1736 : 1123 : && avail > AVAIL_INTERPOSABLE))
1737 : : {
1738 : 15321 : inline_badness b (e, -e->sreal_frequency ());
1739 : 15321 : heap->insert (b, e);
1740 : : }
1741 : 56320 : for (e = where->callees; e; e = e->next_callee)
1742 : 35869 : if (!e->inline_failed)
1743 : 7637 : lookup_recursive_calls (node, e->callee, heap);
1744 : 20451 : }
1745 : :
1746 : : /* Decide on recursive inlining: in the case function has recursive calls,
1747 : : inline until body size reaches given argument. If any new indirect edges
1748 : : are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1749 : : is NULL. */
1750 : :
1751 : : static bool
1752 : 1768 : recursive_inlining (struct cgraph_edge *edge,
1753 : : vec<cgraph_edge *> *new_edges)
1754 : : {
1755 : 3536 : cgraph_node *to = (edge->caller->inlined_to
1756 : 1768 : ? edge->caller->inlined_to : edge->caller);
1757 : 1768 : int limit = opt_for_fn (to->decl,
1758 : : param_max_inline_insns_recursive_auto);
1759 : 1768 : inline_badness b (edge, sreal::min ());
1760 : 1768 : edge_heap_t heap (b);
1761 : 1768 : struct cgraph_node *node;
1762 : 1768 : struct cgraph_edge *e;
1763 : 1768 : struct cgraph_node *master_clone = NULL, *next;
1764 : 1768 : int depth = 0;
1765 : 1768 : int n = 0;
1766 : :
1767 : 1768 : node = edge->caller;
1768 : 1768 : if (node->inlined_to)
1769 : 517 : node = node->inlined_to;
1770 : :
1771 : 1768 : if (DECL_DECLARED_INLINE_P (node->decl))
1772 : 361 : limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1773 : :
1774 : : /* Make sure that function is small enough to be considered for inlining. */
1775 : 1768 : if (estimate_size_after_inlining (node, edge) >= limit)
1776 : : return false;
1777 : 1768 : lookup_recursive_calls (node, node, &heap);
1778 : 1768 : if (heap.empty ())
1779 : : return false;
1780 : :
1781 : 1768 : if (dump_file)
1782 : 4 : fprintf (dump_file,
1783 : : " Performing recursive inlining on %s\n", node->dump_name ());
1784 : :
1785 : : /* Do the inlining and update list of recursive call during process. */
1786 : 15514 : while (!heap.empty ())
1787 : : {
1788 : 13768 : struct cgraph_edge *curr = heap.extract_min ();
1789 : 13768 : struct cgraph_node *cnode, *dest = curr->callee;
1790 : :
1791 : 13768 : if (!can_inline_edge_p (curr, true)
1792 : 13768 : || !can_inline_edge_by_limits_p (curr, CAN_INLINE_REPORT | CAN_INLINE_FORCE_LIMITS))
1793 : 0 : continue;
1794 : :
1795 : : /* MASTER_CLONE is produced in the case we already started modified
1796 : : the function. Be sure to redirect edge to the original body before
1797 : : estimating growths otherwise we will be seeing growths after inlining
1798 : : the already modified body. */
1799 : 13768 : if (master_clone)
1800 : : {
1801 : 11996 : curr->redirect_callee (master_clone);
1802 : 11996 : if (edge_growth_cache != NULL)
1803 : 11996 : edge_growth_cache->remove (curr);
1804 : : }
1805 : :
1806 : 13768 : if (estimate_size_after_inlining (node, curr) > limit)
1807 : : {
1808 : 22 : curr->redirect_callee (dest);
1809 : 22 : if (edge_growth_cache != NULL)
1810 : 22 : edge_growth_cache->remove (curr);
1811 : : break;
1812 : : }
1813 : :
1814 : 13746 : depth = 1;
1815 : 13746 : for (cnode = curr->caller;
1816 : 72476 : cnode->inlined_to; cnode = cnode->callers->caller)
1817 : 117460 : if (node->decl
1818 : 58730 : == curr->callee->ultimate_alias_target ()->decl)
1819 : 58730 : depth++;
1820 : :
1821 : 13746 : if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1822 : : {
1823 : 2700 : curr->redirect_callee (dest);
1824 : 2700 : if (edge_growth_cache != NULL)
1825 : 2700 : edge_growth_cache->remove (curr);
1826 : 2700 : continue;
1827 : : }
1828 : :
1829 : 11046 : if (dump_file)
1830 : : {
1831 : 14 : fprintf (dump_file,
1832 : : " Inlining call of depth %i", depth);
1833 : 28 : if (node->count.nonzero_p () && curr->count.initialized_p ())
1834 : : {
1835 : 2 : fprintf (dump_file, " called approx. %.2f times per call",
1836 : 2 : (double)curr->count.to_gcov_type ()
1837 : 2 : / node->count.to_gcov_type ());
1838 : : }
1839 : 14 : fprintf (dump_file, "\n");
1840 : : }
1841 : 11046 : if (!master_clone)
1842 : : {
1843 : : /* We need original clone to copy around. */
1844 : 1454 : master_clone = node->create_clone (node->decl, node->count,
1845 : : false, vNULL, true, NULL, NULL);
1846 : 4135 : for (e = master_clone->callees; e; e = e->next_callee)
1847 : 2681 : if (!e->inline_failed)
1848 : 469 : clone_inlined_nodes (e, true, false, NULL);
1849 : 1454 : curr->redirect_callee (master_clone);
1850 : 1454 : if (edge_growth_cache != NULL)
1851 : 1454 : edge_growth_cache->remove (curr);
1852 : : }
1853 : :
1854 : 11046 : inline_call (curr, false, new_edges, &overall_size, true);
1855 : 11046 : reset_node_cache (node);
1856 : 11046 : lookup_recursive_calls (node, curr->callee, &heap);
1857 : 11046 : n++;
1858 : : }
1859 : :
1860 : 1768 : if (!heap.empty () && dump_file)
1861 : 0 : fprintf (dump_file, " Recursive inlining growth limit met.\n");
1862 : :
1863 : 1768 : if (!master_clone)
1864 : : return false;
1865 : :
1866 : 1454 : if (dump_enabled_p ())
1867 : 4 : dump_printf_loc (MSG_NOTE, edge->call_stmt,
1868 : : "\n Inlined %i times, "
1869 : : "body grown from size %i to %i, time %f to %f\n", n,
1870 : 4 : ipa_size_summaries->get (master_clone)->size,
1871 : 4 : ipa_size_summaries->get (node)->size,
1872 : 4 : ipa_fn_summaries->get (master_clone)->time.to_double (),
1873 : 4 : ipa_fn_summaries->get (node)->time.to_double ());
1874 : :
1875 : : /* Remove master clone we used for inlining. We rely that clones inlined
1876 : : into master clone gets queued just before master clone so we don't
1877 : : need recursion. */
1878 : 19583 : for (node = symtab->first_function (); node != master_clone;
1879 : : node = next)
1880 : : {
1881 : 16675 : next = symtab->next_function (node);
1882 : 16675 : if (node->inlined_to == master_clone)
1883 : 937 : node->remove ();
1884 : : }
1885 : 1454 : master_clone->remove ();
1886 : 1454 : return true;
1887 : 1768 : }
1888 : :
1889 : :
1890 : : /* Given whole compilation unit estimate of INSNS, compute how large we can
1891 : : allow the unit to grow. */
1892 : :
1893 : : static int64_t
1894 : 813805 : compute_max_insns (cgraph_node *node, int insns)
1895 : : {
1896 : 813805 : int max_insns = insns;
1897 : 813805 : if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1898 : : max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1899 : :
1900 : 813805 : return ((int64_t) max_insns
1901 : 813805 : * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1902 : : }
1903 : :
1904 : :
1905 : : /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1906 : :
1907 : : static void
1908 : 760777 : add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> &new_edges)
1909 : : {
1910 : 1523269 : while (new_edges.length () > 0)
1911 : : {
1912 : 1715 : struct cgraph_edge *edge = new_edges.pop ();
1913 : :
1914 : 1715 : gcc_assert (!edge->aux);
1915 : 1715 : gcc_assert (edge->callee);
1916 : 1715 : if (edge->inline_failed
1917 : 1715 : && can_inline_edge_p (edge, true)
1918 : 834 : && want_inline_small_function_p (edge, true)
1919 : 2265 : && can_inline_edge_by_limits_p (edge, CAN_INLINE_REPORT))
1920 : : {
1921 : 550 : inline_badness b (edge, edge_badness (edge, false));
1922 : 550 : edge->aux = heap->insert (b, edge);
1923 : : }
1924 : : }
1925 : 760777 : }
1926 : :
1927 : : /* Remove EDGE from the fibheap. */
1928 : :
1929 : : static void
1930 : 4979 : heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1931 : : {
1932 : 4979 : if (e->aux)
1933 : : {
1934 : 10 : ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1935 : 10 : e->aux = NULL;
1936 : : }
1937 : 4979 : }
1938 : :
1939 : : /* Return true if speculation of edge E seems useful.
1940 : : If ANTICIPATE_INLINING is true, be conservative and hope that E
1941 : : may get inlined. */
1942 : :
1943 : : bool
1944 : 40372 : speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1945 : : {
1946 : : /* If we have already decided to inline the edge, it seems useful.
1947 : : Also if ipa-cp or other pass worked hard enough to produce a clone,
1948 : : we already decided this is a good idea. */
1949 : 40372 : if (!e->inline_failed
1950 : 6596 : || e->callee->clone_of)
1951 : : return true;
1952 : :
1953 : 6080 : enum availability avail;
1954 : 6080 : struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1955 : : e->callee);
1956 : :
1957 : 6080 : gcc_assert (e->speculative && !e->indirect_unknown_callee);
1958 : :
1959 : : /* Even if calll statement is not hot, we can still have useful speculation
1960 : : in cases where a lot of time is spent is callee.
1961 : : Do not check maybe_hot_p. */
1962 : 6406 : if (!e->count.nonzero_p ())
1963 : : return false;
1964 : :
1965 : : /* See if IP optimizations found something potentially useful about the
1966 : : function. Do this only if the call seems hot since this is about
1967 : : optimizing the code surrounding call site rahter than improving
1968 : : callee. */
1969 : 6044 : if (avail >= AVAIL_AVAILABLE && e->maybe_hot_p ())
1970 : : {
1971 : 5962 : int ecf_flags = flags_from_decl_or_type (target->decl);
1972 : 5962 : if (ecf_flags & ECF_CONST)
1973 : : {
1974 : 183 : if (!(e->speculative_call_indirect_edge ()->indirect_info
1975 : 183 : ->ecf_flags & ECF_CONST))
1976 : : return true;
1977 : : }
1978 : 5779 : else if (ecf_flags & ECF_PURE)
1979 : : {
1980 : 1482 : if (!(e->speculative_call_indirect_edge ()->indirect_info
1981 : 1482 : ->ecf_flags & ECF_PURE))
1982 : : return true;
1983 : : }
1984 : 4297 : else if (get_modref_function_summary (target))
1985 : : return true;
1986 : : }
1987 : : /* If we did not managed to inline the function nor redirect
1988 : : to an ipa-cp clone (that are seen by having local flag set),
1989 : : it is probably pointless to inline it unless hardware is missing
1990 : : indirect call predictor.
1991 : :
1992 : : At this point we know we will not dispatch into faster version of
1993 : : callee, so if call itself is not hot, we definitely can give up
1994 : : speculating. */
1995 : 1945 : if (!anticipate_inlining && (!target->local || !e->maybe_hot_p ()))
1996 : 320 : return false;
1997 : : /* For overwritable targets there is not much to do. */
1998 : 1625 : if (!can_inline_edge_p (e, false)
1999 : 1625 : || !can_inline_edge_by_limits_p (e, CAN_INLINE_DISREGARD_LIMITS))
2000 : 6 : return false;
2001 : : /* OK, speculation seems interesting. */
2002 : : return true;
2003 : : }
2004 : :
2005 : : /* We know that EDGE is not going to be inlined.
2006 : : See if we can remove speculation. */
2007 : :
2008 : : static void
2009 : 54236 : resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
2010 : : {
2011 : 54236 : if (edge->speculative && !speculation_useful_p (edge, false))
2012 : : {
2013 : 18 : struct cgraph_node *node = edge->caller;
2014 : 36 : struct cgraph_node *where = node->inlined_to
2015 : 18 : ? node->inlined_to : node;
2016 : 18 : auto_bitmap updated_nodes;
2017 : :
2018 : 18 : if (edge->count.ipa ().initialized_p ())
2019 : 0 : spec_rem += edge->count.ipa ();
2020 : 18 : cgraph_edge::resolve_speculation (edge);
2021 : 18 : reset_edge_caches (where);
2022 : 18 : ipa_update_overall_fn_summary (where);
2023 : 18 : update_caller_keys (edge_heap, where,
2024 : : updated_nodes, NULL);
2025 : 18 : update_callee_keys (edge_heap, where, NULL,
2026 : : updated_nodes);
2027 : 18 : }
2028 : 54236 : }
2029 : :
2030 : : /* Return true if NODE should be accounted for overall size estimate.
2031 : : Skip all nodes optimized for size so we can measure the growth of hot
2032 : : part of program no matter of the padding. */
2033 : :
2034 : : bool
2035 : 3340975 : inline_account_function_p (struct cgraph_node *node)
2036 : : {
2037 : 3340975 : return (!DECL_EXTERNAL (node->decl)
2038 : 3167383 : && !opt_for_fn (node->decl, optimize_size)
2039 : 6420456 : && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
2040 : : }
2041 : :
2042 : : /* Count number of callers of NODE and store it into DATA (that
2043 : : points to int. Worker for cgraph_for_node_and_aliases. */
2044 : :
2045 : : static bool
2046 : 1387537 : sum_callers (struct cgraph_node *node, void *data)
2047 : : {
2048 : 1387537 : struct cgraph_edge *e;
2049 : 1387537 : int *num_calls = (int *)data;
2050 : :
2051 : 3226286 : for (e = node->callers; e; e = e->next_caller)
2052 : 1838749 : (*num_calls)++;
2053 : 1387537 : return false;
2054 : : }
2055 : :
2056 : : /* We only propagate across edges with non-interposable callee. */
2057 : :
2058 : : inline bool
2059 : 6671662 : ignore_edge_p (struct cgraph_edge *e)
2060 : : {
2061 : 6671662 : enum availability avail;
2062 : 6671662 : e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
2063 : 6671662 : return (avail <= AVAIL_INTERPOSABLE);
2064 : : }
2065 : :
2066 : : /* We use greedy algorithm for inlining of small functions:
2067 : : All inline candidates are put into prioritized heap ordered in
2068 : : increasing badness.
2069 : :
2070 : : The inlining of small functions is bounded by unit growth parameters. */
2071 : :
2072 : : static void
2073 : 226684 : inline_small_functions (void)
2074 : : {
2075 : 226684 : struct cgraph_node *node;
2076 : 226684 : struct cgraph_edge *edge;
2077 : 226684 : inline_badness b;
2078 : 226684 : edge_heap_t edge_heap (b);
2079 : 226684 : auto_bitmap updated_nodes;
2080 : 226684 : int min_size;
2081 : 226684 : auto_vec<cgraph_edge *> new_indirect_edges;
2082 : 226684 : int initial_size = 0;
2083 : 226684 : struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
2084 : 226684 : struct cgraph_edge_hook_list *edge_removal_hook_holder;
2085 : 226684 : new_indirect_edges.create (8);
2086 : :
2087 : 226684 : edge_removal_hook_holder
2088 : 226684 : = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
2089 : :
2090 : : /* Compute overall unit size and other global parameters used by badness
2091 : : metrics. */
2092 : :
2093 : 226684 : max_count = profile_count::uninitialized ();
2094 : 226684 : ipa_reduced_postorder (order, true, ignore_edge_p);
2095 : 226684 : free (order);
2096 : :
2097 : 2040172 : FOR_EACH_DEFINED_FUNCTION (node)
2098 : 1813488 : if (!node->inlined_to)
2099 : : {
2100 : 1813457 : if (!node->alias && node->analyzed
2101 : 1716431 : && (node->has_gimple_body_p () || node->thunk)
2102 : 3529888 : && opt_for_fn (node->decl, optimize))
2103 : : {
2104 : 1288718 : class ipa_fn_summary *info = ipa_fn_summaries->get (node);
2105 : 1288718 : struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
2106 : :
2107 : : /* Do not account external functions, they will be optimized out
2108 : : if not inlined. Also only count the non-cold portion of program. */
2109 : 1288718 : if (inline_account_function_p (node))
2110 : 1187204 : initial_size += ipa_size_summaries->get (node)->size;
2111 : 1288718 : info->growth = estimate_growth (node);
2112 : :
2113 : 1288718 : int num_calls = 0;
2114 : 1288718 : node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2115 : : true);
2116 : 1288718 : if (num_calls == 1)
2117 : 419127 : info->single_caller = true;
2118 : 1288718 : if (dfs && dfs->next_cycle)
2119 : : {
2120 : 6078 : struct cgraph_node *n2;
2121 : 6078 : int id = dfs->scc_no + 1;
2122 : 13494 : for (n2 = node; n2;
2123 : 7416 : n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
2124 : 12157 : if (opt_for_fn (n2->decl, optimize))
2125 : : {
2126 : 12149 : ipa_fn_summary *info2 = ipa_fn_summaries->get
2127 : 12149 : (n2->inlined_to ? n2->inlined_to : n2);
2128 : 12149 : if (info2->scc_no)
2129 : : break;
2130 : 7408 : info2->scc_no = id;
2131 : : }
2132 : : }
2133 : : }
2134 : :
2135 : 4027056 : for (edge = node->callers; edge; edge = edge->next_caller)
2136 : 2213599 : max_count = max_count.max (edge->count.ipa ());
2137 : : }
2138 : 226684 : ipa_free_postorder_info ();
2139 : 226684 : initialize_growth_caches ();
2140 : :
2141 : 226684 : if (dump_file)
2142 : 178 : fprintf (dump_file,
2143 : : "\nDeciding on inlining of small functions. Starting with size %i.\n",
2144 : : initial_size);
2145 : :
2146 : 226684 : overall_size = initial_size;
2147 : 226684 : min_size = overall_size;
2148 : :
2149 : : /* Populate the heap with all edges we might inline. */
2150 : :
2151 : 2040172 : FOR_EACH_DEFINED_FUNCTION (node)
2152 : : {
2153 : 1813488 : bool update = false;
2154 : 1813488 : struct cgraph_edge *next = NULL;
2155 : 1813488 : bool has_speculative = false;
2156 : :
2157 : 1813488 : if (!opt_for_fn (node->decl, optimize)
2158 : : /* With -Og we do not want to perform IPA inlining of small
2159 : : functions since there are no scalar cleanups after it
2160 : : that would realize the anticipated win. All abstraction
2161 : : is removed during early inlining. */
2162 : 1813488 : || opt_for_fn (node->decl, optimize_debug))
2163 : 455992 : continue;
2164 : :
2165 : 1357496 : if (dump_file)
2166 : 806 : fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2167 : :
2168 : 6649525 : for (edge = node->callees; edge; edge = edge->next_callee)
2169 : : {
2170 : 5292029 : if (edge->inline_failed
2171 : 5291998 : && !edge->aux
2172 : 5291963 : && can_inline_edge_p (edge, true)
2173 : 1394886 : && want_inline_small_function_p (edge, true)
2174 : 779843 : && can_inline_edge_by_limits_p (edge, CAN_INLINE_REPORT)
2175 : 6065534 : && edge->inline_failed)
2176 : : {
2177 : 773505 : gcc_assert (!edge->aux);
2178 : 773505 : update_edge_key (&edge_heap, edge);
2179 : : }
2180 : 5292029 : if (edge->speculative)
2181 : 4796 : has_speculative = true;
2182 : : }
2183 : 1357496 : if (has_speculative)
2184 : 20459 : for (edge = node->callees; edge; edge = next)
2185 : : {
2186 : 16566 : next = edge->next_callee;
2187 : 16566 : if (edge->speculative
2188 : 16566 : && !speculation_useful_p (edge, edge->aux != NULL))
2189 : : {
2190 : 262 : cgraph_edge::resolve_speculation (edge);
2191 : 262 : update = true;
2192 : : }
2193 : : }
2194 : 3893 : if (update)
2195 : : {
2196 : 392 : struct cgraph_node *where = node->inlined_to
2197 : 196 : ? node->inlined_to : node;
2198 : 196 : ipa_update_overall_fn_summary (where);
2199 : 196 : reset_edge_caches (where);
2200 : 196 : update_caller_keys (&edge_heap, where,
2201 : : updated_nodes, NULL);
2202 : 196 : update_callee_keys (&edge_heap, where, NULL,
2203 : : updated_nodes);
2204 : 196 : bitmap_clear (updated_nodes);
2205 : : }
2206 : : }
2207 : :
2208 : 226684 : gcc_assert (in_lto_p
2209 : : || !(max_count > 0)
2210 : : || (profile_info && flag_branch_probabilities));
2211 : :
2212 : 2333139 : while (!edge_heap.empty ())
2213 : : {
2214 : 2106455 : int old_size = overall_size;
2215 : 2106455 : struct cgraph_node *where, *callee;
2216 : 2106455 : sreal badness = edge_heap.min_key ().badness;
2217 : 2106455 : sreal current_badness;
2218 : 2106455 : int growth;
2219 : :
2220 : 2106455 : edge = edge_heap.extract_min ();
2221 : 2106455 : gcc_assert (edge->aux);
2222 : 2106455 : edge->aux = NULL;
2223 : 2106455 : if (!edge->inline_failed || !edge->callee->analyzed)
2224 : 1345653 : continue;
2225 : :
2226 : : /* Be sure that caches are maintained consistent.
2227 : : This check is affected by scaling roundoff errors when compiling for
2228 : : IPA this we skip it in that case. */
2229 : 4059553 : if (flag_checking && !edge->callee->count.ipa_p ()
2230 : 4059556 : && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2231 : : {
2232 : 1953188 : sreal cached_badness = edge_badness (edge, false);
2233 : :
2234 : 1953188 : int old_size_est = estimate_edge_size (edge);
2235 : 1953188 : sreal old_time_est = estimate_edge_time (edge);
2236 : 1953188 : int old_hints_est = estimate_edge_hints (edge);
2237 : :
2238 : 1953188 : if (edge_growth_cache != NULL)
2239 : 1953188 : edge_growth_cache->remove (edge);
2240 : 3468347 : reset_node_cache (edge->caller->inlined_to
2241 : : ? edge->caller->inlined_to
2242 : : : edge->caller);
2243 : 1953188 : gcc_assert (old_size_est == estimate_edge_size (edge));
2244 : 1953188 : gcc_assert (old_time_est == estimate_edge_time (edge));
2245 : : /* FIXME:
2246 : :
2247 : : gcc_assert (old_hints_est == estimate_edge_hints (edge));
2248 : :
2249 : : fails with profile feedback because some hints depends on
2250 : : maybe_hot_edge_p predicate and because callee gets inlined to other
2251 : : calls, the edge may become cold.
2252 : : This ought to be fixed by computing relative probabilities
2253 : : for given invocation but that will be better done once whole
2254 : : code is converted to sreals. Disable for now and revert to "wrong"
2255 : : value so enable/disable checking paths agree. */
2256 : 1953188 : edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2257 : :
2258 : : /* When updating the edge costs, we only decrease badness in the keys.
2259 : : Increases of badness are handled lazily; when we see key with out
2260 : : of date value on it, we re-insert it now. */
2261 : 1953188 : current_badness = edge_badness (edge, false);
2262 : 1953188 : gcc_assert (cached_badness == current_badness);
2263 : 1953188 : gcc_assert (current_badness >= badness);
2264 : : }
2265 : : else
2266 : 153180 : current_badness = edge_badness (edge, false);
2267 : 2106368 : if (current_badness != badness)
2268 : : {
2269 : 1426863 : if (edge_heap.min () && current_badness > edge_heap.min_key ().badness)
2270 : : {
2271 : 1291330 : inline_badness b (edge, current_badness);
2272 : 1291330 : edge->aux = edge_heap.insert (b, edge);
2273 : 1291330 : continue;
2274 : 1291330 : }
2275 : : else
2276 : 135533 : badness = current_badness;
2277 : : }
2278 : :
2279 : 815038 : if (!can_inline_edge_p (edge, true)
2280 : 815038 : || !can_inline_edge_by_limits_p (edge, CAN_INLINE_REPORT))
2281 : : {
2282 : 1233 : resolve_noninline_speculation (&edge_heap, edge);
2283 : 1233 : continue;
2284 : : }
2285 : :
2286 : 813805 : callee = edge->callee->ultimate_alias_target ();
2287 : 813805 : growth = estimate_edge_growth (edge);
2288 : 813805 : if (dump_file)
2289 : : {
2290 : 454 : fprintf (dump_file,
2291 : : "\nConsidering %s with %i size\n",
2292 : : callee->dump_name (),
2293 : 454 : ipa_size_summaries->get (callee)->size);
2294 : 908 : fprintf (dump_file,
2295 : : " to be inlined into %s in %s:%i\n"
2296 : : " Estimated badness is %f, frequency %.2f.\n",
2297 : 454 : edge->caller->dump_name (),
2298 : 454 : edge->call_stmt
2299 : 432 : && (LOCATION_LOCUS (gimple_location ((const gimple *)
2300 : : edge->call_stmt))
2301 : : > BUILTINS_LOCATION)
2302 : 423 : ? gimple_filename ((const gimple *) edge->call_stmt)
2303 : : : "unknown",
2304 : 454 : edge->call_stmt
2305 : 432 : ? gimple_lineno ((const gimple *) edge->call_stmt)
2306 : : : -1,
2307 : : badness.to_double (),
2308 : 454 : edge->sreal_frequency ().to_double ());
2309 : 454 : if (edge->count.ipa ().initialized_p ())
2310 : : {
2311 : 0 : fprintf (dump_file, " Called ");
2312 : 0 : edge->count.ipa ().dump (dump_file);
2313 : 0 : fprintf (dump_file, " times\n");
2314 : : }
2315 : 454 : if (dump_flags & TDF_DETAILS)
2316 : 164 : edge_badness (edge, true);
2317 : : }
2318 : :
2319 : 813805 : where = edge->caller;
2320 : :
2321 : 813805 : if (overall_size + growth > compute_max_insns (where, min_size)
2322 : 813805 : && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2323 : : {
2324 : 49189 : edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2325 : 49189 : report_inline_failed_reason (edge);
2326 : 49189 : resolve_noninline_speculation (&edge_heap, edge);
2327 : 49189 : continue;
2328 : : }
2329 : :
2330 : 764616 : if (!want_inline_small_function_p (edge, true))
2331 : : {
2332 : 1966 : resolve_noninline_speculation (&edge_heap, edge);
2333 : 1966 : continue;
2334 : : }
2335 : :
2336 : 762650 : profile_count old_count = callee->count;
2337 : :
2338 : : /* Heuristics for inlining small functions work poorly for
2339 : : recursive calls where we do effects similar to loop unrolling.
2340 : : When inlining such edge seems profitable, leave decision on
2341 : : specific inliner. */
2342 : 762650 : if (edge->recursive_p ())
2343 : : {
2344 : 1768 : if (where->inlined_to)
2345 : 517 : where = where->inlined_to;
2346 : :
2347 : : /* Disable always_inline on self recursive functions.
2348 : : This prevents some inlining bombs such as one in PR113291
2349 : : from exploding.
2350 : : It is not enough to stop inlining in self recursive always_inlines
2351 : : since they may grow large enough so always inlining them even
2352 : : with recursin depth 0 is too much.
2353 : :
2354 : : All sane uses of always_inline should be handled during
2355 : : early optimizations. */
2356 : 1768 : DECL_DISREGARD_INLINE_LIMITS (where->decl) = false;
2357 : :
2358 : 1768 : if (!recursive_inlining (edge,
2359 : 1768 : opt_for_fn (edge->caller->decl,
2360 : : flag_indirect_inlining)
2361 : : ? &new_indirect_edges : NULL))
2362 : : {
2363 : 314 : edge->inline_failed = CIF_RECURSIVE_INLINING;
2364 : 314 : resolve_noninline_speculation (&edge_heap, edge);
2365 : 314 : continue;
2366 : : }
2367 : 1454 : reset_edge_caches (where);
2368 : : /* Recursive inliner inlines all recursive calls of the function
2369 : : at once. Consequently we need to update all callee keys. */
2370 : 1454 : if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2371 : 1429 : add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2372 : 1454 : update_callee_keys (&edge_heap, where, where, updated_nodes);
2373 : 1454 : bitmap_clear (updated_nodes);
2374 : : }
2375 : : else
2376 : : {
2377 : 760882 : struct cgraph_node *outer_node = NULL;
2378 : 760882 : int depth = 0;
2379 : :
2380 : : /* Consider the case where self recursive function A is inlined
2381 : : into B. This is desired optimization in some cases, since it
2382 : : leads to effect similar of loop peeling and we might completely
2383 : : optimize out the recursive call. However we must be extra
2384 : : selective. */
2385 : :
2386 : 760882 : where = edge->caller;
2387 : 1128375 : while (where->inlined_to)
2388 : : {
2389 : 367493 : if (where->decl == callee->decl)
2390 : 7260 : outer_node = where, depth++;
2391 : 367493 : where = where->callers->caller;
2392 : : }
2393 : 762416 : if (outer_node
2394 : 760882 : && !want_inline_self_recursive_call_p (edge, outer_node,
2395 : : true, depth))
2396 : : {
2397 : 1534 : edge->inline_failed
2398 : 1534 : = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2399 : 1534 : ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2400 : 1534 : resolve_noninline_speculation (&edge_heap, edge);
2401 : 1534 : continue;
2402 : : }
2403 : 759348 : else if (depth && dump_file)
2404 : 6 : fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2405 : :
2406 : 759348 : gcc_checking_assert (!callee->inlined_to);
2407 : :
2408 : 759348 : int old_size = ipa_size_summaries->get (where)->size;
2409 : 759348 : sreal old_time = ipa_fn_summaries->get (where)->time;
2410 : :
2411 : 759348 : inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2412 : 759348 : reset_edge_caches (edge->callee);
2413 : 759348 : add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2414 : :
2415 : : /* If caller's size and time increased we do not need to update
2416 : : all edges because badness is not going to decrease. */
2417 : 759348 : if (old_size <= ipa_size_summaries->get (where)->size
2418 : 712327 : && old_time <= ipa_fn_summaries->get (where)->time
2419 : : /* Wrapper penalty may be non-monotonous in this respect.
2420 : : Fortunately it only affects small functions. */
2421 : 1262737 : && !wrapper_heuristics_may_apply (where, old_size))
2422 : 365391 : update_callee_keys (&edge_heap, edge->callee, edge->callee,
2423 : : updated_nodes);
2424 : : else
2425 : 393957 : update_callee_keys (&edge_heap, where,
2426 : : edge->callee,
2427 : : updated_nodes);
2428 : : }
2429 : 760802 : where = edge->caller;
2430 : 760802 : if (where->inlined_to)
2431 : 193416 : where = where->inlined_to;
2432 : :
2433 : : /* Our profitability metric can depend on local properties
2434 : : such as number of inlinable calls and size of the function body.
2435 : : After inlining these properties might change for the function we
2436 : : inlined into (since it's body size changed) and for the functions
2437 : : called by function we inlined (since number of it inlinable callers
2438 : : might change). */
2439 : 760802 : update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2440 : : /* Offline copy count has possibly changed, recompute if profile is
2441 : : available. */
2442 : 760802 : struct cgraph_node *n
2443 : 760802 : = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2444 : 509318 : if (n != edge->callee && n->analyzed && !(n->count == old_count)
2445 : 760839 : && n->count.ipa_p ())
2446 : 37 : update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2447 : 760802 : bitmap_clear (updated_nodes);
2448 : :
2449 : 760802 : if (dump_enabled_p ())
2450 : : {
2451 : 463 : ipa_fn_summary *s = ipa_fn_summaries->get (where);
2452 : :
2453 : : /* dump_printf can't handle %+i. */
2454 : 463 : char buf_net_change[100];
2455 : 463 : snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2456 : : overall_size - old_size);
2457 : :
2458 : 926 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2459 : : " Inlined %C into %C which now has time %f and "
2460 : : "size %i, net change of %s%s.\n",
2461 : : edge->callee, edge->caller,
2462 : : s->time.to_double (),
2463 : 463 : ipa_size_summaries->get (edge->caller)->size,
2464 : : buf_net_change,
2465 : 463 : cross_module_call_p (edge)
2466 : : ? " (cross module)" : "");
2467 : : }
2468 : 760802 : if (min_size > overall_size)
2469 : : {
2470 : 187983 : min_size = overall_size;
2471 : :
2472 : 187983 : if (dump_file)
2473 : 332 : fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2474 : : }
2475 : : }
2476 : :
2477 : 226684 : free_growth_caches ();
2478 : 226684 : if (dump_enabled_p ())
2479 : 434 : dump_printf (MSG_NOTE,
2480 : : "Unit growth for small function inlining: %i->%i (%i%%)\n",
2481 : : initial_size, overall_size,
2482 : 193 : initial_size ? overall_size * 100 / (initial_size) - 100 : 0);
2483 : 226684 : symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2484 : 226684 : }
2485 : :
2486 : : /* Flatten NODE. Performed both during early inlining and
2487 : : at IPA inlining time. */
2488 : :
2489 : : static void
2490 : 703 : flatten_function (struct cgraph_node *node, bool early, bool update)
2491 : : {
2492 : 703 : struct cgraph_edge *e;
2493 : :
2494 : : /* We shouldn't be called recursively when we are being processed. */
2495 : 703 : gcc_assert (node->aux == NULL);
2496 : :
2497 : 703 : node->aux = (void *) node;
2498 : :
2499 : 1624 : for (e = node->callees; e; e = e->next_callee)
2500 : : {
2501 : 921 : struct cgraph_node *orig_callee;
2502 : 921 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2503 : :
2504 : : /* We've hit cycle? It is time to give up. */
2505 : 921 : if (callee->aux)
2506 : : {
2507 : 15 : if (dump_enabled_p ())
2508 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2509 : : "Not inlining %C into %C to avoid cycle.\n",
2510 : : callee, e->caller);
2511 : 15 : if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2512 : 15 : e->inline_failed = CIF_RECURSIVE_INLINING;
2513 : 15 : continue;
2514 : : }
2515 : :
2516 : : /* When the edge is already inlined, we just need to recurse into
2517 : : it in order to fully flatten the leaves. */
2518 : 906 : if (!e->inline_failed)
2519 : : {
2520 : 350 : flatten_function (callee, early, false);
2521 : 350 : continue;
2522 : : }
2523 : :
2524 : : /* Flatten attribute needs to be processed during late inlining. For
2525 : : extra code quality we however do flattening during early optimization,
2526 : : too. */
2527 : 311 : if (!early
2528 : 556 : ? !can_inline_edge_p (e, true)
2529 : 245 : && !can_inline_edge_by_limits_p (e, CAN_INLINE_REPORT)
2530 : 311 : : !can_early_inline_edge_p (e))
2531 : 419 : continue;
2532 : :
2533 : 137 : if (e->recursive_p ())
2534 : : {
2535 : 0 : if (dump_enabled_p ())
2536 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2537 : : "Not inlining: recursive call.\n");
2538 : 0 : continue;
2539 : : }
2540 : :
2541 : 137 : if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2542 : 274 : != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2543 : : {
2544 : 4 : if (dump_enabled_p ())
2545 : 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2546 : : "Not inlining: SSA form does not match.\n");
2547 : 4 : continue;
2548 : : }
2549 : :
2550 : : /* Inline the edge and flatten the inline clone. Avoid
2551 : : recursing through the original node if the node was cloned. */
2552 : 133 : if (dump_enabled_p ())
2553 : 3 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2554 : : " Inlining %C into %C.\n",
2555 : : callee, e->caller);
2556 : 133 : orig_callee = callee;
2557 : 133 : inline_call (e, true, NULL, NULL, false);
2558 : 133 : if (e->callee != orig_callee)
2559 : 94 : orig_callee->aux = (void *) node;
2560 : 133 : flatten_function (e->callee, early, false);
2561 : 133 : if (e->callee != orig_callee)
2562 : 94 : orig_callee->aux = NULL;
2563 : : }
2564 : :
2565 : 703 : node->aux = NULL;
2566 : 703 : cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2567 : 703 : if (update && opt_for_fn (where->decl, optimize))
2568 : 209 : ipa_update_overall_fn_summary (where);
2569 : 703 : }
2570 : :
2571 : : /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2572 : : DATA points to number of calls originally found so we avoid infinite
2573 : : recursion. */
2574 : :
2575 : : static bool
2576 : 28277 : inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2577 : : hash_set<cgraph_node *> *callers)
2578 : : {
2579 : 28277 : int *num_calls = (int *)data;
2580 : 28277 : bool callee_removed = false;
2581 : :
2582 : 59648 : while (node->callers && !node->inlined_to)
2583 : : {
2584 : 32340 : struct cgraph_node *caller = node->callers->caller;
2585 : :
2586 : 32340 : if (!can_inline_edge_p (node->callers, true)
2587 : 32340 : || !can_inline_edge_by_limits_p (node->callers, CAN_INLINE_REPORT)
2588 : 64680 : || node->callers->recursive_p ())
2589 : : {
2590 : 0 : if (dump_file)
2591 : 0 : fprintf (dump_file, "Uninlinable call found; giving up.\n");
2592 : 0 : *num_calls = 0;
2593 : 0 : return false;
2594 : : }
2595 : :
2596 : 32340 : if (dump_file)
2597 : : {
2598 : 4 : cgraph_node *ultimate = node->ultimate_alias_target ();
2599 : 4 : fprintf (dump_file,
2600 : : "\nInlining %s size %i.\n",
2601 : : ultimate->dump_name (),
2602 : 4 : ipa_size_summaries->get (ultimate)->size);
2603 : 4 : fprintf (dump_file,
2604 : : " Called once from %s %i insns.\n",
2605 : : node->callers->caller->dump_name (),
2606 : 4 : ipa_size_summaries->get (node->callers->caller)->size);
2607 : : }
2608 : :
2609 : : /* Remember which callers we inlined to, delaying updating the
2610 : : overall summary. */
2611 : 32340 : callers->add (node->callers->caller);
2612 : 32340 : inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2613 : 32340 : if (dump_file)
2614 : 4 : fprintf (dump_file,
2615 : : " Inlined into %s which now has %i size\n",
2616 : : caller->dump_name (),
2617 : 4 : ipa_size_summaries->get (caller)->size);
2618 : 32340 : if (!(*num_calls)--)
2619 : : {
2620 : 0 : if (dump_file)
2621 : 0 : fprintf (dump_file, "New calls found; giving up.\n");
2622 : 0 : return callee_removed;
2623 : : }
2624 : 32340 : if (callee_removed)
2625 : : return true;
2626 : : }
2627 : : return false;
2628 : : }
2629 : :
2630 : : /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2631 : : update. */
2632 : :
2633 : : static bool
2634 : 28277 : inline_to_all_callers (struct cgraph_node *node, void *data)
2635 : : {
2636 : 28277 : hash_set<cgraph_node *> callers;
2637 : 28277 : bool res = inline_to_all_callers_1 (node, data, &callers);
2638 : : /* Perform the delayed update of the overall summary of all callers
2639 : : processed. This avoids quadratic behavior in the cases where
2640 : : we have a lot of calls to the same function. */
2641 : 55455 : for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2642 : 82633 : i != callers.end (); ++i)
2643 : 27178 : ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2644 : 28277 : return res;
2645 : 28277 : }
2646 : :
2647 : : /* Output overall time estimate. */
2648 : : static void
2649 : 356 : dump_overall_stats (void)
2650 : : {
2651 : 356 : sreal sum_weighted = 0, sum = 0;
2652 : 356 : struct cgraph_node *node;
2653 : :
2654 : 2234 : FOR_EACH_DEFINED_FUNCTION (node)
2655 : 1878 : if (!node->inlined_to
2656 : 1376 : && !node->alias)
2657 : : {
2658 : 1274 : ipa_fn_summary *s = ipa_fn_summaries->get (node);
2659 : 1274 : if (s != NULL)
2660 : : {
2661 : 1170 : sum += s->time;
2662 : 1170 : if (node->count.ipa ().initialized_p ())
2663 : 14 : sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2664 : : }
2665 : : }
2666 : 356 : fprintf (dump_file, "Overall time estimate: "
2667 : : "%f weighted by profile: "
2668 : : "%f\n", sum.to_double (), sum_weighted.to_double ());
2669 : 356 : }
2670 : :
2671 : : /* Output some useful stats about inlining. */
2672 : :
2673 : : static void
2674 : 178 : dump_inline_stats (void)
2675 : : {
2676 : 178 : int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2677 : 178 : int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2678 : 178 : int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2679 : 178 : int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2680 : 178 : int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2681 : 178 : int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2682 : 178 : int64_t reason[CIF_N_REASONS][2];
2683 : 5518 : sreal reason_freq[CIF_N_REASONS];
2684 : 178 : int i;
2685 : 178 : struct cgraph_node *node;
2686 : :
2687 : 178 : memset (reason, 0, sizeof (reason));
2688 : 5518 : for (i=0; i < CIF_N_REASONS; i++)
2689 : 5340 : reason_freq[i] = 0;
2690 : 1185 : FOR_EACH_DEFINED_FUNCTION (node)
2691 : : {
2692 : 1007 : struct cgraph_edge *e;
2693 : 5839 : for (e = node->callees; e; e = e->next_callee)
2694 : : {
2695 : 4832 : if (e->inline_failed)
2696 : : {
2697 : 4333 : if (e->count.ipa ().initialized_p ())
2698 : 2611 : reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2699 : 4333 : reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2700 : 4333 : reason[(int) e->inline_failed][1] ++;
2701 : 4333 : if (DECL_VIRTUAL_P (e->callee->decl)
2702 : 4333 : && e->count.ipa ().initialized_p ())
2703 : : {
2704 : 0 : if (e->indirect_inlining_edge)
2705 : 0 : noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2706 : : else
2707 : 0 : noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2708 : : }
2709 : 4333 : else if (e->count.ipa ().initialized_p ())
2710 : : {
2711 : 2611 : if (e->indirect_inlining_edge)
2712 : 0 : noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2713 : : else
2714 : 2611 : noninlined_cnt += e->count.ipa ().to_gcov_type ();
2715 : : }
2716 : : }
2717 : 499 : else if (e->count.ipa ().initialized_p ())
2718 : : {
2719 : 0 : if (e->speculative)
2720 : : {
2721 : 0 : if (DECL_VIRTUAL_P (e->callee->decl))
2722 : 0 : inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2723 : : else
2724 : 0 : inlined_speculative += e->count.ipa ().to_gcov_type ();
2725 : : }
2726 : 0 : else if (DECL_VIRTUAL_P (e->callee->decl))
2727 : : {
2728 : 0 : if (e->indirect_inlining_edge)
2729 : 0 : inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2730 : : else
2731 : 0 : inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2732 : : }
2733 : : else
2734 : : {
2735 : 0 : if (e->indirect_inlining_edge)
2736 : 0 : inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2737 : : else
2738 : 0 : inlined_cnt += e->count.ipa ().to_gcov_type ();
2739 : : }
2740 : : }
2741 : : }
2742 : 1100 : for (e = node->indirect_calls; e; e = e->next_callee)
2743 : 93 : if (e->indirect_info->polymorphic
2744 : 93 : & e->count.ipa ().initialized_p ())
2745 : 0 : indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2746 : 93 : else if (e->count.ipa ().initialized_p ())
2747 : 0 : indirect_cnt += e->count.ipa ().to_gcov_type ();
2748 : : }
2749 : 178 : if (max_count.initialized_p ())
2750 : : {
2751 : 0 : fprintf (dump_file,
2752 : : "Inlined %" PRId64 " + speculative "
2753 : : "%" PRId64 " + speculative polymorphic "
2754 : : "%" PRId64 " + previously indirect "
2755 : : "%" PRId64 " + virtual "
2756 : : "%" PRId64 " + virtual and previously indirect "
2757 : : "%" PRId64 "\n" "Not inlined "
2758 : : "%" PRId64 " + previously indirect "
2759 : : "%" PRId64 " + virtual "
2760 : : "%" PRId64 " + virtual and previously indirect "
2761 : : "%" PRId64 " + still indirect "
2762 : : "%" PRId64 " + still indirect polymorphic "
2763 : : "%" PRId64 "\n", inlined_cnt,
2764 : : inlined_speculative, inlined_speculative_ply,
2765 : : inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2766 : : noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2767 : : noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2768 : 0 : fprintf (dump_file, "Removed speculations ");
2769 : 0 : spec_rem.dump (dump_file);
2770 : 0 : fprintf (dump_file, "\n");
2771 : : }
2772 : 178 : dump_overall_stats ();
2773 : 178 : fprintf (dump_file, "\nWhy inlining failed?\n");
2774 : 5696 : for (i = 0; i < CIF_N_REASONS; i++)
2775 : 5340 : if (reason[i][1])
2776 : 149 : fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2777 : : cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2778 : : (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2779 : 178 : }
2780 : :
2781 : : /* Called when node is removed. */
2782 : :
2783 : : static void
2784 : 0 : flatten_remove_node_hook (struct cgraph_node *node, void *data)
2785 : : {
2786 : 0 : if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2787 : : return;
2788 : :
2789 : 0 : hash_set<struct cgraph_node *> *removed
2790 : : = (hash_set<struct cgraph_node *> *) data;
2791 : 0 : removed->add (node);
2792 : : }
2793 : :
2794 : : /* Decide on the inlining. We do so in the topological order to avoid
2795 : : expenses on updating data structures. */
2796 : :
2797 : : static unsigned int
2798 : 226684 : ipa_inline (void)
2799 : : {
2800 : 226684 : struct cgraph_node *node;
2801 : 226684 : int nnodes;
2802 : 226684 : struct cgraph_node **order;
2803 : 226684 : int i, j;
2804 : 226684 : int cold;
2805 : 226684 : bool remove_functions = false;
2806 : :
2807 : 226684 : order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2808 : :
2809 : 226684 : if (dump_file)
2810 : 178 : ipa_dump_fn_summaries (dump_file);
2811 : :
2812 : 226684 : nnodes = ipa_reverse_postorder (order);
2813 : 226684 : spec_rem = profile_count::zero ();
2814 : :
2815 : 7527674 : FOR_EACH_FUNCTION (node)
2816 : : {
2817 : 3537153 : node->aux = 0;
2818 : :
2819 : : /* Recompute the default reasons for inlining because they may have
2820 : : changed during merging. */
2821 : 3537153 : if (in_lto_p)
2822 : : {
2823 : 429614 : for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2824 : : {
2825 : 326969 : gcc_assert (e->inline_failed);
2826 : 326969 : initialize_inline_failed (e);
2827 : : }
2828 : 103853 : for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2829 : 1208 : initialize_inline_failed (e);
2830 : : }
2831 : : }
2832 : :
2833 : 226684 : if (dump_file)
2834 : 178 : fprintf (dump_file, "\nFlattening functions:\n");
2835 : :
2836 : : /* First shrink order array, so that it only contains nodes with
2837 : : flatten attribute. */
2838 : 3763837 : for (i = nnodes - 1, j = i; i >= 0; i--)
2839 : : {
2840 : 3537153 : node = order[i];
2841 : 3537153 : if (node->definition
2842 : : /* Do not try to flatten aliases. These may happen for example when
2843 : : creating local aliases. */
2844 : 3537153 : && !node->alias
2845 : 5253597 : && lookup_attribute ("flatten",
2846 : 1716444 : DECL_ATTRIBUTES (node->decl)) != NULL)
2847 : 85 : order[j--] = order[i];
2848 : : }
2849 : :
2850 : : /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2851 : : nodes with flatten attribute. If there is more than one such
2852 : : node, we need to register a node removal hook, as flatten_function
2853 : : could remove other nodes with flatten attribute. See PR82801. */
2854 : 226684 : struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2855 : 226684 : hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2856 : 226684 : if (j < nnodes - 2)
2857 : : {
2858 : 15 : flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2859 : 15 : node_removal_hook_holder
2860 : 15 : = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2861 : : flatten_removed_nodes);
2862 : : }
2863 : :
2864 : : /* In the first pass handle functions to be flattened. Do this with
2865 : : a priority so none of our later choices will make this impossible. */
2866 : 226769 : for (i = nnodes - 1; i > j; i--)
2867 : : {
2868 : 85 : node = order[i];
2869 : 85 : if (flatten_removed_nodes
2870 : 85 : && flatten_removed_nodes->contains (node))
2871 : 0 : continue;
2872 : :
2873 : : /* Handle nodes to be flattened.
2874 : : Ideally when processing callees we stop inlining at the
2875 : : entry of cycles, possibly cloning that entry point and
2876 : : try to flatten itself turning it into a self-recursive
2877 : : function. */
2878 : 85 : if (dump_file)
2879 : 4 : fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2880 : 85 : flatten_function (node, false, true);
2881 : : }
2882 : :
2883 : 226684 : if (j < nnodes - 2)
2884 : : {
2885 : 15 : symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2886 : 30 : delete flatten_removed_nodes;
2887 : : }
2888 : 226684 : free (order);
2889 : :
2890 : 226684 : if (dump_file)
2891 : 178 : dump_overall_stats ();
2892 : :
2893 : 226684 : inline_small_functions ();
2894 : :
2895 : 226684 : gcc_assert (symtab->state == IPA_SSA);
2896 : 226684 : symtab->state = IPA_SSA_AFTER_INLINING;
2897 : : /* Do first after-inlining removal. We want to remove all "stale" extern
2898 : : inline functions and virtual functions so we really know what is called
2899 : : once. */
2900 : 226684 : symtab->remove_unreachable_nodes (dump_file);
2901 : :
2902 : : /* Inline functions with a property that after inlining into all callers the
2903 : : code size will shrink because the out-of-line copy is eliminated.
2904 : : We do this regardless on the callee size as long as function growth limits
2905 : : are met. */
2906 : 226684 : if (dump_file)
2907 : 178 : fprintf (dump_file,
2908 : : "\nDeciding on functions to be inlined into all callers and "
2909 : : "removing useless speculations:\n");
2910 : :
2911 : : /* Inlining one function called once has good chance of preventing
2912 : : inlining other function into the same callee. Ideally we should
2913 : : work in priority order, but probably inlining hot functions first
2914 : : is good cut without the extra pain of maintaining the queue.
2915 : :
2916 : : ??? this is not really fitting the bill perfectly: inlining function
2917 : : into callee often leads to better optimization of callee due to
2918 : : increased context for optimization.
2919 : : For example if main() function calls a function that outputs help
2920 : : and then function that does the main optimization, we should inline
2921 : : the second with priority even if both calls are cold by themselves.
2922 : :
2923 : : We probably want to implement new predicate replacing our use of
2924 : : maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2925 : : to be hot. */
2926 : 680052 : for (cold = 0; cold <= 1; cold ++)
2927 : : {
2928 : 5692346 : FOR_EACH_DEFINED_FUNCTION (node)
2929 : : {
2930 : 5238978 : struct cgraph_edge *edge, *next;
2931 : 5238978 : bool update=false;
2932 : :
2933 : 5238978 : if (!opt_for_fn (node->decl, optimize)
2934 : 5238978 : || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2935 : 912070 : continue;
2936 : :
2937 : 17650628 : for (edge = node->callees; edge; edge = next)
2938 : : {
2939 : 13323720 : next = edge->next_callee;
2940 : 13323720 : if (edge->speculative && !speculation_useful_p (edge, false))
2941 : : {
2942 : 40 : if (edge->count.ipa ().initialized_p ())
2943 : 0 : spec_rem += edge->count.ipa ();
2944 : 40 : cgraph_edge::resolve_speculation (edge);
2945 : 40 : update = true;
2946 : 40 : remove_functions = true;
2947 : : }
2948 : : }
2949 : 4326908 : if (update)
2950 : : {
2951 : 80 : struct cgraph_node *where = node->inlined_to
2952 : 40 : ? node->inlined_to : node;
2953 : 40 : reset_edge_caches (where);
2954 : 40 : ipa_update_overall_fn_summary (where);
2955 : : }
2956 : 4326908 : if (want_inline_function_to_all_callers_p (node, cold))
2957 : : {
2958 : 25189 : int num_calls = 0;
2959 : 25189 : node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2960 : : true);
2961 : 25189 : while (node->call_for_symbol_and_aliases
2962 : 26158 : (inline_to_all_callers, &num_calls, true))
2963 : : ;
2964 : 25189 : remove_functions = true;
2965 : : }
2966 : : }
2967 : : }
2968 : :
2969 : 226684 : if (dump_enabled_p ())
2970 : 241 : dump_printf (MSG_NOTE,
2971 : : "\nInlined %i calls, eliminated %i functions\n\n",
2972 : : ncalls_inlined, nfunctions_inlined);
2973 : 226684 : if (dump_file)
2974 : 178 : dump_inline_stats ();
2975 : :
2976 : 226684 : if (dump_file)
2977 : 178 : ipa_dump_fn_summaries (dump_file);
2978 : 226684 : return remove_functions ? TODO_remove_functions : 0;
2979 : : }
2980 : :
2981 : : /* Inline always-inline function calls in NODE
2982 : : (which itself is possibly inline). */
2983 : :
2984 : : static bool
2985 : 3163746 : inline_always_inline_functions (struct cgraph_node *node)
2986 : : {
2987 : 3163746 : struct cgraph_edge *e;
2988 : 3163746 : bool inlined = false;
2989 : :
2990 : 12363993 : for (e = node->callees; e; e = e->next_callee)
2991 : : {
2992 : 9200247 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2993 : 9200247 : gcc_checking_assert (!callee->aux || callee->aux == (void *)(size_t)1);
2994 : 9200247 : if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
2995 : : /* Watch for self-recursive cycles. */
2996 : 9200247 : || callee->aux)
2997 : 8728035 : continue;
2998 : :
2999 : 472212 : if (e->recursive_p ())
3000 : : {
3001 : 6 : if (dump_enabled_p ())
3002 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
3003 : : " Not inlining recursive call to %C.\n",
3004 : : e->callee);
3005 : 6 : e->inline_failed = CIF_RECURSIVE_INLINING;
3006 : 6 : continue;
3007 : : }
3008 : 472206 : if (callee->definition
3009 : 472177 : && !ipa_fn_summaries->get (callee))
3010 : 221 : compute_fn_summary (callee, true);
3011 : :
3012 : 472206 : if (!can_early_inline_edge_p (e))
3013 : : {
3014 : : /* Set inlined to true if the callee is marked "always_inline" but
3015 : : is not inlinable. This will allow flagging an error later in
3016 : : expand_call_inline in tree-inline.cc. */
3017 : 351 : if (lookup_attribute ("always_inline",
3018 : 351 : DECL_ATTRIBUTES (callee->decl)) != NULL)
3019 : 27 : inlined = true;
3020 : 351 : continue;
3021 : : }
3022 : :
3023 : 471855 : if (dump_enabled_p ())
3024 : 11 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
3025 : : " Inlining %C into %C (always_inline).\n",
3026 : : e->callee, e->caller);
3027 : 471855 : inline_call (e, true, NULL, NULL, false);
3028 : 471855 : callee->aux = (void *)(size_t)1;
3029 : : /* Inline recursively to handle the case where always_inline function was
3030 : : not optimized yet since it is a part of a cycle in callgraph. */
3031 : 471855 : inline_always_inline_functions (e->callee);
3032 : 471855 : callee->aux = NULL;
3033 : 471855 : inlined = true;
3034 : : }
3035 : 3163746 : return inlined;
3036 : : }
3037 : :
3038 : : /* Decide on the inlining. We do so in the topological order to avoid
3039 : : expenses on updating data structures. */
3040 : :
3041 : : static bool
3042 : 2231145 : early_inline_small_functions (struct cgraph_node *node)
3043 : : {
3044 : 2231145 : struct cgraph_edge *e;
3045 : 2231145 : bool inlined = false;
3046 : :
3047 : 9538743 : for (e = node->callees; e; e = e->next_callee)
3048 : : {
3049 : 7307598 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
3050 : :
3051 : : /* We can encounter not-yet-analyzed function during
3052 : : early inlining on callgraphs with strongly
3053 : : connected components. */
3054 : 7307598 : ipa_fn_summary *s = ipa_fn_summaries->get (callee);
3055 : 7307598 : if (s == NULL || !s->inlinable || !e->inline_failed)
3056 : 4035176 : continue;
3057 : :
3058 : : /* Do not consider functions not declared inline. */
3059 : 3272422 : if (!DECL_DECLARED_INLINE_P (callee->decl)
3060 : 839327 : && !opt_for_fn (node->decl, flag_inline_small_functions)
3061 : 3317781 : && !opt_for_fn (node->decl, flag_inline_functions))
3062 : 45193 : continue;
3063 : :
3064 : 3227229 : if (dump_enabled_p ())
3065 : 155 : dump_printf_loc (MSG_NOTE, e->call_stmt,
3066 : : "Considering inline candidate %C.\n",
3067 : : callee);
3068 : :
3069 : 3227229 : if (!can_early_inline_edge_p (e))
3070 : 86033 : continue;
3071 : :
3072 : 3141196 : if (e->recursive_p ())
3073 : : {
3074 : 7693 : if (dump_enabled_p ())
3075 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
3076 : : " Not inlining: recursive call.\n");
3077 : 7693 : continue;
3078 : : }
3079 : :
3080 : 3133503 : if (!want_early_inline_function_p (e))
3081 : 963000 : continue;
3082 : :
3083 : 2170503 : if (dump_enabled_p ())
3084 : 102 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
3085 : : " Inlining %C into %C.\n",
3086 : : callee, e->caller);
3087 : 2170503 : inline_call (e, true, NULL, NULL, false);
3088 : 2170503 : inlined = true;
3089 : : }
3090 : :
3091 : 2231145 : if (inlined)
3092 : 749597 : ipa_update_overall_fn_summary (node);
3093 : :
3094 : 2231145 : return inlined;
3095 : : }
3096 : :
3097 : : unsigned int
3098 : 2691905 : early_inliner (function *fun)
3099 : : {
3100 : 2691905 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
3101 : 2691905 : struct cgraph_edge *edge;
3102 : 2691905 : unsigned int todo = 0;
3103 : 2691905 : int iterations = 0;
3104 : 2691905 : bool inlined = false;
3105 : :
3106 : 2691905 : if (seen_error ())
3107 : : return 0;
3108 : :
3109 : : /* Do nothing if datastructures for ipa-inliner are already computed. This
3110 : : happens when some pass decides to construct new function and
3111 : : cgraph_add_new_function calls lowering passes and early optimization on
3112 : : it. This may confuse ourself when early inliner decide to inline call to
3113 : : function clone, because function clones don't have parameter list in
3114 : : ipa-prop matching their signature. */
3115 : 2691899 : if (ipa_node_params_sum)
3116 : : return 0;
3117 : :
3118 : 2691891 : if (flag_checking)
3119 : 2691861 : node->verify ();
3120 : 2691891 : node->remove_all_references ();
3121 : :
3122 : : /* Even when not optimizing or not inlining inline always-inline
3123 : : functions. */
3124 : 2691891 : inlined = inline_always_inline_functions (node);
3125 : :
3126 : 2691891 : if (!optimize
3127 : 2254354 : || flag_no_inline
3128 : 2232674 : || !flag_early_inlining)
3129 : : ;
3130 : 2231186 : else if (lookup_attribute ("flatten",
3131 : 2231186 : DECL_ATTRIBUTES (node->decl)) != NULL)
3132 : : {
3133 : : /* When the function is marked to be flattened, recursively inline
3134 : : all calls in it. */
3135 : 135 : if (dump_enabled_p ())
3136 : 0 : dump_printf (MSG_OPTIMIZED_LOCATIONS,
3137 : : "Flattening %C\n", node);
3138 : 135 : flatten_function (node, true, true);
3139 : 135 : inlined = true;
3140 : : }
3141 : : else
3142 : : {
3143 : : /* If some always_inline functions was inlined, apply the changes.
3144 : : This way we will not account always inline into growth limits and
3145 : : moreover we will inline calls from always inlines that we skipped
3146 : : previously because of conditional in can_early_inline_edge_p
3147 : : which prevents some inlining to always_inline. */
3148 : 2231051 : if (inlined)
3149 : : {
3150 : 245077 : timevar_push (TV_INTEGRATION);
3151 : 245077 : todo |= optimize_inline_calls (current_function_decl);
3152 : : /* optimize_inline_calls call above might have introduced new
3153 : : statements that don't have inline parameters computed. */
3154 : 1179404 : for (edge = node->callees; edge; edge = edge->next_callee)
3155 : : {
3156 : : /* We can enounter not-yet-analyzed function during
3157 : : early inlining on callgraphs with strongly
3158 : : connected components. */
3159 : 934327 : ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3160 : 934327 : es->call_stmt_size
3161 : 934327 : = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3162 : 934327 : es->call_stmt_time
3163 : 934327 : = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3164 : : }
3165 : 245077 : ipa_update_overall_fn_summary (node);
3166 : 245077 : inlined = false;
3167 : 245077 : timevar_pop (TV_INTEGRATION);
3168 : : }
3169 : : /* We iterate incremental inlining to get trivial cases of indirect
3170 : : inlining. */
3171 : 5961296 : while (iterations < opt_for_fn (node->decl,
3172 : : param_early_inliner_max_iterations)
3173 : 2980648 : && early_inline_small_functions (node))
3174 : : {
3175 : 749597 : timevar_push (TV_INTEGRATION);
3176 : 749597 : todo |= optimize_inline_calls (current_function_decl);
3177 : :
3178 : : /* Technically we ought to recompute inline parameters so the new
3179 : : iteration of early inliner works as expected. We however have
3180 : : values approximately right and thus we only need to update edge
3181 : : info that might be cleared out for newly discovered edges. */
3182 : 3064090 : for (edge = node->callees; edge; edge = edge->next_callee)
3183 : : {
3184 : : /* We have no summary for new bound store calls yet. */
3185 : 2314493 : ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3186 : 2314493 : es->call_stmt_size
3187 : 2314493 : = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3188 : 2314493 : es->call_stmt_time
3189 : 2314493 : = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3190 : : }
3191 : 749597 : if (iterations < opt_for_fn (node->decl,
3192 : 749597 : param_early_inliner_max_iterations) - 1)
3193 : 94 : ipa_update_overall_fn_summary (node);
3194 : 749597 : timevar_pop (TV_INTEGRATION);
3195 : 749597 : iterations++;
3196 : 749597 : inlined = false;
3197 : : }
3198 : 2231051 : if (dump_file)
3199 : 191 : fprintf (dump_file, "Iterations: %i\n", iterations);
3200 : : }
3201 : :
3202 : 2691891 : if (inlined)
3203 : : {
3204 : 22571 : timevar_push (TV_INTEGRATION);
3205 : 22571 : todo |= optimize_inline_calls (current_function_decl);
3206 : 22571 : timevar_pop (TV_INTEGRATION);
3207 : : }
3208 : :
3209 : 2691891 : fun->always_inline_functions_inlined = true;
3210 : :
3211 : 2691891 : return todo;
3212 : : }
3213 : :
3214 : : /* Do inlining of small functions. Doing so early helps profiling and other
3215 : : passes to be somewhat more effective and avoids some code duplication in
3216 : : later real inlining pass for testcases with very many function calls. */
3217 : :
3218 : : namespace {
3219 : :
3220 : : const pass_data pass_data_early_inline =
3221 : : {
3222 : : GIMPLE_PASS, /* type */
3223 : : "einline", /* name */
3224 : : OPTGROUP_INLINE, /* optinfo_flags */
3225 : : TV_EARLY_INLINING, /* tv_id */
3226 : : PROP_ssa, /* properties_required */
3227 : : 0, /* properties_provided */
3228 : : 0, /* properties_destroyed */
3229 : : 0, /* todo_flags_start */
3230 : : 0, /* todo_flags_finish */
3231 : : };
3232 : :
3233 : : class pass_early_inline : public gimple_opt_pass
3234 : : {
3235 : : public:
3236 : 282866 : pass_early_inline (gcc::context *ctxt)
3237 : 565732 : : gimple_opt_pass (pass_data_early_inline, ctxt)
3238 : : {}
3239 : :
3240 : : /* opt_pass methods: */
3241 : : unsigned int execute (function *) final override;
3242 : :
3243 : : }; // class pass_early_inline
3244 : :
3245 : : unsigned int
3246 : 2691905 : pass_early_inline::execute (function *fun)
3247 : : {
3248 : 2691905 : return early_inliner (fun);
3249 : : }
3250 : :
3251 : : } // anon namespace
3252 : :
3253 : : gimple_opt_pass *
3254 : 282866 : make_pass_early_inline (gcc::context *ctxt)
3255 : : {
3256 : 282866 : return new pass_early_inline (ctxt);
3257 : : }
3258 : :
3259 : : namespace {
3260 : :
3261 : : const pass_data pass_data_ipa_inline =
3262 : : {
3263 : : IPA_PASS, /* type */
3264 : : "inline", /* name */
3265 : : OPTGROUP_INLINE, /* optinfo_flags */
3266 : : TV_IPA_INLINING, /* tv_id */
3267 : : 0, /* properties_required */
3268 : : 0, /* properties_provided */
3269 : : 0, /* properties_destroyed */
3270 : : 0, /* todo_flags_start */
3271 : : ( TODO_dump_symtab ), /* todo_flags_finish */
3272 : : };
3273 : :
3274 : : class pass_ipa_inline : public ipa_opt_pass_d
3275 : : {
3276 : : public:
3277 : 282866 : pass_ipa_inline (gcc::context *ctxt)
3278 : : : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3279 : : NULL, /* generate_summary */
3280 : : NULL, /* write_summary */
3281 : : NULL, /* read_summary */
3282 : : NULL, /* write_optimization_summary */
3283 : : NULL, /* read_optimization_summary */
3284 : : NULL, /* stmt_fixup */
3285 : : 0, /* function_transform_todo_flags_start */
3286 : : inline_transform, /* function_transform */
3287 : 282866 : NULL) /* variable_transform */
3288 : 282866 : {}
3289 : :
3290 : : /* opt_pass methods: */
3291 : 226684 : unsigned int execute (function *) final override { return ipa_inline (); }
3292 : :
3293 : : }; // class pass_ipa_inline
3294 : :
3295 : : } // anon namespace
3296 : :
3297 : : ipa_opt_pass_d *
3298 : 282866 : make_pass_ipa_inline (gcc::context *ctxt)
3299 : : {
3300 : 282866 : return new pass_ipa_inline (ctxt);
3301 : : }
|