Branch data Line data Source code
1 : : /* Inlining decision heuristics.
2 : : Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 : : Contributed by Jan Hubicka
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : /* Inlining decision heuristics
22 : :
23 : : The implementation of inliner is organized as follows:
24 : :
25 : : inlining heuristics limits
26 : :
27 : : can_inline_edge_p allow to check that particular inlining is allowed
28 : : by the limits specified by user (allowed function growth, growth and so
29 : : on).
30 : :
31 : : Functions are inlined when it is obvious the result is profitable (such
32 : : as functions called once or when inlining reduce code size).
33 : : In addition to that we perform inlining of small functions and recursive
34 : : inlining.
35 : :
36 : : inlining heuristics
37 : :
38 : : The inliner itself is split into two passes:
39 : :
40 : : pass_early_inlining
41 : :
42 : : Simple local inlining pass inlining callees into current function.
43 : : This pass makes no use of whole unit analysis and thus it can do only
44 : : very simple decisions based on local properties.
45 : :
46 : : The strength of the pass is that it is run in topological order
47 : : (reverse postorder) on the callgraph. Functions are converted into SSA
48 : : form just before this pass and optimized subsequently. As a result, the
49 : : callees of the function seen by the early inliner was already optimized
50 : : and results of early inlining adds a lot of optimization opportunities
51 : : for the local optimization.
52 : :
53 : : The pass handle the obvious inlining decisions within the compilation
54 : : unit - inlining auto inline functions, inlining for size and
55 : : flattening.
56 : :
57 : : main strength of the pass is the ability to eliminate abstraction
58 : : penalty in C++ code (via combination of inlining and early
59 : : optimization) and thus improve quality of analysis done by real IPA
60 : : optimizers.
61 : :
62 : : Because of lack of whole unit knowledge, the pass cannot really make
63 : : good code size/performance tradeoffs. It however does very simple
64 : : speculative inlining allowing code size to grow by
65 : : EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 : : optimizations performed later are very likely to eliminate the cost.
67 : :
68 : : pass_ipa_inline
69 : :
70 : : This is the real inliner able to handle inlining with whole program
71 : : knowledge. It performs following steps:
72 : :
73 : : 1) inlining of small functions. This is implemented by greedy
74 : : algorithm ordering all inlinable cgraph edges by their badness and
75 : : inlining them in this order as long as inline limits allows doing so.
76 : :
77 : : This heuristics is not very good on inlining recursive calls. Recursive
78 : : calls can be inlined with results similar to loop unrolling. To do so,
79 : : special purpose recursive inliner is executed on function when
80 : : recursive edge is met as viable candidate.
81 : :
82 : : 2) Unreachable functions are removed from callgraph. Inlining leads
83 : : to devirtualization and other modification of callgraph so functions
84 : : may become unreachable during the process. Also functions declared as
85 : : extern inline or virtual functions are removed, since after inlining
86 : : we no longer need the offline bodies.
87 : :
88 : : 3) Functions called once and not exported from the unit are inlined.
89 : : This should almost always lead to reduction of code size by eliminating
90 : : the need for offline copy of the function. */
91 : :
92 : : #include "config.h"
93 : : #include "system.h"
94 : : #include "coretypes.h"
95 : : #include "backend.h"
96 : : #include "target.h"
97 : : #include "rtl.h"
98 : : #include "tree.h"
99 : : #include "gimple.h"
100 : : #include "alloc-pool.h"
101 : : #include "tree-pass.h"
102 : : #include "gimple-ssa.h"
103 : : #include "cgraph.h"
104 : : #include "lto-streamer.h"
105 : : #include "trans-mem.h"
106 : : #include "calls.h"
107 : : #include "tree-inline.h"
108 : : #include "profile.h"
109 : : #include "symbol-summary.h"
110 : : #include "tree-vrp.h"
111 : : #include "sreal.h"
112 : : #include "ipa-cp.h"
113 : : #include "ipa-prop.h"
114 : : #include "ipa-fnsummary.h"
115 : : #include "ipa-inline.h"
116 : : #include "ipa-utils.h"
117 : : #include "auto-profile.h"
118 : : #include "builtins.h"
119 : : #include "fibonacci_heap.h"
120 : : #include "stringpool.h"
121 : : #include "attribs.h"
122 : : #include "asan.h"
123 : : #include "ipa-strub.h"
124 : :
125 : : /* Inliner uses greedy algorithm to inline calls in a priority order.
126 : : Badness is used as the key in a Fibonacci heap which roughly corresponds
127 : : to negation of benefit to cost ratios.
128 : : In case multiple calls has same priority we want to stabilize the outcomes
129 : : for which we use ids. */
130 : : class inline_badness
131 : : {
132 : : public:
133 : : sreal badness;
134 : : int uid;
135 : 955648 : inline_badness ()
136 : 727980 : : badness (sreal::min ()), uid (0)
137 : : {
138 : : }
139 : 3088330 : inline_badness (cgraph_edge *e, sreal b)
140 : 17069 : : badness (b), uid (e->get_uid ())
141 : : {
142 : : }
143 : 652727 : bool operator<= (const inline_badness &other)
144 : : {
145 : 652727 : if (badness != other.badness)
146 : 652727 : return badness <= other.badness;
147 : 0 : return uid <= other.uid;
148 : : }
149 : 727980 : bool operator== (const inline_badness &other)
150 : : {
151 : 1340572 : return badness == other.badness && uid == other.uid;
152 : : }
153 : 0 : bool operator!= (const inline_badness &other)
154 : : {
155 : 727980 : return badness != other.badness || uid != other.uid;
156 : : }
157 : 23747537 : bool operator< (const inline_badness &other)
158 : : {
159 : 23747537 : if (badness != other.badness)
160 : 19318664 : return badness < other.badness;
161 : 4428873 : return uid < other.uid;
162 : : }
163 : 10826625 : bool operator> (const inline_badness &other)
164 : : {
165 : 10826625 : if (badness != other.badness)
166 : 8642234 : return badness > other.badness;
167 : 2184391 : return uid > other.uid;
168 : : }
169 : : };
170 : :
171 : : typedef fibonacci_heap <inline_badness, cgraph_edge> edge_heap_t;
172 : : typedef fibonacci_node <inline_badness, cgraph_edge> edge_heap_node_t;
173 : :
174 : : /* Statistics we collect about inlining algorithm. */
175 : : static int overall_size;
176 : : static profile_count max_count;
177 : : static profile_count spec_rem;
178 : :
179 : : /* Return false when inlining edge E would lead to violating
180 : : limits on function unit growth or stack usage growth.
181 : :
182 : : The relative function body growth limit is present generally
183 : : to avoid problems with non-linear behavior of the compiler.
184 : : To allow inlining huge functions into tiny wrapper, the limit
185 : : is always based on the bigger of the two functions considered.
186 : :
187 : : For stack growth limits we always base the growth in stack usage
188 : : of the callers. We want to prevent applications from segfaulting
189 : : on stack overflow when functions with huge stack frames gets
190 : : inlined. */
191 : :
192 : : static bool
193 : 5537949 : caller_growth_limits (struct cgraph_edge *e)
194 : : {
195 : 5537949 : struct cgraph_node *to = e->caller;
196 : 5537949 : struct cgraph_node *what = e->callee->ultimate_alias_target ();
197 : 5537949 : int newsize;
198 : 5537949 : int limit = 0;
199 : 5537949 : HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
200 : 5537949 : ipa_size_summary *outer_info = ipa_size_summaries->get (to);
201 : :
202 : : /* Look for function e->caller is inlined to. While doing
203 : : so work out the largest function body on the way. As
204 : : described above, we want to base our function growth
205 : : limits based on that. Not on the self size of the
206 : : outer function, not on the self size of inline code
207 : : we immediately inline to. This is the most relaxed
208 : : interpretation of the rule "do not grow large functions
209 : : too much in order to prevent compiler from exploding". */
210 : 7884011 : while (true)
211 : : {
212 : 6710980 : ipa_size_summary *size_info = ipa_size_summaries->get (to);
213 : 6710980 : if (limit < size_info->self_size)
214 : : limit = size_info->self_size;
215 : 6710980 : if (stack_size_limit < size_info->estimated_self_stack_size)
216 : : stack_size_limit = size_info->estimated_self_stack_size;
217 : 6710980 : if (to->inlined_to)
218 : 1173031 : to = to->callers->caller;
219 : : else
220 : : break;
221 : 1173031 : }
222 : :
223 : 5537949 : ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
224 : 5537949 : ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
225 : :
226 : 5537949 : if (limit < what_size_info->self_size)
227 : : limit = what_size_info->self_size;
228 : :
229 : 5537949 : limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
230 : :
231 : : /* Check the size after inlining against the function limits. But allow
232 : : the function to shrink if it went over the limits by forced inlining. */
233 : 5537949 : newsize = estimate_size_after_inlining (to, e);
234 : 5537949 : if (newsize >= ipa_size_summaries->get (what)->size
235 : 5395555 : && newsize > opt_for_fn (to->decl, param_large_function_insns)
236 : 5775586 : && newsize > limit)
237 : : {
238 : 3369 : e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
239 : 3369 : return false;
240 : : }
241 : :
242 : 5534580 : if (!what_info->estimated_stack_size)
243 : : return true;
244 : :
245 : : /* FIXME: Stack size limit often prevents inlining in Fortran programs
246 : : due to large i/o datastructures used by the Fortran front-end.
247 : : We ought to ignore this limit when we know that the edge is executed
248 : : on every invocation of the caller (i.e. its call statement dominates
249 : : exit block). We do not track this information, yet. */
250 : 1616580 : stack_size_limit += ((gcov_type)stack_size_limit
251 : 808290 : * opt_for_fn (to->decl, param_stack_frame_growth)
252 : 808290 : / 100);
253 : :
254 : 808290 : inlined_stack = (ipa_get_stack_frame_offset (to)
255 : 808290 : + outer_info->estimated_self_stack_size
256 : 808290 : + what_info->estimated_stack_size);
257 : : /* Check new stack consumption with stack consumption at the place
258 : : stack is used. */
259 : 808290 : if (inlined_stack > stack_size_limit
260 : : /* If function already has large stack usage from sibling
261 : : inline call, we can inline, too.
262 : : This bit overoptimistically assume that we are good at stack
263 : : packing. */
264 : 250283 : && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
265 : 1048646 : && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
266 : : {
267 : 50761 : e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
268 : 50761 : return false;
269 : : }
270 : : return true;
271 : : }
272 : :
273 : : /* Dump info about why inlining has failed. */
274 : :
275 : : static void
276 : 4610310 : report_inline_failed_reason (struct cgraph_edge *e)
277 : : {
278 : 4610310 : if (dump_enabled_p ())
279 : : {
280 : 2462 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
281 : : " not inlinable: %C -> %C, %s\n",
282 : : e->caller, e->callee,
283 : : cgraph_inline_failed_string (e->inline_failed));
284 : 2462 : if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
285 : 2462 : || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
286 : 2 : && e->caller->lto_file_data
287 : 2462 : && e->callee->ultimate_alias_target ()->lto_file_data)
288 : : {
289 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
290 : : " LTO objects: %s, %s\n",
291 : 0 : e->caller->lto_file_data->file_name,
292 : 0 : e->callee->ultimate_alias_target ()->lto_file_data->file_name);
293 : : }
294 : 2462 : if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
295 : 2 : if (dump_file)
296 : 0 : cl_target_option_print_diff
297 : 0 : (dump_file, 2, target_opts_for_fn (e->caller->decl),
298 : 0 : target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
299 : 2462 : if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
300 : 0 : if (dump_file)
301 : 0 : cl_optimization_print_diff
302 : 0 : (dump_file, 2, opts_for_fn (e->caller->decl),
303 : 0 : opts_for_fn (e->callee->ultimate_alias_target ()->decl));
304 : : }
305 : 4610310 : }
306 : :
307 : : /* Decide whether sanitizer-related attributes allow inlining. */
308 : :
309 : : static bool
310 : 8097303 : sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
311 : : {
312 : 8097303 : if (!caller || !callee)
313 : : return true;
314 : :
315 : : /* Follow clang and allow inlining for always_inline functions. */
316 : 8097303 : if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
317 : : return true;
318 : :
319 : 7748129 : const sanitize_code codes[] =
320 : : {
321 : : SANITIZE_ADDRESS,
322 : : SANITIZE_THREAD,
323 : : SANITIZE_UNDEFINED,
324 : : SANITIZE_UNDEFINED_NONDEFAULT,
325 : : SANITIZE_POINTER_COMPARE,
326 : : SANITIZE_POINTER_SUBTRACT
327 : : };
328 : :
329 : 54235933 : for (unsigned i = 0; i < ARRAY_SIZE (codes); i++)
330 : 92975982 : if (sanitize_flags_p (codes[i], caller)
331 : 46487991 : != sanitize_flags_p (codes[i], callee))
332 : : return false;
333 : :
334 : 7747942 : if (sanitize_coverage_p (caller) != sanitize_coverage_p (callee))
335 : : return false;
336 : :
337 : : return true;
338 : : }
339 : :
340 : : /* Used for flags where it is safe to inline when caller's value is
341 : : grater than callee's. */
342 : : #define check_maybe_up(flag) \
343 : : (opts_for_fn (caller->decl)->x_##flag \
344 : : != opts_for_fn (callee->decl)->x_##flag \
345 : : && (!always_inline \
346 : : || opts_for_fn (caller->decl)->x_##flag \
347 : : < opts_for_fn (callee->decl)->x_##flag))
348 : : /* Used for flags where it is safe to inline when caller's value is
349 : : smaller than callee's. */
350 : : #define check_maybe_down(flag) \
351 : : (opts_for_fn (caller->decl)->x_##flag \
352 : : != opts_for_fn (callee->decl)->x_##flag \
353 : : && (!always_inline \
354 : : || opts_for_fn (caller->decl)->x_##flag \
355 : : > opts_for_fn (callee->decl)->x_##flag))
356 : : /* Used for flags where exact match is needed for correctness. */
357 : : #define check_match(flag) \
358 : : (opts_for_fn (caller->decl)->x_##flag \
359 : : != opts_for_fn (callee->decl)->x_##flag)
360 : :
361 : : /* Decide if we can inline the edge and possibly update
362 : : inline_failed reason.
363 : : We check whether inlining is possible at all and whether
364 : : caller growth limits allow doing so.
365 : :
366 : : if REPORT is true, output reason to the dump file. */
367 : :
368 : : static bool
369 : 11961193 : can_inline_edge_p (struct cgraph_edge *e, bool report,
370 : : bool early = false)
371 : : {
372 : 11961193 : gcc_checking_assert (e->inline_failed);
373 : :
374 : 11961193 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
375 : : {
376 : 3386962 : if (report)
377 : 3356820 : report_inline_failed_reason (e);
378 : 3386962 : return false;
379 : : }
380 : :
381 : 8574231 : bool inlinable = true;
382 : 8574231 : enum availability avail;
383 : 17148462 : cgraph_node *caller = (e->caller->inlined_to
384 : 8574231 : ? e->caller->inlined_to : e->caller);
385 : 8574231 : cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
386 : :
387 : 8574231 : if (!callee->definition)
388 : : {
389 : 1414 : e->inline_failed = CIF_BODY_NOT_AVAILABLE;
390 : 1414 : inlinable = false;
391 : : }
392 : 8574231 : if (!early && (!opt_for_fn (callee->decl, optimize)
393 : 5014644 : || !opt_for_fn (caller->decl, optimize)))
394 : : {
395 : 208 : e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
396 : 208 : inlinable = false;
397 : : }
398 : 8574023 : else if (callee->calls_comdat_local)
399 : : {
400 : 17489 : e->inline_failed = CIF_USES_COMDAT_LOCAL;
401 : 17489 : inlinable = false;
402 : : }
403 : 8556534 : else if (avail <= AVAIL_INTERPOSABLE)
404 : : {
405 : 123772 : e->inline_failed = CIF_OVERWRITABLE;
406 : 123772 : inlinable = false;
407 : : }
408 : : /* All edges with call_stmt_cannot_inline_p should have inline_failed
409 : : initialized to one of FINAL_ERROR reasons. */
410 : 8432762 : else if (e->call_stmt_cannot_inline_p)
411 : 0 : gcc_unreachable ();
412 : : /* Don't inline if the functions have different EH personalities. */
413 : 8432762 : else if (DECL_FUNCTION_PERSONALITY (caller->decl)
414 : 2307317 : && DECL_FUNCTION_PERSONALITY (callee->decl)
415 : 8432762 : && (DECL_FUNCTION_PERSONALITY (caller->decl)
416 : 189815 : != DECL_FUNCTION_PERSONALITY (callee->decl)))
417 : : {
418 : 0 : e->inline_failed = CIF_EH_PERSONALITY;
419 : 0 : inlinable = false;
420 : : }
421 : : /* TM pure functions should not be inlined into non-TM_pure
422 : : functions. */
423 : 8432762 : else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
424 : : {
425 : 31 : e->inline_failed = CIF_UNSPECIFIED;
426 : 31 : inlinable = false;
427 : : }
428 : : /* Check compatibility of target optimization options. */
429 : 8432731 : else if (!targetm.target_option.can_inline_p (caller->decl,
430 : : callee->decl))
431 : : {
432 : 447 : e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
433 : 447 : inlinable = false;
434 : : }
435 : 8432284 : else if (ipa_fn_summaries->get (callee) == NULL
436 : 8432282 : || !ipa_fn_summaries->get (callee)->inlinable)
437 : : {
438 : 334981 : e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
439 : 334981 : inlinable = false;
440 : : }
441 : : /* Don't inline a function with mismatched sanitization attributes. */
442 : 8097303 : else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
443 : : {
444 : 187 : e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
445 : 187 : inlinable = false;
446 : : }
447 : :
448 : 8574231 : if (inlinable && !strub_inlinable_to_p (callee, caller))
449 : : {
450 : 1203 : e->inline_failed = CIF_UNSPECIFIED;
451 : 1203 : inlinable = false;
452 : : }
453 : 8574231 : if (!inlinable && report)
454 : 472990 : report_inline_failed_reason (e);
455 : : return inlinable;
456 : : }
457 : :
458 : : /* Return inlining_insns_single limit for function N. If HINT or HINT2 is true
459 : : scale up the bound. */
460 : :
461 : : static int
462 : 6953359 : inline_insns_single (cgraph_node *n, bool hint, bool hint2)
463 : : {
464 : 6953359 : if (hint && hint2)
465 : : {
466 : 2273645 : int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
467 : 2273645 : spd = spd * spd;
468 : 2273645 : if (spd > 1000000)
469 : : spd = 1000000;
470 : 2273645 : return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
471 : : }
472 : 4679714 : if (hint || hint2)
473 : 237144 : return opt_for_fn (n->decl, param_max_inline_insns_single)
474 : 237144 : * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
475 : 4442570 : return opt_for_fn (n->decl, param_max_inline_insns_single);
476 : : }
477 : :
478 : : /* Return inlining_insns_auto limit for function N. If HINT or HINT2 is true
479 : : scale up the bound. */
480 : :
481 : : static int
482 : 5807611 : inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
483 : : {
484 : 5807611 : int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
485 : 5807611 : if (hint && hint2)
486 : : {
487 : 2108811 : int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
488 : 2108811 : spd = spd * spd;
489 : 2108811 : if (spd > 1000000)
490 : : spd = 1000000;
491 : 2108811 : return max_inline_insns_auto * spd / 100;
492 : : }
493 : 3698800 : if (hint || hint2)
494 : 1637060 : return max_inline_insns_auto
495 : 1637060 : * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
496 : : return max_inline_insns_auto;
497 : : }
498 : :
499 : : /* Decide if we can inline the edge and possibly update
500 : : inline_failed reason.
501 : : We check whether inlining is possible at all and whether
502 : : caller growth limits allow doing so.
503 : :
504 : : if REPORT is true, output reason to the dump file.
505 : :
506 : : if DISREGARD_LIMITS is true, ignore size limits. */
507 : :
508 : : static bool
509 : 5892217 : can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
510 : : bool disregard_limits = false, bool early = false)
511 : : {
512 : 5892217 : gcc_checking_assert (e->inline_failed);
513 : :
514 : 5892217 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
515 : : {
516 : 210 : if (report)
517 : 210 : report_inline_failed_reason (e);
518 : 210 : return false;
519 : : }
520 : :
521 : 5892007 : bool inlinable = true;
522 : 5892007 : enum availability avail;
523 : 11784014 : cgraph_node *caller = (e->caller->inlined_to
524 : 5892007 : ? e->caller->inlined_to : e->caller);
525 : 5892007 : cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
526 : 5892007 : tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
527 : 5892007 : tree callee_tree
528 : 5892007 : = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
529 : : /* Check if caller growth allows the inlining. */
530 : 5892007 : if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
531 : 5541416 : && !disregard_limits
532 : 5538089 : && !lookup_attribute ("flatten",
533 : 5538089 : DECL_ATTRIBUTES (caller->decl))
534 : 11429956 : && !caller_growth_limits (e))
535 : : inlinable = false;
536 : 5837877 : else if (callee->externally_visible
537 : 3815769 : && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
538 : 9443889 : && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
539 : : {
540 : 2 : e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
541 : 2 : inlinable = false;
542 : : }
543 : : /* Don't inline a function with a higher optimization level than the
544 : : caller. FIXME: this is really just tip of iceberg of handling
545 : : optimization attribute. */
546 : 5837875 : else if (caller_tree != callee_tree)
547 : : {
548 : 17083 : bool always_inline =
549 : 17083 : (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
550 : 27325 : && lookup_attribute ("always_inline",
551 : 10242 : DECL_ATTRIBUTES (callee->decl)));
552 : 17083 : ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
553 : 17083 : ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
554 : :
555 : : /* Until GCC 4.9 we did not check the semantics-altering flags
556 : : below and inlined across optimization boundaries.
557 : : Enabling checks below breaks several packages by refusing
558 : : to inline library always_inline functions. See PR65873.
559 : : Disable the check for early inlining for now until better solution
560 : : is found. */
561 : 17083 : if (always_inline && early)
562 : : ;
563 : : /* There are some options that change IL semantics which means
564 : : we cannot inline in these cases for correctness reason.
565 : : Not even for always_inline declared functions. */
566 : 6841 : else if (check_match (flag_wrapv)
567 : 6841 : || check_match (flag_trapv)
568 : 6841 : || check_match (flag_pcc_struct_return)
569 : 6841 : || check_maybe_down (optimize_debug)
570 : : /* When caller or callee does FP math, be sure FP codegen flags
571 : : compatible. */
572 : 6837 : || ((caller_info->fp_expressions && callee_info->fp_expressions)
573 : 1256 : && (check_maybe_up (flag_rounding_math)
574 : 1256 : || check_maybe_up (flag_trapping_math)
575 : 1254 : || check_maybe_down (flag_unsafe_math_optimizations)
576 : 1254 : || check_maybe_down (flag_finite_math_only)
577 : 1253 : || check_maybe_up (flag_signaling_nans)
578 : 1253 : || check_maybe_down (flag_cx_limited_range)
579 : 1253 : || check_maybe_up (flag_signed_zeros)
580 : 1253 : || check_maybe_down (flag_associative_math)
581 : 1232 : || check_maybe_down (flag_reciprocal_math)
582 : 1232 : || check_maybe_down (flag_fp_int_builtin_inexact)
583 : : /* Strictly speaking only when the callee contains function
584 : : calls that may end up setting errno. */
585 : 1232 : || check_maybe_up (flag_errno_math)))
586 : : /* We do not want to make code compiled with exceptions to be
587 : : brought into a non-EH function unless we know that the callee
588 : : does not throw.
589 : : This is tracked by DECL_FUNCTION_PERSONALITY. */
590 : 6813 : || (check_maybe_up (flag_non_call_exceptions)
591 : 0 : && DECL_FUNCTION_PERSONALITY (callee->decl))
592 : 6813 : || (check_maybe_up (flag_exceptions)
593 : 16 : && DECL_FUNCTION_PERSONALITY (callee->decl))
594 : : /* When devirtualization is disabled for callee, it is not safe
595 : : to inline it as we possibly mangled the type info.
596 : : Allow early inlining of always inlines. */
597 : 13654 : || (!early && check_maybe_down (flag_devirtualize)))
598 : : {
599 : 36 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
600 : 36 : inlinable = false;
601 : : }
602 : : /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
603 : 6805 : else if (always_inline)
604 : : ;
605 : : /* When user added an attribute to the callee honor it. */
606 : 6805 : else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
607 : 6805 : && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
608 : : {
609 : 2164 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
610 : 2164 : inlinable = false;
611 : : }
612 : : /* If explicit optimize attribute are not used, the mismatch is caused
613 : : by different command line options used to build different units.
614 : : Do not care about COMDAT functions - those are intended to be
615 : : optimized with the optimization flags of module they are used in.
616 : : Also do not care about mixing up size/speed optimization when
617 : : DECL_DISREGARD_INLINE_LIMITS is set. */
618 : 4641 : else if ((callee->merged_comdat
619 : 0 : && !lookup_attribute ("optimize",
620 : 0 : DECL_ATTRIBUTES (caller->decl)))
621 : 4641 : || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
622 : : ;
623 : : /* If mismatch is caused by merging two LTO units with different
624 : : optimization flags we want to be bit nicer. However never inline
625 : : if one of functions is not optimized at all. */
626 : 4641 : else if (!opt_for_fn (callee->decl, optimize)
627 : 4641 : || !opt_for_fn (caller->decl, optimize))
628 : : {
629 : 0 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
630 : 0 : inlinable = false;
631 : : }
632 : : /* If callee is optimized for size and caller is not, allow inlining if
633 : : code shrinks or we are in param_max_inline_insns_single limit and
634 : : callee is inline (and thus likely an unified comdat).
635 : : This will allow caller to run faster. */
636 : 4641 : else if (opt_for_fn (callee->decl, optimize_size)
637 : 4641 : > opt_for_fn (caller->decl, optimize_size))
638 : : {
639 : 120 : int growth = estimate_edge_growth (e);
640 : 120 : if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
641 : 120 : && (!DECL_DECLARED_INLINE_P (callee->decl)
642 : 67 : && growth >= MAX (inline_insns_single (caller, false, false),
643 : : inline_insns_auto (caller, false, false))))
644 : : {
645 : 0 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
646 : 0 : inlinable = false;
647 : : }
648 : : }
649 : : /* If callee is more aggressively optimized for performance than caller,
650 : : we generally want to inline only cheap (runtime wise) functions. */
651 : 4521 : else if (opt_for_fn (callee->decl, optimize_size)
652 : : < opt_for_fn (caller->decl, optimize_size)
653 : 4521 : || (opt_for_fn (callee->decl, optimize)
654 : : > opt_for_fn (caller->decl, optimize)))
655 : : {
656 : 12495 : if (estimate_edge_time (e)
657 : 4165 : >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
658 : : {
659 : 1511 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
660 : 1511 : inlinable = false;
661 : : }
662 : : }
663 : :
664 : : }
665 : :
666 : 5892007 : if (!inlinable && report)
667 : 54545 : report_inline_failed_reason (e);
668 : : return inlinable;
669 : : }
670 : :
671 : :
672 : : /* Return true if the edge E is inlinable during early inlining. */
673 : :
674 : : static bool
675 : 3559670 : can_early_inline_edge_p (struct cgraph_edge *e)
676 : : {
677 : 7119340 : cgraph_node *caller = (e->caller->inlined_to
678 : 3559670 : ? e->caller->inlined_to : e->caller);
679 : 3559670 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
680 : : /* Early inliner might get called at WPA stage when IPA pass adds new
681 : : function. In this case we cannot really do any of early inlining
682 : : because function bodies are missing. */
683 : 3559670 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
684 : : return false;
685 : 3559434 : if (!gimple_has_body_p (callee->decl))
686 : : {
687 : 0 : e->inline_failed = CIF_BODY_NOT_AVAILABLE;
688 : 0 : return false;
689 : : }
690 : 7118868 : gcc_assert (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
691 : : && gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)));
692 : 3557940 : if ((profile_arc_flag || condition_coverage_flag)
693 : 3560932 : && ((lookup_attribute ("no_profile_instrument_function",
694 : 1498 : DECL_ATTRIBUTES (caller->decl)) == NULL_TREE)
695 : 1498 : != (lookup_attribute ("no_profile_instrument_function",
696 : 2996 : DECL_ATTRIBUTES (callee->decl)) == NULL_TREE)))
697 : : return false;
698 : :
699 : 3559432 : if (!can_inline_edge_p (e, true, true)
700 : 3559432 : || !can_inline_edge_by_limits_p (e, true, false, true))
701 : 83580 : return false;
702 : : /* When inlining regular functions into always-inline functions
703 : : during early inlining watch for possible inline cycles. */
704 : 3475852 : if (DECL_DISREGARD_INLINE_LIMITS (caller->decl)
705 : 139492 : && lookup_attribute ("always_inline", DECL_ATTRIBUTES (caller->decl))
706 : 3615335 : && (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
707 : 81100 : || !lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee->decl))))
708 : : {
709 : : /* If there are indirect calls, inlining may produce direct call.
710 : : TODO: We may lift this restriction if we avoid errors on formely
711 : : indirect calls to always_inline functions. Taking address
712 : : of always_inline function is generally bad idea and should
713 : : have been declared as undefined, but sadly we allow this. */
714 : 58383 : if (caller->indirect_calls || e->callee->indirect_calls)
715 : : return false;
716 : 57321 : ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
717 : 57321 : if (callee_info->safe_to_inline_to_always_inline)
718 : 9088 : return callee_info->safe_to_inline_to_always_inline - 1;
719 : 99870 : for (cgraph_edge *e2 = callee->callees; e2; e2 = e2->next_callee)
720 : : {
721 : 51647 : struct cgraph_node *callee2 = e2->callee->ultimate_alias_target ();
722 : : /* As early inliner runs in RPO order, we will see uninlined
723 : : always_inline calls only in the case of cyclic graphs. */
724 : 51647 : if (DECL_DISREGARD_INLINE_LIMITS (callee2->decl)
725 : 51647 : || lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee2->decl)))
726 : : {
727 : 0 : callee_info->safe_to_inline_to_always_inline = 1;
728 : 0 : return false;
729 : : }
730 : : /* With LTO watch for case where function is later replaced
731 : : by always_inline definition.
732 : : TODO: We may either stop treating noninlined cross-module always
733 : : inlines as errors, or we can extend decl merging to produce
734 : : syntacic alias and honor always inline only in units it has
735 : : been declared as such. */
736 : 51647 : if (flag_lto && callee2->externally_visible)
737 : : {
738 : 10 : callee_info->safe_to_inline_to_always_inline = 1;
739 : 10 : return false;
740 : : }
741 : : }
742 : 48223 : callee_info->safe_to_inline_to_always_inline = 2;
743 : : }
744 : : return true;
745 : : }
746 : :
747 : :
748 : : /* Return number of calls in N. Ignore cheap builtins. */
749 : :
750 : : static int
751 : 747281 : num_calls (struct cgraph_node *n)
752 : : {
753 : 747281 : struct cgraph_edge *e;
754 : 747281 : int num = 0;
755 : :
756 : 1463530 : for (e = n->callees; e; e = e->next_callee)
757 : 716249 : if (!is_inexpensive_builtin (e->callee->decl))
758 : 669985 : num++;
759 : 747281 : return num;
760 : : }
761 : :
762 : :
763 : : /* Return true if we are interested in inlining small function. */
764 : :
765 : : static bool
766 : 3121546 : want_early_inline_function_p (struct cgraph_edge *e)
767 : : {
768 : 3121546 : bool want_inline = true;
769 : 3121546 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
770 : :
771 : 3121546 : if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
772 : : ;
773 : : /* For AutoFDO, we need to make sure that before profile summary, all
774 : : hot paths' IR look exactly the same as profiled binary. As a result,
775 : : in einliner, we will disregard size limit and inline those callsites
776 : : that are:
777 : : * inlined in the profiled binary, and
778 : : * the cloned callee has enough samples to be considered "hot". */
779 : 3121539 : else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
780 : : ;
781 : 3121539 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
782 : 3121539 : && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
783 : : {
784 : 139 : e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
785 : 139 : report_inline_failed_reason (e);
786 : 139 : want_inline = false;
787 : : }
788 : : else
789 : : {
790 : : /* First take care of very large functions. */
791 : 3121400 : int min_growth = estimate_min_edge_growth (e), growth = 0;
792 : 3121400 : int n;
793 : 3121400 : int early_inlining_insns = param_early_inlining_insns;
794 : :
795 : 3121400 : if (min_growth > early_inlining_insns)
796 : : {
797 : 380184 : if (dump_enabled_p ())
798 : 40 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
799 : : " will not early inline: %C->%C, "
800 : : "call is cold and code would grow "
801 : : "at least by %i\n",
802 : : e->caller, callee,
803 : : min_growth);
804 : : want_inline = false;
805 : : }
806 : : else
807 : 2741216 : growth = estimate_edge_growth (e);
808 : :
809 : :
810 : 3121400 : if (!want_inline || growth <= param_max_inline_insns_size)
811 : : ;
812 : 1077858 : else if (!e->maybe_hot_p ())
813 : : {
814 : 17361 : if (dump_enabled_p ())
815 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
816 : : " will not early inline: %C->%C, "
817 : : "call is cold and code would grow by %i\n",
818 : : e->caller, callee,
819 : : growth);
820 : : want_inline = false;
821 : : }
822 : 1060497 : else if (growth > early_inlining_insns)
823 : : {
824 : 313216 : if (dump_enabled_p ())
825 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
826 : : " will not early inline: %C->%C, "
827 : : "growth %i exceeds --param early-inlining-insns\n",
828 : : e->caller, callee, growth);
829 : : want_inline = false;
830 : : }
831 : 747281 : else if ((n = num_calls (callee)) != 0
832 : 747281 : && growth * (n + 1) > early_inlining_insns)
833 : : {
834 : 213507 : if (dump_enabled_p ())
835 : 11 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
836 : : " will not early inline: %C->%C, "
837 : : "growth %i exceeds --param early-inlining-insns "
838 : : "divided by number of calls\n",
839 : : e->caller, callee, growth);
840 : : want_inline = false;
841 : : }
842 : : }
843 : 3121546 : return want_inline;
844 : : }
845 : :
846 : : /* Compute time of the edge->caller + edge->callee execution when inlining
847 : : does not happen. */
848 : :
849 : : inline sreal
850 : 356084 : compute_uninlined_call_time (struct cgraph_edge *edge,
851 : : sreal uninlined_call_time,
852 : : sreal freq)
853 : : {
854 : 712168 : cgraph_node *caller = (edge->caller->inlined_to
855 : 356084 : ? edge->caller->inlined_to
856 : : : edge->caller);
857 : :
858 : 356084 : if (freq > 0)
859 : 344194 : uninlined_call_time *= freq;
860 : : else
861 : 11890 : uninlined_call_time = uninlined_call_time >> 11;
862 : :
863 : 356084 : sreal caller_time = ipa_fn_summaries->get (caller)->time;
864 : 356084 : return uninlined_call_time + caller_time;
865 : : }
866 : :
867 : : /* Same as compute_uinlined_call_time but compute time when inlining
868 : : does happen. */
869 : :
870 : : inline sreal
871 : 356084 : compute_inlined_call_time (struct cgraph_edge *edge,
872 : : sreal time,
873 : : sreal freq)
874 : : {
875 : 712168 : cgraph_node *caller = (edge->caller->inlined_to
876 : 356084 : ? edge->caller->inlined_to
877 : : : edge->caller);
878 : 356084 : sreal caller_time = ipa_fn_summaries->get (caller)->time;
879 : :
880 : 356084 : if (freq > 0)
881 : 344194 : time *= freq;
882 : : else
883 : 11890 : time = time >> 11;
884 : :
885 : : /* This calculation should match one in ipa-inline-analysis.cc
886 : : (estimate_edge_size_and_time). */
887 : 356084 : time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
888 : 356084 : time += caller_time;
889 : 356084 : if (time <= 0)
890 : 0 : time = ((sreal) 1) >> 8;
891 : 356084 : gcc_checking_assert (time >= 0);
892 : 356084 : return time;
893 : : }
894 : :
895 : : /* Determine time saved by inlining EDGE of frequency FREQ
896 : : where callee's runtime w/o inlining is UNINLINED_TYPE
897 : : and with inlined is INLINED_TYPE. */
898 : :
899 : : inline sreal
900 : 8351708 : inlining_speedup (struct cgraph_edge *edge,
901 : : sreal freq,
902 : : sreal uninlined_time,
903 : : sreal inlined_time)
904 : : {
905 : 8351708 : sreal speedup = uninlined_time - inlined_time;
906 : : /* Handling of call_time should match one in ipa-inline-fnsummary.c
907 : : (estimate_edge_size_and_time). */
908 : 8351708 : sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
909 : :
910 : 8351708 : if (freq > 0)
911 : : {
912 : 8320978 : speedup = (speedup + call_time);
913 : 10127766 : if (freq != 1)
914 : 6514190 : speedup = speedup * freq;
915 : : }
916 : 30730 : else if (freq == 0)
917 : 30730 : speedup = speedup >> 11;
918 : 8351708 : gcc_checking_assert (speedup >= 0);
919 : 8351708 : return speedup;
920 : : }
921 : :
922 : : /* Return true if the speedup for inlining E is bigger than
923 : : param_inline_min_speedup. */
924 : :
925 : : static bool
926 : 356084 : big_speedup_p (struct cgraph_edge *e)
927 : : {
928 : 356084 : sreal unspec_time;
929 : 356084 : sreal spec_time = estimate_edge_time (e, &unspec_time);
930 : 356084 : sreal freq = e->sreal_frequency ();
931 : 356084 : sreal time = compute_uninlined_call_time (e, unspec_time, freq);
932 : 356084 : sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
933 : 712168 : cgraph_node *caller = (e->caller->inlined_to
934 : 356084 : ? e->caller->inlined_to
935 : : : e->caller);
936 : 356084 : int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
937 : :
938 : 356084 : if ((time - inlined_time) * 100 > time * limit)
939 : : return true;
940 : : return false;
941 : : }
942 : :
943 : : /* Return true if we are interested in inlining small function.
944 : : When REPORT is true, report reason to dump file. */
945 : :
946 : : static bool
947 : 4430622 : want_inline_small_function_p (struct cgraph_edge *e, bool report)
948 : : {
949 : 4430622 : bool want_inline = true;
950 : 4430622 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
951 : 8861244 : cgraph_node *to = (e->caller->inlined_to
952 : 4430622 : ? e->caller->inlined_to : e->caller);
953 : :
954 : : /* Allow this function to be called before can_inline_edge_p,
955 : : since it's usually cheaper. */
956 : 4430622 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
957 : : want_inline = false;
958 : 4430622 : else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
959 : : ;
960 : 4425518 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
961 : 4425518 : && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
962 : : {
963 : 43190 : e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
964 : 43190 : want_inline = false;
965 : : }
966 : : /* Do fast and conservative check if the function can be good
967 : : inline candidate. */
968 : 4382328 : else if ((!DECL_DECLARED_INLINE_P (callee->decl)
969 : 2108915 : && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
970 : 6491139 : && ipa_fn_summaries->get (callee)->min_size
971 : 2108811 : - ipa_call_summaries->get (e)->call_stmt_size
972 : 2108811 : > inline_insns_auto (e->caller, true, true))
973 : : {
974 : 948 : e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
975 : 948 : want_inline = false;
976 : : }
977 : 4381380 : else if ((DECL_DECLARED_INLINE_P (callee->decl)
978 : 2107967 : || e->count.ipa ().nonzero_p ())
979 : 6655025 : && ipa_fn_summaries->get (callee)->min_size
980 : 2273645 : - ipa_call_summaries->get (e)->call_stmt_size
981 : 2273645 : > inline_insns_single (e->caller, true, true))
982 : : {
983 : 0 : e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
984 : 0 : ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
985 : : : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
986 : 0 : want_inline = false;
987 : : }
988 : : else
989 : : {
990 : 4381380 : int growth = estimate_edge_growth (e);
991 : 4381380 : ipa_hints hints = estimate_edge_hints (e);
992 : : /* We have two independent groups of hints. If one matches in each
993 : : of groups the limits are inreased. If both groups matches, limit
994 : : is increased even more. */
995 : 4381380 : bool apply_hints = (hints & (INLINE_HINT_indirect_call
996 : : | INLINE_HINT_known_hot
997 : : | INLINE_HINT_loop_iterations
998 : : | INLINE_HINT_loop_stride));
999 : 4381380 : bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
1000 : :
1001 : 4381380 : if (growth <= opt_for_fn (to->decl,
1002 : : param_max_inline_insns_size))
1003 : : ;
1004 : : /* Apply param_max_inline_insns_single limit. Do not do so when
1005 : : hints suggests that inlining given function is very profitable.
1006 : : Avoid computation of big_speedup_p when not necessary to change
1007 : : outcome of decision. */
1008 : 4224688 : else if (DECL_DECLARED_INLINE_P (callee->decl)
1009 : 2184468 : && growth >= inline_insns_single (e->caller, apply_hints,
1010 : : apply_hints2)
1011 : 4450323 : && (apply_hints || apply_hints2
1012 : 223546 : || growth >= inline_insns_single (e->caller, true,
1013 : : apply_hints2)
1014 : 122506 : || !big_speedup_p (e)))
1015 : : {
1016 : 225305 : e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
1017 : 225305 : want_inline = false;
1018 : : }
1019 : 3999383 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
1020 : 2040220 : && !opt_for_fn (e->caller->decl, flag_inline_functions)
1021 : 4004700 : && growth >= opt_for_fn (to->decl,
1022 : : param_max_inline_insns_small))
1023 : : {
1024 : : /* growth_positive_p is expensive, always test it last. */
1025 : 5317 : if (growth >= inline_insns_single (e->caller, false, false)
1026 : 5317 : || growth_positive_p (callee, e, growth))
1027 : : {
1028 : 4958 : e->inline_failed = CIF_NOT_DECLARED_INLINED;
1029 : 4958 : want_inline = false;
1030 : : }
1031 : : }
1032 : : /* Apply param_max_inline_insns_auto limit for functions not declared
1033 : : inline. Bypass the limit when speedup seems big. */
1034 : 3994066 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
1035 : 2034903 : && growth >= inline_insns_auto (e->caller, apply_hints,
1036 : : apply_hints2)
1037 : 5295159 : && (apply_hints || apply_hints2
1038 : 1285125 : || growth >= inline_insns_auto (e->caller, true,
1039 : : apply_hints2)
1040 : 233378 : || !big_speedup_p (e)))
1041 : : {
1042 : : /* growth_positive_p is expensive, always test it last. */
1043 : 1284037 : if (growth >= inline_insns_single (e->caller, false, false)
1044 : 1284037 : || growth_positive_p (callee, e, growth))
1045 : : {
1046 : 1199768 : e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
1047 : 1199768 : want_inline = false;
1048 : : }
1049 : : }
1050 : : /* If call is cold, do not inline when function body would grow. */
1051 : 2710029 : else if (!e->maybe_hot_p ()
1052 : 2710029 : && (growth >= inline_insns_single (e->caller, false, false)
1053 : 792697 : || growth_positive_p (callee, e, growth)))
1054 : : {
1055 : 731732 : e->inline_failed = CIF_UNLIKELY_CALL;
1056 : 731732 : want_inline = false;
1057 : : }
1058 : : }
1059 : 4430622 : if (!want_inline && report)
1060 : 610335 : report_inline_failed_reason (e);
1061 : 4430622 : return want_inline;
1062 : : }
1063 : :
1064 : : /* EDGE is self recursive edge.
1065 : : We handle two cases - when function A is inlining into itself
1066 : : or when function A is being inlined into another inliner copy of function
1067 : : A within function B.
1068 : :
1069 : : In first case OUTER_NODE points to the toplevel copy of A, while
1070 : : in the second case OUTER_NODE points to the outermost copy of A in B.
1071 : :
1072 : : In both cases we want to be extra selective since
1073 : : inlining the call will just introduce new recursive calls to appear. */
1074 : :
1075 : : static bool
1076 : 17066 : want_inline_self_recursive_call_p (struct cgraph_edge *edge,
1077 : : struct cgraph_node *outer_node,
1078 : : bool peeling,
1079 : : int depth)
1080 : : {
1081 : 17066 : char const *reason = NULL;
1082 : 17066 : bool want_inline = true;
1083 : 17066 : sreal caller_freq = 1;
1084 : 17066 : int max_depth = opt_for_fn (outer_node->decl,
1085 : : param_max_inline_recursive_depth_auto);
1086 : :
1087 : 17066 : if (DECL_DECLARED_INLINE_P (edge->caller->decl))
1088 : 2263 : max_depth = opt_for_fn (outer_node->decl,
1089 : : param_max_inline_recursive_depth);
1090 : :
1091 : 17066 : if (!edge->maybe_hot_p ())
1092 : : {
1093 : : reason = "recursive call is cold";
1094 : : want_inline = false;
1095 : : }
1096 : 17005 : else if (depth > max_depth)
1097 : : {
1098 : : reason = "--param max-inline-recursive-depth exceeded.";
1099 : : want_inline = false;
1100 : : }
1101 : 14916 : else if (outer_node->inlined_to
1102 : 18179 : && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1103 : : {
1104 : 0 : reason = "caller frequency is 0";
1105 : 0 : want_inline = false;
1106 : : }
1107 : :
1108 : 17066 : if (!want_inline)
1109 : : ;
1110 : : /* Inlining of self recursive function into copy of itself within other
1111 : : function is transformation similar to loop peeling.
1112 : :
1113 : : Peeling is profitable if we can inline enough copies to make probability
1114 : : of actual call to the self recursive function very small. Be sure that
1115 : : the probability of recursion is small.
1116 : :
1117 : : We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1118 : : This way the expected number of recursion is at most max_depth. */
1119 : 14916 : else if (peeling)
1120 : : {
1121 : 3263 : sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1122 : 3263 : int i;
1123 : 7394 : for (i = 1; i < depth; i++)
1124 : 4131 : max_prob = max_prob * max_prob;
1125 : 3263 : if (edge->sreal_frequency () >= max_prob * caller_freq)
1126 : : {
1127 : 1475 : reason = "frequency of recursive call is too large";
1128 : 1475 : want_inline = false;
1129 : : }
1130 : : }
1131 : : /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1132 : : recursion depth is large. We reduce function call overhead and increase
1133 : : chances that things fit in hardware return predictor.
1134 : :
1135 : : Recursive inlining might however increase cost of stack frame setup
1136 : : actually slowing down functions whose recursion tree is wide rather than
1137 : : deep.
1138 : :
1139 : : Deciding reliably on when to do recursive inlining without profile feedback
1140 : : is tricky. For now we disable recursive inlining when probability of self
1141 : : recursion is low.
1142 : :
1143 : : Recursive inlining of self recursive call within loop also results in
1144 : : large loop depths that generally optimize badly. We may want to throttle
1145 : : down inlining in those cases. In particular this seems to happen in one
1146 : : of libstdc++ rb tree methods. */
1147 : : else
1148 : : {
1149 : 11653 : if (edge->sreal_frequency () * 100
1150 : 11653 : <= caller_freq
1151 : 23306 : * opt_for_fn (outer_node->decl,
1152 : : param_min_inline_recursive_probability))
1153 : : {
1154 : 599 : reason = "frequency of recursive call is too small";
1155 : 599 : want_inline = false;
1156 : : }
1157 : : }
1158 : 17066 : if (!want_inline && dump_enabled_p ())
1159 : 9 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1160 : : " not inlining recursively: %s\n", reason);
1161 : 17066 : return want_inline;
1162 : : }
1163 : :
1164 : : /* Return true when NODE has uninlinable caller;
1165 : : set HAS_HOT_CALL if it has hot call.
1166 : : Worker for cgraph_for_node_and_aliases. */
1167 : :
1168 : : static bool
1169 : 66222 : check_callers (struct cgraph_node *node, void *has_hot_call)
1170 : : {
1171 : 66222 : struct cgraph_edge *e;
1172 : 115873 : for (e = node->callers; e; e = e->next_caller)
1173 : : {
1174 : 73437 : if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1175 : 73437 : || !opt_for_fn (e->caller->decl, optimize))
1176 : : return true;
1177 : 73437 : if (!can_inline_edge_p (e, true))
1178 : : return true;
1179 : 73408 : if (e->recursive_p ())
1180 : : return true;
1181 : 73408 : if (!can_inline_edge_by_limits_p (e, true))
1182 : : return true;
1183 : : /* Inlining large functions to large loop depth is often harmful because
1184 : : of register pressure it implies. */
1185 : 49697 : if ((int)ipa_call_summaries->get (e)->loop_depth
1186 : 49697 : > param_inline_functions_called_once_loop_depth)
1187 : : return true;
1188 : : /* Do not produce gigantic functions. */
1189 : 91404 : if (estimate_size_after_inlining (e->caller->inlined_to ?
1190 : : e->caller->inlined_to : e->caller, e)
1191 : 49697 : > param_inline_functions_called_once_insns)
1192 : : return true;
1193 : 49651 : if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1194 : 9897 : *(bool *)has_hot_call = true;
1195 : : }
1196 : : return false;
1197 : : }
1198 : :
1199 : : /* If NODE has a caller, return true. */
1200 : :
1201 : : static bool
1202 : 2043509 : has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1203 : : {
1204 : 2043509 : if (node->callers)
1205 : 636566 : return true;
1206 : : return false;
1207 : : }
1208 : :
1209 : : /* Decide if inlining NODE would reduce unit size by eliminating
1210 : : the offline copy of function.
1211 : : When COLD is true the cold calls are considered, too. */
1212 : :
1213 : : static bool
1214 : 4273391 : want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1215 : : {
1216 : 4273391 : bool has_hot_call = false;
1217 : :
1218 : : /* Aliases gets inlined along with the function they alias. */
1219 : 4273391 : if (node->alias)
1220 : : return false;
1221 : : /* Already inlined? */
1222 : 4197292 : if (node->inlined_to)
1223 : : return false;
1224 : : /* Does it have callers? */
1225 : 1994158 : if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1226 : : return false;
1227 : : /* Inlining into all callers would increase size? */
1228 : 636566 : if (growth_positive_p (node, NULL, INT_MIN) > 0)
1229 : : return false;
1230 : : /* All inlines must be possible. */
1231 : 62495 : if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1232 : : true))
1233 : : return false;
1234 : 38709 : if (!cold && !has_hot_call)
1235 : : return false;
1236 : : return true;
1237 : : }
1238 : :
1239 : : /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1240 : : in estimate_edge_badness. */
1241 : :
1242 : : static bool
1243 : 568197 : wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1244 : : {
1245 : 568197 : return size < (DECL_DECLARED_INLINE_P (where->decl)
1246 : 568197 : ? inline_insns_single (where, false, false)
1247 : 378705 : : inline_insns_auto (where, false, false));
1248 : : }
1249 : :
1250 : : /* A cost model driving the inlining heuristics in a way so the edges with
1251 : : smallest badness are inlined first. After each inlining is performed
1252 : : the costs of all caller edges of nodes affected are recomputed so the
1253 : : metrics may accurately depend on values such as number of inlinable callers
1254 : : of the function or function body size. */
1255 : :
1256 : : static sreal
1257 : 8928304 : edge_badness (struct cgraph_edge *edge, bool dump)
1258 : : {
1259 : 8928304 : sreal badness;
1260 : 8928304 : int growth;
1261 : 8928304 : sreal edge_time, unspec_edge_time;
1262 : 8928304 : struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1263 : 8928304 : class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1264 : 8928304 : ipa_hints hints;
1265 : 17856608 : cgraph_node *caller = (edge->caller->inlined_to
1266 : 8928304 : ? edge->caller->inlined_to
1267 : : : edge->caller);
1268 : :
1269 : 8928304 : growth = estimate_edge_growth (edge);
1270 : 8928304 : edge_time = estimate_edge_time (edge, &unspec_edge_time);
1271 : 8928304 : hints = estimate_edge_hints (edge);
1272 : 8928304 : gcc_checking_assert (edge_time >= 0);
1273 : : /* Check that inlined time is better, but tolerate some roundoff issues.
1274 : : FIXME: When callee profile drops to 0 we account calls more. This
1275 : : should be fixed by never doing that. */
1276 : 8928304 : gcc_checking_assert ((edge_time * 100
1277 : : - callee_info->time * 101).to_int () <= 0
1278 : : || callee->count.ipa ().initialized_p ());
1279 : 8928304 : gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1280 : :
1281 : 8928304 : if (dump)
1282 : : {
1283 : 200 : fprintf (dump_file, " Badness calculation for %s -> %s\n",
1284 : 200 : edge->caller->dump_name (),
1285 : 200 : edge->callee->dump_name ());
1286 : 200 : fprintf (dump_file, " size growth %i, time %f unspec %f ",
1287 : : growth,
1288 : : edge_time.to_double (),
1289 : : unspec_edge_time.to_double ());
1290 : 200 : ipa_dump_hints (dump_file, hints);
1291 : 200 : if (big_speedup_p (edge))
1292 : 169 : fprintf (dump_file, " big_speedup");
1293 : 200 : fprintf (dump_file, "\n");
1294 : : }
1295 : :
1296 : : /* Always prefer inlining saving code size. */
1297 : 8928304 : if (growth <= 0)
1298 : : {
1299 : 530931 : badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1300 : 530931 : if (dump)
1301 : 110 : fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1302 : : growth);
1303 : : }
1304 : : /* Inlining into EXTERNAL functions is not going to change anything unless
1305 : : they are themselves inlined. */
1306 : 8397373 : else if (DECL_EXTERNAL (caller->decl))
1307 : : {
1308 : 44352 : if (dump)
1309 : 0 : fprintf (dump_file, " max: function is external\n");
1310 : 44352 : return sreal::max ();
1311 : : }
1312 : : /* When profile is available. Compute badness as:
1313 : :
1314 : : time_saved * caller_count
1315 : : goodness = -------------------------------------------------
1316 : : growth_of_caller * overall_growth * combined_size
1317 : :
1318 : : badness = - goodness
1319 : :
1320 : : Again use negative value to make calls with profile appear hotter
1321 : : then calls without.
1322 : : */
1323 : 8353021 : else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1324 : 8353021 : || caller->count.ipa ().nonzero_p ())
1325 : : {
1326 : 8351618 : sreal numerator, denominator;
1327 : 8351618 : int overall_growth;
1328 : 8351618 : sreal freq = edge->sreal_frequency ();
1329 : :
1330 : 8351618 : numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1331 : 8351618 : if (numerator <= 0)
1332 : 6244 : numerator = ((sreal) 1 >> 8);
1333 : 8351618 : if (caller->count.ipa ().nonzero_p ())
1334 : 99 : numerator *= caller->count.ipa ().to_gcov_type ();
1335 : 8351519 : else if (caller->count.ipa ().initialized_p ())
1336 : 661 : numerator = numerator >> 11;
1337 : 8351618 : denominator = growth;
1338 : :
1339 : 8351618 : overall_growth = callee_info->growth;
1340 : :
1341 : : /* Look for inliner wrappers of the form:
1342 : :
1343 : : inline_caller ()
1344 : : {
1345 : : do_fast_job...
1346 : : if (need_more_work)
1347 : : noninline_callee ();
1348 : : }
1349 : : Without penalizing this case, we usually inline noninline_callee
1350 : : into the inline_caller because overall_growth is small preventing
1351 : : further inlining of inline_caller.
1352 : :
1353 : : Penalize only callgraph edges to functions with small overall
1354 : : growth ...
1355 : : */
1356 : 8351618 : if (growth > overall_growth
1357 : : /* ... and having only one caller which is not inlined ... */
1358 : 1655441 : && callee_info->single_caller
1359 : 1049866 : && !edge->caller->inlined_to
1360 : : /* ... and edges executed only conditionally ... */
1361 : 653405 : && freq < 1
1362 : : /* ... consider case where callee is not inline but caller is ... */
1363 : 8664573 : && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1364 : 99616 : && DECL_DECLARED_INLINE_P (caller->decl))
1365 : : /* ... or when early optimizers decided to split and edge
1366 : : frequency still indicates splitting is a win ... */
1367 : 304598 : || (callee->split_part && !caller->split_part
1368 : 76039 : && freq * 100
1369 : 8420673 : < opt_for_fn (caller->decl,
1370 : : param_partial_inlining_entry_probability)
1371 : : /* ... and do not overwrite user specified hints. */
1372 : 75718 : && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1373 : 52191 : || DECL_DECLARED_INLINE_P (caller->decl)))))
1374 : : {
1375 : 82702 : ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1376 : 82702 : int caller_growth = caller_info->growth;
1377 : :
1378 : : /* Only apply the penalty when caller looks like inline candidate,
1379 : : and it is not called once. */
1380 : 45459 : if (!caller_info->single_caller && overall_growth < caller_growth
1381 : 43693 : && caller_info->inlinable
1382 : 126341 : && wrapper_heuristics_may_apply
1383 : 43639 : (caller, ipa_size_summaries->get (caller)->size))
1384 : : {
1385 : 34087 : if (dump)
1386 : 1 : fprintf (dump_file,
1387 : : " Wrapper penalty. Increasing growth %i to %i\n",
1388 : : overall_growth, caller_growth);
1389 : : overall_growth = caller_growth;
1390 : : }
1391 : : }
1392 : 8351618 : if (overall_growth > 0)
1393 : : {
1394 : : /* Strongly prefer functions with few callers that can be inlined
1395 : : fully. The square root here leads to smaller binaries at average.
1396 : : Watch however for extreme cases and return to linear function
1397 : : when growth is large. */
1398 : 7141182 : if (overall_growth < 256)
1399 : 3202083 : overall_growth *= overall_growth;
1400 : : else
1401 : 3939099 : overall_growth += 256 * 256 - 256;
1402 : 7141182 : denominator *= overall_growth;
1403 : : }
1404 : 8351618 : denominator *= ipa_size_summaries->get (caller)->size + growth;
1405 : :
1406 : 8351618 : badness = - numerator / denominator;
1407 : :
1408 : 8351618 : if (dump)
1409 : : {
1410 : 360 : fprintf (dump_file,
1411 : : " %f: guessed profile. frequency %f, count %" PRId64
1412 : : " caller count %" PRId64
1413 : : " time saved %f"
1414 : : " overall growth %i (current) %i (original)"
1415 : : " %i (compensated)\n",
1416 : : badness.to_double (),
1417 : : freq.to_double (),
1418 : 90 : edge->count.ipa ().initialized_p ()
1419 : 0 : ? edge->count.ipa ().to_gcov_type () : -1,
1420 : 90 : caller->count.ipa ().initialized_p ()
1421 : 0 : ? caller->count.ipa ().to_gcov_type () : -1,
1422 : 180 : inlining_speedup (edge, freq, unspec_edge_time,
1423 : : edge_time).to_double (),
1424 : : estimate_growth (callee),
1425 : : callee_info->growth, overall_growth);
1426 : : }
1427 : : }
1428 : : /* When function local profile is not available or it does not give
1429 : : useful information (i.e. frequency is zero), base the cost on
1430 : : loop nest and overall size growth, so we optimize for overall number
1431 : : of functions fully inlined in program. */
1432 : : else
1433 : : {
1434 : 1403 : int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1435 : 1403 : badness = growth;
1436 : :
1437 : : /* Decrease badness if call is nested. */
1438 : 1403 : if (badness > 0)
1439 : 1403 : badness = badness >> nest;
1440 : : else
1441 : 0 : badness = badness << nest;
1442 : 1403 : if (dump)
1443 : 0 : fprintf (dump_file, " %f: no profile. nest %i\n",
1444 : : badness.to_double (), nest);
1445 : : }
1446 : 8883952 : gcc_checking_assert (badness != 0);
1447 : :
1448 : 8883952 : if (edge->recursive_p ())
1449 : 14025 : badness = badness.shift (badness > 0 ? 4 : -4);
1450 : 8883952 : if ((hints & (INLINE_HINT_indirect_call
1451 : : | INLINE_HINT_loop_iterations
1452 : : | INLINE_HINT_loop_stride))
1453 : 8066785 : || callee_info->growth <= 0)
1454 : 4246925 : badness = badness.shift (badness > 0 ? -2 : 2);
1455 : 8883952 : if (hints & INLINE_HINT_builtin_constant_p)
1456 : 14112 : badness = badness.shift (badness > 0 ? -4 : 4);
1457 : 8883952 : if (hints & (INLINE_HINT_same_scc))
1458 : 74102 : badness = badness.shift (badness > 0 ? 3 : -3);
1459 : 8846900 : else if (hints & (INLINE_HINT_in_scc))
1460 : 86736 : badness = badness.shift (badness > 0 ? 2 : -2);
1461 : 8803532 : else if (hints & (INLINE_HINT_cross_module))
1462 : 3372 : badness = badness.shift (badness > 0 ? 1 : -1);
1463 : 8883952 : if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1464 : 15302 : badness = badness.shift (badness > 0 ? -4 : 4);
1465 : 8876301 : else if ((hints & INLINE_HINT_declared_inline))
1466 : 13609529 : badness = badness.shift (badness > 0 ? -3 : 3);
1467 : 8883952 : if (dump)
1468 : 200 : fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1469 : 8883952 : return badness;
1470 : : }
1471 : :
1472 : : /* Recompute badness of EDGE and update its key in HEAP if needed. */
1473 : : static inline void
1474 : 4404347 : update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1475 : : {
1476 : 4404347 : sreal badness = edge_badness (edge, false);
1477 : 4404347 : if (edge->aux)
1478 : : {
1479 : 3496354 : edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1480 : 3496354 : gcc_checking_assert (n->get_data () == edge);
1481 : :
1482 : : /* fibonacci_heap::replace_key does busy updating of the
1483 : : heap that is unnecessarily expensive.
1484 : : We do lazy increases: after extracting minimum if the key
1485 : : turns out to be out of date, it is re-inserted into heap
1486 : : with correct value. */
1487 : 3496354 : if (badness < n->get_key ().badness)
1488 : : {
1489 : 652727 : if (dump_file && (dump_flags & TDF_DETAILS))
1490 : : {
1491 : 102 : fprintf (dump_file,
1492 : : " decreasing badness %s -> %s, %f to %f\n",
1493 : 51 : edge->caller->dump_name (),
1494 : 51 : edge->callee->dump_name (),
1495 : 102 : n->get_key ().badness.to_double (),
1496 : : badness.to_double ());
1497 : : }
1498 : 652727 : inline_badness b (edge, badness);
1499 : 652727 : heap->decrease_key (n, b);
1500 : : }
1501 : : }
1502 : : else
1503 : : {
1504 : 907993 : if (dump_file && (dump_flags & TDF_DETAILS))
1505 : : {
1506 : 318 : fprintf (dump_file,
1507 : : " enqueuing call %s -> %s, badness %f\n",
1508 : 159 : edge->caller->dump_name (),
1509 : 159 : edge->callee->dump_name (),
1510 : : badness.to_double ());
1511 : : }
1512 : 907993 : inline_badness b (edge, badness);
1513 : 907993 : edge->aux = heap->insert (b, edge);
1514 : : }
1515 : 4404347 : }
1516 : :
1517 : :
1518 : : /* NODE was inlined.
1519 : : All caller edges needs to be reset because
1520 : : size estimates change. Similarly callees needs reset
1521 : : because better context may be known. */
1522 : :
1523 : : static void
1524 : 841829 : reset_edge_caches (struct cgraph_node *node)
1525 : : {
1526 : 841829 : struct cgraph_edge *edge;
1527 : 841829 : struct cgraph_edge *e = node->callees;
1528 : 841829 : struct cgraph_node *where = node;
1529 : 841829 : struct ipa_ref *ref;
1530 : :
1531 : 841829 : if (where->inlined_to)
1532 : 783365 : where = where->inlined_to;
1533 : :
1534 : 841829 : reset_node_cache (where);
1535 : :
1536 : 841829 : if (edge_growth_cache != NULL)
1537 : 2765489 : for (edge = where->callers; edge; edge = edge->next_caller)
1538 : 1923765 : if (edge->inline_failed)
1539 : 1923765 : edge_growth_cache->remove (edge);
1540 : :
1541 : 898285 : FOR_EACH_ALIAS (where, ref)
1542 : 112912 : reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1543 : :
1544 : 841829 : if (!e)
1545 : : return;
1546 : :
1547 : 2075708 : while (true)
1548 : 2075708 : if (!e->inline_failed && e->callee->callees)
1549 : : e = e->callee->callees;
1550 : : else
1551 : : {
1552 : 1719801 : if (edge_growth_cache != NULL && e->inline_failed)
1553 : 1637520 : edge_growth_cache->remove (e);
1554 : 1719801 : if (e->next_callee)
1555 : : e = e->next_callee;
1556 : : else
1557 : : {
1558 : 1058751 : do
1559 : : {
1560 : 1058751 : if (e->caller == node)
1561 : : return;
1562 : 355907 : e = e->caller->callers;
1563 : : }
1564 : 355907 : while (!e->next_callee);
1565 : : e = e->next_callee;
1566 : : }
1567 : : }
1568 : : }
1569 : :
1570 : : /* Recompute HEAP nodes for each of caller of NODE.
1571 : : UPDATED_NODES track nodes we already visited, to avoid redundant work.
1572 : : When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1573 : : it is inlinable. Otherwise check all edges. */
1574 : :
1575 : : static void
1576 : 841373 : update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1577 : : bitmap updated_nodes,
1578 : : struct cgraph_edge *check_inlinablity_for)
1579 : : {
1580 : 841373 : struct cgraph_edge *edge;
1581 : 841373 : struct ipa_ref *ref;
1582 : :
1583 : 841373 : if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1584 : 830912 : || node->inlined_to)
1585 : 10461 : return;
1586 : 830912 : if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1587 : : return;
1588 : :
1589 : 886977 : FOR_EACH_ALIAS (node, ref)
1590 : : {
1591 : 56065 : struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1592 : 56065 : update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1593 : : }
1594 : :
1595 : 2648351 : for (edge = node->callers; edge; edge = edge->next_caller)
1596 : 1817439 : if (edge->inline_failed)
1597 : : {
1598 : 1817439 : if (!check_inlinablity_for
1599 : 1817439 : || check_inlinablity_for == edge)
1600 : : {
1601 : 1817439 : if (can_inline_edge_p (edge, false)
1602 : 1785265 : && want_inline_small_function_p (edge, false)
1603 : 2209323 : && can_inline_edge_by_limits_p (edge, false))
1604 : 389017 : update_edge_key (heap, edge);
1605 : 1428422 : else if (edge->aux)
1606 : : {
1607 : 68376 : report_inline_failed_reason (edge);
1608 : 68376 : heap->delete_node ((edge_heap_node_t *) edge->aux);
1609 : 68376 : edge->aux = NULL;
1610 : : }
1611 : : }
1612 : 0 : else if (edge->aux)
1613 : 0 : update_edge_key (heap, edge);
1614 : : }
1615 : : }
1616 : :
1617 : : /* Recompute HEAP nodes for each uninlined call in NODE
1618 : : If UPDATE_SINCE is non-NULL check if edges called within that function
1619 : : are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1620 : : where all edges have new context).
1621 : :
1622 : : This is used when we know that edge badnesses are going only to increase
1623 : : (we introduced new call site) and thus all we need is to insert newly
1624 : : created edges into heap. */
1625 : :
1626 : : static void
1627 : 785340 : update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1628 : : struct cgraph_node *update_since,
1629 : : bitmap updated_nodes)
1630 : : {
1631 : 785340 : struct cgraph_edge *e = node->callees;
1632 : 785340 : bool check_inlinability = update_since == node;
1633 : :
1634 : 785340 : if (!e)
1635 : : return;
1636 : 23582075 : while (true)
1637 : 23582075 : if (!e->inline_failed && e->callee->callees)
1638 : : {
1639 : 3874972 : if (e->callee == update_since)
1640 : 342730 : check_inlinability = true;
1641 : : e = e->callee->callees;
1642 : : }
1643 : : else
1644 : : {
1645 : 19707103 : enum availability avail;
1646 : 19707103 : struct cgraph_node *callee;
1647 : 19707103 : if (!check_inlinability)
1648 : : {
1649 : 18007476 : if (e->aux
1650 : 20983048 : && !bitmap_bit_p (updated_nodes,
1651 : 2975572 : e->callee->ultimate_alias_target
1652 : 2975572 : (&avail, e->caller)->get_uid ()))
1653 : 2975572 : update_edge_key (heap, e);
1654 : : }
1655 : : /* We do not reset callee growth cache here. Since we added a new call,
1656 : : growth should have just increased and consequently badness metric
1657 : : don't need updating. */
1658 : 1699627 : else if (e->inline_failed
1659 : 1620143 : && (callee = e->callee->ultimate_alias_target (&avail,
1660 : 1620143 : e->caller))
1661 : 1620143 : && avail >= AVAIL_AVAILABLE
1662 : 506028 : && ipa_fn_summaries->get (callee) != NULL
1663 : 506026 : && ipa_fn_summaries->get (callee)->inlinable
1664 : 2192829 : && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1665 : : {
1666 : 493202 : if (can_inline_edge_p (e, false)
1667 : 489916 : && want_inline_small_function_p (e, false)
1668 : 780933 : && can_inline_edge_by_limits_p (e, false))
1669 : : {
1670 : 287300 : gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1671 : 287300 : gcc_checking_assert (check_inlinability || e->aux);
1672 : 287300 : update_edge_key (heap, e);
1673 : : }
1674 : 205902 : else if (e->aux)
1675 : : {
1676 : 4208 : report_inline_failed_reason (e);
1677 : 4208 : heap->delete_node ((edge_heap_node_t *) e->aux);
1678 : 4208 : e->aux = NULL;
1679 : : }
1680 : : }
1681 : : /* In case we redirected to unreachable node we only need to remove the
1682 : : fibheap entry. */
1683 : 1206425 : else if (e->aux)
1684 : : {
1685 : 2655 : heap->delete_node ((edge_heap_node_t *) e->aux);
1686 : 2655 : e->aux = NULL;
1687 : : }
1688 : 19707103 : if (e->next_callee)
1689 : : e = e->next_callee;
1690 : : else
1691 : : {
1692 : 4628388 : do
1693 : : {
1694 : 4628388 : if (e->caller == node)
1695 : 753416 : return;
1696 : 3874972 : if (e->caller == update_since)
1697 : 342730 : check_inlinability = false;
1698 : 3874972 : e = e->caller->callers;
1699 : : }
1700 : 3874972 : while (!e->next_callee);
1701 : : e = e->next_callee;
1702 : : }
1703 : : }
1704 : : }
1705 : :
1706 : : /* Enqueue all recursive calls from NODE into priority queue depending on
1707 : : how likely we want to recursively inline the call. */
1708 : :
1709 : : static void
1710 : 20220 : lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1711 : : edge_heap_t *heap)
1712 : : {
1713 : 20220 : struct cgraph_edge *e;
1714 : 20220 : enum availability avail;
1715 : :
1716 : 55377 : for (e = where->callees; e; e = e->next_callee)
1717 : 35157 : if (e->callee == node
1718 : 35157 : || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1719 : 1142 : && avail > AVAIL_INTERPOSABLE))
1720 : : {
1721 : 15325 : inline_badness b (e, -e->sreal_frequency ());
1722 : 15325 : heap->insert (b, e);
1723 : : }
1724 : 55377 : for (e = where->callees; e; e = e->next_callee)
1725 : 35157 : if (!e->inline_failed)
1726 : 7422 : lookup_recursive_calls (node, e->callee, heap);
1727 : 20220 : }
1728 : :
1729 : : /* Decide on recursive inlining: in the case function has recursive calls,
1730 : : inline until body size reaches given argument. If any new indirect edges
1731 : : are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1732 : : is NULL. */
1733 : :
1734 : : static bool
1735 : 1744 : recursive_inlining (struct cgraph_edge *edge,
1736 : : vec<cgraph_edge *> *new_edges)
1737 : : {
1738 : 3488 : cgraph_node *to = (edge->caller->inlined_to
1739 : 1744 : ? edge->caller->inlined_to : edge->caller);
1740 : 1744 : int limit = opt_for_fn (to->decl,
1741 : : param_max_inline_insns_recursive_auto);
1742 : 1744 : inline_badness b (edge, sreal::min ());
1743 : 1744 : edge_heap_t heap (b);
1744 : 1744 : struct cgraph_node *node;
1745 : 1744 : struct cgraph_edge *e;
1746 : 1744 : struct cgraph_node *master_clone = NULL, *next;
1747 : 1744 : int depth = 0;
1748 : 1744 : int n = 0;
1749 : :
1750 : 1744 : node = edge->caller;
1751 : 1744 : if (node->inlined_to)
1752 : 505 : node = node->inlined_to;
1753 : :
1754 : 1744 : if (DECL_DECLARED_INLINE_P (node->decl))
1755 : 348 : limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1756 : :
1757 : : /* Make sure that function is small enough to be considered for inlining. */
1758 : 1744 : if (estimate_size_after_inlining (node, edge) >= limit)
1759 : : return false;
1760 : 1744 : lookup_recursive_calls (node, node, &heap);
1761 : 1744 : if (heap.empty ())
1762 : : return false;
1763 : :
1764 : 1744 : if (dump_file)
1765 : 5 : fprintf (dump_file,
1766 : : " Performing recursive inlining on %s\n", node->dump_name ());
1767 : :
1768 : : /* Do the inlining and update list of recursive call during process. */
1769 : 15512 : while (!heap.empty ())
1770 : : {
1771 : 13788 : struct cgraph_edge *curr = heap.extract_min ();
1772 : 13788 : struct cgraph_node *cnode, *dest = curr->callee;
1773 : :
1774 : 13788 : if (!can_inline_edge_p (curr, true)
1775 : 13788 : || !can_inline_edge_by_limits_p (curr, true))
1776 : 0 : continue;
1777 : :
1778 : : /* MASTER_CLONE is produced in the case we already started modified
1779 : : the function. Be sure to redirect edge to the original body before
1780 : : estimating growths otherwise we will be seeing growths after inlining
1781 : : the already modified body. */
1782 : 13788 : if (master_clone)
1783 : : {
1784 : 12040 : curr->redirect_callee (master_clone);
1785 : 12040 : if (edge_growth_cache != NULL)
1786 : 12040 : edge_growth_cache->remove (curr);
1787 : : }
1788 : :
1789 : 13788 : if (estimate_size_after_inlining (node, curr) > limit)
1790 : : {
1791 : 20 : curr->redirect_callee (dest);
1792 : 20 : if (edge_growth_cache != NULL)
1793 : 20 : edge_growth_cache->remove (curr);
1794 : : break;
1795 : : }
1796 : :
1797 : 13768 : depth = 1;
1798 : 13768 : for (cnode = curr->caller;
1799 : 72784 : cnode->inlined_to; cnode = cnode->callers->caller)
1800 : 118032 : if (node->decl
1801 : 59016 : == curr->callee->ultimate_alias_target ()->decl)
1802 : 59016 : depth++;
1803 : :
1804 : 13768 : if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1805 : : {
1806 : 2714 : curr->redirect_callee (dest);
1807 : 2714 : if (edge_growth_cache != NULL)
1808 : 2714 : edge_growth_cache->remove (curr);
1809 : 2714 : continue;
1810 : : }
1811 : :
1812 : 11054 : if (dump_file)
1813 : : {
1814 : 18 : fprintf (dump_file,
1815 : : " Inlining call of depth %i", depth);
1816 : 36 : if (node->count.nonzero_p () && curr->count.initialized_p ())
1817 : : {
1818 : 2 : fprintf (dump_file, " called approx. %.2f times per call",
1819 : 2 : (double)curr->count.to_gcov_type ()
1820 : 2 : / node->count.to_gcov_type ());
1821 : : }
1822 : 18 : fprintf (dump_file, "\n");
1823 : : }
1824 : 11054 : if (!master_clone)
1825 : : {
1826 : : /* We need original clone to copy around. */
1827 : 1448 : master_clone = node->create_clone (node->decl, node->count,
1828 : : false, vNULL, true, NULL, NULL);
1829 : 4100 : for (e = master_clone->callees; e; e = e->next_callee)
1830 : 2652 : if (!e->inline_failed)
1831 : 460 : clone_inlined_nodes (e, true, false, NULL);
1832 : 1448 : curr->redirect_callee (master_clone);
1833 : 1448 : if (edge_growth_cache != NULL)
1834 : 1448 : edge_growth_cache->remove (curr);
1835 : : }
1836 : :
1837 : 11054 : inline_call (curr, false, new_edges, &overall_size, true);
1838 : 11054 : reset_node_cache (node);
1839 : 11054 : lookup_recursive_calls (node, curr->callee, &heap);
1840 : 11054 : n++;
1841 : : }
1842 : :
1843 : 1744 : if (!heap.empty () && dump_file)
1844 : 0 : fprintf (dump_file, " Recursive inlining growth limit met.\n");
1845 : :
1846 : 1744 : if (!master_clone)
1847 : : return false;
1848 : :
1849 : 1448 : if (dump_enabled_p ())
1850 : 5 : dump_printf_loc (MSG_NOTE, edge->call_stmt,
1851 : : "\n Inlined %i times, "
1852 : : "body grown from size %i to %i, time %f to %f\n", n,
1853 : 5 : ipa_size_summaries->get (master_clone)->size,
1854 : 5 : ipa_size_summaries->get (node)->size,
1855 : 5 : ipa_fn_summaries->get (master_clone)->time.to_double (),
1856 : 5 : ipa_fn_summaries->get (node)->time.to_double ());
1857 : :
1858 : : /* Remove master clone we used for inlining. We rely that clones inlined
1859 : : into master clone gets queued just before master clone so we don't
1860 : : need recursion. */
1861 : 19473 : for (node = symtab->first_function (); node != master_clone;
1862 : : node = next)
1863 : : {
1864 : 16577 : next = symtab->next_function (node);
1865 : 16577 : if (node->inlined_to == master_clone)
1866 : 892 : node->remove ();
1867 : : }
1868 : 1448 : master_clone->remove ();
1869 : 1448 : return true;
1870 : 1744 : }
1871 : :
1872 : :
1873 : : /* Given whole compilation unit estimate of INSNS, compute how large we can
1874 : : allow the unit to grow. */
1875 : :
1876 : : static int64_t
1877 : 831336 : compute_max_insns (cgraph_node *node, int insns)
1878 : : {
1879 : 831336 : int max_insns = insns;
1880 : 831336 : if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1881 : : max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1882 : :
1883 : 831336 : return ((int64_t) max_insns
1884 : 831336 : * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1885 : : }
1886 : :
1887 : :
1888 : : /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1889 : :
1890 : : static void
1891 : 784787 : add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> &new_edges)
1892 : : {
1893 : 1571357 : while (new_edges.length () > 0)
1894 : : {
1895 : 1783 : struct cgraph_edge *edge = new_edges.pop ();
1896 : :
1897 : 1783 : gcc_assert (!edge->aux);
1898 : 1783 : gcc_assert (edge->callee);
1899 : 1783 : if (edge->inline_failed
1900 : 1783 : && can_inline_edge_p (edge, true)
1901 : 865 : && want_inline_small_function_p (edge, true)
1902 : 2356 : && can_inline_edge_by_limits_p (edge, true))
1903 : : {
1904 : 573 : inline_badness b (edge, edge_badness (edge, false));
1905 : 573 : edge->aux = heap->insert (b, edge);
1906 : : }
1907 : : }
1908 : 784787 : }
1909 : :
1910 : : /* Remove EDGE from the fibheap. */
1911 : :
1912 : : static void
1913 : 5196 : heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1914 : : {
1915 : 5196 : if (e->aux)
1916 : : {
1917 : 14 : ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1918 : 14 : e->aux = NULL;
1919 : : }
1920 : 5196 : }
1921 : :
1922 : : /* Return true if speculation of edge E seems useful.
1923 : : If ANTICIPATE_INLINING is true, be conservative and hope that E
1924 : : may get inlined. */
1925 : :
1926 : : bool
1927 : 49473 : speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1928 : : {
1929 : : /* If we have already decided to inline the edge, it seems useful. */
1930 : 49473 : if (!e->inline_failed)
1931 : : return true;
1932 : :
1933 : 6534 : enum availability avail;
1934 : 13068 : struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1935 : 6534 : e->caller);
1936 : :
1937 : 6534 : gcc_assert (e->speculative && !e->indirect_unknown_callee);
1938 : :
1939 : 6534 : if (!e->maybe_hot_p ())
1940 : : return false;
1941 : :
1942 : : /* See if IP optimizations found something potentially useful about the
1943 : : function. For now we look only for CONST/PURE flags. Almost everything
1944 : : else we propagate is useless. */
1945 : 6526 : if (avail >= AVAIL_AVAILABLE)
1946 : : {
1947 : 6517 : int ecf_flags = flags_from_decl_or_type (target->decl);
1948 : 6517 : if (ecf_flags & ECF_CONST)
1949 : : {
1950 : 320 : if (!(e->speculative_call_indirect_edge ()->indirect_info
1951 : 320 : ->ecf_flags & ECF_CONST))
1952 : : return true;
1953 : : }
1954 : 6197 : else if (ecf_flags & ECF_PURE)
1955 : : {
1956 : 2133 : if (!(e->speculative_call_indirect_edge ()->indirect_info
1957 : 2133 : ->ecf_flags & ECF_PURE))
1958 : : return true;
1959 : : }
1960 : : }
1961 : : /* If we did not managed to inline the function nor redirect
1962 : : to an ipa-cp clone (that are seen by having local flag set),
1963 : : it is probably pointless to inline it unless hardware is missing
1964 : : indirect call predictor. */
1965 : 4073 : if (!anticipate_inlining && !target->local)
1966 : : return false;
1967 : : /* For overwritable targets there is not much to do. */
1968 : 3337 : if (!can_inline_edge_p (e, false)
1969 : 3337 : || !can_inline_edge_by_limits_p (e, false, true))
1970 : 10 : return false;
1971 : : /* OK, speculation seems interesting. */
1972 : : return true;
1973 : : }
1974 : :
1975 : : /* We know that EDGE is not going to be inlined.
1976 : : See if we can remove speculation. */
1977 : :
1978 : : static void
1979 : 48413 : resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1980 : : {
1981 : 48413 : if (edge->speculative && !speculation_useful_p (edge, false))
1982 : : {
1983 : 107 : struct cgraph_node *node = edge->caller;
1984 : 214 : struct cgraph_node *where = node->inlined_to
1985 : 107 : ? node->inlined_to : node;
1986 : 107 : auto_bitmap updated_nodes;
1987 : :
1988 : 107 : if (edge->count.ipa ().initialized_p ())
1989 : 0 : spec_rem += edge->count.ipa ();
1990 : 107 : cgraph_edge::resolve_speculation (edge);
1991 : 107 : reset_edge_caches (where);
1992 : 107 : ipa_update_overall_fn_summary (where);
1993 : 107 : update_caller_keys (edge_heap, where,
1994 : : updated_nodes, NULL);
1995 : 107 : update_callee_keys (edge_heap, where, NULL,
1996 : : updated_nodes);
1997 : 107 : }
1998 : 48413 : }
1999 : :
2000 : : /* Return true if NODE should be accounted for overall size estimate.
2001 : : Skip all nodes optimized for size so we can measure the growth of hot
2002 : : part of program no matter of the padding. */
2003 : :
2004 : : bool
2005 : 3316176 : inline_account_function_p (struct cgraph_node *node)
2006 : : {
2007 : 3316176 : return (!DECL_EXTERNAL (node->decl)
2008 : 3133555 : && !opt_for_fn (node->decl, optimize_size)
2009 : 6365696 : && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
2010 : : }
2011 : :
2012 : : /* Count number of callers of NODE and store it into DATA (that
2013 : : points to int. Worker for cgraph_for_node_and_aliases. */
2014 : :
2015 : : static bool
2016 : 1362464 : sum_callers (struct cgraph_node *node, void *data)
2017 : : {
2018 : 1362464 : struct cgraph_edge *e;
2019 : 1362464 : int *num_calls = (int *)data;
2020 : :
2021 : 3174202 : for (e = node->callers; e; e = e->next_caller)
2022 : 1811738 : (*num_calls)++;
2023 : 1362464 : return false;
2024 : : }
2025 : :
2026 : : /* We only propagate across edges with non-interposable callee. */
2027 : :
2028 : : inline bool
2029 : 6541681 : ignore_edge_p (struct cgraph_edge *e)
2030 : : {
2031 : 6541681 : enum availability avail;
2032 : 6541681 : e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
2033 : 6541681 : return (avail <= AVAIL_INTERPOSABLE);
2034 : : }
2035 : :
2036 : : /* We use greedy algorithm for inlining of small functions:
2037 : : All inline candidates are put into prioritized heap ordered in
2038 : : increasing badness.
2039 : :
2040 : : The inlining of small functions is bounded by unit growth parameters. */
2041 : :
2042 : : static void
2043 : 227668 : inline_small_functions (void)
2044 : : {
2045 : 227668 : struct cgraph_node *node;
2046 : 227668 : struct cgraph_edge *edge;
2047 : 227668 : inline_badness b;
2048 : 227668 : edge_heap_t edge_heap (b);
2049 : 227668 : auto_bitmap updated_nodes;
2050 : 227668 : int min_size;
2051 : 227668 : auto_vec<cgraph_edge *> new_indirect_edges;
2052 : 227668 : int initial_size = 0;
2053 : 227668 : struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
2054 : 227668 : struct cgraph_edge_hook_list *edge_removal_hook_holder;
2055 : 227668 : new_indirect_edges.create (8);
2056 : :
2057 : 227668 : edge_removal_hook_holder
2058 : 227668 : = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
2059 : :
2060 : : /* Compute overall unit size and other global parameters used by badness
2061 : : metrics. */
2062 : :
2063 : 227668 : max_count = profile_count::uninitialized ();
2064 : 227668 : ipa_reduced_postorder (order, true, ignore_edge_p);
2065 : 227668 : free (order);
2066 : :
2067 : 2030265 : FOR_EACH_DEFINED_FUNCTION (node)
2068 : 1802597 : if (!node->inlined_to)
2069 : : {
2070 : 1802566 : if (!node->alias && node->analyzed
2071 : 1704203 : && (node->has_gimple_body_p () || node->thunk)
2072 : 3506769 : && opt_for_fn (node->decl, optimize))
2073 : : {
2074 : 1264821 : class ipa_fn_summary *info = ipa_fn_summaries->get (node);
2075 : 1264821 : struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
2076 : :
2077 : : /* Do not account external functions, they will be optimized out
2078 : : if not inlined. Also only count the non-cold portion of program. */
2079 : 1264821 : if (inline_account_function_p (node))
2080 : 1163242 : initial_size += ipa_size_summaries->get (node)->size;
2081 : 1264821 : info->growth = estimate_growth (node);
2082 : :
2083 : 1264821 : int num_calls = 0;
2084 : 1264821 : node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2085 : : true);
2086 : 1264821 : if (num_calls == 1)
2087 : 415935 : info->single_caller = true;
2088 : 1264821 : if (dfs && dfs->next_cycle)
2089 : : {
2090 : 6163 : struct cgraph_node *n2;
2091 : 6163 : int id = dfs->scc_no + 1;
2092 : 13677 : for (n2 = node; n2;
2093 : 7514 : n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
2094 : 12327 : if (opt_for_fn (n2->decl, optimize))
2095 : : {
2096 : 12319 : ipa_fn_summary *info2 = ipa_fn_summaries->get
2097 : 12319 : (n2->inlined_to ? n2->inlined_to : n2);
2098 : 12319 : if (info2->scc_no)
2099 : : break;
2100 : 7506 : info2->scc_no = id;
2101 : : }
2102 : : }
2103 : : }
2104 : :
2105 : 3994451 : for (edge = node->callers; edge; edge = edge->next_caller)
2106 : 2191885 : max_count = max_count.max (edge->count.ipa ());
2107 : : }
2108 : 227668 : ipa_free_postorder_info ();
2109 : 227668 : initialize_growth_caches ();
2110 : :
2111 : 227668 : if (dump_file)
2112 : 208 : fprintf (dump_file,
2113 : : "\nDeciding on inlining of small functions. Starting with size %i.\n",
2114 : : initial_size);
2115 : :
2116 : 227668 : overall_size = initial_size;
2117 : 227668 : min_size = overall_size;
2118 : :
2119 : : /* Populate the heap with all edges we might inline. */
2120 : :
2121 : 2030265 : FOR_EACH_DEFINED_FUNCTION (node)
2122 : : {
2123 : 1802597 : bool update = false;
2124 : 1802597 : struct cgraph_edge *next = NULL;
2125 : 1802597 : bool has_speculative = false;
2126 : :
2127 : 1802597 : if (!opt_for_fn (node->decl, optimize)
2128 : : /* With -Og we do not want to perform IPA inlining of small
2129 : : functions since there are no scalar cleanups after it
2130 : : that would realize the anticipated win. All abstraction
2131 : : is removed during early inlining. */
2132 : 1802597 : || opt_for_fn (node->decl, optimize_debug))
2133 : 468814 : continue;
2134 : :
2135 : 1333783 : if (dump_file)
2136 : 999 : fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2137 : :
2138 : 6468377 : for (edge = node->callees; edge; edge = edge->next_callee)
2139 : : {
2140 : 5134594 : if (edge->inline_failed
2141 : 5134563 : && !edge->aux
2142 : 5134488 : && can_inline_edge_p (edge, true)
2143 : 1365927 : && want_inline_small_function_p (edge, true)
2144 : 757914 : && can_inline_edge_by_limits_p (edge, true)
2145 : 5887052 : && edge->inline_failed)
2146 : : {
2147 : 752458 : gcc_assert (!edge->aux);
2148 : 752458 : update_edge_key (&edge_heap, edge);
2149 : : }
2150 : 5134594 : if (edge->speculative)
2151 : 5246 : has_speculative = true;
2152 : : }
2153 : 1333783 : if (has_speculative)
2154 : 21263 : for (edge = node->callees; edge; edge = next)
2155 : : {
2156 : 16942 : next = edge->next_callee;
2157 : 16942 : if (edge->speculative
2158 : 16942 : && !speculation_useful_p (edge, edge->aux != NULL))
2159 : : {
2160 : 542 : cgraph_edge::resolve_speculation (edge);
2161 : 542 : update = true;
2162 : : }
2163 : : }
2164 : 4321 : if (update)
2165 : : {
2166 : 776 : struct cgraph_node *where = node->inlined_to
2167 : 388 : ? node->inlined_to : node;
2168 : 388 : ipa_update_overall_fn_summary (where);
2169 : 388 : reset_edge_caches (where);
2170 : 388 : update_caller_keys (&edge_heap, where,
2171 : : updated_nodes, NULL);
2172 : 388 : update_callee_keys (&edge_heap, where, NULL,
2173 : : updated_nodes);
2174 : 388 : bitmap_clear (updated_nodes);
2175 : : }
2176 : : }
2177 : :
2178 : 227668 : gcc_assert (in_lto_p
2179 : : || !(max_count > 0)
2180 : : || (profile_info && flag_branch_probabilities));
2181 : :
2182 : 2570949 : while (!edge_heap.empty ())
2183 : : {
2184 : 2343281 : int old_size = overall_size;
2185 : 2343281 : struct cgraph_node *where, *callee;
2186 : 2343281 : sreal badness = edge_heap.min_key ().badness;
2187 : 2343281 : sreal current_badness;
2188 : 2343281 : int growth;
2189 : :
2190 : 2343281 : edge = edge_heap.extract_min ();
2191 : 2343281 : gcc_assert (edge->aux);
2192 : 2343281 : edge->aux = NULL;
2193 : 2343281 : if (!edge->inline_failed || !edge->callee->analyzed)
2194 : 1558468 : continue;
2195 : :
2196 : : /* Be sure that caches are maintained consistent.
2197 : : This check is affected by scaling roundoff errors when compiling for
2198 : : IPA this we skip it in that case. */
2199 : 4523180 : if (flag_checking && !edge->callee->count.ipa_p ()
2200 : 4523184 : && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2201 : : {
2202 : 2179990 : sreal cached_badness = edge_badness (edge, false);
2203 : :
2204 : 2179990 : int old_size_est = estimate_edge_size (edge);
2205 : 2179990 : sreal old_time_est = estimate_edge_time (edge);
2206 : 2179990 : int old_hints_est = estimate_edge_hints (edge);
2207 : :
2208 : 2179990 : if (edge_growth_cache != NULL)
2209 : 2179990 : edge_growth_cache->remove (edge);
2210 : 3895486 : reset_node_cache (edge->caller->inlined_to
2211 : : ? edge->caller->inlined_to
2212 : : : edge->caller);
2213 : 2179990 : gcc_assert (old_size_est == estimate_edge_size (edge));
2214 : 2179990 : gcc_assert (old_time_est == estimate_edge_time (edge));
2215 : : /* FIXME:
2216 : :
2217 : : gcc_assert (old_hints_est == estimate_edge_hints (edge));
2218 : :
2219 : : fails with profile feedback because some hints depends on
2220 : : maybe_hot_edge_p predicate and because callee gets inlined to other
2221 : : calls, the edge may become cold.
2222 : : This ought to be fixed by computing relative probabilities
2223 : : for given invocation but that will be better done once whole
2224 : : code is converted to sreals. Disable for now and revert to "wrong"
2225 : : value so enable/disable checking paths agree. */
2226 : 2179990 : edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2227 : :
2228 : : /* When updating the edge costs, we only decrease badness in the keys.
2229 : : Increases of badness are handled lazily; when we see key with out
2230 : : of date value on it, we re-insert it now. */
2231 : 2179990 : current_badness = edge_badness (edge, false);
2232 : 2179990 : gcc_assert (cached_badness == current_badness);
2233 : 2179990 : gcc_assert (current_badness >= badness);
2234 : : }
2235 : : else
2236 : 163204 : current_badness = edge_badness (edge, false);
2237 : 2343194 : if (current_badness != badness)
2238 : : {
2239 : 1654787 : if (edge_heap.min () && current_badness > edge_heap.min_key ().badness)
2240 : : {
2241 : 1509968 : inline_badness b (edge, current_badness);
2242 : 1509968 : edge->aux = edge_heap.insert (b, edge);
2243 : 1509968 : continue;
2244 : 1509968 : }
2245 : : else
2246 : 144819 : badness = current_badness;
2247 : : }
2248 : :
2249 : 833226 : if (!can_inline_edge_p (edge, true)
2250 : 833226 : || !can_inline_edge_by_limits_p (edge, true))
2251 : : {
2252 : 1890 : resolve_noninline_speculation (&edge_heap, edge);
2253 : 1890 : continue;
2254 : : }
2255 : :
2256 : 831336 : callee = edge->callee->ultimate_alias_target ();
2257 : 831336 : growth = estimate_edge_growth (edge);
2258 : 831336 : if (dump_file)
2259 : : {
2260 : 565 : fprintf (dump_file,
2261 : : "\nConsidering %s with %i size\n",
2262 : : callee->dump_name (),
2263 : 565 : ipa_size_summaries->get (callee)->size);
2264 : 1130 : fprintf (dump_file,
2265 : : " to be inlined into %s in %s:%i\n"
2266 : : " Estimated badness is %f, frequency %.2f.\n",
2267 : 565 : edge->caller->dump_name (),
2268 : 565 : edge->call_stmt
2269 : 537 : && (LOCATION_LOCUS (gimple_location ((const gimple *)
2270 : : edge->call_stmt))
2271 : : > BUILTINS_LOCATION)
2272 : 526 : ? gimple_filename ((const gimple *) edge->call_stmt)
2273 : : : "unknown",
2274 : 565 : edge->call_stmt
2275 : 537 : ? gimple_lineno ((const gimple *) edge->call_stmt)
2276 : : : -1,
2277 : : badness.to_double (),
2278 : 565 : edge->sreal_frequency ().to_double ());
2279 : 565 : if (edge->count.ipa ().initialized_p ())
2280 : : {
2281 : 0 : fprintf (dump_file, " Called ");
2282 : 0 : edge->count.ipa ().dump (dump_file);
2283 : 0 : fprintf (dump_file, " times\n");
2284 : : }
2285 : 565 : if (dump_flags & TDF_DETAILS)
2286 : 200 : edge_badness (edge, true);
2287 : : }
2288 : :
2289 : 831336 : where = edge->caller;
2290 : :
2291 : 831336 : if (overall_size + growth > compute_max_insns (where, min_size)
2292 : 831336 : && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2293 : : {
2294 : 42687 : edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2295 : 42687 : report_inline_failed_reason (edge);
2296 : 42687 : resolve_noninline_speculation (&edge_heap, edge);
2297 : 42687 : continue;
2298 : : }
2299 : :
2300 : 788649 : if (!want_inline_small_function_p (edge, true))
2301 : : {
2302 : 2030 : resolve_noninline_speculation (&edge_heap, edge);
2303 : 2030 : continue;
2304 : : }
2305 : :
2306 : 786619 : profile_count old_count = callee->count;
2307 : :
2308 : : /* Heuristics for inlining small functions work poorly for
2309 : : recursive calls where we do effects similar to loop unrolling.
2310 : : When inlining such edge seems profitable, leave decision on
2311 : : specific inliner. */
2312 : 786619 : if (edge->recursive_p ())
2313 : : {
2314 : 1744 : if (where->inlined_to)
2315 : 505 : where = where->inlined_to;
2316 : 1744 : if (!recursive_inlining (edge,
2317 : 1744 : opt_for_fn (edge->caller->decl,
2318 : : flag_indirect_inlining)
2319 : : ? &new_indirect_edges : NULL))
2320 : : {
2321 : 296 : edge->inline_failed = CIF_RECURSIVE_INLINING;
2322 : 296 : resolve_noninline_speculation (&edge_heap, edge);
2323 : 296 : continue;
2324 : : }
2325 : 1448 : reset_edge_caches (where);
2326 : : /* Recursive inliner inlines all recursive calls of the function
2327 : : at once. Consequently we need to update all callee keys. */
2328 : 1448 : if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2329 : 1422 : add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2330 : 1448 : update_callee_keys (&edge_heap, where, where, updated_nodes);
2331 : 1448 : bitmap_clear (updated_nodes);
2332 : : }
2333 : : else
2334 : : {
2335 : 784875 : struct cgraph_node *outer_node = NULL;
2336 : 784875 : int depth = 0;
2337 : :
2338 : : /* Consider the case where self recursive function A is inlined
2339 : : into B. This is desired optimization in some cases, since it
2340 : : leads to effect similar of loop peeling and we might completely
2341 : : optimize out the recursive call. However we must be extra
2342 : : selective. */
2343 : :
2344 : 784875 : where = edge->caller;
2345 : 1209935 : while (where->inlined_to)
2346 : : {
2347 : 425060 : if (where->decl == callee->decl)
2348 : 7461 : outer_node = where, depth++;
2349 : 425060 : where = where->callers->caller;
2350 : : }
2351 : 786385 : if (outer_node
2352 : 784875 : && !want_inline_self_recursive_call_p (edge, outer_node,
2353 : : true, depth))
2354 : : {
2355 : 1510 : edge->inline_failed
2356 : 1510 : = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2357 : 1510 : ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2358 : 1510 : resolve_noninline_speculation (&edge_heap, edge);
2359 : 1510 : continue;
2360 : : }
2361 : 783365 : else if (depth && dump_file)
2362 : 6 : fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2363 : :
2364 : 783365 : gcc_checking_assert (!callee->inlined_to);
2365 : :
2366 : 783365 : int old_size = ipa_size_summaries->get (where)->size;
2367 : 783365 : sreal old_time = ipa_fn_summaries->get (where)->time;
2368 : :
2369 : 783365 : inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2370 : 783365 : reset_edge_caches (edge->callee);
2371 : 783365 : add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2372 : :
2373 : : /* If caller's size and time increased we do not need to update
2374 : : all edges because badness is not going to decrease. */
2375 : 783365 : if (old_size <= ipa_size_summaries->get (where)->size
2376 : 741286 : && old_time <= ipa_fn_summaries->get (where)->time
2377 : : /* Wrapper penalty may be non-monotonous in this respect.
2378 : : Fortunately it only affects small functions. */
2379 : 1307923 : && !wrapper_heuristics_may_apply (where, old_size))
2380 : 390018 : update_callee_keys (&edge_heap, edge->callee, edge->callee,
2381 : : updated_nodes);
2382 : : else
2383 : 393347 : update_callee_keys (&edge_heap, where,
2384 : : edge->callee,
2385 : : updated_nodes);
2386 : : }
2387 : 784813 : where = edge->caller;
2388 : 784813 : if (where->inlined_to)
2389 : 229952 : where = where->inlined_to;
2390 : :
2391 : : /* Our profitability metric can depend on local properties
2392 : : such as number of inlinable calls and size of the function body.
2393 : : After inlining these properties might change for the function we
2394 : : inlined into (since it's body size changed) and for the functions
2395 : : called by function we inlined (since number of it inlinable callers
2396 : : might change). */
2397 : 784813 : update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2398 : : /* Offline copy count has possibly changed, recompute if profile is
2399 : : available. */
2400 : 784813 : struct cgraph_node *n
2401 : 784813 : = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2402 : 540967 : if (n != edge->callee && n->analyzed && !(n->count == old_count)
2403 : 784845 : && n->count.ipa_p ())
2404 : 32 : update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2405 : 784813 : bitmap_clear (updated_nodes);
2406 : :
2407 : 784813 : if (dump_enabled_p ())
2408 : : {
2409 : 573 : ipa_fn_summary *s = ipa_fn_summaries->get (where);
2410 : :
2411 : : /* dump_printf can't handle %+i. */
2412 : 573 : char buf_net_change[100];
2413 : 573 : snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2414 : : overall_size - old_size);
2415 : :
2416 : 1146 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2417 : : " Inlined %C into %C which now has time %f and "
2418 : : "size %i, net change of %s%s.\n",
2419 : : edge->callee, edge->caller,
2420 : : s->time.to_double (),
2421 : 573 : ipa_size_summaries->get (edge->caller)->size,
2422 : : buf_net_change,
2423 : 573 : cross_module_call_p (edge) ? " (cross module)":"");
2424 : : }
2425 : 784813 : if (min_size > overall_size)
2426 : : {
2427 : 179951 : min_size = overall_size;
2428 : :
2429 : 179951 : if (dump_file)
2430 : 421 : fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2431 : : }
2432 : : }
2433 : :
2434 : 227668 : free_growth_caches ();
2435 : 227668 : if (dump_enabled_p ())
2436 : 499 : dump_printf (MSG_NOTE,
2437 : : "Unit growth for small function inlining: %i->%i (%i%%)\n",
2438 : : initial_size, overall_size,
2439 : 225 : initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2440 : 227668 : symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2441 : 227668 : }
2442 : :
2443 : : /* Flatten NODE. Performed both during early inlining and
2444 : : at IPA inlining time. */
2445 : :
2446 : : static void
2447 : 664 : flatten_function (struct cgraph_node *node, bool early, bool update)
2448 : : {
2449 : 664 : struct cgraph_edge *e;
2450 : :
2451 : : /* We shouldn't be called recursively when we are being processed. */
2452 : 664 : gcc_assert (node->aux == NULL);
2453 : :
2454 : 664 : node->aux = (void *) node;
2455 : :
2456 : 1548 : for (e = node->callees; e; e = e->next_callee)
2457 : : {
2458 : 884 : struct cgraph_node *orig_callee;
2459 : 884 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2460 : :
2461 : : /* We've hit cycle? It is time to give up. */
2462 : 884 : if (callee->aux)
2463 : : {
2464 : 15 : if (dump_enabled_p ())
2465 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2466 : : "Not inlining %C into %C to avoid cycle.\n",
2467 : : callee, e->caller);
2468 : 15 : if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2469 : 15 : e->inline_failed = CIF_RECURSIVE_INLINING;
2470 : 15 : continue;
2471 : : }
2472 : :
2473 : : /* When the edge is already inlined, we just need to recurse into
2474 : : it in order to fully flatten the leaves. */
2475 : 869 : if (!e->inline_failed)
2476 : : {
2477 : 325 : flatten_function (callee, early, false);
2478 : 325 : continue;
2479 : : }
2480 : :
2481 : : /* Flatten attribute needs to be processed during late inlining. For
2482 : : extra code quality we however do flattening during early optimization,
2483 : : too. */
2484 : 299 : if (!early
2485 : 544 : ? !can_inline_edge_p (e, true)
2486 : 245 : && !can_inline_edge_by_limits_p (e, true)
2487 : 299 : : !can_early_inline_edge_p (e))
2488 : 415 : continue;
2489 : :
2490 : 129 : if (e->recursive_p ())
2491 : : {
2492 : 0 : if (dump_enabled_p ())
2493 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2494 : : "Not inlining: recursive call.\n");
2495 : 0 : continue;
2496 : : }
2497 : :
2498 : 129 : if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2499 : 258 : != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2500 : : {
2501 : 4 : if (dump_enabled_p ())
2502 : 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2503 : : "Not inlining: SSA form does not match.\n");
2504 : 4 : continue;
2505 : : }
2506 : :
2507 : : /* Inline the edge and flatten the inline clone. Avoid
2508 : : recursing through the original node if the node was cloned. */
2509 : 125 : if (dump_enabled_p ())
2510 : 3 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2511 : : " Inlining %C into %C.\n",
2512 : : callee, e->caller);
2513 : 125 : orig_callee = callee;
2514 : 125 : inline_call (e, true, NULL, NULL, false);
2515 : 125 : if (e->callee != orig_callee)
2516 : 90 : orig_callee->aux = (void *) node;
2517 : 125 : flatten_function (e->callee, early, false);
2518 : 125 : if (e->callee != orig_callee)
2519 : 90 : orig_callee->aux = NULL;
2520 : : }
2521 : :
2522 : 664 : node->aux = NULL;
2523 : 664 : cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2524 : 664 : if (update && opt_for_fn (where->decl, optimize))
2525 : 203 : ipa_update_overall_fn_summary (where);
2526 : 664 : }
2527 : :
2528 : : /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2529 : : DATA points to number of calls originally found so we avoid infinite
2530 : : recursion. */
2531 : :
2532 : : static bool
2533 : 26634 : inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2534 : : hash_set<cgraph_node *> *callers)
2535 : : {
2536 : 26634 : int *num_calls = (int *)data;
2537 : 26634 : bool callee_removed = false;
2538 : :
2539 : 56727 : while (node->callers && !node->inlined_to)
2540 : : {
2541 : 30816 : struct cgraph_node *caller = node->callers->caller;
2542 : :
2543 : 30816 : if (!can_inline_edge_p (node->callers, true)
2544 : 30816 : || !can_inline_edge_by_limits_p (node->callers, true)
2545 : 61632 : || node->callers->recursive_p ())
2546 : : {
2547 : 0 : if (dump_file)
2548 : 0 : fprintf (dump_file, "Uninlinable call found; giving up.\n");
2549 : 0 : *num_calls = 0;
2550 : 0 : return false;
2551 : : }
2552 : :
2553 : 30816 : if (dump_file)
2554 : : {
2555 : 4 : cgraph_node *ultimate = node->ultimate_alias_target ();
2556 : 4 : fprintf (dump_file,
2557 : : "\nInlining %s size %i.\n",
2558 : : ultimate->dump_name (),
2559 : 4 : ipa_size_summaries->get (ultimate)->size);
2560 : 4 : fprintf (dump_file,
2561 : : " Called once from %s %i insns.\n",
2562 : : node->callers->caller->dump_name (),
2563 : 4 : ipa_size_summaries->get (node->callers->caller)->size);
2564 : : }
2565 : :
2566 : : /* Remember which callers we inlined to, delaying updating the
2567 : : overall summary. */
2568 : 30816 : callers->add (node->callers->caller);
2569 : 30816 : inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2570 : 30816 : if (dump_file)
2571 : 4 : fprintf (dump_file,
2572 : : " Inlined into %s which now has %i size\n",
2573 : : caller->dump_name (),
2574 : 4 : ipa_size_summaries->get (caller)->size);
2575 : 30816 : if (!(*num_calls)--)
2576 : : {
2577 : 0 : if (dump_file)
2578 : 0 : fprintf (dump_file, "New calls found; giving up.\n");
2579 : 0 : return callee_removed;
2580 : : }
2581 : 30816 : if (callee_removed)
2582 : : return true;
2583 : : }
2584 : : return false;
2585 : : }
2586 : :
2587 : : /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2588 : : update. */
2589 : :
2590 : : static bool
2591 : 26634 : inline_to_all_callers (struct cgraph_node *node, void *data)
2592 : : {
2593 : 26634 : hash_set<cgraph_node *> callers;
2594 : 26634 : bool res = inline_to_all_callers_1 (node, data, &callers);
2595 : : /* Perform the delayed update of the overall summary of all callers
2596 : : processed. This avoids quadratic behavior in the cases where
2597 : : we have a lot of calls to the same function. */
2598 : 52430 : for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2599 : 78226 : i != callers.end (); ++i)
2600 : 25796 : ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2601 : 26634 : return res;
2602 : 26634 : }
2603 : :
2604 : : /* Output overall time estimate. */
2605 : : static void
2606 : 416 : dump_overall_stats (void)
2607 : : {
2608 : 416 : sreal sum_weighted = 0, sum = 0;
2609 : 416 : struct cgraph_node *node;
2610 : :
2611 : 2725 : FOR_EACH_DEFINED_FUNCTION (node)
2612 : 2309 : if (!node->inlined_to
2613 : 1684 : && !node->alias)
2614 : : {
2615 : 1551 : ipa_fn_summary *s = ipa_fn_summaries->get (node);
2616 : 1551 : if (s != NULL)
2617 : : {
2618 : 1435 : sum += s->time;
2619 : 1435 : if (node->count.ipa ().initialized_p ())
2620 : 14 : sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2621 : : }
2622 : : }
2623 : 416 : fprintf (dump_file, "Overall time estimate: "
2624 : : "%f weighted by profile: "
2625 : : "%f\n", sum.to_double (), sum_weighted.to_double ());
2626 : 416 : }
2627 : :
2628 : : /* Output some useful stats about inlining. */
2629 : :
2630 : : static void
2631 : 208 : dump_inline_stats (void)
2632 : : {
2633 : 208 : int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2634 : 208 : int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2635 : 208 : int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2636 : 208 : int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2637 : 208 : int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2638 : 208 : int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2639 : 208 : int64_t reason[CIF_N_REASONS][2];
2640 : 6448 : sreal reason_freq[CIF_N_REASONS];
2641 : 208 : int i;
2642 : 208 : struct cgraph_node *node;
2643 : :
2644 : 208 : memset (reason, 0, sizeof (reason));
2645 : 6448 : for (i=0; i < CIF_N_REASONS; i++)
2646 : 6240 : reason_freq[i] = 0;
2647 : 1445 : FOR_EACH_DEFINED_FUNCTION (node)
2648 : : {
2649 : 1237 : struct cgraph_edge *e;
2650 : 6277 : for (e = node->callees; e; e = e->next_callee)
2651 : : {
2652 : 5040 : if (e->inline_failed)
2653 : : {
2654 : 4418 : if (e->count.ipa ().initialized_p ())
2655 : 2628 : reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2656 : 4418 : reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2657 : 4418 : reason[(int) e->inline_failed][1] ++;
2658 : 4418 : if (DECL_VIRTUAL_P (e->callee->decl)
2659 : 4418 : && e->count.ipa ().initialized_p ())
2660 : : {
2661 : 0 : if (e->indirect_inlining_edge)
2662 : 0 : noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2663 : : else
2664 : 0 : noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2665 : : }
2666 : 4418 : else if (e->count.ipa ().initialized_p ())
2667 : : {
2668 : 2628 : if (e->indirect_inlining_edge)
2669 : 0 : noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2670 : : else
2671 : 2628 : noninlined_cnt += e->count.ipa ().to_gcov_type ();
2672 : : }
2673 : : }
2674 : 622 : else if (e->count.ipa ().initialized_p ())
2675 : : {
2676 : 0 : if (e->speculative)
2677 : : {
2678 : 0 : if (DECL_VIRTUAL_P (e->callee->decl))
2679 : 0 : inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2680 : : else
2681 : 0 : inlined_speculative += e->count.ipa ().to_gcov_type ();
2682 : : }
2683 : 0 : else if (DECL_VIRTUAL_P (e->callee->decl))
2684 : : {
2685 : 0 : if (e->indirect_inlining_edge)
2686 : 0 : inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2687 : : else
2688 : 0 : inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2689 : : }
2690 : : else
2691 : : {
2692 : 0 : if (e->indirect_inlining_edge)
2693 : 0 : inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2694 : : else
2695 : 0 : inlined_cnt += e->count.ipa ().to_gcov_type ();
2696 : : }
2697 : : }
2698 : : }
2699 : 1351 : for (e = node->indirect_calls; e; e = e->next_callee)
2700 : 114 : if (e->indirect_info->polymorphic
2701 : 114 : & e->count.ipa ().initialized_p ())
2702 : 0 : indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2703 : 114 : else if (e->count.ipa ().initialized_p ())
2704 : 0 : indirect_cnt += e->count.ipa ().to_gcov_type ();
2705 : : }
2706 : 208 : if (max_count.initialized_p ())
2707 : : {
2708 : 0 : fprintf (dump_file,
2709 : : "Inlined %" PRId64 " + speculative "
2710 : : "%" PRId64 " + speculative polymorphic "
2711 : : "%" PRId64 " + previously indirect "
2712 : : "%" PRId64 " + virtual "
2713 : : "%" PRId64 " + virtual and previously indirect "
2714 : : "%" PRId64 "\n" "Not inlined "
2715 : : "%" PRId64 " + previously indirect "
2716 : : "%" PRId64 " + virtual "
2717 : : "%" PRId64 " + virtual and previously indirect "
2718 : : "%" PRId64 " + still indirect "
2719 : : "%" PRId64 " + still indirect polymorphic "
2720 : : "%" PRId64 "\n", inlined_cnt,
2721 : : inlined_speculative, inlined_speculative_ply,
2722 : : inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2723 : : noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2724 : : noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2725 : 0 : fprintf (dump_file, "Removed speculations ");
2726 : 0 : spec_rem.dump (dump_file);
2727 : 0 : fprintf (dump_file, "\n");
2728 : : }
2729 : 208 : dump_overall_stats ();
2730 : 208 : fprintf (dump_file, "\nWhy inlining failed?\n");
2731 : 6656 : for (i = 0; i < CIF_N_REASONS; i++)
2732 : 6240 : if (reason[i][1])
2733 : 182 : fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2734 : : cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2735 : : (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2736 : 208 : }
2737 : :
2738 : : /* Called when node is removed. */
2739 : :
2740 : : static void
2741 : 0 : flatten_remove_node_hook (struct cgraph_node *node, void *data)
2742 : : {
2743 : 0 : if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2744 : : return;
2745 : :
2746 : 0 : hash_set<struct cgraph_node *> *removed
2747 : : = (hash_set<struct cgraph_node *> *) data;
2748 : 0 : removed->add (node);
2749 : : }
2750 : :
2751 : : /* Decide on the inlining. We do so in the topological order to avoid
2752 : : expenses on updating data structures. */
2753 : :
2754 : : static unsigned int
2755 : 227668 : ipa_inline (void)
2756 : : {
2757 : 227668 : struct cgraph_node *node;
2758 : 227668 : int nnodes;
2759 : 227668 : struct cgraph_node **order;
2760 : 227668 : int i, j;
2761 : 227668 : int cold;
2762 : 227668 : bool remove_functions = false;
2763 : :
2764 : 227668 : order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2765 : :
2766 : 227668 : if (dump_file)
2767 : 208 : ipa_dump_fn_summaries (dump_file);
2768 : :
2769 : 227668 : nnodes = ipa_reverse_postorder (order);
2770 : 227668 : spec_rem = profile_count::zero ();
2771 : :
2772 : 7503434 : FOR_EACH_FUNCTION (node)
2773 : : {
2774 : 3524049 : node->aux = 0;
2775 : :
2776 : : /* Recompute the default reasons for inlining because they may have
2777 : : changed during merging. */
2778 : 3524049 : if (in_lto_p)
2779 : : {
2780 : 425346 : for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2781 : : {
2782 : 325502 : gcc_assert (e->inline_failed);
2783 : 325502 : initialize_inline_failed (e);
2784 : : }
2785 : 101018 : for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2786 : 1174 : initialize_inline_failed (e);
2787 : : }
2788 : : }
2789 : :
2790 : 227668 : if (dump_file)
2791 : 208 : fprintf (dump_file, "\nFlattening functions:\n");
2792 : :
2793 : : /* First shrink order array, so that it only contains nodes with
2794 : : flatten attribute. */
2795 : 3751717 : for (i = nnodes - 1, j = i; i >= 0; i--)
2796 : : {
2797 : 3524049 : node = order[i];
2798 : 3524049 : if (node->definition
2799 : : /* Do not try to flatten aliases. These may happen for example when
2800 : : creating local aliases. */
2801 : 3524049 : && !node->alias
2802 : 5228265 : && lookup_attribute ("flatten",
2803 : 1704216 : DECL_ATTRIBUTES (node->decl)) != NULL)
2804 : 86 : order[j--] = order[i];
2805 : : }
2806 : :
2807 : : /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2808 : : nodes with flatten attribute. If there is more than one such
2809 : : node, we need to register a node removal hook, as flatten_function
2810 : : could remove other nodes with flatten attribute. See PR82801. */
2811 : 227668 : struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2812 : 227668 : hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2813 : 227668 : if (j < nnodes - 2)
2814 : : {
2815 : 15 : flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2816 : 15 : node_removal_hook_holder
2817 : 15 : = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2818 : : flatten_removed_nodes);
2819 : : }
2820 : :
2821 : : /* In the first pass handle functions to be flattened. Do this with
2822 : : a priority so none of our later choices will make this impossible. */
2823 : 227754 : for (i = nnodes - 1; i > j; i--)
2824 : : {
2825 : 86 : node = order[i];
2826 : 86 : if (flatten_removed_nodes
2827 : 86 : && flatten_removed_nodes->contains (node))
2828 : 0 : continue;
2829 : :
2830 : : /* Handle nodes to be flattened.
2831 : : Ideally when processing callees we stop inlining at the
2832 : : entry of cycles, possibly cloning that entry point and
2833 : : try to flatten itself turning it into a self-recursive
2834 : : function. */
2835 : 86 : if (dump_file)
2836 : 4 : fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2837 : 86 : flatten_function (node, false, true);
2838 : : }
2839 : :
2840 : 227668 : if (j < nnodes - 2)
2841 : : {
2842 : 15 : symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2843 : 30 : delete flatten_removed_nodes;
2844 : : }
2845 : 227668 : free (order);
2846 : :
2847 : 227668 : if (dump_file)
2848 : 208 : dump_overall_stats ();
2849 : :
2850 : 227668 : inline_small_functions ();
2851 : :
2852 : 227668 : gcc_assert (symtab->state == IPA_SSA);
2853 : 227668 : symtab->state = IPA_SSA_AFTER_INLINING;
2854 : : /* Do first after-inlining removal. We want to remove all "stale" extern
2855 : : inline functions and virtual functions so we really know what is called
2856 : : once. */
2857 : 227668 : symtab->remove_unreachable_nodes (dump_file);
2858 : :
2859 : : /* Inline functions with a property that after inlining into all callers the
2860 : : code size will shrink because the out-of-line copy is eliminated.
2861 : : We do this regardless on the callee size as long as function growth limits
2862 : : are met. */
2863 : 227668 : if (dump_file)
2864 : 208 : fprintf (dump_file,
2865 : : "\nDeciding on functions to be inlined into all callers and "
2866 : : "removing useless speculations:\n");
2867 : :
2868 : : /* Inlining one function called once has good chance of preventing
2869 : : inlining other function into the same callee. Ideally we should
2870 : : work in priority order, but probably inlining hot functions first
2871 : : is good cut without the extra pain of maintaining the queue.
2872 : :
2873 : : ??? this is not really fitting the bill perfectly: inlining function
2874 : : into callee often leads to better optimization of callee due to
2875 : : increased context for optimization.
2876 : : For example if main() function calls a function that outputs help
2877 : : and then function that does the main optimization, we should inline
2878 : : the second with priority even if both calls are cold by themselves.
2879 : :
2880 : : We probably want to implement new predicate replacing our use of
2881 : : maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2882 : : to be hot. */
2883 : 683004 : for (cold = 0; cold <= 1; cold ++)
2884 : : {
2885 : 5664515 : FOR_EACH_DEFINED_FUNCTION (node)
2886 : : {
2887 : 5209179 : struct cgraph_edge *edge, *next;
2888 : 5209179 : bool update=false;
2889 : :
2890 : 5209179 : if (!opt_for_fn (node->decl, optimize)
2891 : 5209179 : || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2892 : 935788 : continue;
2893 : :
2894 : 17159695 : for (edge = node->callees; edge; edge = next)
2895 : : {
2896 : 12886304 : next = edge->next_callee;
2897 : 12886304 : if (edge->speculative && !speculation_useful_p (edge, false))
2898 : : {
2899 : 96 : if (edge->count.ipa ().initialized_p ())
2900 : 0 : spec_rem += edge->count.ipa ();
2901 : 96 : cgraph_edge::resolve_speculation (edge);
2902 : 96 : update = true;
2903 : 96 : remove_functions = true;
2904 : : }
2905 : : }
2906 : 4273391 : if (update)
2907 : : {
2908 : 130 : struct cgraph_node *where = node->inlined_to
2909 : 65 : ? node->inlined_to : node;
2910 : 65 : reset_edge_caches (where);
2911 : 65 : ipa_update_overall_fn_summary (where);
2912 : : }
2913 : 4273391 : if (want_inline_function_to_all_callers_p (node, cold))
2914 : : {
2915 : 24000 : int num_calls = 0;
2916 : 24000 : node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2917 : : true);
2918 : 24000 : while (node->call_for_symbol_and_aliases
2919 : 24723 : (inline_to_all_callers, &num_calls, true))
2920 : : ;
2921 : 24000 : remove_functions = true;
2922 : : }
2923 : : }
2924 : : }
2925 : :
2926 : 227668 : if (dump_enabled_p ())
2927 : 274 : dump_printf (MSG_NOTE,
2928 : : "\nInlined %i calls, eliminated %i functions\n\n",
2929 : : ncalls_inlined, nfunctions_inlined);
2930 : 227668 : if (dump_file)
2931 : 208 : dump_inline_stats ();
2932 : :
2933 : 227668 : if (dump_file)
2934 : 208 : ipa_dump_fn_summaries (dump_file);
2935 : 227668 : return remove_functions ? TODO_remove_functions : 0;
2936 : : }
2937 : :
2938 : : /* Inline always-inline function calls in NODE
2939 : : (which itself is possibly inline). */
2940 : :
2941 : : static bool
2942 : 3015174 : inline_always_inline_functions (struct cgraph_node *node)
2943 : : {
2944 : 3015174 : struct cgraph_edge *e;
2945 : 3015174 : bool inlined = false;
2946 : :
2947 : 12002610 : for (e = node->callees; e; e = e->next_callee)
2948 : : {
2949 : 8987436 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2950 : 8987436 : gcc_checking_assert (!callee->aux || callee->aux == (void *)(size_t)1);
2951 : 8987436 : if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
2952 : : /* Watch for self-recursive cycles. */
2953 : 8987436 : || callee->aux)
2954 : 8641661 : continue;
2955 : :
2956 : 345775 : if (e->recursive_p ())
2957 : : {
2958 : 7 : if (dump_enabled_p ())
2959 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2960 : : " Not inlining recursive call to %C.\n",
2961 : : e->callee);
2962 : 7 : e->inline_failed = CIF_RECURSIVE_INLINING;
2963 : 7 : continue;
2964 : : }
2965 : 345768 : if (callee->definition
2966 : 345738 : && !ipa_fn_summaries->get (callee))
2967 : 250 : compute_fn_summary (callee, true);
2968 : :
2969 : 345768 : if (!can_early_inline_edge_p (e))
2970 : : {
2971 : : /* Set inlined to true if the callee is marked "always_inline" but
2972 : : is not inlinable. This will allow flagging an error later in
2973 : : expand_call_inline in tree-inline.cc. */
2974 : 334 : if (lookup_attribute ("always_inline",
2975 : 334 : DECL_ATTRIBUTES (callee->decl)) != NULL)
2976 : 27 : inlined = true;
2977 : 334 : continue;
2978 : : }
2979 : :
2980 : 345434 : if (dump_enabled_p ())
2981 : 9 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2982 : : " Inlining %C into %C (always_inline).\n",
2983 : : e->callee, e->caller);
2984 : 345434 : inline_call (e, true, NULL, NULL, false);
2985 : 345434 : callee->aux = (void *)(size_t)1;
2986 : : /* Inline recursively to handle the case where always_inline function was
2987 : : not optimized yet since it is a part of a cycle in callgraph. */
2988 : 345434 : inline_always_inline_functions (e->callee);
2989 : 345434 : callee->aux = NULL;
2990 : 345434 : inlined = true;
2991 : : }
2992 : 3015174 : return inlined;
2993 : : }
2994 : :
2995 : : /* Decide on the inlining. We do so in the topological order to avoid
2996 : : expenses on updating data structures. */
2997 : :
2998 : : static bool
2999 : 2188573 : early_inline_small_functions (struct cgraph_node *node)
3000 : : {
3001 : 2188573 : struct cgraph_edge *e;
3002 : 2188573 : bool inlined = false;
3003 : :
3004 : 9362868 : for (e = node->callees; e; e = e->next_callee)
3005 : : {
3006 : 7174295 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
3007 : :
3008 : : /* We can encounter not-yet-analyzed function during
3009 : : early inlining on callgraphs with strongly
3010 : : connected components. */
3011 : 7174295 : ipa_fn_summary *s = ipa_fn_summaries->get (callee);
3012 : 7174295 : if (s == NULL || !s->inlinable || !e->inline_failed)
3013 : 3917603 : continue;
3014 : :
3015 : : /* Do not consider functions not declared inline. */
3016 : 3256692 : if (!DECL_DECLARED_INLINE_P (callee->decl)
3017 : 833355 : && !opt_for_fn (node->decl, flag_inline_small_functions)
3018 : 3299933 : && !opt_for_fn (node->decl, flag_inline_functions))
3019 : 43089 : continue;
3020 : :
3021 : 3213603 : if (dump_enabled_p ())
3022 : 179 : dump_printf_loc (MSG_NOTE, e->call_stmt,
3023 : : "Considering inline candidate %C.\n",
3024 : : callee);
3025 : :
3026 : 3213603 : if (!can_early_inline_edge_p (e))
3027 : 84357 : continue;
3028 : :
3029 : 3129246 : if (e->recursive_p ())
3030 : : {
3031 : 7700 : if (dump_enabled_p ())
3032 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
3033 : : " Not inlining: recursive call.\n");
3034 : 7700 : continue;
3035 : : }
3036 : :
3037 : 3121546 : if (!want_early_inline_function_p (e))
3038 : 924407 : continue;
3039 : :
3040 : 2197139 : if (dump_enabled_p ())
3041 : 126 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
3042 : : " Inlining %C into %C.\n",
3043 : : callee, e->caller);
3044 : 2197139 : inline_call (e, true, NULL, NULL, false);
3045 : 2197139 : inlined = true;
3046 : : }
3047 : :
3048 : 2188573 : if (inlined)
3049 : 766173 : ipa_update_overall_fn_summary (node);
3050 : :
3051 : 2188573 : return inlined;
3052 : : }
3053 : :
3054 : : unsigned int
3055 : 2669754 : early_inliner (function *fun)
3056 : : {
3057 : 2669754 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
3058 : 2669754 : struct cgraph_edge *edge;
3059 : 2669754 : unsigned int todo = 0;
3060 : 2669754 : int iterations = 0;
3061 : 2669754 : bool inlined = false;
3062 : :
3063 : 2669754 : if (seen_error ())
3064 : : return 0;
3065 : :
3066 : : /* Do nothing if datastructures for ipa-inliner are already computed. This
3067 : : happens when some pass decides to construct new function and
3068 : : cgraph_add_new_function calls lowering passes and early optimization on
3069 : : it. This may confuse ourself when early inliner decide to inline call to
3070 : : function clone, because function clones don't have parameter list in
3071 : : ipa-prop matching their signature. */
3072 : 2669748 : if (ipa_node_params_sum)
3073 : : return 0;
3074 : :
3075 : 2669740 : if (flag_checking)
3076 : 2669703 : node->verify ();
3077 : 2669740 : node->remove_all_references ();
3078 : :
3079 : : /* Even when not optimizing or not inlining inline always-inline
3080 : : functions. */
3081 : 2669740 : inlined = inline_always_inline_functions (node);
3082 : :
3083 : 2669740 : if (!optimize
3084 : 2225358 : || flag_no_inline
3085 : 2190314 : || !flag_early_inlining)
3086 : : ;
3087 : 2188596 : else if (lookup_attribute ("flatten",
3088 : 2188596 : DECL_ATTRIBUTES (node->decl)) != NULL)
3089 : : {
3090 : : /* When the function is marked to be flattened, recursively inline
3091 : : all calls in it. */
3092 : 128 : if (dump_enabled_p ())
3093 : 0 : dump_printf (MSG_OPTIMIZED_LOCATIONS,
3094 : : "Flattening %C\n", node);
3095 : 128 : flatten_function (node, true, true);
3096 : 128 : inlined = true;
3097 : : }
3098 : : else
3099 : : {
3100 : : /* If some always_inline functions was inlined, apply the changes.
3101 : : This way we will not account always inline into growth limits and
3102 : : moreover we will inline calls from always inlines that we skipped
3103 : : previously because of conditional in can_early_inline_edge_p
3104 : : which prevents some inlining to always_inline. */
3105 : 2188468 : if (inlined)
3106 : : {
3107 : 183536 : timevar_push (TV_INTEGRATION);
3108 : 183536 : todo |= optimize_inline_calls (current_function_decl);
3109 : : /* optimize_inline_calls call above might have introduced new
3110 : : statements that don't have inline parameters computed. */
3111 : 969005 : for (edge = node->callees; edge; edge = edge->next_callee)
3112 : : {
3113 : : /* We can enounter not-yet-analyzed function during
3114 : : early inlining on callgraphs with strongly
3115 : : connected components. */
3116 : 785469 : ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3117 : 785469 : es->call_stmt_size
3118 : 785469 : = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3119 : 785469 : es->call_stmt_time
3120 : 785469 : = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3121 : : }
3122 : 183536 : ipa_update_overall_fn_summary (node);
3123 : 183536 : inlined = false;
3124 : 183536 : timevar_pop (TV_INTEGRATION);
3125 : : }
3126 : : /* We iterate incremental inlining to get trivial cases of indirect
3127 : : inlining. */
3128 : 5909282 : while (iterations < opt_for_fn (node->decl,
3129 : : param_early_inliner_max_iterations)
3130 : 2954641 : && early_inline_small_functions (node))
3131 : : {
3132 : 766173 : timevar_push (TV_INTEGRATION);
3133 : 766173 : todo |= optimize_inline_calls (current_function_decl);
3134 : :
3135 : : /* Technically we ought to recompute inline parameters so the new
3136 : : iteration of early inliner works as expected. We however have
3137 : : values approximately right and thus we only need to update edge
3138 : : info that might be cleared out for newly discovered edges. */
3139 : 2976567 : for (edge = node->callees; edge; edge = edge->next_callee)
3140 : : {
3141 : : /* We have no summary for new bound store calls yet. */
3142 : 2210394 : ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3143 : 2210394 : es->call_stmt_size
3144 : 2210394 : = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3145 : 2210394 : es->call_stmt_time
3146 : 2210394 : = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3147 : : }
3148 : 766173 : if (iterations < opt_for_fn (node->decl,
3149 : 766173 : param_early_inliner_max_iterations) - 1)
3150 : 105 : ipa_update_overall_fn_summary (node);
3151 : 766173 : timevar_pop (TV_INTEGRATION);
3152 : 766173 : iterations++;
3153 : 766173 : inlined = false;
3154 : : }
3155 : 2188468 : if (dump_file)
3156 : 222 : fprintf (dump_file, "Iterations: %i\n", iterations);
3157 : : }
3158 : :
3159 : 2669740 : if (inlined)
3160 : : {
3161 : 17203 : timevar_push (TV_INTEGRATION);
3162 : 17203 : todo |= optimize_inline_calls (current_function_decl);
3163 : 17203 : timevar_pop (TV_INTEGRATION);
3164 : : }
3165 : :
3166 : 2669740 : fun->always_inline_functions_inlined = true;
3167 : :
3168 : 2669740 : return todo;
3169 : : }
3170 : :
3171 : : /* Do inlining of small functions. Doing so early helps profiling and other
3172 : : passes to be somewhat more effective and avoids some code duplication in
3173 : : later real inlining pass for testcases with very many function calls. */
3174 : :
3175 : : namespace {
3176 : :
3177 : : const pass_data pass_data_early_inline =
3178 : : {
3179 : : GIMPLE_PASS, /* type */
3180 : : "einline", /* name */
3181 : : OPTGROUP_INLINE, /* optinfo_flags */
3182 : : TV_EARLY_INLINING, /* tv_id */
3183 : : PROP_ssa, /* properties_required */
3184 : : 0, /* properties_provided */
3185 : : 0, /* properties_destroyed */
3186 : : 0, /* todo_flags_start */
3187 : : 0, /* todo_flags_finish */
3188 : : };
3189 : :
3190 : : class pass_early_inline : public gimple_opt_pass
3191 : : {
3192 : : public:
3193 : 281914 : pass_early_inline (gcc::context *ctxt)
3194 : 563828 : : gimple_opt_pass (pass_data_early_inline, ctxt)
3195 : : {}
3196 : :
3197 : : /* opt_pass methods: */
3198 : : unsigned int execute (function *) final override;
3199 : :
3200 : : }; // class pass_early_inline
3201 : :
3202 : : unsigned int
3203 : 2669754 : pass_early_inline::execute (function *fun)
3204 : : {
3205 : 2669754 : return early_inliner (fun);
3206 : : }
3207 : :
3208 : : } // anon namespace
3209 : :
3210 : : gimple_opt_pass *
3211 : 281914 : make_pass_early_inline (gcc::context *ctxt)
3212 : : {
3213 : 281914 : return new pass_early_inline (ctxt);
3214 : : }
3215 : :
3216 : : namespace {
3217 : :
3218 : : const pass_data pass_data_ipa_inline =
3219 : : {
3220 : : IPA_PASS, /* type */
3221 : : "inline", /* name */
3222 : : OPTGROUP_INLINE, /* optinfo_flags */
3223 : : TV_IPA_INLINING, /* tv_id */
3224 : : 0, /* properties_required */
3225 : : 0, /* properties_provided */
3226 : : 0, /* properties_destroyed */
3227 : : 0, /* todo_flags_start */
3228 : : ( TODO_dump_symtab ), /* todo_flags_finish */
3229 : : };
3230 : :
3231 : : class pass_ipa_inline : public ipa_opt_pass_d
3232 : : {
3233 : : public:
3234 : 281914 : pass_ipa_inline (gcc::context *ctxt)
3235 : : : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3236 : : NULL, /* generate_summary */
3237 : : NULL, /* write_summary */
3238 : : NULL, /* read_summary */
3239 : : NULL, /* write_optimization_summary */
3240 : : NULL, /* read_optimization_summary */
3241 : : NULL, /* stmt_fixup */
3242 : : 0, /* function_transform_todo_flags_start */
3243 : : inline_transform, /* function_transform */
3244 : 281914 : NULL) /* variable_transform */
3245 : 281914 : {}
3246 : :
3247 : : /* opt_pass methods: */
3248 : 227668 : unsigned int execute (function *) final override { return ipa_inline (); }
3249 : :
3250 : : }; // class pass_ipa_inline
3251 : :
3252 : : } // anon namespace
3253 : :
3254 : : ipa_opt_pass_d *
3255 : 281914 : make_pass_ipa_inline (gcc::context *ctxt)
3256 : : {
3257 : 281914 : return new pass_ipa_inline (ctxt);
3258 : : }
|