Branch data Line data Source code
1 : : /* Inlining decision heuristics.
2 : : Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 : : Contributed by Jan Hubicka
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : /* Inlining decision heuristics
22 : :
23 : : The implementation of inliner is organized as follows:
24 : :
25 : : inlining heuristics limits
26 : :
27 : : can_inline_edge_p allow to check that particular inlining is allowed
28 : : by the limits specified by user (allowed function growth, growth and so
29 : : on).
30 : :
31 : : Functions are inlined when it is obvious the result is profitable (such
32 : : as functions called once or when inlining reduce code size).
33 : : In addition to that we perform inlining of small functions and recursive
34 : : inlining.
35 : :
36 : : inlining heuristics
37 : :
38 : : The inliner itself is split into two passes:
39 : :
40 : : pass_early_inlining
41 : :
42 : : Simple local inlining pass inlining callees into current function.
43 : : This pass makes no use of whole unit analysis and thus it can do only
44 : : very simple decisions based on local properties.
45 : :
46 : : The strength of the pass is that it is run in topological order
47 : : (reverse postorder) on the callgraph. Functions are converted into SSA
48 : : form just before this pass and optimized subsequently. As a result, the
49 : : callees of the function seen by the early inliner was already optimized
50 : : and results of early inlining adds a lot of optimization opportunities
51 : : for the local optimization.
52 : :
53 : : The pass handle the obvious inlining decisions within the compilation
54 : : unit - inlining auto inline functions, inlining for size and
55 : : flattening.
56 : :
57 : : main strength of the pass is the ability to eliminate abstraction
58 : : penalty in C++ code (via combination of inlining and early
59 : : optimization) and thus improve quality of analysis done by real IPA
60 : : optimizers.
61 : :
62 : : Because of lack of whole unit knowledge, the pass cannot really make
63 : : good code size/performance tradeoffs. It however does very simple
64 : : speculative inlining allowing code size to grow by
65 : : EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 : : optimizations performed later are very likely to eliminate the cost.
67 : :
68 : : pass_ipa_inline
69 : :
70 : : This is the real inliner able to handle inlining with whole program
71 : : knowledge. It performs following steps:
72 : :
73 : : 1) inlining of small functions. This is implemented by greedy
74 : : algorithm ordering all inlinable cgraph edges by their badness and
75 : : inlining them in this order as long as inline limits allows doing so.
76 : :
77 : : This heuristics is not very good on inlining recursive calls. Recursive
78 : : calls can be inlined with results similar to loop unrolling. To do so,
79 : : special purpose recursive inliner is executed on function when
80 : : recursive edge is met as viable candidate.
81 : :
82 : : 2) Unreachable functions are removed from callgraph. Inlining leads
83 : : to devirtualization and other modification of callgraph so functions
84 : : may become unreachable during the process. Also functions declared as
85 : : extern inline or virtual functions are removed, since after inlining
86 : : we no longer need the offline bodies.
87 : :
88 : : 3) Functions called once and not exported from the unit are inlined.
89 : : This should almost always lead to reduction of code size by eliminating
90 : : the need for offline copy of the function. */
91 : :
92 : : #include "config.h"
93 : : #include "system.h"
94 : : #include "coretypes.h"
95 : : #include "backend.h"
96 : : #include "target.h"
97 : : #include "rtl.h"
98 : : #include "tree.h"
99 : : #include "gimple.h"
100 : : #include "alloc-pool.h"
101 : : #include "tree-pass.h"
102 : : #include "gimple-ssa.h"
103 : : #include "cgraph.h"
104 : : #include "lto-streamer.h"
105 : : #include "trans-mem.h"
106 : : #include "calls.h"
107 : : #include "tree-inline.h"
108 : : #include "profile.h"
109 : : #include "symbol-summary.h"
110 : : #include "tree-vrp.h"
111 : : #include "sreal.h"
112 : : #include "ipa-cp.h"
113 : : #include "ipa-prop.h"
114 : : #include "ipa-fnsummary.h"
115 : : #include "ipa-inline.h"
116 : : #include "ipa-utils.h"
117 : : #include "auto-profile.h"
118 : : #include "builtins.h"
119 : : #include "fibonacci_heap.h"
120 : : #include "stringpool.h"
121 : : #include "attribs.h"
122 : : #include "asan.h"
123 : : #include "ipa-strub.h"
124 : :
125 : : /* Inliner uses greedy algorithm to inline calls in a priority order.
126 : : Badness is used as the key in a Fibonacci heap which roughly corresponds
127 : : to negation of benefit to cost ratios.
128 : : In case multiple calls has same priority we want to stabilize the outcomes
129 : : for which we use ids. */
130 : : class inline_badness
131 : : {
132 : : public:
133 : : sreal badness;
134 : : int uid;
135 : 991041 : inline_badness ()
136 : 767073 : : badness (sreal::min ()), uid (0)
137 : : {
138 : : }
139 : 2895836 : inline_badness (cgraph_edge *e, sreal b)
140 : 17119 : : badness (b), uid (e->get_uid ())
141 : : {
142 : : }
143 : 688705 : bool operator<= (const inline_badness &other)
144 : : {
145 : 688705 : if (badness != other.badness)
146 : 688705 : return badness <= other.badness;
147 : 0 : return uid <= other.uid;
148 : : }
149 : 767073 : bool operator== (const inline_badness &other)
150 : : {
151 : 1401513 : return badness == other.badness && uid == other.uid;
152 : : }
153 : 0 : bool operator!= (const inline_badness &other)
154 : : {
155 : 767073 : return badness != other.badness || uid != other.uid;
156 : : }
157 : 22248689 : bool operator< (const inline_badness &other)
158 : : {
159 : 22248689 : if (badness != other.badness)
160 : 18657099 : return badness < other.badness;
161 : 3591590 : return uid < other.uid;
162 : : }
163 : 10120792 : bool operator> (const inline_badness &other)
164 : : {
165 : 10120792 : if (badness != other.badness)
166 : 8388269 : return badness > other.badness;
167 : 1732523 : return uid > other.uid;
168 : : }
169 : : };
170 : :
171 : : typedef fibonacci_heap <inline_badness, cgraph_edge> edge_heap_t;
172 : : typedef fibonacci_node <inline_badness, cgraph_edge> edge_heap_node_t;
173 : :
174 : : /* Statistics we collect about inlining algorithm. */
175 : : static int overall_size;
176 : : static profile_count max_count;
177 : : static profile_count spec_rem;
178 : :
179 : : /* Return false when inlining edge E would lead to violating
180 : : limits on function unit growth or stack usage growth.
181 : :
182 : : The relative function body growth limit is present generally
183 : : to avoid problems with non-linear behavior of the compiler.
184 : : To allow inlining huge functions into tiny wrapper, the limit
185 : : is always based on the bigger of the two functions considered.
186 : :
187 : : For stack growth limits we always base the growth in stack usage
188 : : of the callers. We want to prevent applications from segfaulting
189 : : on stack overflow when functions with huge stack frames gets
190 : : inlined. */
191 : :
192 : : static bool
193 : 5541777 : caller_growth_limits (struct cgraph_edge *e)
194 : : {
195 : 5541777 : struct cgraph_node *to = e->caller;
196 : 5541777 : struct cgraph_node *what = e->callee->ultimate_alias_target ();
197 : 5541777 : int newsize;
198 : 5541777 : int limit = 0;
199 : 5541777 : HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
200 : 5541777 : ipa_size_summary *outer_info = ipa_size_summaries->get (to);
201 : :
202 : : /* Look for function e->caller is inlined to. While doing
203 : : so work out the largest function body on the way. As
204 : : described above, we want to base our function growth
205 : : limits based on that. Not on the self size of the
206 : : outer function, not on the self size of inline code
207 : : we immediately inline to. This is the most relaxed
208 : : interpretation of the rule "do not grow large functions
209 : : too much in order to prevent compiler from exploding". */
210 : 7805805 : while (true)
211 : : {
212 : 6673791 : ipa_size_summary *size_info = ipa_size_summaries->get (to);
213 : 6673791 : if (limit < size_info->self_size)
214 : : limit = size_info->self_size;
215 : 6673791 : if (stack_size_limit < size_info->estimated_self_stack_size)
216 : : stack_size_limit = size_info->estimated_self_stack_size;
217 : 6673791 : if (to->inlined_to)
218 : 1132014 : to = to->callers->caller;
219 : : else
220 : : break;
221 : 1132014 : }
222 : :
223 : 5541777 : ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
224 : 5541777 : ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
225 : :
226 : 5541777 : if (limit < what_size_info->self_size)
227 : : limit = what_size_info->self_size;
228 : :
229 : 5541777 : limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
230 : :
231 : : /* Check the size after inlining against the function limits. But allow
232 : : the function to shrink if it went over the limits by forced inlining. */
233 : 5541777 : newsize = estimate_size_after_inlining (to, e);
234 : 5541777 : if (newsize >= ipa_size_summaries->get (what)->size
235 : 5394786 : && newsize > opt_for_fn (to->decl, param_large_function_insns)
236 : 5777229 : && newsize > limit)
237 : : {
238 : 2464 : e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
239 : 2464 : return false;
240 : : }
241 : :
242 : 5539313 : if (!what_info->estimated_stack_size)
243 : : return true;
244 : :
245 : : /* FIXME: Stack size limit often prevents inlining in Fortran programs
246 : : due to large i/o datastructures used by the Fortran front-end.
247 : : We ought to ignore this limit when we know that the edge is executed
248 : : on every invocation of the caller (i.e. its call statement dominates
249 : : exit block). We do not track this information, yet. */
250 : 1831158 : stack_size_limit += ((gcov_type)stack_size_limit
251 : 915579 : * opt_for_fn (to->decl, param_stack_frame_growth)
252 : 915579 : / 100);
253 : :
254 : 915579 : inlined_stack = (ipa_get_stack_frame_offset (to)
255 : 915579 : + outer_info->estimated_self_stack_size
256 : 915579 : + what_info->estimated_stack_size);
257 : : /* Check new stack consumption with stack consumption at the place
258 : : stack is used. */
259 : 915579 : if (inlined_stack > stack_size_limit
260 : : /* If function already has large stack usage from sibling
261 : : inline call, we can inline, too.
262 : : This bit overoptimistically assume that we are good at stack
263 : : packing. */
264 : 273752 : && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
265 : 1178648 : && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
266 : : {
267 : 55228 : e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
268 : 55228 : return false;
269 : : }
270 : : return true;
271 : : }
272 : :
273 : : /* Dump info about why inlining has failed. */
274 : :
275 : : static void
276 : 4714860 : report_inline_failed_reason (struct cgraph_edge *e)
277 : : {
278 : 4714860 : if (dump_enabled_p ())
279 : : {
280 : 2389 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
281 : : " not inlinable: %C -> %C, %s\n",
282 : : e->caller, e->callee,
283 : : cgraph_inline_failed_string (e->inline_failed));
284 : 2389 : if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
285 : 2389 : || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
286 : 2 : && e->caller->lto_file_data
287 : 2389 : && e->callee->ultimate_alias_target ()->lto_file_data)
288 : : {
289 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
290 : : " LTO objects: %s, %s\n",
291 : 0 : e->caller->lto_file_data->file_name,
292 : 0 : e->callee->ultimate_alias_target ()->lto_file_data->file_name);
293 : : }
294 : 2389 : if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
295 : 2 : if (dump_file)
296 : 0 : cl_target_option_print_diff
297 : 0 : (dump_file, 2, target_opts_for_fn (e->caller->decl),
298 : 0 : target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
299 : 2389 : if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
300 : 0 : if (dump_file)
301 : 0 : cl_optimization_print_diff
302 : 0 : (dump_file, 2, opts_for_fn (e->caller->decl),
303 : 0 : opts_for_fn (e->callee->ultimate_alias_target ()->decl));
304 : : }
305 : 4714860 : }
306 : :
307 : : /* Decide whether sanitizer-related attributes allow inlining. */
308 : :
309 : : static bool
310 : 8077864 : sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
311 : : {
312 : 8077864 : if (!caller || !callee)
313 : : return true;
314 : :
315 : : /* Follow clang and allow inlining for always_inline functions. */
316 : 8077864 : if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
317 : : return true;
318 : :
319 : 7609646 : const sanitize_code codes[] =
320 : : {
321 : : SANITIZE_ADDRESS,
322 : : SANITIZE_THREAD,
323 : : SANITIZE_UNDEFINED,
324 : : SANITIZE_UNDEFINED_NONDEFAULT,
325 : : SANITIZE_POINTER_COMPARE,
326 : : SANITIZE_POINTER_SUBTRACT
327 : : };
328 : :
329 : 53266482 : for (unsigned i = 0; i < ARRAY_SIZE (codes); i++)
330 : 91314068 : if (sanitize_flags_p (codes[i], caller)
331 : 45657034 : != sanitize_flags_p (codes[i], callee))
332 : : return false;
333 : :
334 : 7609448 : if (sanitize_coverage_p (caller) != sanitize_coverage_p (callee))
335 : : return false;
336 : :
337 : : return true;
338 : : }
339 : :
340 : : /* Used for flags where it is safe to inline when caller's value is
341 : : grater than callee's. */
342 : : #define check_maybe_up(flag) \
343 : : (opts_for_fn (caller->decl)->x_##flag \
344 : : != opts_for_fn (callee->decl)->x_##flag \
345 : : && (!always_inline \
346 : : || opts_for_fn (caller->decl)->x_##flag \
347 : : < opts_for_fn (callee->decl)->x_##flag))
348 : : /* Used for flags where it is safe to inline when caller's value is
349 : : smaller than callee's. */
350 : : #define check_maybe_down(flag) \
351 : : (opts_for_fn (caller->decl)->x_##flag \
352 : : != opts_for_fn (callee->decl)->x_##flag \
353 : : && (!always_inline \
354 : : || opts_for_fn (caller->decl)->x_##flag \
355 : : > opts_for_fn (callee->decl)->x_##flag))
356 : : /* Used for flags where exact match is needed for correctness. */
357 : : #define check_match(flag) \
358 : : (opts_for_fn (caller->decl)->x_##flag \
359 : : != opts_for_fn (callee->decl)->x_##flag)
360 : :
361 : : /* Decide if we can inline the edge and possibly update
362 : : inline_failed reason.
363 : : We check whether inlining is possible at all and whether
364 : : caller growth limits allow doing so.
365 : :
366 : : if REPORT is true, output reason to the dump file. */
367 : :
368 : : static bool
369 : 12028478 : can_inline_edge_p (struct cgraph_edge *e, bool report,
370 : : bool early = false)
371 : : {
372 : 12028478 : gcc_checking_assert (e->inline_failed);
373 : :
374 : 12028478 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
375 : : {
376 : 3477532 : if (report)
377 : 3447627 : report_inline_failed_reason (e);
378 : 3477532 : return false;
379 : : }
380 : :
381 : 8550946 : bool inlinable = true;
382 : 8550946 : enum availability avail;
383 : 17101892 : cgraph_node *caller = (e->caller->inlined_to
384 : 8550946 : ? e->caller->inlined_to : e->caller);
385 : 8550946 : cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
386 : :
387 : 8550946 : if (!callee->definition)
388 : : {
389 : 874 : e->inline_failed = CIF_BODY_NOT_AVAILABLE;
390 : 874 : inlinable = false;
391 : : }
392 : 8550946 : if (!early && (!opt_for_fn (callee->decl, optimize)
393 : 4894276 : || !opt_for_fn (caller->decl, optimize)))
394 : : {
395 : 305 : e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
396 : 305 : inlinable = false;
397 : : }
398 : 8550641 : else if (callee->calls_comdat_local)
399 : : {
400 : 18146 : e->inline_failed = CIF_USES_COMDAT_LOCAL;
401 : 18146 : inlinable = false;
402 : : }
403 : 8532495 : else if (avail <= AVAIL_INTERPOSABLE)
404 : : {
405 : 126533 : e->inline_failed = CIF_OVERWRITABLE;
406 : 126533 : inlinable = false;
407 : : }
408 : : /* All edges with call_stmt_cannot_inline_p should have inline_failed
409 : : initialized to one of FINAL_ERROR reasons. */
410 : 8405962 : else if (e->call_stmt_cannot_inline_p)
411 : 0 : gcc_unreachable ();
412 : : /* Don't inline if the functions have different EH personalities. */
413 : 8405962 : else if (DECL_FUNCTION_PERSONALITY (caller->decl)
414 : 2354257 : && DECL_FUNCTION_PERSONALITY (callee->decl)
415 : 8405962 : && (DECL_FUNCTION_PERSONALITY (caller->decl)
416 : 188859 : != DECL_FUNCTION_PERSONALITY (callee->decl)))
417 : : {
418 : 0 : e->inline_failed = CIF_EH_PERSONALITY;
419 : 0 : inlinable = false;
420 : : }
421 : : /* TM pure functions should not be inlined into non-TM_pure
422 : : functions. */
423 : 8405962 : else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
424 : : {
425 : 30 : e->inline_failed = CIF_UNSPECIFIED;
426 : 30 : inlinable = false;
427 : : }
428 : : /* Check compatibility of target optimization options. */
429 : 8405932 : else if (!targetm.target_option.can_inline_p (caller->decl,
430 : : callee->decl))
431 : : {
432 : 448 : e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
433 : 448 : inlinable = false;
434 : : }
435 : 8405484 : else if (ipa_fn_summaries->get (callee) == NULL
436 : 8405482 : || !ipa_fn_summaries->get (callee)->inlinable)
437 : : {
438 : 327620 : e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
439 : 327620 : inlinable = false;
440 : : }
441 : : /* Don't inline a function with mismatched sanitization attributes. */
442 : 8077864 : else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
443 : : {
444 : 198 : e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
445 : 198 : inlinable = false;
446 : : }
447 : :
448 : 8550946 : if (inlinable && !strub_inlinable_to_p (callee, caller))
449 : : {
450 : 1097 : e->inline_failed = CIF_UNSPECIFIED;
451 : 1097 : inlinable = false;
452 : : }
453 : 8550946 : if (!inlinable && report)
454 : 469008 : report_inline_failed_reason (e);
455 : : return inlinable;
456 : : }
457 : :
458 : : /* Return inlining_insns_single limit for function N. If HINT or HINT2 is true
459 : : scale up the bound. */
460 : :
461 : : static int
462 : 6896120 : inline_insns_single (cgraph_node *n, bool hint, bool hint2)
463 : : {
464 : 6896120 : if (hint && hint2)
465 : : {
466 : 2279791 : int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
467 : 2279791 : spd = spd * spd;
468 : 2279791 : if (spd > 1000000)
469 : : spd = 1000000;
470 : 2279791 : return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
471 : : }
472 : 4616329 : if (hint || hint2)
473 : 304634 : return opt_for_fn (n->decl, param_max_inline_insns_single)
474 : 304634 : * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
475 : 4311695 : return opt_for_fn (n->decl, param_max_inline_insns_single);
476 : : }
477 : :
478 : : /* Return inlining_insns_auto limit for function N. If HINT or HINT2 is true
479 : : scale up the bound. */
480 : :
481 : : static int
482 : 5360455 : inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
483 : : {
484 : 5360455 : int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
485 : 5360455 : if (hint && hint2)
486 : : {
487 : 1974762 : int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
488 : 1974762 : spd = spd * spd;
489 : 1974762 : if (spd > 1000000)
490 : : spd = 1000000;
491 : 1974762 : return max_inline_insns_auto * spd / 100;
492 : : }
493 : 3385693 : if (hint || hint2)
494 : 1499697 : return max_inline_insns_auto
495 : 1499697 : * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
496 : : return max_inline_insns_auto;
497 : : }
498 : :
499 : : enum can_inline_edge_by_limits_flags
500 : : {
501 : : /* True if we are early inlining. */
502 : : CAN_INLINE_EARLY = 1,
503 : : /* Ignore size limits. */
504 : : CAN_INLINE_DISREGARD_LIMITS = 2,
505 : : /* Force size limits (ignore always_inline). This is used for
506 : : recrusive inlining where always_inline may lead to inline bombs
507 : : and technically it is non-sential anyway. */
508 : : CAN_INLINE_FORCE_LIMITS = 4,
509 : : /* Report decision to dump file. */
510 : : CAN_INLINE_REPORT = 8,
511 : : };
512 : :
513 : : /* Decide if we can inline the edge and possibly update
514 : : inline_failed reason.
515 : : We check whether inlining is possible at all and whether
516 : : caller growth limits allow doing so. */
517 : :
518 : : static bool
519 : 6014693 : can_inline_edge_by_limits_p (struct cgraph_edge *e, int flags)
520 : : {
521 : 6014693 : gcc_checking_assert (e->inline_failed);
522 : :
523 : 6014693 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
524 : : {
525 : 210 : if (flags & CAN_INLINE_REPORT)
526 : 210 : report_inline_failed_reason (e);
527 : 210 : return false;
528 : : }
529 : :
530 : 6014483 : bool inlinable = true;
531 : 6014483 : enum availability avail;
532 : 12028966 : cgraph_node *caller = (e->caller->inlined_to
533 : 6014483 : ? e->caller->inlined_to : e->caller);
534 : 6014483 : cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
535 : 6014483 : tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
536 : 6014483 : tree callee_tree
537 : 6014483 : = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
538 : : /* Check if caller growth allows the inlining. */
539 : 6014483 : if (!(flags & CAN_INLINE_DISREGARD_LIMITS)
540 : 6011369 : && ((flags & CAN_INLINE_FORCE_LIMITS)
541 : 5980488 : || (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
542 : 5511034 : && !lookup_attribute ("flatten",
543 : 5511034 : DECL_ATTRIBUTES (caller->decl))))
544 : 11556260 : && !caller_growth_limits (e))
545 : : inlinable = false;
546 : 5956791 : else if (callee->externally_visible
547 : 4031031 : && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
548 : 9642526 : && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
549 : : {
550 : 2 : e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
551 : 2 : inlinable = false;
552 : : }
553 : : /* Don't inline a function with a higher optimization level than the
554 : : caller. FIXME: this is really just tip of iceberg of handling
555 : : optimization attribute. */
556 : 5956789 : else if (caller_tree != callee_tree)
557 : : {
558 : 17716 : bool always_inline =
559 : 17716 : (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
560 : 28573 : && lookup_attribute ("always_inline",
561 : 10857 : DECL_ATTRIBUTES (callee->decl)));
562 : 17716 : ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
563 : 17716 : ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
564 : :
565 : : /* Until GCC 4.9 we did not check the semantics-altering flags
566 : : below and inlined across optimization boundaries.
567 : : Enabling checks below breaks several packages by refusing
568 : : to inline library always_inline functions. See PR65873.
569 : : Disable the check for early inlining for now until better solution
570 : : is found. */
571 : 17716 : if (always_inline && (flags & CAN_INLINE_EARLY))
572 : : ;
573 : : /* There are some options that change IL semantics which means
574 : : we cannot inline in these cases for correctness reason.
575 : : Not even for always_inline declared functions. */
576 : 6859 : else if (check_match (flag_wrapv)
577 : 6859 : || check_match (flag_trapv)
578 : 6859 : || check_match (flag_pcc_struct_return)
579 : 6859 : || check_maybe_down (optimize_debug)
580 : : /* When caller or callee does FP math, be sure FP codegen flags
581 : : compatible. */
582 : 6853 : || ((caller_info->fp_expressions && callee_info->fp_expressions)
583 : 1251 : && (check_maybe_up (flag_rounding_math)
584 : 1251 : || check_maybe_up (flag_trapping_math)
585 : 1249 : || check_maybe_down (flag_unsafe_math_optimizations)
586 : 1249 : || check_maybe_down (flag_finite_math_only)
587 : 1248 : || check_maybe_up (flag_signaling_nans)
588 : 1248 : || check_maybe_down (flag_cx_limited_range)
589 : 1248 : || check_maybe_up (flag_signed_zeros)
590 : 1248 : || check_maybe_down (flag_associative_math)
591 : 1232 : || check_maybe_down (flag_reciprocal_math)
592 : 1232 : || check_maybe_down (flag_fp_int_builtin_inexact)
593 : : /* Strictly speaking only when the callee contains function
594 : : calls that may end up setting errno. */
595 : 1232 : || check_maybe_up (flag_errno_math)))
596 : : /* We do not want to make code compiled with exceptions to be
597 : : brought into a non-EH function unless we know that the callee
598 : : does not throw.
599 : : This is tracked by DECL_FUNCTION_PERSONALITY. */
600 : 6834 : || (check_maybe_up (flag_non_call_exceptions)
601 : 0 : && DECL_FUNCTION_PERSONALITY (callee->decl))
602 : 6834 : || (check_maybe_up (flag_exceptions)
603 : 16 : && DECL_FUNCTION_PERSONALITY (callee->decl))
604 : : /* When devirtualization is disabled for callee, it is not safe
605 : : to inline it as we possibly mangled the type info.
606 : : Allow early inlining of always inlines. */
607 : 13693 : || (!(flags & CAN_INLINE_EARLY) && check_maybe_down (flag_devirtualize)))
608 : : {
609 : 33 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
610 : 33 : inlinable = false;
611 : : }
612 : : /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
613 : 6826 : else if (always_inline)
614 : : ;
615 : : /* When user added an attribute to the callee honor it. */
616 : 6826 : else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
617 : 6826 : && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
618 : : {
619 : 2162 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
620 : 2162 : inlinable = false;
621 : : }
622 : : /* If explicit optimize attribute are not used, the mismatch is caused
623 : : by different command line options used to build different units.
624 : : Do not care about COMDAT functions - those are intended to be
625 : : optimized with the optimization flags of module they are used in.
626 : : Also do not care about mixing up size/speed optimization when
627 : : DECL_DISREGARD_INLINE_LIMITS is set. */
628 : 4664 : else if ((callee->merged_comdat
629 : 0 : && !lookup_attribute ("optimize",
630 : 0 : DECL_ATTRIBUTES (caller->decl)))
631 : 4664 : || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
632 : : ;
633 : : /* If mismatch is caused by merging two LTO units with different
634 : : optimization flags we want to be bit nicer. However never inline
635 : : if one of functions is not optimized at all. */
636 : 4664 : else if (!opt_for_fn (callee->decl, optimize)
637 : 4664 : || !opt_for_fn (caller->decl, optimize))
638 : : {
639 : 0 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
640 : 0 : inlinable = false;
641 : : }
642 : : /* If callee is optimized for size and caller is not, allow inlining if
643 : : code shrinks or we are in param_max_inline_insns_single limit and
644 : : callee is inline (and thus likely an unified comdat).
645 : : This will allow caller to run faster. */
646 : 4664 : else if (opt_for_fn (callee->decl, optimize_size)
647 : 4664 : > opt_for_fn (caller->decl, optimize_size))
648 : : {
649 : 120 : int growth = estimate_edge_growth (e);
650 : 120 : if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
651 : 120 : && (!DECL_DECLARED_INLINE_P (callee->decl)
652 : 67 : && growth >= MAX (inline_insns_single (caller, false, false),
653 : : inline_insns_auto (caller, false, false))))
654 : : {
655 : 0 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
656 : 0 : inlinable = false;
657 : : }
658 : : }
659 : : /* If callee is more aggressively optimized for performance than caller,
660 : : we generally want to inline only cheap (runtime wise) functions. */
661 : 4544 : else if (opt_for_fn (callee->decl, optimize_size)
662 : : < opt_for_fn (caller->decl, optimize_size)
663 : 4544 : || (opt_for_fn (callee->decl, optimize)
664 : : > opt_for_fn (caller->decl, optimize)))
665 : : {
666 : 12576 : if (estimate_edge_time (e)
667 : 4192 : >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
668 : : {
669 : 1518 : e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
670 : 1518 : inlinable = false;
671 : : }
672 : : }
673 : :
674 : : }
675 : :
676 : 61407 : if (!inlinable && (flags & CAN_INLINE_REPORT))
677 : 58042 : report_inline_failed_reason (e);
678 : : return inlinable;
679 : : }
680 : :
681 : :
682 : : /* Return true if the edge E is inlinable during early inlining. */
683 : :
684 : : static bool
685 : 3656663 : can_early_inline_edge_p (struct cgraph_edge *e)
686 : : {
687 : 7313326 : cgraph_node *caller = (e->caller->inlined_to
688 : 3656663 : ? e->caller->inlined_to : e->caller);
689 : 3656663 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
690 : : /* Early inliner might get called at WPA stage when IPA pass adds new
691 : : function. In this case we cannot really do any of early inlining
692 : : because function bodies are missing. */
693 : 3656663 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
694 : : return false;
695 : 3656424 : if (!gimple_has_body_p (callee->decl))
696 : : {
697 : 0 : e->inline_failed = CIF_BODY_NOT_AVAILABLE;
698 : 0 : return false;
699 : : }
700 : 7312848 : gcc_assert (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
701 : : && gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)));
702 : 3654893 : if ((profile_arc_flag || condition_coverage_flag)
703 : 3657959 : && ((lookup_attribute ("no_profile_instrument_function",
704 : 1535 : DECL_ATTRIBUTES (caller->decl)) == NULL_TREE)
705 : 1535 : != (lookup_attribute ("no_profile_instrument_function",
706 : 3070 : DECL_ATTRIBUTES (callee->decl)) == NULL_TREE)))
707 : : return false;
708 : :
709 : 3656422 : if (!can_inline_edge_p (e, true, true)
710 : 3656422 : || !can_inline_edge_by_limits_p (e, CAN_INLINE_EARLY | CAN_INLINE_REPORT))
711 : 85318 : return false;
712 : : /* When inlining regular functions into always-inline functions
713 : : during early inlining watch for possible inline cycles. */
714 : 3571104 : if (DECL_DISREGARD_INLINE_LIMITS (caller->decl)
715 : 196015 : && lookup_attribute ("always_inline", DECL_ATTRIBUTES (caller->decl))
716 : 3767110 : && (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
717 : 125605 : || !lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee->decl))))
718 : : {
719 : : /* If there are indirect calls, inlining may produce direct call.
720 : : TODO: We may lift this restriction if we avoid errors on formely
721 : : indirect calls to always_inline functions. Taking address
722 : : of always_inline function is generally bad idea and should
723 : : have been declared as undefined, but sadly we allow this. */
724 : 70402 : if (caller->indirect_calls || e->callee->indirect_calls)
725 : : return false;
726 : 69324 : ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
727 : 69324 : if (callee_info->safe_to_inline_to_always_inline)
728 : 15731 : return callee_info->safe_to_inline_to_always_inline - 1;
729 : 113601 : for (cgraph_edge *e2 = callee->callees; e2; e2 = e2->next_callee)
730 : : {
731 : 60020 : struct cgraph_node *callee2 = e2->callee->ultimate_alias_target ();
732 : : /* As early inliner runs in RPO order, we will see uninlined
733 : : always_inline calls only in the case of cyclic graphs. */
734 : 60020 : if (DECL_DISREGARD_INLINE_LIMITS (callee2->decl)
735 : 60020 : || lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee2->decl)))
736 : : {
737 : 0 : callee_info->safe_to_inline_to_always_inline = 1;
738 : 0 : return false;
739 : : }
740 : : /* With LTO watch for case where function is later replaced
741 : : by always_inline definition.
742 : : TODO: We may either stop treating noninlined cross-module always
743 : : inlines as errors, or we can extend decl merging to produce
744 : : syntacic alias and honor always inline only in units it has
745 : : been declared as such. */
746 : 60020 : if (flag_lto && callee2->externally_visible)
747 : : {
748 : 12 : callee_info->safe_to_inline_to_always_inline = 1;
749 : 12 : return false;
750 : : }
751 : : }
752 : 53581 : callee_info->safe_to_inline_to_always_inline = 2;
753 : : }
754 : : return true;
755 : : }
756 : :
757 : :
758 : : /* Return number of calls in N. Ignore cheap builtins. */
759 : :
760 : : static int
761 : 750423 : num_calls (struct cgraph_node *n)
762 : : {
763 : 750423 : struct cgraph_edge *e;
764 : 750423 : int num = 0;
765 : :
766 : 1473195 : for (e = n->callees; e; e = e->next_callee)
767 : 722772 : if (!is_inexpensive_builtin (e->callee->decl))
768 : 662297 : num++;
769 : 750423 : return num;
770 : : }
771 : :
772 : :
773 : : /* Return true if we are interested in inlining small function. */
774 : :
775 : : static bool
776 : 3097769 : want_early_inline_function_p (struct cgraph_edge *e)
777 : : {
778 : 3097769 : bool want_inline = true;
779 : 3097769 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
780 : :
781 : 3097769 : if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
782 : : ;
783 : : /* For AutoFDO, we need to make sure that before profile summary, all
784 : : hot paths' IR look exactly the same as profiled binary. As a result,
785 : : in einliner, we will disregard size limit and inline those callsites
786 : : that are:
787 : : * inlined in the profiled binary, and
788 : : * the cloned callee has enough samples to be considered "hot". */
789 : 3097762 : else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
790 : : ;
791 : 3097762 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
792 : 3097762 : && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
793 : : {
794 : 153 : e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
795 : 153 : report_inline_failed_reason (e);
796 : 153 : want_inline = false;
797 : : }
798 : : else
799 : : {
800 : : /* First take care of very large functions. */
801 : 3097609 : int min_growth = estimate_min_edge_growth (e), growth = 0;
802 : 3097609 : int n;
803 : 3097609 : int early_inlining_insns = param_early_inlining_insns;
804 : :
805 : 3097609 : if (min_growth > early_inlining_insns)
806 : : {
807 : 392533 : if (dump_enabled_p ())
808 : 40 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
809 : : " will not early inline: %C->%C, "
810 : : "call is cold and code would grow "
811 : : "at least by %i\n",
812 : : e->caller, callee,
813 : : min_growth);
814 : : want_inline = false;
815 : : }
816 : : else
817 : 2705076 : growth = estimate_edge_growth (e);
818 : :
819 : :
820 : 2705076 : if (!want_inline || growth <= param_max_inline_insns_size)
821 : : ;
822 : 1097622 : else if (!e->maybe_hot_p ())
823 : : {
824 : 17981 : if (dump_enabled_p ())
825 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
826 : : " will not early inline: %C->%C, "
827 : : "call is cold and code would grow by %i\n",
828 : : e->caller, callee,
829 : : growth);
830 : : want_inline = false;
831 : : }
832 : 1079641 : else if (growth > early_inlining_insns)
833 : : {
834 : 329218 : if (dump_enabled_p ())
835 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
836 : : " will not early inline: %C->%C, "
837 : : "growth %i exceeds --param early-inlining-insns\n",
838 : : e->caller, callee, growth);
839 : : want_inline = false;
840 : : }
841 : 750423 : else if ((n = num_calls (callee)) != 0
842 : 750423 : && growth * (n + 1) > early_inlining_insns)
843 : : {
844 : 218771 : if (dump_enabled_p ())
845 : 11 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
846 : : " will not early inline: %C->%C, "
847 : : "growth %i exceeds --param early-inlining-insns "
848 : : "divided by number of calls\n",
849 : : e->caller, callee, growth);
850 : : want_inline = false;
851 : : }
852 : : }
853 : 3097769 : return want_inline;
854 : : }
855 : :
856 : : /* Compute time of the edge->caller + edge->callee execution when inlining
857 : : does not happen. */
858 : :
859 : : inline sreal
860 : 375322 : compute_uninlined_call_time (struct cgraph_edge *edge,
861 : : sreal uninlined_call_time,
862 : : sreal freq)
863 : : {
864 : 750644 : cgraph_node *caller = (edge->caller->inlined_to
865 : 375322 : ? edge->caller->inlined_to
866 : : : edge->caller);
867 : :
868 : 375322 : if (freq > 0)
869 : 366004 : uninlined_call_time *= freq;
870 : : else
871 : 9318 : uninlined_call_time = uninlined_call_time >> 11;
872 : :
873 : 375322 : sreal caller_time = ipa_fn_summaries->get (caller)->time;
874 : 375322 : return uninlined_call_time + caller_time;
875 : : }
876 : :
877 : : /* Same as compute_uinlined_call_time but compute time when inlining
878 : : does happen. */
879 : :
880 : : inline sreal
881 : 375322 : compute_inlined_call_time (struct cgraph_edge *edge,
882 : : sreal time,
883 : : sreal freq)
884 : : {
885 : 750644 : cgraph_node *caller = (edge->caller->inlined_to
886 : 375322 : ? edge->caller->inlined_to
887 : : : edge->caller);
888 : 375322 : sreal caller_time = ipa_fn_summaries->get (caller)->time;
889 : :
890 : 375322 : if (freq > 0)
891 : 366004 : time *= freq;
892 : : else
893 : 9318 : time = time >> 11;
894 : :
895 : : /* This calculation should match one in ipa-inline-analysis.cc
896 : : (estimate_edge_size_and_time). */
897 : 375322 : time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
898 : 375322 : time += caller_time;
899 : 375322 : if (time <= 0)
900 : 0 : time = ((sreal) 1) >> 8;
901 : 375322 : gcc_checking_assert (time >= 0);
902 : 375322 : return time;
903 : : }
904 : :
905 : : /* Determine time saved by inlining EDGE of frequency FREQ
906 : : where callee's runtime w/o inlining is UNINLINED_TYPE
907 : : and with inlined is INLINED_TYPE. */
908 : :
909 : : inline sreal
910 : 7693646 : inlining_speedup (struct cgraph_edge *edge,
911 : : sreal freq,
912 : : sreal uninlined_time,
913 : : sreal inlined_time)
914 : : {
915 : 7693646 : sreal speedup = uninlined_time - inlined_time;
916 : : /* Handling of call_time should match one in ipa-inline-fnsummary.c
917 : : (estimate_edge_size_and_time). */
918 : 7693646 : sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
919 : :
920 : 7693646 : if (freq > 0)
921 : : {
922 : 7662710 : speedup = (speedup + call_time);
923 : 9572621 : if (freq != 1)
924 : 5752799 : speedup = speedup * freq;
925 : : }
926 : 30936 : else if (freq == 0)
927 : 30936 : speedup = speedup >> 11;
928 : 7693646 : gcc_checking_assert (speedup >= 0);
929 : 7693646 : return speedup;
930 : : }
931 : :
932 : : /* Return true if the speedup for inlining E is bigger than
933 : : param_inline_min_speedup. */
934 : :
935 : : static bool
936 : 375322 : big_speedup_p (struct cgraph_edge *e)
937 : : {
938 : 375322 : sreal unspec_time;
939 : 375322 : sreal spec_time = estimate_edge_time (e, &unspec_time);
940 : 375322 : sreal freq = e->sreal_frequency ();
941 : 375322 : sreal time = compute_uninlined_call_time (e, unspec_time, freq);
942 : 375322 : sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
943 : 750644 : cgraph_node *caller = (e->caller->inlined_to
944 : 375322 : ? e->caller->inlined_to
945 : : : e->caller);
946 : 375322 : int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
947 : :
948 : 375322 : if ((time - inlined_time) * 100 > time * limit)
949 : : return true;
950 : : return false;
951 : : }
952 : :
953 : : /* Return true if we are interested in inlining small function.
954 : : When REPORT is true, report reason to dump file. */
955 : :
956 : : static bool
957 : 4304061 : want_inline_small_function_p (struct cgraph_edge *e, bool report)
958 : : {
959 : 4304061 : bool want_inline = true;
960 : 4304061 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
961 : 8608122 : cgraph_node *to = (e->caller->inlined_to
962 : 4304061 : ? e->caller->inlined_to : e->caller);
963 : :
964 : : /* Allow this function to be called before can_inline_edge_p,
965 : : since it's usually cheaper. */
966 : 4304061 : if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
967 : : want_inline = false;
968 : 4304061 : else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
969 : : ;
970 : 4298977 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
971 : 4298977 : && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
972 : : {
973 : 44554 : e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
974 : 44554 : want_inline = false;
975 : : }
976 : : /* Do fast and conservative check if the function can be good
977 : : inline candidate. */
978 : 4254423 : else if ((!DECL_DECLARED_INLINE_P (callee->decl)
979 : 1974866 : && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
980 : 6229185 : && ipa_fn_summaries->get (callee)->min_size
981 : 1974762 : - ipa_call_summaries->get (e)->call_stmt_size
982 : 1974762 : > inline_insns_auto (e->caller, true, true))
983 : : {
984 : 948 : e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
985 : 948 : want_inline = false;
986 : : }
987 : 4253475 : else if ((DECL_DECLARED_INLINE_P (callee->decl)
988 : 1973918 : || e->count.ipa ().nonzero_p ())
989 : 6533266 : && ipa_fn_summaries->get (callee)->min_size
990 : 2279791 : - ipa_call_summaries->get (e)->call_stmt_size
991 : 2279791 : > inline_insns_single (e->caller, true, true))
992 : : {
993 : 0 : e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
994 : 0 : ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
995 : : : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
996 : 0 : want_inline = false;
997 : : }
998 : : else
999 : : {
1000 : 4253475 : int growth = estimate_edge_growth (e);
1001 : 4253475 : ipa_hints hints = estimate_edge_hints (e);
1002 : : /* We have two independent groups of hints. If one matches in each
1003 : : of groups the limits are inreased. If both groups matches, limit
1004 : : is increased even more. */
1005 : 4253475 : bool apply_hints = (hints & (INLINE_HINT_indirect_call
1006 : : | INLINE_HINT_known_hot
1007 : : | INLINE_HINT_loop_iterations
1008 : : | INLINE_HINT_loop_stride));
1009 : 4253475 : bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
1010 : :
1011 : 4253475 : if (growth <= opt_for_fn (to->decl,
1012 : : param_max_inline_insns_size))
1013 : : ;
1014 : : /* Apply param_max_inline_insns_single limit. Do not do so when
1015 : : hints suggests that inlining given function is very profitable.
1016 : : Avoid computation of big_speedup_p when not necessary to change
1017 : : outcome of decision. */
1018 : 4137624 : else if (DECL_DECLARED_INLINE_P (callee->decl)
1019 : 2231478 : && growth >= inline_insns_single (e->caller, apply_hints,
1020 : : apply_hints2)
1021 : 4428102 : && (apply_hints || apply_hints2
1022 : 285027 : || growth >= inline_insns_single (e->caller, true,
1023 : : apply_hints2)
1024 : 144505 : || !big_speedup_p (e)))
1025 : : {
1026 : 290217 : e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
1027 : 290217 : want_inline = false;
1028 : : }
1029 : 3847407 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
1030 : 1906146 : && !opt_for_fn (e->caller->decl, flag_inline_functions)
1031 : 3852715 : && growth >= opt_for_fn (to->decl,
1032 : : param_max_inline_insns_small))
1033 : : {
1034 : : /* growth_positive_p is expensive, always test it last. */
1035 : 5308 : if (growth >= inline_insns_single (e->caller, false, false)
1036 : 5308 : || growth_positive_p (callee, e, growth))
1037 : : {
1038 : 4957 : e->inline_failed = CIF_NOT_DECLARED_INLINED;
1039 : 4957 : want_inline = false;
1040 : : }
1041 : : }
1042 : : /* Apply param_max_inline_insns_auto limit for functions not declared
1043 : : inline. Bypass the limit when speedup seems big. */
1044 : 3842099 : else if (!DECL_DECLARED_INLINE_P (callee->decl)
1045 : 1900838 : && growth >= inline_insns_auto (e->caller, apply_hints,
1046 : : apply_hints2)
1047 : 5003167 : && (apply_hints || apply_hints2
1048 : 1144770 : || growth >= inline_insns_auto (e->caller, true,
1049 : : apply_hints2)
1050 : 230653 : || !big_speedup_p (e)))
1051 : : {
1052 : : /* growth_positive_p is expensive, always test it last. */
1053 : 1144689 : if (growth >= inline_insns_single (e->caller, false, false)
1054 : 1144689 : || growth_positive_p (callee, e, growth))
1055 : : {
1056 : 1059550 : e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
1057 : 1059550 : want_inline = false;
1058 : : }
1059 : : }
1060 : : /* If call is cold, do not inline when function body would grow. */
1061 : 2697410 : else if (!e->maybe_hot_p ()
1062 : 2697410 : && (growth >= inline_insns_single (e->caller, false, false)
1063 : 744711 : || growth_positive_p (callee, e, growth)))
1064 : : {
1065 : 680806 : e->inline_failed = CIF_UNLIKELY_CALL;
1066 : 680806 : want_inline = false;
1067 : : }
1068 : : }
1069 : 4304061 : if (!want_inline && report)
1070 : 615448 : report_inline_failed_reason (e);
1071 : 4304061 : return want_inline;
1072 : : }
1073 : :
1074 : : /* EDGE is self recursive edge.
1075 : : We handle two cases - when function A is inlining into itself
1076 : : or when function A is being inlined into another inliner copy of function
1077 : : A within function B.
1078 : :
1079 : : In first case OUTER_NODE points to the toplevel copy of A, while
1080 : : in the second case OUTER_NODE points to the outermost copy of A in B.
1081 : :
1082 : : In both cases we want to be extra selective since
1083 : : inlining the call will just introduce new recursive calls to appear. */
1084 : :
1085 : : static bool
1086 : 17058 : want_inline_self_recursive_call_p (struct cgraph_edge *edge,
1087 : : struct cgraph_node *outer_node,
1088 : : bool peeling,
1089 : : int depth)
1090 : : {
1091 : 17058 : char const *reason = NULL;
1092 : 17058 : bool want_inline = true;
1093 : 17058 : sreal caller_freq = 1;
1094 : 17058 : int max_depth = opt_for_fn (outer_node->decl,
1095 : : param_max_inline_recursive_depth_auto);
1096 : :
1097 : 17058 : if (DECL_DECLARED_INLINE_P (edge->caller->decl))
1098 : 2204 : max_depth = opt_for_fn (outer_node->decl,
1099 : : param_max_inline_recursive_depth);
1100 : :
1101 : 17058 : if (!edge->maybe_hot_p ())
1102 : : {
1103 : : reason = "recursive call is cold";
1104 : : want_inline = false;
1105 : : }
1106 : 16996 : else if (depth > max_depth)
1107 : : {
1108 : : reason = "--param max-inline-recursive-depth exceeded.";
1109 : : want_inline = false;
1110 : : }
1111 : 14901 : else if (outer_node->inlined_to
1112 : 18120 : && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1113 : : {
1114 : 0 : reason = "caller frequency is 0";
1115 : 0 : want_inline = false;
1116 : : }
1117 : :
1118 : 0 : if (!want_inline)
1119 : : ;
1120 : : /* Inlining of self recursive function into copy of itself within other
1121 : : function is transformation similar to loop peeling.
1122 : :
1123 : : Peeling is profitable if we can inline enough copies to make probability
1124 : : of actual call to the self recursive function very small. Be sure that
1125 : : the probability of recursion is small.
1126 : :
1127 : : We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1128 : : This way the expected number of recursion is at most max_depth. */
1129 : 14901 : else if (peeling)
1130 : : {
1131 : 3219 : sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1132 : 3219 : int i;
1133 : 7192 : for (i = 1; i < depth; i++)
1134 : 3973 : max_prob = max_prob * max_prob;
1135 : 3219 : if (edge->sreal_frequency () >= max_prob * caller_freq)
1136 : : {
1137 : 1508 : reason = "frequency of recursive call is too large";
1138 : 1508 : want_inline = false;
1139 : : }
1140 : : }
1141 : : /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1142 : : recursion depth is large. We reduce function call overhead and increase
1143 : : chances that things fit in hardware return predictor.
1144 : :
1145 : : Recursive inlining might however increase cost of stack frame setup
1146 : : actually slowing down functions whose recursion tree is wide rather than
1147 : : deep.
1148 : :
1149 : : Deciding reliably on when to do recursive inlining without profile feedback
1150 : : is tricky. For now we disable recursive inlining when probability of self
1151 : : recursion is low.
1152 : :
1153 : : Recursive inlining of self recursive call within loop also results in
1154 : : large loop depths that generally optimize badly. We may want to throttle
1155 : : down inlining in those cases. In particular this seems to happen in one
1156 : : of libstdc++ rb tree methods. */
1157 : : else
1158 : : {
1159 : 11682 : if (edge->sreal_frequency () * 100
1160 : 11682 : <= caller_freq
1161 : 23364 : * opt_for_fn (outer_node->decl,
1162 : : param_min_inline_recursive_probability))
1163 : : {
1164 : 581 : reason = "frequency of recursive call is too small";
1165 : 581 : want_inline = false;
1166 : : }
1167 : : }
1168 : 17058 : if (!can_inline_edge_by_limits_p (edge, CAN_INLINE_FORCE_LIMITS | CAN_INLINE_REPORT))
1169 : : {
1170 : : reason = "inline limits exceeded for always_inline function";
1171 : : want_inline = false;
1172 : : }
1173 : 17058 : if (!want_inline && dump_enabled_p ())
1174 : 7 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1175 : : " not inlining recursively: %s\n", reason);
1176 : 17058 : return want_inline;
1177 : : }
1178 : :
1179 : : /* Return true when NODE has uninlinable caller;
1180 : : set HAS_HOT_CALL if it has hot call.
1181 : : Worker for cgraph_for_node_and_aliases. */
1182 : :
1183 : : static bool
1184 : 70363 : check_callers (struct cgraph_node *node, void *has_hot_call)
1185 : : {
1186 : 70363 : struct cgraph_edge *e;
1187 : 122109 : for (e = node->callers; e; e = e->next_caller)
1188 : : {
1189 : 77662 : if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1190 : 77662 : || !opt_for_fn (e->caller->decl, optimize))
1191 : : return true;
1192 : 77662 : if (!can_inline_edge_p (e, true))
1193 : : return true;
1194 : 77635 : if (e->recursive_p ())
1195 : : return true;
1196 : 77635 : if (!can_inline_edge_by_limits_p (e, CAN_INLINE_REPORT))
1197 : : return true;
1198 : : /* Inlining large functions to large loop depth is often harmful because
1199 : : of register pressure it implies. */
1200 : 51792 : if ((int)ipa_call_summaries->get (e)->loop_depth
1201 : 51792 : > param_inline_functions_called_once_loop_depth)
1202 : : return true;
1203 : : /* Do not produce gigantic functions. */
1204 : 95540 : if (estimate_size_after_inlining (e->caller->inlined_to ?
1205 : : e->caller->inlined_to : e->caller, e)
1206 : 51792 : > param_inline_functions_called_once_insns)
1207 : : return true;
1208 : 51746 : if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1209 : 9936 : *(bool *)has_hot_call = true;
1210 : : }
1211 : : return false;
1212 : : }
1213 : :
1214 : : /* If NODE has a caller, return true. */
1215 : :
1216 : : static bool
1217 : 2064417 : has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1218 : : {
1219 : 2064417 : if (node->callers)
1220 : 639982 : return true;
1221 : : return false;
1222 : : }
1223 : :
1224 : : /* Decide if inlining NODE would reduce unit size by eliminating
1225 : : the offline copy of function.
1226 : : When COLD is true the cold calls are considered, too. */
1227 : :
1228 : : static bool
1229 : 4297388 : want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1230 : : {
1231 : 4297388 : bool has_hot_call = false;
1232 : :
1233 : : /* Aliases gets inlined along with the function they alias. */
1234 : 4297388 : if (node->alias)
1235 : : return false;
1236 : : /* Already inlined? */
1237 : 4223937 : if (node->inlined_to)
1238 : : return false;
1239 : : /* Does it have callers? */
1240 : 2017584 : if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1241 : : return false;
1242 : : /* Inlining into all callers would increase size? */
1243 : 639982 : if (growth_positive_p (node, NULL, INT_MIN) > 0)
1244 : : return false;
1245 : : /* All inlines must be possible. */
1246 : 66272 : if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1247 : : true))
1248 : : return false;
1249 : 40356 : if (!cold && !has_hot_call)
1250 : : return false;
1251 : : return true;
1252 : : }
1253 : :
1254 : : /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1255 : : in estimate_edge_badness. */
1256 : :
1257 : : static bool
1258 : 544962 : wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1259 : : {
1260 : 544962 : return size < (DECL_DECLARED_INLINE_P (where->decl)
1261 : 544962 : ? inline_insns_single (where, false, false)
1262 : 340018 : : inline_insns_auto (where, false, false));
1263 : : }
1264 : :
1265 : : /* A cost model driving the inlining heuristics in a way so the edges with
1266 : : smallest badness are inlined first. After each inlining is performed
1267 : : the costs of all caller edges of nodes affected are recomputed so the
1268 : : metrics may accurately depend on values such as number of inlinable callers
1269 : : of the function or function body size. */
1270 : :
1271 : : static sreal
1272 : 8195884 : edge_badness (struct cgraph_edge *edge, bool dump)
1273 : : {
1274 : 8195884 : sreal badness;
1275 : 8195884 : int growth;
1276 : 8195884 : sreal edge_time, unspec_edge_time;
1277 : 8195884 : struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1278 : 8195884 : class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1279 : 8195884 : ipa_hints hints;
1280 : 16391768 : cgraph_node *caller = (edge->caller->inlined_to
1281 : 8195884 : ? edge->caller->inlined_to
1282 : : : edge->caller);
1283 : :
1284 : 8195884 : growth = estimate_edge_growth (edge);
1285 : 8195884 : edge_time = estimate_edge_time (edge, &unspec_edge_time);
1286 : 8195884 : hints = estimate_edge_hints (edge);
1287 : 8195884 : gcc_checking_assert (edge_time >= 0);
1288 : : /* Check that inlined time is better, but tolerate some roundoff issues.
1289 : : FIXME: When callee profile drops to 0 we account calls more. This
1290 : : should be fixed by never doing that. */
1291 : 8195884 : gcc_checking_assert ((edge_time * 100
1292 : : - callee_info->time * 101).to_int () <= 0
1293 : : || callee->count.ipa ().initialized_p ());
1294 : 8195884 : gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1295 : :
1296 : 8195884 : if (dump)
1297 : : {
1298 : 164 : fprintf (dump_file, " Badness calculation for %s -> %s\n",
1299 : 164 : edge->caller->dump_name (),
1300 : 164 : edge->callee->dump_name ());
1301 : 164 : fprintf (dump_file, " size growth %i, time %f unspec %f ",
1302 : : growth,
1303 : : edge_time.to_double (),
1304 : : unspec_edge_time.to_double ());
1305 : 164 : ipa_dump_hints (dump_file, hints);
1306 : 164 : if (big_speedup_p (edge))
1307 : 133 : fprintf (dump_file, " big_speedup");
1308 : 164 : fprintf (dump_file, "\n");
1309 : : }
1310 : :
1311 : : /* Always prefer inlining saving code size. */
1312 : 8195884 : if (growth <= 0)
1313 : : {
1314 : 469978 : badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1315 : 469978 : if (dump)
1316 : 87 : fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1317 : : growth);
1318 : : }
1319 : : /* Inlining into EXTERNAL functions is not going to change anything unless
1320 : : they are themselves inlined. */
1321 : 7725906 : else if (DECL_EXTERNAL (caller->decl))
1322 : : {
1323 : 31025 : if (dump)
1324 : 0 : fprintf (dump_file, " max: function is external\n");
1325 : 31025 : return sreal::max ();
1326 : : }
1327 : : /* When profile is available. Compute badness as:
1328 : :
1329 : : time_saved * caller_count
1330 : : goodness = -------------------------------------------------
1331 : : growth_of_caller * overall_growth * combined_size
1332 : :
1333 : : badness = - goodness
1334 : :
1335 : : Again use negative value to make calls with profile appear hotter
1336 : : then calls without.
1337 : : */
1338 : 7694881 : else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1339 : 7694881 : || caller->count.ipa ().nonzero_p ())
1340 : : {
1341 : 7693569 : sreal numerator, denominator;
1342 : 7693569 : int overall_growth;
1343 : 7693569 : sreal freq = edge->sreal_frequency ();
1344 : :
1345 : 7693569 : numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1346 : 7693569 : if (numerator <= 0)
1347 : 6323 : numerator = ((sreal) 1 >> 8);
1348 : 7693569 : if (caller->count.ipa ().nonzero_p ())
1349 : 101 : numerator *= caller->count.ipa ().to_gcov_type ();
1350 : 7693468 : else if (caller->count.ipa ().initialized_p ())
1351 : 679 : numerator = numerator >> 11;
1352 : 7693569 : denominator = growth;
1353 : :
1354 : 7693569 : overall_growth = callee_info->growth;
1355 : :
1356 : : /* Look for inliner wrappers of the form:
1357 : :
1358 : : inline_caller ()
1359 : : {
1360 : : do_fast_job...
1361 : : if (need_more_work)
1362 : : noninline_callee ();
1363 : : }
1364 : : Without penalizing this case, we usually inline noninline_callee
1365 : : into the inline_caller because overall_growth is small preventing
1366 : : further inlining of inline_caller.
1367 : :
1368 : : Penalize only callgraph edges to functions with small overall
1369 : : growth ...
1370 : : */
1371 : 7693569 : if (growth > overall_growth
1372 : : /* ... and having only one caller which is not inlined ... */
1373 : 1694192 : && callee_info->single_caller
1374 : 1035859 : && !edge->caller->inlined_to
1375 : : /* ... and edges executed only conditionally ... */
1376 : 673121 : && freq < 1
1377 : : /* ... consider case where callee is not inline but caller is ... */
1378 : 8014889 : && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1379 : 97999 : && DECL_DECLARED_INLINE_P (caller->decl))
1380 : : /* ... or when early optimizers decided to split and edge
1381 : : frequency still indicates splitting is a win ... */
1382 : 312695 : || (callee->split_part && !caller->split_part
1383 : 73160 : && freq * 100
1384 : 7759110 : < opt_for_fn (caller->decl,
1385 : : param_partial_inlining_entry_probability)
1386 : : /* ... and do not overwrite user specified hints. */
1387 : 72840 : && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1388 : 50384 : || DECL_DECLARED_INLINE_P (caller->decl)))))
1389 : : {
1390 : 80459 : ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1391 : 80459 : int caller_growth = caller_info->growth;
1392 : :
1393 : : /* Only apply the penalty when caller looks like inline candidate,
1394 : : and it is not called once. */
1395 : 45600 : if (!caller_info->single_caller && overall_growth < caller_growth
1396 : 43513 : && caller_info->inlinable
1397 : 123918 : && wrapper_heuristics_may_apply
1398 : 43459 : (caller, ipa_size_summaries->get (caller)->size))
1399 : : {
1400 : 34356 : if (dump)
1401 : 1 : fprintf (dump_file,
1402 : : " Wrapper penalty. Increasing growth %i to %i\n",
1403 : : overall_growth, caller_growth);
1404 : : overall_growth = caller_growth;
1405 : : }
1406 : : }
1407 : 7693569 : if (overall_growth > 0)
1408 : : {
1409 : : /* Strongly prefer functions with few callers that can be inlined
1410 : : fully. The square root here leads to smaller binaries at average.
1411 : : Watch however for extreme cases and return to linear function
1412 : : when growth is large. */
1413 : 6494011 : if (overall_growth < 256)
1414 : 3354849 : overall_growth *= overall_growth;
1415 : : else
1416 : 3139162 : overall_growth += 256 * 256 - 256;
1417 : 6494011 : denominator *= overall_growth;
1418 : : }
1419 : 7693569 : denominator *= ipa_size_summaries->get (caller)->size + growth;
1420 : :
1421 : 7693569 : badness = - numerator / denominator;
1422 : :
1423 : 7693569 : if (dump)
1424 : : {
1425 : 308 : fprintf (dump_file,
1426 : : " %f: guessed profile. frequency %f, count %" PRId64
1427 : : " caller count %" PRId64
1428 : : " time saved %f"
1429 : : " overall growth %i (current) %i (original)"
1430 : : " %i (compensated)\n",
1431 : : badness.to_double (),
1432 : : freq.to_double (),
1433 : 77 : edge->count.ipa ().initialized_p ()
1434 : 0 : ? edge->count.ipa ().to_gcov_type () : -1,
1435 : 77 : caller->count.ipa ().initialized_p ()
1436 : 0 : ? caller->count.ipa ().to_gcov_type () : -1,
1437 : 154 : inlining_speedup (edge, freq, unspec_edge_time,
1438 : : edge_time).to_double (),
1439 : : estimate_growth (callee),
1440 : : callee_info->growth, overall_growth);
1441 : : }
1442 : : }
1443 : : /* When function local profile is not available or it does not give
1444 : : useful information (i.e. frequency is zero), base the cost on
1445 : : loop nest and overall size growth, so we optimize for overall number
1446 : : of functions fully inlined in program. */
1447 : : else
1448 : : {
1449 : 1312 : int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1450 : 1312 : badness = growth;
1451 : :
1452 : : /* Decrease badness if call is nested. */
1453 : 1312 : if (badness > 0)
1454 : 1312 : badness = badness >> nest;
1455 : : else
1456 : 0 : badness = badness << nest;
1457 : 1312 : if (dump)
1458 : 0 : fprintf (dump_file, " %f: no profile. nest %i\n",
1459 : : badness.to_double (), nest);
1460 : : }
1461 : 8164859 : gcc_checking_assert (badness != 0);
1462 : :
1463 : 8164859 : if (edge->recursive_p ())
1464 : 13833 : badness = badness.shift (badness > 0 ? 4 : -4);
1465 : 8164859 : if ((hints & (INLINE_HINT_indirect_call
1466 : : | INLINE_HINT_loop_iterations
1467 : : | INLINE_HINT_loop_stride))
1468 : 7336985 : || callee_info->growth <= 0)
1469 : 4126136 : badness = badness.shift (badness > 0 ? -2 : 2);
1470 : 8164859 : if (hints & INLINE_HINT_builtin_constant_p)
1471 : 21318 : badness = badness.shift (badness > 0 ? -4 : 4);
1472 : 8164859 : if (hints & (INLINE_HINT_same_scc))
1473 : 73166 : badness = badness.shift (badness > 0 ? 3 : -3);
1474 : 8128275 : else if (hints & (INLINE_HINT_in_scc))
1475 : 81610 : badness = badness.shift (badness > 0 ? 2 : -2);
1476 : 8087470 : else if (hints & (INLINE_HINT_cross_module))
1477 : 3386 : badness = badness.shift (badness > 0 ? 1 : -1);
1478 : 8164859 : if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1479 : 15244 : badness = badness.shift (badness > 0 ? -4 : 4);
1480 : 8157237 : else if ((hints & INLINE_HINT_declared_inline))
1481 : 12120487 : badness = badness.shift (badness > 0 ? -3 : 3);
1482 : 8164859 : if (dump)
1483 : 164 : fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1484 : 8164859 : return badness;
1485 : : }
1486 : :
1487 : : /* Recompute badness of EDGE and update its key in HEAP if needed. */
1488 : : static inline void
1489 : 4126417 : update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1490 : : {
1491 : 4126417 : sreal badness = edge_badness (edge, false);
1492 : 4126417 : if (edge->aux)
1493 : : {
1494 : 3237363 : edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1495 : 3237363 : gcc_checking_assert (n->get_data () == edge);
1496 : :
1497 : : /* fibonacci_heap::replace_key does busy updating of the
1498 : : heap that is unnecessarily expensive.
1499 : : We do lazy increases: after extracting minimum if the key
1500 : : turns out to be out of date, it is re-inserted into heap
1501 : : with correct value. */
1502 : 3237363 : if (badness < n->get_key ().badness)
1503 : : {
1504 : 688705 : if (dump_file && (dump_flags & TDF_DETAILS))
1505 : : {
1506 : 78 : fprintf (dump_file,
1507 : : " decreasing badness %s -> %s, %f to %f\n",
1508 : 39 : edge->caller->dump_name (),
1509 : 39 : edge->callee->dump_name (),
1510 : 78 : n->get_key ().badness.to_double (),
1511 : : badness.to_double ());
1512 : : }
1513 : 688705 : inline_badness b (edge, badness);
1514 : 688705 : heap->decrease_key (n, b);
1515 : : }
1516 : : }
1517 : : else
1518 : : {
1519 : 889054 : if (dump_file && (dump_flags & TDF_DETAILS))
1520 : : {
1521 : 266 : fprintf (dump_file,
1522 : : " enqueuing call %s -> %s, badness %f\n",
1523 : 133 : edge->caller->dump_name (),
1524 : 133 : edge->callee->dump_name (),
1525 : : badness.to_double ());
1526 : : }
1527 : 889054 : inline_badness b (edge, badness);
1528 : 889054 : edge->aux = heap->insert (b, edge);
1529 : : }
1530 : 4126417 : }
1531 : :
1532 : :
1533 : : /* NODE was inlined.
1534 : : All caller edges needs to be reset because
1535 : : size estimates change. Similarly callees needs reset
1536 : : because better context may be known. */
1537 : :
1538 : : static void
1539 : 810571 : reset_edge_caches (struct cgraph_node *node)
1540 : : {
1541 : 810571 : struct cgraph_edge *edge;
1542 : 810571 : struct cgraph_edge *e = node->callees;
1543 : 810571 : struct cgraph_node *where = node;
1544 : 810571 : struct ipa_ref *ref;
1545 : :
1546 : 810571 : if (where->inlined_to)
1547 : 755652 : where = where->inlined_to;
1548 : :
1549 : 810571 : reset_node_cache (where);
1550 : :
1551 : 810571 : if (edge_growth_cache != NULL)
1552 : 2649529 : for (edge = where->callers; edge; edge = edge->next_caller)
1553 : 1839039 : if (edge->inline_failed)
1554 : 1839039 : edge_growth_cache->remove (edge);
1555 : :
1556 : 863508 : FOR_EACH_ALIAS (where, ref)
1557 : 105874 : reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1558 : :
1559 : 810571 : if (!e)
1560 : : return;
1561 : :
1562 : 2192405 : while (true)
1563 : 2192405 : if (!e->inline_failed && e->callee->callees)
1564 : : e = e->callee->callees;
1565 : : else
1566 : : {
1567 : 1799254 : if (edge_growth_cache != NULL && e->inline_failed)
1568 : 1714541 : edge_growth_cache->remove (e);
1569 : 1799254 : if (e->next_callee)
1570 : : e = e->next_callee;
1571 : : else
1572 : : {
1573 : 1065064 : do
1574 : : {
1575 : 1065064 : if (e->caller == node)
1576 : : return;
1577 : 393151 : e = e->caller->callers;
1578 : : }
1579 : 393151 : while (!e->next_callee);
1580 : : e = e->next_callee;
1581 : : }
1582 : : }
1583 : : }
1584 : :
1585 : : /* Recompute HEAP nodes for each of caller of NODE.
1586 : : UPDATED_NODES track nodes we already visited, to avoid redundant work.
1587 : : When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1588 : : it is inlinable. Otherwise check all edges. */
1589 : :
1590 : : static void
1591 : 810184 : update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1592 : : bitmap updated_nodes,
1593 : : struct cgraph_edge *check_inlinablity_for)
1594 : : {
1595 : 810184 : struct cgraph_edge *edge;
1596 : 810184 : struct ipa_ref *ref;
1597 : :
1598 : 810184 : if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1599 : 799554 : || node->inlined_to)
1600 : 10630 : return;
1601 : 799554 : if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1602 : : return;
1603 : :
1604 : 852155 : FOR_EACH_ALIAS (node, ref)
1605 : : {
1606 : 52601 : struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1607 : 52601 : update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1608 : : }
1609 : :
1610 : 2531293 : for (edge = node->callers; edge; edge = edge->next_caller)
1611 : 1731739 : if (edge->inline_failed)
1612 : : {
1613 : 1731739 : if (!check_inlinablity_for
1614 : 1731739 : || check_inlinablity_for == edge)
1615 : : {
1616 : 1731739 : if (can_inline_edge_p (edge, false)
1617 : 1699854 : && want_inline_small_function_p (edge, false)
1618 : 2162909 : && can_inline_edge_by_limits_p (edge, 0))
1619 : 428367 : update_edge_key (heap, edge);
1620 : 1303372 : else if (edge->aux)
1621 : : {
1622 : 70515 : report_inline_failed_reason (edge);
1623 : 70515 : heap->delete_node ((edge_heap_node_t *) edge->aux);
1624 : 70515 : edge->aux = NULL;
1625 : : }
1626 : : }
1627 : 0 : else if (edge->aux)
1628 : 0 : update_edge_key (heap, edge);
1629 : : }
1630 : : }
1631 : :
1632 : : /* Recompute HEAP nodes for each uninlined call in NODE
1633 : : If UPDATE_SINCE is non-NULL check if edges called within that function
1634 : : are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1635 : : where all edges have new context).
1636 : :
1637 : : This is used when we know that edge badnesses are going only to increase
1638 : : (we introduced new call site) and thus all we need is to insert newly
1639 : : created edges into heap. */
1640 : :
1641 : : static void
1642 : 757617 : update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1643 : : struct cgraph_node *update_since,
1644 : : bitmap updated_nodes)
1645 : : {
1646 : 757617 : struct cgraph_edge *e = node->callees;
1647 : 757617 : bool check_inlinability = update_since == node;
1648 : :
1649 : 757617 : if (!e)
1650 : : return;
1651 : 22672358 : while (true)
1652 : 22672358 : if (!e->inline_failed && e->callee->callees)
1653 : : {
1654 : 3576105 : if (e->callee == update_since)
1655 : 339461 : check_inlinability = true;
1656 : : e = e->callee->callees;
1657 : : }
1658 : : else
1659 : : {
1660 : 19096253 : enum availability avail;
1661 : 19096253 : struct cgraph_node *callee;
1662 : 19096253 : if (!check_inlinability)
1663 : : {
1664 : 17313725 : if (e->aux
1665 : 19986286 : && !bitmap_bit_p (updated_nodes,
1666 : 2672561 : e->callee->ultimate_alias_target
1667 : 2672561 : (&avail, e->caller)->get_uid ()))
1668 : 2672561 : update_edge_key (heap, e);
1669 : : }
1670 : : /* We do not reset callee growth cache here. Since we added a new call,
1671 : : growth should have just increased and consequently badness metric
1672 : : don't need updating. */
1673 : 1782528 : else if (e->inline_failed
1674 : 1700104 : && (callee = e->callee->ultimate_alias_target (&avail,
1675 : 1700104 : e->caller))
1676 : 1700104 : && avail >= AVAIL_AVAILABLE
1677 : 469966 : && ipa_fn_summaries->get (callee) != NULL
1678 : 469964 : && ipa_fn_summaries->get (callee)->inlinable
1679 : 2239540 : && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1680 : : {
1681 : 457012 : if (can_inline_edge_p (e, false)
1682 : 453632 : && want_inline_small_function_p (e, false)
1683 : 713744 : && can_inline_edge_by_limits_p (e, 0))
1684 : : {
1685 : 256170 : gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1686 : 256170 : gcc_checking_assert (check_inlinability || e->aux);
1687 : 256170 : update_edge_key (heap, e);
1688 : : }
1689 : 200842 : else if (e->aux)
1690 : : {
1691 : 4836 : report_inline_failed_reason (e);
1692 : 4836 : heap->delete_node ((edge_heap_node_t *) e->aux);
1693 : 4836 : e->aux = NULL;
1694 : : }
1695 : : }
1696 : : /* In case we redirected to unreachable node we only need to remove the
1697 : : fibheap entry. */
1698 : 1325516 : else if (e->aux)
1699 : : {
1700 : 3003 : heap->delete_node ((edge_heap_node_t *) e->aux);
1701 : 3003 : e->aux = NULL;
1702 : : }
1703 : 19096253 : if (e->next_callee)
1704 : : e = e->next_callee;
1705 : : else
1706 : : {
1707 : 4300991 : do
1708 : : {
1709 : 4300991 : if (e->caller == node)
1710 : 724886 : return;
1711 : 3576105 : if (e->caller == update_since)
1712 : 339461 : check_inlinability = false;
1713 : 3576105 : e = e->caller->callers;
1714 : : }
1715 : 3576105 : while (!e->next_callee);
1716 : : e = e->next_callee;
1717 : : }
1718 : : }
1719 : : }
1720 : :
1721 : : /* Enqueue all recursive calls from NODE into priority queue depending on
1722 : : how likely we want to recursively inline the call. */
1723 : :
1724 : : static void
1725 : 20471 : lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1726 : : edge_heap_t *heap)
1727 : : {
1728 : 20471 : struct cgraph_edge *e;
1729 : 20471 : enum availability avail;
1730 : :
1731 : 56287 : for (e = where->callees; e; e = e->next_callee)
1732 : 35816 : if (e->callee == node
1733 : 35816 : || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1734 : 1123 : && avail > AVAIL_INTERPOSABLE))
1735 : : {
1736 : 15360 : inline_badness b (e, -e->sreal_frequency ());
1737 : 15360 : heap->insert (b, e);
1738 : : }
1739 : 56287 : for (e = where->callees; e; e = e->next_callee)
1740 : 35816 : if (!e->inline_failed)
1741 : 7611 : lookup_recursive_calls (node, e->callee, heap);
1742 : 20471 : }
1743 : :
1744 : : /* Decide on recursive inlining: in the case function has recursive calls,
1745 : : inline until body size reaches given argument. If any new indirect edges
1746 : : are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1747 : : is NULL. */
1748 : :
1749 : : static bool
1750 : 1759 : recursive_inlining (struct cgraph_edge *edge,
1751 : : vec<cgraph_edge *> *new_edges)
1752 : : {
1753 : 3518 : cgraph_node *to = (edge->caller->inlined_to
1754 : 1759 : ? edge->caller->inlined_to : edge->caller);
1755 : 1759 : int limit = opt_for_fn (to->decl,
1756 : : param_max_inline_insns_recursive_auto);
1757 : 1759 : inline_badness b (edge, sreal::min ());
1758 : 1759 : edge_heap_t heap (b);
1759 : 1759 : struct cgraph_node *node;
1760 : 1759 : struct cgraph_edge *e;
1761 : 1759 : struct cgraph_node *master_clone = NULL, *next;
1762 : 1759 : int depth = 0;
1763 : 1759 : int n = 0;
1764 : :
1765 : 1759 : node = edge->caller;
1766 : 1759 : if (node->inlined_to)
1767 : 511 : node = node->inlined_to;
1768 : :
1769 : 1759 : if (DECL_DECLARED_INLINE_P (node->decl))
1770 : 353 : limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1771 : :
1772 : : /* Make sure that function is small enough to be considered for inlining. */
1773 : 1759 : if (estimate_size_after_inlining (node, edge) >= limit)
1774 : : return false;
1775 : 1759 : lookup_recursive_calls (node, node, &heap);
1776 : 1759 : if (heap.empty ())
1777 : : return false;
1778 : :
1779 : 1759 : if (dump_file)
1780 : 4 : fprintf (dump_file,
1781 : : " Performing recursive inlining on %s\n", node->dump_name ());
1782 : :
1783 : : /* Do the inlining and update list of recursive call during process. */
1784 : 15562 : while (!heap.empty ())
1785 : : {
1786 : 13823 : struct cgraph_edge *curr = heap.extract_min ();
1787 : 13823 : struct cgraph_node *cnode, *dest = curr->callee;
1788 : :
1789 : 13823 : if (!can_inline_edge_p (curr, true)
1790 : 13823 : || !can_inline_edge_by_limits_p (curr, CAN_INLINE_REPORT | CAN_INLINE_FORCE_LIMITS))
1791 : 0 : continue;
1792 : :
1793 : : /* MASTER_CLONE is produced in the case we already started modified
1794 : : the function. Be sure to redirect edge to the original body before
1795 : : estimating growths otherwise we will be seeing growths after inlining
1796 : : the already modified body. */
1797 : 13823 : if (master_clone)
1798 : : {
1799 : 12060 : curr->redirect_callee (master_clone);
1800 : 12060 : if (edge_growth_cache != NULL)
1801 : 12060 : edge_growth_cache->remove (curr);
1802 : : }
1803 : :
1804 : 13823 : if (estimate_size_after_inlining (node, curr) > limit)
1805 : : {
1806 : 20 : curr->redirect_callee (dest);
1807 : 20 : if (edge_growth_cache != NULL)
1808 : 20 : edge_growth_cache->remove (curr);
1809 : : break;
1810 : : }
1811 : :
1812 : 13803 : depth = 1;
1813 : 13803 : for (cnode = curr->caller;
1814 : 72969 : cnode->inlined_to; cnode = cnode->callers->caller)
1815 : 118332 : if (node->decl
1816 : 59166 : == curr->callee->ultimate_alias_target ()->decl)
1817 : 59166 : depth++;
1818 : :
1819 : 13803 : if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1820 : : {
1821 : 2702 : curr->redirect_callee (dest);
1822 : 2702 : if (edge_growth_cache != NULL)
1823 : 2702 : edge_growth_cache->remove (curr);
1824 : 2702 : continue;
1825 : : }
1826 : :
1827 : 11101 : if (dump_file)
1828 : : {
1829 : 14 : fprintf (dump_file,
1830 : : " Inlining call of depth %i", depth);
1831 : 28 : if (node->count.nonzero_p () && curr->count.initialized_p ())
1832 : : {
1833 : 2 : fprintf (dump_file, " called approx. %.2f times per call",
1834 : 2 : (double)curr->count.to_gcov_type ()
1835 : 2 : / node->count.to_gcov_type ());
1836 : : }
1837 : 14 : fprintf (dump_file, "\n");
1838 : : }
1839 : 11101 : if (!master_clone)
1840 : : {
1841 : : /* We need original clone to copy around. */
1842 : 1459 : master_clone = node->create_clone (node->decl, node->count,
1843 : : false, vNULL, true, NULL, NULL);
1844 : 4149 : for (e = master_clone->callees; e; e = e->next_callee)
1845 : 2690 : if (!e->inline_failed)
1846 : 466 : clone_inlined_nodes (e, true, false, NULL);
1847 : 1459 : curr->redirect_callee (master_clone);
1848 : 1459 : if (edge_growth_cache != NULL)
1849 : 1459 : edge_growth_cache->remove (curr);
1850 : : }
1851 : :
1852 : 11101 : inline_call (curr, false, new_edges, &overall_size, true);
1853 : 11101 : reset_node_cache (node);
1854 : 11101 : lookup_recursive_calls (node, curr->callee, &heap);
1855 : 11101 : n++;
1856 : : }
1857 : :
1858 : 1759 : if (!heap.empty () && dump_file)
1859 : 0 : fprintf (dump_file, " Recursive inlining growth limit met.\n");
1860 : :
1861 : 1759 : if (!master_clone)
1862 : : return false;
1863 : :
1864 : 1459 : if (dump_enabled_p ())
1865 : 4 : dump_printf_loc (MSG_NOTE, edge->call_stmt,
1866 : : "\n Inlined %i times, "
1867 : : "body grown from size %i to %i, time %f to %f\n", n,
1868 : 4 : ipa_size_summaries->get (master_clone)->size,
1869 : 4 : ipa_size_summaries->get (node)->size,
1870 : 4 : ipa_fn_summaries->get (master_clone)->time.to_double (),
1871 : 4 : ipa_fn_summaries->get (node)->time.to_double ());
1872 : :
1873 : : /* Remove master clone we used for inlining. We rely that clones inlined
1874 : : into master clone gets queued just before master clone so we don't
1875 : : need recursion. */
1876 : 19702 : for (node = symtab->first_function (); node != master_clone;
1877 : : node = next)
1878 : : {
1879 : 16784 : next = symtab->next_function (node);
1880 : 16784 : if (node->inlined_to == master_clone)
1881 : 934 : node->remove ();
1882 : : }
1883 : 1459 : master_clone->remove ();
1884 : 1459 : return true;
1885 : 1759 : }
1886 : :
1887 : :
1888 : : /* Given whole compilation unit estimate of INSNS, compute how large we can
1889 : : allow the unit to grow. */
1890 : :
1891 : : static int64_t
1892 : 809899 : compute_max_insns (cgraph_node *node, int insns)
1893 : : {
1894 : 809899 : int max_insns = insns;
1895 : 809899 : if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1896 : : max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1897 : :
1898 : 809899 : return ((int64_t) max_insns
1899 : 809899 : * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1900 : : }
1901 : :
1902 : :
1903 : : /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1904 : :
1905 : : static void
1906 : 757086 : add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> &new_edges)
1907 : : {
1908 : 1515890 : while (new_edges.length () > 0)
1909 : : {
1910 : 1718 : struct cgraph_edge *edge = new_edges.pop ();
1911 : :
1912 : 1718 : gcc_assert (!edge->aux);
1913 : 1718 : gcc_assert (edge->callee);
1914 : 1718 : if (edge->inline_failed
1915 : 1718 : && can_inline_edge_p (edge, true)
1916 : 837 : && want_inline_small_function_p (edge, true)
1917 : 2268 : && can_inline_edge_by_limits_p (edge, CAN_INLINE_REPORT))
1918 : : {
1919 : 550 : inline_badness b (edge, edge_badness (edge, false));
1920 : 550 : edge->aux = heap->insert (b, edge);
1921 : : }
1922 : : }
1923 : 757086 : }
1924 : :
1925 : : /* Remove EDGE from the fibheap. */
1926 : :
1927 : : static void
1928 : 5270 : heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1929 : : {
1930 : 5270 : if (e->aux)
1931 : : {
1932 : 14 : ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1933 : 14 : e->aux = NULL;
1934 : : }
1935 : 5270 : }
1936 : :
1937 : : /* Return true if speculation of edge E seems useful.
1938 : : If ANTICIPATE_INLINING is true, be conservative and hope that E
1939 : : may get inlined. */
1940 : :
1941 : : bool
1942 : 40575 : speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1943 : : {
1944 : : /* If we have already decided to inline the edge, it seems useful. */
1945 : 40575 : if (!e->inline_failed)
1946 : : return true;
1947 : :
1948 : 5955 : enum availability avail;
1949 : 11910 : struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1950 : 5955 : e->caller);
1951 : :
1952 : 5955 : gcc_assert (e->speculative && !e->indirect_unknown_callee);
1953 : :
1954 : 5955 : if (!e->maybe_hot_p ())
1955 : : return false;
1956 : :
1957 : : /* See if IP optimizations found something potentially useful about the
1958 : : function. For now we look only for CONST/PURE flags. Almost everything
1959 : : else we propagate is useless. */
1960 : 5947 : if (avail >= AVAIL_AVAILABLE)
1961 : : {
1962 : 5938 : int ecf_flags = flags_from_decl_or_type (target->decl);
1963 : 5938 : if (ecf_flags & ECF_CONST)
1964 : : {
1965 : 201 : if (!(e->speculative_call_indirect_edge ()->indirect_info
1966 : 201 : ->ecf_flags & ECF_CONST))
1967 : : return true;
1968 : : }
1969 : 5737 : else if (ecf_flags & ECF_PURE)
1970 : : {
1971 : 1943 : if (!(e->speculative_call_indirect_edge ()->indirect_info
1972 : 1943 : ->ecf_flags & ECF_PURE))
1973 : : return true;
1974 : : }
1975 : : }
1976 : : /* If we did not managed to inline the function nor redirect
1977 : : to an ipa-cp clone (that are seen by having local flag set),
1978 : : it is probably pointless to inline it unless hardware is missing
1979 : : indirect call predictor. */
1980 : 3803 : if (!anticipate_inlining && !target->local)
1981 : : return false;
1982 : : /* For overwritable targets there is not much to do. */
1983 : 3123 : if (!can_inline_edge_p (e, false)
1984 : 3123 : || !can_inline_edge_by_limits_p (e, CAN_INLINE_DISREGARD_LIMITS))
1985 : 9 : return false;
1986 : : /* OK, speculation seems interesting. */
1987 : : return true;
1988 : : }
1989 : :
1990 : : /* We know that EDGE is not going to be inlined.
1991 : : See if we can remove speculation. */
1992 : :
1993 : : static void
1994 : 54041 : resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1995 : : {
1996 : 54041 : if (edge->speculative && !speculation_useful_p (edge, false))
1997 : : {
1998 : 89 : struct cgraph_node *node = edge->caller;
1999 : 178 : struct cgraph_node *where = node->inlined_to
2000 : 89 : ? node->inlined_to : node;
2001 : 89 : auto_bitmap updated_nodes;
2002 : :
2003 : 89 : if (edge->count.ipa ().initialized_p ())
2004 : 0 : spec_rem += edge->count.ipa ();
2005 : 89 : cgraph_edge::resolve_speculation (edge);
2006 : 89 : reset_edge_caches (where);
2007 : 89 : ipa_update_overall_fn_summary (where);
2008 : 89 : update_caller_keys (edge_heap, where,
2009 : : updated_nodes, NULL);
2010 : 89 : update_callee_keys (edge_heap, where, NULL,
2011 : : updated_nodes);
2012 : 89 : }
2013 : 54041 : }
2014 : :
2015 : : /* Return true if NODE should be accounted for overall size estimate.
2016 : : Skip all nodes optimized for size so we can measure the growth of hot
2017 : : part of program no matter of the padding. */
2018 : :
2019 : : bool
2020 : 3316068 : inline_account_function_p (struct cgraph_node *node)
2021 : : {
2022 : 3316068 : return (!DECL_EXTERNAL (node->decl)
2023 : 3142000 : && !opt_for_fn (node->decl, optimize_size)
2024 : 6371422 : && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
2025 : : }
2026 : :
2027 : : /* Count number of callers of NODE and store it into DATA (that
2028 : : points to int. Worker for cgraph_for_node_and_aliases. */
2029 : :
2030 : : static bool
2031 : 1380017 : sum_callers (struct cgraph_node *node, void *data)
2032 : : {
2033 : 1380017 : struct cgraph_edge *e;
2034 : 1380017 : int *num_calls = (int *)data;
2035 : :
2036 : 3211602 : for (e = node->callers; e; e = e->next_caller)
2037 : 1831585 : (*num_calls)++;
2038 : 1380017 : return false;
2039 : : }
2040 : :
2041 : : /* We only propagate across edges with non-interposable callee. */
2042 : :
2043 : : inline bool
2044 : 6588765 : ignore_edge_p (struct cgraph_edge *e)
2045 : : {
2046 : 6588765 : enum availability avail;
2047 : 6588765 : e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
2048 : 6588765 : return (avail <= AVAIL_INTERPOSABLE);
2049 : : }
2050 : :
2051 : : /* We use greedy algorithm for inlining of small functions:
2052 : : All inline candidates are put into prioritized heap ordered in
2053 : : increasing badness.
2054 : :
2055 : : The inlining of small functions is bounded by unit growth parameters. */
2056 : :
2057 : : static void
2058 : 223968 : inline_small_functions (void)
2059 : : {
2060 : 223968 : struct cgraph_node *node;
2061 : 223968 : struct cgraph_edge *edge;
2062 : 223968 : inline_badness b;
2063 : 223968 : edge_heap_t edge_heap (b);
2064 : 223968 : auto_bitmap updated_nodes;
2065 : 223968 : int min_size;
2066 : 223968 : auto_vec<cgraph_edge *> new_indirect_edges;
2067 : 223968 : int initial_size = 0;
2068 : 223968 : struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
2069 : 223968 : struct cgraph_edge_hook_list *edge_removal_hook_holder;
2070 : 223968 : new_indirect_edges.create (8);
2071 : :
2072 : 223968 : edge_removal_hook_holder
2073 : 223968 : = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
2074 : :
2075 : : /* Compute overall unit size and other global parameters used by badness
2076 : : metrics. */
2077 : :
2078 : 223968 : max_count = profile_count::uninitialized ();
2079 : 223968 : ipa_reduced_postorder (order, true, ignore_edge_p);
2080 : 223968 : free (order);
2081 : :
2082 : 2018496 : FOR_EACH_DEFINED_FUNCTION (node)
2083 : 1794528 : if (!node->inlined_to)
2084 : : {
2085 : 1794497 : if (!node->alias && node->analyzed
2086 : 1697950 : && (node->has_gimple_body_p () || node->thunk)
2087 : 3492447 : && opt_for_fn (node->decl, optimize))
2088 : : {
2089 : 1281562 : class ipa_fn_summary *info = ipa_fn_summaries->get (node);
2090 : 1281562 : struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
2091 : :
2092 : : /* Do not account external functions, they will be optimized out
2093 : : if not inlined. Also only count the non-cold portion of program. */
2094 : 1281562 : if (inline_account_function_p (node))
2095 : 1180648 : initial_size += ipa_size_summaries->get (node)->size;
2096 : 1281562 : info->growth = estimate_growth (node);
2097 : :
2098 : 1281562 : int num_calls = 0;
2099 : 1281562 : node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2100 : : true);
2101 : 1281562 : if (num_calls == 1)
2102 : 417606 : info->single_caller = true;
2103 : 1281562 : if (dfs && dfs->next_cycle)
2104 : : {
2105 : 6036 : struct cgraph_node *n2;
2106 : 6036 : int id = dfs->scc_no + 1;
2107 : 13409 : for (n2 = node; n2;
2108 : 7373 : n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
2109 : 12073 : if (opt_for_fn (n2->decl, optimize))
2110 : : {
2111 : 12065 : ipa_fn_summary *info2 = ipa_fn_summaries->get
2112 : 12065 : (n2->inlined_to ? n2->inlined_to : n2);
2113 : 12065 : if (info2->scc_no)
2114 : : break;
2115 : 7365 : info2->scc_no = id;
2116 : : }
2117 : : }
2118 : : }
2119 : :
2120 : 3996074 : for (edge = node->callers; edge; edge = edge->next_caller)
2121 : 2201577 : max_count = max_count.max (edge->count.ipa ());
2122 : : }
2123 : 223968 : ipa_free_postorder_info ();
2124 : 223968 : initialize_growth_caches ();
2125 : :
2126 : 223968 : if (dump_file)
2127 : 178 : fprintf (dump_file,
2128 : : "\nDeciding on inlining of small functions. Starting with size %i.\n",
2129 : : initial_size);
2130 : :
2131 : 223968 : overall_size = initial_size;
2132 : 223968 : min_size = overall_size;
2133 : :
2134 : : /* Populate the heap with all edges we might inline. */
2135 : :
2136 : 2018496 : FOR_EACH_DEFINED_FUNCTION (node)
2137 : : {
2138 : 1794528 : bool update = false;
2139 : 1794528 : struct cgraph_edge *next = NULL;
2140 : 1794528 : bool has_speculative = false;
2141 : :
2142 : 1794528 : if (!opt_for_fn (node->decl, optimize)
2143 : : /* With -Og we do not want to perform IPA inlining of small
2144 : : functions since there are no scalar cleanups after it
2145 : : that would realize the anticipated win. All abstraction
2146 : : is removed during early inlining. */
2147 : 1794528 : || opt_for_fn (node->decl, optimize_debug))
2148 : 444152 : continue;
2149 : :
2150 : 1350376 : if (dump_file)
2151 : 806 : fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2152 : :
2153 : 6594185 : for (edge = node->callees; edge; edge = edge->next_callee)
2154 : : {
2155 : 5243809 : if (edge->inline_failed
2156 : 5243778 : && !edge->aux
2157 : 5243702 : && can_inline_edge_p (edge, true)
2158 : 1388860 : && want_inline_small_function_p (edge, true)
2159 : 775622 : && can_inline_edge_by_limits_p (edge, CAN_INLINE_REPORT)
2160 : 6013128 : && edge->inline_failed)
2161 : : {
2162 : 769319 : gcc_assert (!edge->aux);
2163 : 769319 : update_edge_key (&edge_heap, edge);
2164 : : }
2165 : 5243809 : if (edge->speculative)
2166 : 4809 : has_speculative = true;
2167 : : }
2168 : 1350376 : if (has_speculative)
2169 : 20367 : for (edge = node->callees; edge; edge = next)
2170 : : {
2171 : 16461 : next = edge->next_callee;
2172 : 16461 : if (edge->speculative
2173 : 16461 : && !speculation_useful_p (edge, edge->aux != NULL))
2174 : : {
2175 : 533 : cgraph_edge::resolve_speculation (edge);
2176 : 533 : update = true;
2177 : : }
2178 : : }
2179 : 3906 : if (update)
2180 : : {
2181 : 766 : struct cgraph_node *where = node->inlined_to
2182 : 383 : ? node->inlined_to : node;
2183 : 383 : ipa_update_overall_fn_summary (where);
2184 : 383 : reset_edge_caches (where);
2185 : 383 : update_caller_keys (&edge_heap, where,
2186 : : updated_nodes, NULL);
2187 : 383 : update_callee_keys (&edge_heap, where, NULL,
2188 : : updated_nodes);
2189 : 383 : bitmap_clear (updated_nodes);
2190 : : }
2191 : : }
2192 : :
2193 : 223968 : gcc_assert (in_lto_p
2194 : : || !(max_count > 0)
2195 : : || (profile_info && flag_branch_probabilities));
2196 : :
2197 : 2335612 : while (!edge_heap.empty ())
2198 : : {
2199 : 2111644 : int old_size = overall_size;
2200 : 2111644 : struct cgraph_node *where, *callee;
2201 : 2111644 : sreal badness = edge_heap.min_key ().badness;
2202 : 2111644 : sreal current_badness;
2203 : 2111644 : int growth;
2204 : :
2205 : 2111644 : edge = edge_heap.extract_min ();
2206 : 2111644 : gcc_assert (edge->aux);
2207 : 2111644 : edge->aux = NULL;
2208 : 2111644 : if (!edge->inline_failed || !edge->callee->analyzed)
2209 : 1354533 : continue;
2210 : :
2211 : : /* Be sure that caches are maintained consistent.
2212 : : This check is affected by scaling roundoff errors when compiling for
2213 : : IPA this we skip it in that case. */
2214 : 4068750 : if (flag_checking && !edge->callee->count.ipa_p ()
2215 : 4068753 : && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2216 : : {
2217 : 1957193 : sreal cached_badness = edge_badness (edge, false);
2218 : :
2219 : 1957193 : int old_size_est = estimate_edge_size (edge);
2220 : 1957193 : sreal old_time_est = estimate_edge_time (edge);
2221 : 1957193 : int old_hints_est = estimate_edge_hints (edge);
2222 : :
2223 : 1957193 : if (edge_growth_cache != NULL)
2224 : 1957193 : edge_growth_cache->remove (edge);
2225 : 3478353 : reset_node_cache (edge->caller->inlined_to
2226 : : ? edge->caller->inlined_to
2227 : : : edge->caller);
2228 : 1957193 : gcc_assert (old_size_est == estimate_edge_size (edge));
2229 : 1957193 : gcc_assert (old_time_est == estimate_edge_time (edge));
2230 : : /* FIXME:
2231 : :
2232 : : gcc_assert (old_hints_est == estimate_edge_hints (edge));
2233 : :
2234 : : fails with profile feedback because some hints depends on
2235 : : maybe_hot_edge_p predicate and because callee gets inlined to other
2236 : : calls, the edge may become cold.
2237 : : This ought to be fixed by computing relative probabilities
2238 : : for given invocation but that will be better done once whole
2239 : : code is converted to sreals. Disable for now and revert to "wrong"
2240 : : value so enable/disable checking paths agree. */
2241 : 1957193 : edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2242 : :
2243 : : /* When updating the edge costs, we only decrease badness in the keys.
2244 : : Increases of badness are handled lazily; when we see key with out
2245 : : of date value on it, we re-insert it now. */
2246 : 1957193 : current_badness = edge_badness (edge, false);
2247 : 1957193 : gcc_assert (cached_badness == current_badness);
2248 : 1957193 : gcc_assert (current_badness >= badness);
2249 : : }
2250 : : else
2251 : 154367 : current_badness = edge_badness (edge, false);
2252 : 2111560 : if (current_badness != badness)
2253 : : {
2254 : 1435630 : if (edge_heap.min () && current_badness > edge_heap.min_key ().badness)
2255 : : {
2256 : 1300408 : inline_badness b (edge, current_badness);
2257 : 1300408 : edge->aux = edge_heap.insert (b, edge);
2258 : 1300408 : continue;
2259 : 1300408 : }
2260 : : else
2261 : 135222 : badness = current_badness;
2262 : : }
2263 : :
2264 : 811152 : if (!can_inline_edge_p (edge, true)
2265 : 811152 : || !can_inline_edge_by_limits_p (edge, CAN_INLINE_REPORT))
2266 : : {
2267 : 1253 : resolve_noninline_speculation (&edge_heap, edge);
2268 : 1253 : continue;
2269 : : }
2270 : :
2271 : 809899 : callee = edge->callee->ultimate_alias_target ();
2272 : 809899 : growth = estimate_edge_growth (edge);
2273 : 809899 : if (dump_file)
2274 : : {
2275 : 454 : fprintf (dump_file,
2276 : : "\nConsidering %s with %i size\n",
2277 : : callee->dump_name (),
2278 : 454 : ipa_size_summaries->get (callee)->size);
2279 : 908 : fprintf (dump_file,
2280 : : " to be inlined into %s in %s:%i\n"
2281 : : " Estimated badness is %f, frequency %.2f.\n",
2282 : 454 : edge->caller->dump_name (),
2283 : 454 : edge->call_stmt
2284 : 432 : && (LOCATION_LOCUS (gimple_location ((const gimple *)
2285 : : edge->call_stmt))
2286 : : > BUILTINS_LOCATION)
2287 : 423 : ? gimple_filename ((const gimple *) edge->call_stmt)
2288 : : : "unknown",
2289 : 454 : edge->call_stmt
2290 : 432 : ? gimple_lineno ((const gimple *) edge->call_stmt)
2291 : : : -1,
2292 : : badness.to_double (),
2293 : 454 : edge->sreal_frequency ().to_double ());
2294 : 454 : if (edge->count.ipa ().initialized_p ())
2295 : : {
2296 : 0 : fprintf (dump_file, " Called ");
2297 : 0 : edge->count.ipa ().dump (dump_file);
2298 : 0 : fprintf (dump_file, " times\n");
2299 : : }
2300 : 454 : if (dump_flags & TDF_DETAILS)
2301 : 164 : edge_badness (edge, true);
2302 : : }
2303 : :
2304 : 809899 : where = edge->caller;
2305 : :
2306 : 809899 : if (overall_size + growth > compute_max_insns (where, min_size)
2307 : 809899 : && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2308 : : {
2309 : 49021 : edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2310 : 49021 : report_inline_failed_reason (edge);
2311 : 49021 : resolve_noninline_speculation (&edge_heap, edge);
2312 : 49021 : continue;
2313 : : }
2314 : :
2315 : 760878 : if (!want_inline_small_function_p (edge, true))
2316 : : {
2317 : 1923 : resolve_noninline_speculation (&edge_heap, edge);
2318 : 1923 : continue;
2319 : : }
2320 : :
2321 : 758955 : profile_count old_count = callee->count;
2322 : :
2323 : : /* Heuristics for inlining small functions work poorly for
2324 : : recursive calls where we do effects similar to loop unrolling.
2325 : : When inlining such edge seems profitable, leave decision on
2326 : : specific inliner. */
2327 : 758955 : if (edge->recursive_p ())
2328 : : {
2329 : 1759 : if (where->inlined_to)
2330 : 511 : where = where->inlined_to;
2331 : :
2332 : : /* Disable always_inline on self recursive functions.
2333 : : This prevents some inlining bombs such as one in PR113291
2334 : : from exploding.
2335 : : It is not enough to stop inlining in self recursive always_inlines
2336 : : since they may grow large enough so always inlining them even
2337 : : with recursin depth 0 is too much.
2338 : :
2339 : : All sane uses of always_inline should be handled during
2340 : : early optimizations. */
2341 : 1759 : DECL_DISREGARD_INLINE_LIMITS (where->decl) = false;
2342 : :
2343 : 1759 : if (!recursive_inlining (edge,
2344 : 1759 : opt_for_fn (edge->caller->decl,
2345 : : flag_indirect_inlining)
2346 : : ? &new_indirect_edges : NULL))
2347 : : {
2348 : 300 : edge->inline_failed = CIF_RECURSIVE_INLINING;
2349 : 300 : resolve_noninline_speculation (&edge_heap, edge);
2350 : 300 : continue;
2351 : : }
2352 : 1459 : reset_edge_caches (where);
2353 : : /* Recursive inliner inlines all recursive calls of the function
2354 : : at once. Consequently we need to update all callee keys. */
2355 : 1459 : if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2356 : 1434 : add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2357 : 1459 : update_callee_keys (&edge_heap, where, where, updated_nodes);
2358 : 1459 : bitmap_clear (updated_nodes);
2359 : : }
2360 : : else
2361 : : {
2362 : 757196 : struct cgraph_node *outer_node = NULL;
2363 : 757196 : int depth = 0;
2364 : :
2365 : : /* Consider the case where self recursive function A is inlined
2366 : : into B. This is desired optimization in some cases, since it
2367 : : leads to effect similar of loop peeling and we might completely
2368 : : optimize out the recursive call. However we must be extra
2369 : : selective. */
2370 : :
2371 : 757196 : where = edge->caller;
2372 : 1122939 : while (where->inlined_to)
2373 : : {
2374 : 365743 : if (where->decl == callee->decl)
2375 : 7268 : outer_node = where, depth++;
2376 : 365743 : where = where->callers->caller;
2377 : : }
2378 : 758740 : if (outer_node
2379 : 757196 : && !want_inline_self_recursive_call_p (edge, outer_node,
2380 : : true, depth))
2381 : : {
2382 : 1544 : edge->inline_failed
2383 : 1544 : = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2384 : 1544 : ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2385 : 1544 : resolve_noninline_speculation (&edge_heap, edge);
2386 : 1544 : continue;
2387 : : }
2388 : 755652 : else if (depth && dump_file)
2389 : 6 : fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2390 : :
2391 : 755652 : gcc_checking_assert (!callee->inlined_to);
2392 : :
2393 : 755652 : int old_size = ipa_size_summaries->get (where)->size;
2394 : 755652 : sreal old_time = ipa_fn_summaries->get (where)->time;
2395 : :
2396 : 755652 : inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2397 : 755652 : reset_edge_caches (edge->callee);
2398 : 755652 : add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2399 : :
2400 : : /* If caller's size and time increased we do not need to update
2401 : : all edges because badness is not going to decrease. */
2402 : 755652 : if (old_size <= ipa_size_summaries->get (where)->size
2403 : 708927 : && old_time <= ipa_fn_summaries->get (where)->time
2404 : : /* Wrapper penalty may be non-monotonous in this respect.
2405 : : Fortunately it only affects small functions. */
2406 : 1257155 : && !wrapper_heuristics_may_apply (where, old_size))
2407 : 363189 : update_callee_keys (&edge_heap, edge->callee, edge->callee,
2408 : : updated_nodes);
2409 : : else
2410 : 392463 : update_callee_keys (&edge_heap, where,
2411 : : edge->callee,
2412 : : updated_nodes);
2413 : : }
2414 : 757111 : where = edge->caller;
2415 : 757111 : if (where->inlined_to)
2416 : 193066 : where = where->inlined_to;
2417 : :
2418 : : /* Our profitability metric can depend on local properties
2419 : : such as number of inlinable calls and size of the function body.
2420 : : After inlining these properties might change for the function we
2421 : : inlined into (since it's body size changed) and for the functions
2422 : : called by function we inlined (since number of it inlinable callers
2423 : : might change). */
2424 : 757111 : update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2425 : : /* Offline copy count has possibly changed, recompute if profile is
2426 : : available. */
2427 : 757111 : struct cgraph_node *n
2428 : 757111 : = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2429 : 507699 : if (n != edge->callee && n->analyzed && !(n->count == old_count)
2430 : 757145 : && n->count.ipa_p ())
2431 : 34 : update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2432 : 757111 : bitmap_clear (updated_nodes);
2433 : :
2434 : 757111 : if (dump_enabled_p ())
2435 : : {
2436 : 463 : ipa_fn_summary *s = ipa_fn_summaries->get (where);
2437 : :
2438 : : /* dump_printf can't handle %+i. */
2439 : 463 : char buf_net_change[100];
2440 : 463 : snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2441 : : overall_size - old_size);
2442 : :
2443 : 926 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2444 : : " Inlined %C into %C which now has time %f and "
2445 : : "size %i, net change of %s%s.\n",
2446 : : edge->callee, edge->caller,
2447 : : s->time.to_double (),
2448 : 463 : ipa_size_summaries->get (edge->caller)->size,
2449 : : buf_net_change,
2450 : 463 : cross_module_call_p (edge)
2451 : : ? " (cross module)" : "");
2452 : : }
2453 : 757111 : if (min_size > overall_size)
2454 : : {
2455 : 186593 : min_size = overall_size;
2456 : :
2457 : 186593 : if (dump_file)
2458 : 332 : fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2459 : : }
2460 : : }
2461 : :
2462 : 223968 : free_growth_caches ();
2463 : 223968 : if (dump_enabled_p ())
2464 : 436 : dump_printf (MSG_NOTE,
2465 : : "Unit growth for small function inlining: %i->%i (%i%%)\n",
2466 : : initial_size, overall_size,
2467 : 194 : initial_size ? overall_size * 100 / (initial_size) - 100 : 0);
2468 : 223968 : symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2469 : 223968 : }
2470 : :
2471 : : /* Flatten NODE. Performed both during early inlining and
2472 : : at IPA inlining time. */
2473 : :
2474 : : static void
2475 : 703 : flatten_function (struct cgraph_node *node, bool early, bool update)
2476 : : {
2477 : 703 : struct cgraph_edge *e;
2478 : :
2479 : : /* We shouldn't be called recursively when we are being processed. */
2480 : 703 : gcc_assert (node->aux == NULL);
2481 : :
2482 : 703 : node->aux = (void *) node;
2483 : :
2484 : 1624 : for (e = node->callees; e; e = e->next_callee)
2485 : : {
2486 : 921 : struct cgraph_node *orig_callee;
2487 : 921 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2488 : :
2489 : : /* We've hit cycle? It is time to give up. */
2490 : 921 : if (callee->aux)
2491 : : {
2492 : 15 : if (dump_enabled_p ())
2493 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2494 : : "Not inlining %C into %C to avoid cycle.\n",
2495 : : callee, e->caller);
2496 : 15 : if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2497 : 15 : e->inline_failed = CIF_RECURSIVE_INLINING;
2498 : 15 : continue;
2499 : : }
2500 : :
2501 : : /* When the edge is already inlined, we just need to recurse into
2502 : : it in order to fully flatten the leaves. */
2503 : 906 : if (!e->inline_failed)
2504 : : {
2505 : 350 : flatten_function (callee, early, false);
2506 : 350 : continue;
2507 : : }
2508 : :
2509 : : /* Flatten attribute needs to be processed during late inlining. For
2510 : : extra code quality we however do flattening during early optimization,
2511 : : too. */
2512 : 311 : if (!early
2513 : 556 : ? !can_inline_edge_p (e, true)
2514 : 245 : && !can_inline_edge_by_limits_p (e, CAN_INLINE_REPORT)
2515 : 311 : : !can_early_inline_edge_p (e))
2516 : 419 : continue;
2517 : :
2518 : 137 : if (e->recursive_p ())
2519 : : {
2520 : 0 : if (dump_enabled_p ())
2521 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2522 : : "Not inlining: recursive call.\n");
2523 : 0 : continue;
2524 : : }
2525 : :
2526 : 137 : if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2527 : 274 : != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2528 : : {
2529 : 4 : if (dump_enabled_p ())
2530 : 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2531 : : "Not inlining: SSA form does not match.\n");
2532 : 4 : continue;
2533 : : }
2534 : :
2535 : : /* Inline the edge and flatten the inline clone. Avoid
2536 : : recursing through the original node if the node was cloned. */
2537 : 133 : if (dump_enabled_p ())
2538 : 3 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2539 : : " Inlining %C into %C.\n",
2540 : : callee, e->caller);
2541 : 133 : orig_callee = callee;
2542 : 133 : inline_call (e, true, NULL, NULL, false);
2543 : 133 : if (e->callee != orig_callee)
2544 : 94 : orig_callee->aux = (void *) node;
2545 : 133 : flatten_function (e->callee, early, false);
2546 : 133 : if (e->callee != orig_callee)
2547 : 94 : orig_callee->aux = NULL;
2548 : : }
2549 : :
2550 : 703 : node->aux = NULL;
2551 : 703 : cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2552 : 703 : if (update && opt_for_fn (where->decl, optimize))
2553 : 209 : ipa_update_overall_fn_summary (where);
2554 : 703 : }
2555 : :
2556 : : /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2557 : : DATA points to number of calls originally found so we avoid infinite
2558 : : recursion. */
2559 : :
2560 : : static bool
2561 : 27867 : inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2562 : : hash_set<cgraph_node *> *callers)
2563 : : {
2564 : 27867 : int *num_calls = (int *)data;
2565 : 27867 : bool callee_removed = false;
2566 : :
2567 : 58805 : while (node->callers && !node->inlined_to)
2568 : : {
2569 : 31880 : struct cgraph_node *caller = node->callers->caller;
2570 : :
2571 : 31880 : if (!can_inline_edge_p (node->callers, true)
2572 : 31880 : || !can_inline_edge_by_limits_p (node->callers, CAN_INLINE_REPORT)
2573 : 63760 : || node->callers->recursive_p ())
2574 : : {
2575 : 0 : if (dump_file)
2576 : 0 : fprintf (dump_file, "Uninlinable call found; giving up.\n");
2577 : 0 : *num_calls = 0;
2578 : 0 : return false;
2579 : : }
2580 : :
2581 : 31880 : if (dump_file)
2582 : : {
2583 : 4 : cgraph_node *ultimate = node->ultimate_alias_target ();
2584 : 4 : fprintf (dump_file,
2585 : : "\nInlining %s size %i.\n",
2586 : : ultimate->dump_name (),
2587 : 4 : ipa_size_summaries->get (ultimate)->size);
2588 : 4 : fprintf (dump_file,
2589 : : " Called once from %s %i insns.\n",
2590 : : node->callers->caller->dump_name (),
2591 : 4 : ipa_size_summaries->get (node->callers->caller)->size);
2592 : : }
2593 : :
2594 : : /* Remember which callers we inlined to, delaying updating the
2595 : : overall summary. */
2596 : 31880 : callers->add (node->callers->caller);
2597 : 31880 : inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2598 : 31880 : if (dump_file)
2599 : 4 : fprintf (dump_file,
2600 : : " Inlined into %s which now has %i size\n",
2601 : : caller->dump_name (),
2602 : 4 : ipa_size_summaries->get (caller)->size);
2603 : 31880 : if (!(*num_calls)--)
2604 : : {
2605 : 0 : if (dump_file)
2606 : 0 : fprintf (dump_file, "New calls found; giving up.\n");
2607 : 0 : return callee_removed;
2608 : : }
2609 : 31880 : if (callee_removed)
2610 : : return true;
2611 : : }
2612 : : return false;
2613 : : }
2614 : :
2615 : : /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2616 : : update. */
2617 : :
2618 : : static bool
2619 : 27867 : inline_to_all_callers (struct cgraph_node *node, void *data)
2620 : : {
2621 : 27867 : hash_set<cgraph_node *> callers;
2622 : 27867 : bool res = inline_to_all_callers_1 (node, data, &callers);
2623 : : /* Perform the delayed update of the overall summary of all callers
2624 : : processed. This avoids quadratic behavior in the cases where
2625 : : we have a lot of calls to the same function. */
2626 : 54669 : for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2627 : 81471 : i != callers.end (); ++i)
2628 : 26802 : ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2629 : 27867 : return res;
2630 : 27867 : }
2631 : :
2632 : : /* Output overall time estimate. */
2633 : : static void
2634 : 356 : dump_overall_stats (void)
2635 : : {
2636 : 356 : sreal sum_weighted = 0, sum = 0;
2637 : 356 : struct cgraph_node *node;
2638 : :
2639 : 2234 : FOR_EACH_DEFINED_FUNCTION (node)
2640 : 1878 : if (!node->inlined_to
2641 : 1376 : && !node->alias)
2642 : : {
2643 : 1274 : ipa_fn_summary *s = ipa_fn_summaries->get (node);
2644 : 1274 : if (s != NULL)
2645 : : {
2646 : 1170 : sum += s->time;
2647 : 1170 : if (node->count.ipa ().initialized_p ())
2648 : 14 : sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2649 : : }
2650 : : }
2651 : 356 : fprintf (dump_file, "Overall time estimate: "
2652 : : "%f weighted by profile: "
2653 : : "%f\n", sum.to_double (), sum_weighted.to_double ());
2654 : 356 : }
2655 : :
2656 : : /* Output some useful stats about inlining. */
2657 : :
2658 : : static void
2659 : 178 : dump_inline_stats (void)
2660 : : {
2661 : 178 : int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2662 : 178 : int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2663 : 178 : int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2664 : 178 : int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2665 : 178 : int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2666 : 178 : int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2667 : 178 : int64_t reason[CIF_N_REASONS][2];
2668 : 5518 : sreal reason_freq[CIF_N_REASONS];
2669 : 178 : int i;
2670 : 178 : struct cgraph_node *node;
2671 : :
2672 : 178 : memset (reason, 0, sizeof (reason));
2673 : 5518 : for (i=0; i < CIF_N_REASONS; i++)
2674 : 5340 : reason_freq[i] = 0;
2675 : 1185 : FOR_EACH_DEFINED_FUNCTION (node)
2676 : : {
2677 : 1007 : struct cgraph_edge *e;
2678 : 5839 : for (e = node->callees; e; e = e->next_callee)
2679 : : {
2680 : 4832 : if (e->inline_failed)
2681 : : {
2682 : 4333 : if (e->count.ipa ().initialized_p ())
2683 : 2611 : reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2684 : 4333 : reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2685 : 4333 : reason[(int) e->inline_failed][1] ++;
2686 : 4333 : if (DECL_VIRTUAL_P (e->callee->decl)
2687 : 4333 : && e->count.ipa ().initialized_p ())
2688 : : {
2689 : 0 : if (e->indirect_inlining_edge)
2690 : 0 : noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2691 : : else
2692 : 0 : noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2693 : : }
2694 : 4333 : else if (e->count.ipa ().initialized_p ())
2695 : : {
2696 : 2611 : if (e->indirect_inlining_edge)
2697 : 0 : noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2698 : : else
2699 : 2611 : noninlined_cnt += e->count.ipa ().to_gcov_type ();
2700 : : }
2701 : : }
2702 : 499 : else if (e->count.ipa ().initialized_p ())
2703 : : {
2704 : 0 : if (e->speculative)
2705 : : {
2706 : 0 : if (DECL_VIRTUAL_P (e->callee->decl))
2707 : 0 : inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2708 : : else
2709 : 0 : inlined_speculative += e->count.ipa ().to_gcov_type ();
2710 : : }
2711 : 0 : else if (DECL_VIRTUAL_P (e->callee->decl))
2712 : : {
2713 : 0 : if (e->indirect_inlining_edge)
2714 : 0 : inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2715 : : else
2716 : 0 : inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2717 : : }
2718 : : else
2719 : : {
2720 : 0 : if (e->indirect_inlining_edge)
2721 : 0 : inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2722 : : else
2723 : 0 : inlined_cnt += e->count.ipa ().to_gcov_type ();
2724 : : }
2725 : : }
2726 : : }
2727 : 1100 : for (e = node->indirect_calls; e; e = e->next_callee)
2728 : 93 : if (e->indirect_info->polymorphic
2729 : 93 : & e->count.ipa ().initialized_p ())
2730 : 0 : indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2731 : 93 : else if (e->count.ipa ().initialized_p ())
2732 : 0 : indirect_cnt += e->count.ipa ().to_gcov_type ();
2733 : : }
2734 : 178 : if (max_count.initialized_p ())
2735 : : {
2736 : 0 : fprintf (dump_file,
2737 : : "Inlined %" PRId64 " + speculative "
2738 : : "%" PRId64 " + speculative polymorphic "
2739 : : "%" PRId64 " + previously indirect "
2740 : : "%" PRId64 " + virtual "
2741 : : "%" PRId64 " + virtual and previously indirect "
2742 : : "%" PRId64 "\n" "Not inlined "
2743 : : "%" PRId64 " + previously indirect "
2744 : : "%" PRId64 " + virtual "
2745 : : "%" PRId64 " + virtual and previously indirect "
2746 : : "%" PRId64 " + still indirect "
2747 : : "%" PRId64 " + still indirect polymorphic "
2748 : : "%" PRId64 "\n", inlined_cnt,
2749 : : inlined_speculative, inlined_speculative_ply,
2750 : : inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2751 : : noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2752 : : noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2753 : 0 : fprintf (dump_file, "Removed speculations ");
2754 : 0 : spec_rem.dump (dump_file);
2755 : 0 : fprintf (dump_file, "\n");
2756 : : }
2757 : 178 : dump_overall_stats ();
2758 : 178 : fprintf (dump_file, "\nWhy inlining failed?\n");
2759 : 5696 : for (i = 0; i < CIF_N_REASONS; i++)
2760 : 5340 : if (reason[i][1])
2761 : 149 : fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2762 : : cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2763 : : (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2764 : 178 : }
2765 : :
2766 : : /* Called when node is removed. */
2767 : :
2768 : : static void
2769 : 0 : flatten_remove_node_hook (struct cgraph_node *node, void *data)
2770 : : {
2771 : 0 : if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2772 : : return;
2773 : :
2774 : 0 : hash_set<struct cgraph_node *> *removed
2775 : : = (hash_set<struct cgraph_node *> *) data;
2776 : 0 : removed->add (node);
2777 : : }
2778 : :
2779 : : /* Decide on the inlining. We do so in the topological order to avoid
2780 : : expenses on updating data structures. */
2781 : :
2782 : : static unsigned int
2783 : 223968 : ipa_inline (void)
2784 : : {
2785 : 223968 : struct cgraph_node *node;
2786 : 223968 : int nnodes;
2787 : 223968 : struct cgraph_node **order;
2788 : 223968 : int i, j;
2789 : 223968 : int cold;
2790 : 223968 : bool remove_functions = false;
2791 : :
2792 : 223968 : order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2793 : :
2794 : 223968 : if (dump_file)
2795 : 178 : ipa_dump_fn_summaries (dump_file);
2796 : :
2797 : 223968 : nnodes = ipa_reverse_postorder (order);
2798 : 223968 : spec_rem = profile_count::zero ();
2799 : :
2800 : 7459538 : FOR_EACH_FUNCTION (node)
2801 : : {
2802 : 3505801 : node->aux = 0;
2803 : :
2804 : : /* Recompute the default reasons for inlining because they may have
2805 : : changed during merging. */
2806 : 3505801 : if (in_lto_p)
2807 : : {
2808 : 429212 : for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2809 : : {
2810 : 326622 : gcc_assert (e->inline_failed);
2811 : 326622 : initialize_inline_failed (e);
2812 : : }
2813 : 103788 : for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2814 : 1198 : initialize_inline_failed (e);
2815 : : }
2816 : : }
2817 : :
2818 : 223968 : if (dump_file)
2819 : 178 : fprintf (dump_file, "\nFlattening functions:\n");
2820 : :
2821 : : /* First shrink order array, so that it only contains nodes with
2822 : : flatten attribute. */
2823 : 3729769 : for (i = nnodes - 1, j = i; i >= 0; i--)
2824 : : {
2825 : 3505801 : node = order[i];
2826 : 3505801 : if (node->definition
2827 : : /* Do not try to flatten aliases. These may happen for example when
2828 : : creating local aliases. */
2829 : 3505801 : && !node->alias
2830 : 5203764 : && lookup_attribute ("flatten",
2831 : 1697963 : DECL_ATTRIBUTES (node->decl)) != NULL)
2832 : 85 : order[j--] = order[i];
2833 : : }
2834 : :
2835 : : /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2836 : : nodes with flatten attribute. If there is more than one such
2837 : : node, we need to register a node removal hook, as flatten_function
2838 : : could remove other nodes with flatten attribute. See PR82801. */
2839 : 223968 : struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2840 : 223968 : hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2841 : 223968 : if (j < nnodes - 2)
2842 : : {
2843 : 15 : flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2844 : 15 : node_removal_hook_holder
2845 : 15 : = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2846 : : flatten_removed_nodes);
2847 : : }
2848 : :
2849 : : /* In the first pass handle functions to be flattened. Do this with
2850 : : a priority so none of our later choices will make this impossible. */
2851 : 224053 : for (i = nnodes - 1; i > j; i--)
2852 : : {
2853 : 85 : node = order[i];
2854 : 85 : if (flatten_removed_nodes
2855 : 85 : && flatten_removed_nodes->contains (node))
2856 : 0 : continue;
2857 : :
2858 : : /* Handle nodes to be flattened.
2859 : : Ideally when processing callees we stop inlining at the
2860 : : entry of cycles, possibly cloning that entry point and
2861 : : try to flatten itself turning it into a self-recursive
2862 : : function. */
2863 : 85 : if (dump_file)
2864 : 4 : fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2865 : 85 : flatten_function (node, false, true);
2866 : : }
2867 : :
2868 : 223968 : if (j < nnodes - 2)
2869 : : {
2870 : 15 : symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2871 : 30 : delete flatten_removed_nodes;
2872 : : }
2873 : 223968 : free (order);
2874 : :
2875 : 223968 : if (dump_file)
2876 : 178 : dump_overall_stats ();
2877 : :
2878 : 223968 : inline_small_functions ();
2879 : :
2880 : 223968 : gcc_assert (symtab->state == IPA_SSA);
2881 : 223968 : symtab->state = IPA_SSA_AFTER_INLINING;
2882 : : /* Do first after-inlining removal. We want to remove all "stale" extern
2883 : : inline functions and virtual functions so we really know what is called
2884 : : once. */
2885 : 223968 : symtab->remove_unreachable_nodes (dump_file);
2886 : :
2887 : : /* Inline functions with a property that after inlining into all callers the
2888 : : code size will shrink because the out-of-line copy is eliminated.
2889 : : We do this regardless on the callee size as long as function growth limits
2890 : : are met. */
2891 : 223968 : if (dump_file)
2892 : 178 : fprintf (dump_file,
2893 : : "\nDeciding on functions to be inlined into all callers and "
2894 : : "removing useless speculations:\n");
2895 : :
2896 : : /* Inlining one function called once has good chance of preventing
2897 : : inlining other function into the same callee. Ideally we should
2898 : : work in priority order, but probably inlining hot functions first
2899 : : is good cut without the extra pain of maintaining the queue.
2900 : :
2901 : : ??? this is not really fitting the bill perfectly: inlining function
2902 : : into callee often leads to better optimization of callee due to
2903 : : increased context for optimization.
2904 : : For example if main() function calls a function that outputs help
2905 : : and then function that does the main optimization, we should inline
2906 : : the second with priority even if both calls are cold by themselves.
2907 : :
2908 : : We probably want to implement new predicate replacing our use of
2909 : : maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2910 : : to be hot. */
2911 : 671904 : for (cold = 0; cold <= 1; cold ++)
2912 : : {
2913 : 5633706 : FOR_EACH_DEFINED_FUNCTION (node)
2914 : : {
2915 : 5185770 : struct cgraph_edge *edge, *next;
2916 : 5185770 : bool update=false;
2917 : :
2918 : 5185770 : if (!opt_for_fn (node->decl, optimize)
2919 : 5185770 : || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2920 : 888382 : continue;
2921 : :
2922 : 17507751 : for (edge = node->callees; edge; edge = next)
2923 : : {
2924 : 13210363 : next = edge->next_callee;
2925 : 13210363 : if (edge->speculative && !speculation_useful_p (edge, false))
2926 : : {
2927 : 66 : if (edge->count.ipa ().initialized_p ())
2928 : 0 : spec_rem += edge->count.ipa ();
2929 : 66 : cgraph_edge::resolve_speculation (edge);
2930 : 66 : update = true;
2931 : 66 : remove_functions = true;
2932 : : }
2933 : : }
2934 : 4297388 : if (update)
2935 : : {
2936 : 102 : struct cgraph_node *where = node->inlined_to
2937 : 51 : ? node->inlined_to : node;
2938 : 51 : reset_edge_caches (where);
2939 : 51 : ipa_update_overall_fn_summary (where);
2940 : : }
2941 : 4297388 : if (want_inline_function_to_all_callers_p (node, cold))
2942 : : {
2943 : 24843 : int num_calls = 0;
2944 : 24843 : node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2945 : : true);
2946 : 24843 : while (node->call_for_symbol_and_aliases
2947 : 25785 : (inline_to_all_callers, &num_calls, true))
2948 : : ;
2949 : 24843 : remove_functions = true;
2950 : : }
2951 : : }
2952 : : }
2953 : :
2954 : 223968 : if (dump_enabled_p ())
2955 : 242 : dump_printf (MSG_NOTE,
2956 : : "\nInlined %i calls, eliminated %i functions\n\n",
2957 : : ncalls_inlined, nfunctions_inlined);
2958 : 223968 : if (dump_file)
2959 : 178 : dump_inline_stats ();
2960 : :
2961 : 223968 : if (dump_file)
2962 : 178 : ipa_dump_fn_summaries (dump_file);
2963 : 223968 : return remove_functions ? TODO_remove_functions : 0;
2964 : : }
2965 : :
2966 : : /* Inline always-inline function calls in NODE
2967 : : (which itself is possibly inline). */
2968 : :
2969 : : static bool
2970 : 3127492 : inline_always_inline_functions (struct cgraph_node *node)
2971 : : {
2972 : 3127492 : struct cgraph_edge *e;
2973 : 3127492 : bool inlined = false;
2974 : :
2975 : 12192568 : for (e = node->callees; e; e = e->next_callee)
2976 : : {
2977 : 9065076 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2978 : 9065076 : gcc_checking_assert (!callee->aux || callee->aux == (void *)(size_t)1);
2979 : 9065076 : if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
2980 : : /* Watch for self-recursive cycles. */
2981 : 9065076 : || callee->aux)
2982 : 8600364 : continue;
2983 : :
2984 : 464712 : if (e->recursive_p ())
2985 : : {
2986 : 6 : if (dump_enabled_p ())
2987 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2988 : : " Not inlining recursive call to %C.\n",
2989 : : e->callee);
2990 : 6 : e->inline_failed = CIF_RECURSIVE_INLINING;
2991 : 6 : continue;
2992 : : }
2993 : 464706 : if (callee->definition
2994 : 464677 : && !ipa_fn_summaries->get (callee))
2995 : 221 : compute_fn_summary (callee, true);
2996 : :
2997 : 464706 : if (!can_early_inline_edge_p (e))
2998 : : {
2999 : : /* Set inlined to true if the callee is marked "always_inline" but
3000 : : is not inlinable. This will allow flagging an error later in
3001 : : expand_call_inline in tree-inline.cc. */
3002 : 348 : if (lookup_attribute ("always_inline",
3003 : 348 : DECL_ATTRIBUTES (callee->decl)) != NULL)
3004 : 27 : inlined = true;
3005 : 348 : continue;
3006 : : }
3007 : :
3008 : 464358 : if (dump_enabled_p ())
3009 : 11 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
3010 : : " Inlining %C into %C (always_inline).\n",
3011 : : e->callee, e->caller);
3012 : 464358 : inline_call (e, true, NULL, NULL, false);
3013 : 464358 : callee->aux = (void *)(size_t)1;
3014 : : /* Inline recursively to handle the case where always_inline function was
3015 : : not optimized yet since it is a part of a cycle in callgraph. */
3016 : 464358 : inline_always_inline_functions (e->callee);
3017 : 464358 : callee->aux = NULL;
3018 : 464358 : inlined = true;
3019 : : }
3020 : 3127492 : return inlined;
3021 : : }
3022 : :
3023 : : /* Decide on the inlining. We do so in the topological order to avoid
3024 : : expenses on updating data structures. */
3025 : :
3026 : : static bool
3027 : 2214823 : early_inline_small_functions (struct cgraph_node *node)
3028 : : {
3029 : 2214823 : struct cgraph_edge *e;
3030 : 2214823 : bool inlined = false;
3031 : :
3032 : 9427639 : for (e = node->callees; e; e = e->next_callee)
3033 : : {
3034 : 7212816 : struct cgraph_node *callee = e->callee->ultimate_alias_target ();
3035 : :
3036 : : /* We can encounter not-yet-analyzed function during
3037 : : early inlining on callgraphs with strongly
3038 : : connected components. */
3039 : 7212816 : ipa_fn_summary *s = ipa_fn_summaries->get (callee);
3040 : 7212816 : if (s == NULL || !s->inlinable || !e->inline_failed)
3041 : 3976542 : continue;
3042 : :
3043 : : /* Do not consider functions not declared inline. */
3044 : 3236274 : if (!DECL_DECLARED_INLINE_P (callee->decl)
3045 : 836650 : && !opt_for_fn (node->decl, flag_inline_small_functions)
3046 : 3281068 : && !opt_for_fn (node->decl, flag_inline_functions))
3047 : 44628 : continue;
3048 : :
3049 : 3191646 : if (dump_enabled_p ())
3050 : 155 : dump_printf_loc (MSG_NOTE, e->call_stmt,
3051 : : "Considering inline candidate %C.\n",
3052 : : callee);
3053 : :
3054 : 3191646 : if (!can_early_inline_edge_p (e))
3055 : 86104 : continue;
3056 : :
3057 : 3105542 : if (e->recursive_p ())
3058 : : {
3059 : 7773 : if (dump_enabled_p ())
3060 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
3061 : : " Not inlining: recursive call.\n");
3062 : 7773 : continue;
3063 : : }
3064 : :
3065 : 3097769 : if (!want_early_inline_function_p (e))
3066 : 958656 : continue;
3067 : :
3068 : 2139113 : if (dump_enabled_p ())
3069 : 102 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
3070 : : " Inlining %C into %C.\n",
3071 : : callee, e->caller);
3072 : 2139113 : inline_call (e, true, NULL, NULL, false);
3073 : 2139113 : inlined = true;
3074 : : }
3075 : :
3076 : 2214823 : if (inlined)
3077 : 744271 : ipa_update_overall_fn_summary (node);
3078 : :
3079 : 2214823 : return inlined;
3080 : : }
3081 : :
3082 : : unsigned int
3083 : 2663148 : early_inliner (function *fun)
3084 : : {
3085 : 2663148 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
3086 : 2663148 : struct cgraph_edge *edge;
3087 : 2663148 : unsigned int todo = 0;
3088 : 2663148 : int iterations = 0;
3089 : 2663148 : bool inlined = false;
3090 : :
3091 : 2663148 : if (seen_error ())
3092 : : return 0;
3093 : :
3094 : : /* Do nothing if datastructures for ipa-inliner are already computed. This
3095 : : happens when some pass decides to construct new function and
3096 : : cgraph_add_new_function calls lowering passes and early optimization on
3097 : : it. This may confuse ourself when early inliner decide to inline call to
3098 : : function clone, because function clones don't have parameter list in
3099 : : ipa-prop matching their signature. */
3100 : 2663142 : if (ipa_node_params_sum)
3101 : : return 0;
3102 : :
3103 : 2663134 : if (flag_checking)
3104 : 2663104 : node->verify ();
3105 : 2663134 : node->remove_all_references ();
3106 : :
3107 : : /* Even when not optimizing or not inlining inline always-inline
3108 : : functions. */
3109 : 2663134 : inlined = inline_always_inline_functions (node);
3110 : :
3111 : 2663134 : if (!optimize
3112 : 2238875 : || flag_no_inline
3113 : 2216352 : || !flag_early_inlining)
3114 : : ;
3115 : 2214864 : else if (lookup_attribute ("flatten",
3116 : 2214864 : DECL_ATTRIBUTES (node->decl)) != NULL)
3117 : : {
3118 : : /* When the function is marked to be flattened, recursively inline
3119 : : all calls in it. */
3120 : 135 : if (dump_enabled_p ())
3121 : 0 : dump_printf (MSG_OPTIMIZED_LOCATIONS,
3122 : : "Flattening %C\n", node);
3123 : 135 : flatten_function (node, true, true);
3124 : 135 : inlined = true;
3125 : : }
3126 : : else
3127 : : {
3128 : : /* If some always_inline functions was inlined, apply the changes.
3129 : : This way we will not account always inline into growth limits and
3130 : : moreover we will inline calls from always inlines that we skipped
3131 : : previously because of conditional in can_early_inline_edge_p
3132 : : which prevents some inlining to always_inline. */
3133 : 2214729 : if (inlined)
3134 : : {
3135 : 243077 : timevar_push (TV_INTEGRATION);
3136 : 243077 : todo |= optimize_inline_calls (current_function_decl);
3137 : : /* optimize_inline_calls call above might have introduced new
3138 : : statements that don't have inline parameters computed. */
3139 : 1171222 : for (edge = node->callees; edge; edge = edge->next_callee)
3140 : : {
3141 : : /* We can enounter not-yet-analyzed function during
3142 : : early inlining on callgraphs with strongly
3143 : : connected components. */
3144 : 928145 : ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3145 : 928145 : es->call_stmt_size
3146 : 928145 : = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3147 : 928145 : es->call_stmt_time
3148 : 928145 : = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3149 : : }
3150 : 243077 : ipa_update_overall_fn_summary (node);
3151 : 243077 : inlined = false;
3152 : 243077 : timevar_pop (TV_INTEGRATION);
3153 : : }
3154 : : /* We iterate incremental inlining to get trivial cases of indirect
3155 : : inlining. */
3156 : 5918000 : while (iterations < opt_for_fn (node->decl,
3157 : : param_early_inliner_max_iterations)
3158 : 2959000 : && early_inline_small_functions (node))
3159 : : {
3160 : 744271 : timevar_push (TV_INTEGRATION);
3161 : 744271 : todo |= optimize_inline_calls (current_function_decl);
3162 : :
3163 : : /* Technically we ought to recompute inline parameters so the new
3164 : : iteration of early inliner works as expected. We however have
3165 : : values approximately right and thus we only need to update edge
3166 : : info that might be cleared out for newly discovered edges. */
3167 : 3044915 : for (edge = node->callees; edge; edge = edge->next_callee)
3168 : : {
3169 : : /* We have no summary for new bound store calls yet. */
3170 : 2300644 : ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3171 : 2300644 : es->call_stmt_size
3172 : 2300644 : = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3173 : 2300644 : es->call_stmt_time
3174 : 2300644 : = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3175 : : }
3176 : 744271 : if (iterations < opt_for_fn (node->decl,
3177 : 744271 : param_early_inliner_max_iterations) - 1)
3178 : 94 : ipa_update_overall_fn_summary (node);
3179 : 744271 : timevar_pop (TV_INTEGRATION);
3180 : 744271 : iterations++;
3181 : 744271 : inlined = false;
3182 : : }
3183 : 2214729 : if (dump_file)
3184 : 191 : fprintf (dump_file, "Iterations: %i\n", iterations);
3185 : : }
3186 : :
3187 : 2663134 : if (inlined)
3188 : : {
3189 : 19994 : timevar_push (TV_INTEGRATION);
3190 : 19994 : todo |= optimize_inline_calls (current_function_decl);
3191 : 19994 : timevar_pop (TV_INTEGRATION);
3192 : : }
3193 : :
3194 : 2663134 : fun->always_inline_functions_inlined = true;
3195 : :
3196 : 2663134 : return todo;
3197 : : }
3198 : :
3199 : : /* Do inlining of small functions. Doing so early helps profiling and other
3200 : : passes to be somewhat more effective and avoids some code duplication in
3201 : : later real inlining pass for testcases with very many function calls. */
3202 : :
3203 : : namespace {
3204 : :
3205 : : const pass_data pass_data_early_inline =
3206 : : {
3207 : : GIMPLE_PASS, /* type */
3208 : : "einline", /* name */
3209 : : OPTGROUP_INLINE, /* optinfo_flags */
3210 : : TV_EARLY_INLINING, /* tv_id */
3211 : : PROP_ssa, /* properties_required */
3212 : : 0, /* properties_provided */
3213 : : 0, /* properties_destroyed */
3214 : : 0, /* todo_flags_start */
3215 : : 0, /* todo_flags_finish */
3216 : : };
3217 : :
3218 : : class pass_early_inline : public gimple_opt_pass
3219 : : {
3220 : : public:
3221 : 280114 : pass_early_inline (gcc::context *ctxt)
3222 : 560228 : : gimple_opt_pass (pass_data_early_inline, ctxt)
3223 : : {}
3224 : :
3225 : : /* opt_pass methods: */
3226 : : unsigned int execute (function *) final override;
3227 : :
3228 : : }; // class pass_early_inline
3229 : :
3230 : : unsigned int
3231 : 2663148 : pass_early_inline::execute (function *fun)
3232 : : {
3233 : 2663148 : return early_inliner (fun);
3234 : : }
3235 : :
3236 : : } // anon namespace
3237 : :
3238 : : gimple_opt_pass *
3239 : 280114 : make_pass_early_inline (gcc::context *ctxt)
3240 : : {
3241 : 280114 : return new pass_early_inline (ctxt);
3242 : : }
3243 : :
3244 : : namespace {
3245 : :
3246 : : const pass_data pass_data_ipa_inline =
3247 : : {
3248 : : IPA_PASS, /* type */
3249 : : "inline", /* name */
3250 : : OPTGROUP_INLINE, /* optinfo_flags */
3251 : : TV_IPA_INLINING, /* tv_id */
3252 : : 0, /* properties_required */
3253 : : 0, /* properties_provided */
3254 : : 0, /* properties_destroyed */
3255 : : 0, /* todo_flags_start */
3256 : : ( TODO_dump_symtab ), /* todo_flags_finish */
3257 : : };
3258 : :
3259 : : class pass_ipa_inline : public ipa_opt_pass_d
3260 : : {
3261 : : public:
3262 : 280114 : pass_ipa_inline (gcc::context *ctxt)
3263 : : : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3264 : : NULL, /* generate_summary */
3265 : : NULL, /* write_summary */
3266 : : NULL, /* read_summary */
3267 : : NULL, /* write_optimization_summary */
3268 : : NULL, /* read_optimization_summary */
3269 : : NULL, /* stmt_fixup */
3270 : : 0, /* function_transform_todo_flags_start */
3271 : : inline_transform, /* function_transform */
3272 : 280114 : NULL) /* variable_transform */
3273 : 280114 : {}
3274 : :
3275 : : /* opt_pass methods: */
3276 : 223968 : unsigned int execute (function *) final override { return ipa_inline (); }
3277 : :
3278 : : }; // class pass_ipa_inline
3279 : :
3280 : : } // anon namespace
3281 : :
3282 : : ipa_opt_pass_d *
3283 : 280114 : make_pass_ipa_inline (gcc::context *ctxt)
3284 : : {
3285 : 280114 : return new pass_ipa_inline (ctxt);
3286 : : }
|