Line data Source code
1 : /* Utilities for ipa analysis.
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 : Contributed by Kenneth Zadeck <zadeck@naturalbridge.com>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "config.h"
22 : #include "system.h"
23 : #include "coretypes.h"
24 : #include "backend.h"
25 : #include "tree.h"
26 : #include "gimple.h"
27 : #include "predict.h"
28 : #include "alloc-pool.h"
29 : #include "cgraph.h"
30 : #include "lto-streamer.h"
31 : #include "dumpfile.h"
32 : #include "splay-tree.h"
33 : #include "ipa-utils.h"
34 : #include "symbol-summary.h"
35 : #include "tree-vrp.h"
36 : #include "sreal.h"
37 : #include "ipa-cp.h"
38 : #include "ipa-prop.h"
39 : #include "ipa-fnsummary.h"
40 : #include "tree-eh.h"
41 : #include "gimple-iterator.h"
42 : #include "ipa-modref-tree.h"
43 : #include "ipa-modref.h"
44 : #include "tree-ssa-loop-niter.h"
45 : #include "calls.h"
46 : #include "cfgloop.h"
47 : #include "cfganal.h"
48 :
49 : /* Debugging function for postorder and inorder code. NOTE is a string
50 : that is printed before the nodes are printed. ORDER is an array of
51 : cgraph_nodes that has COUNT useful nodes in it. */
52 :
53 : void
54 82 : ipa_print_order (FILE* out,
55 : const char * note,
56 : struct cgraph_node** order,
57 : int count)
58 : {
59 82 : int i;
60 82 : fprintf (out, "\n\n ordered call graph: %s\n", note);
61 :
62 261 : for (i = count - 1; i >= 0; i--)
63 179 : order[i]->dump (out);
64 82 : fprintf (out, "\n");
65 82 : fflush (out);
66 82 : }
67 :
68 :
69 : struct searchc_env {
70 : struct cgraph_node **stack;
71 : struct cgraph_node **result;
72 : int stack_size;
73 : int order_pos;
74 : splay_tree nodes_marked_new;
75 : bool reduce;
76 : int count;
77 : };
78 :
79 : /* This is an implementation of Tarjan's strongly connected region
80 : finder as reprinted in Aho Hopcraft and Ullman's The Design and
81 : Analysis of Computer Programs (1975) pages 192-193. This version
82 : has been customized for cgraph_nodes. The env parameter is because
83 : it is recursive and there are no nested functions here. This
84 : function should only be called from itself or
85 : ipa_reduced_postorder. ENV is a stack env and would be
86 : unnecessary if C had nested functions. V is the node to start
87 : searching from. */
88 :
89 : static void
90 14201376 : searchc (struct searchc_env* env, struct cgraph_node *v,
91 : bool (*ignore_edge) (struct cgraph_edge *))
92 : {
93 14201376 : struct cgraph_edge *edge;
94 14201376 : struct ipa_dfs_info *v_info = (struct ipa_dfs_info *) v->aux;
95 :
96 : /* mark node as old */
97 14201376 : v_info->new_node = false;
98 14201376 : splay_tree_remove (env->nodes_marked_new, v->get_uid ());
99 :
100 14201376 : v_info->dfn_number = env->count;
101 14201376 : v_info->low_link = env->count;
102 14201376 : env->count++;
103 14201376 : env->stack[(env->stack_size)++] = v;
104 14201376 : v_info->on_stack = true;
105 :
106 60997961 : for (edge = v->callees; edge; edge = edge->next_callee)
107 : {
108 46796585 : struct ipa_dfs_info * w_info;
109 46796585 : enum availability avail;
110 46796585 : struct cgraph_node *w = edge->callee->ultimate_alias_target (&avail);
111 :
112 46796585 : if (!w || (ignore_edge && ignore_edge (edge)))
113 29092908 : continue;
114 :
115 17703677 : if (w->aux
116 14385680 : && (avail >= AVAIL_INTERPOSABLE))
117 : {
118 14385680 : w_info = (struct ipa_dfs_info *) w->aux;
119 14385680 : if (w_info->new_node)
120 : {
121 5874114 : searchc (env, w, ignore_edge);
122 5874114 : v_info->low_link =
123 5874114 : (v_info->low_link < w_info->low_link) ?
124 : v_info->low_link : w_info->low_link;
125 : }
126 : else
127 8511566 : if ((w_info->dfn_number < v_info->dfn_number)
128 7347006 : && (w_info->on_stack))
129 50137 : v_info->low_link =
130 50137 : (w_info->dfn_number < v_info->low_link) ?
131 : w_info->dfn_number : v_info->low_link;
132 : }
133 : }
134 :
135 :
136 14201376 : if (v_info->low_link == v_info->dfn_number)
137 : {
138 : struct cgraph_node *last = NULL;
139 14201376 : struct cgraph_node *x;
140 14201376 : struct ipa_dfs_info *x_info;
141 14201376 : do {
142 14201376 : x = env->stack[--(env->stack_size)];
143 14201376 : x_info = (struct ipa_dfs_info *) x->aux;
144 14201376 : x_info->on_stack = false;
145 14201376 : x_info->scc_no = v_info->dfn_number;
146 :
147 14201376 : if (env->reduce)
148 : {
149 14201376 : x_info->next_cycle = last;
150 14201376 : last = x;
151 : }
152 : else
153 0 : env->result[env->order_pos++] = x;
154 : }
155 14201376 : while (v != x);
156 14124078 : if (env->reduce)
157 14124078 : env->result[env->order_pos++] = v;
158 : }
159 14201376 : }
160 :
161 : /* Topsort the call graph by caller relation. Put the result in ORDER.
162 :
163 : The REDUCE flag is true if you want the cycles reduced to single nodes.
164 : You can use ipa_get_nodes_in_cycle to obtain a vector containing all real
165 : call graph nodes in a reduced node.
166 :
167 : Set ALLOW_OVERWRITABLE if nodes with such availability should be included.
168 : IGNORE_EDGE, if non-NULL is a hook that may make some edges insignificant
169 : for the topological sort. */
170 :
171 : int
172 1084054 : ipa_reduced_postorder (struct cgraph_node **order,
173 : bool reduce,
174 : bool (*ignore_edge) (struct cgraph_edge *))
175 : {
176 1084054 : struct cgraph_node *node;
177 1084054 : struct searchc_env env;
178 1084054 : splay_tree_node result;
179 1084054 : env.stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
180 1084054 : env.stack_size = 0;
181 1084054 : env.result = order;
182 1084054 : env.order_pos = 0;
183 1084054 : env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0);
184 1084054 : env.count = 1;
185 1084054 : env.reduce = reduce;
186 :
187 15285458 : FOR_EACH_DEFINED_FUNCTION (node)
188 : {
189 14201404 : enum availability avail = node->get_availability ();
190 :
191 14201404 : if (avail > AVAIL_INTERPOSABLE
192 14201404 : || avail == AVAIL_INTERPOSABLE)
193 : {
194 : /* Reuse the info if it is already there. */
195 14201376 : struct ipa_dfs_info *info = (struct ipa_dfs_info *) node->aux;
196 14201376 : if (!info)
197 14201376 : info = XCNEW (struct ipa_dfs_info);
198 14201376 : info->new_node = true;
199 14201376 : info->on_stack = false;
200 14201376 : info->next_cycle = NULL;
201 14201376 : node->aux = info;
202 :
203 14201376 : splay_tree_insert (env.nodes_marked_new,
204 14201376 : (splay_tree_key)node->get_uid (),
205 : (splay_tree_value)node);
206 : }
207 : else
208 28 : node->aux = NULL;
209 : }
210 1084054 : result = splay_tree_min (env.nodes_marked_new);
211 10495370 : while (result)
212 : {
213 8327262 : node = (struct cgraph_node *)result->value;
214 8327262 : searchc (&env, node, ignore_edge);
215 8327262 : result = splay_tree_min (env.nodes_marked_new);
216 : }
217 1084054 : splay_tree_delete (env.nodes_marked_new);
218 1084054 : free (env.stack);
219 :
220 1084054 : return env.order_pos;
221 : }
222 :
223 : /* Deallocate all ipa_dfs_info structures pointed to by the aux pointer of call
224 : graph nodes. */
225 :
226 : void
227 1235673 : ipa_free_postorder_info (void)
228 : {
229 1235673 : struct cgraph_node *node;
230 17868613 : FOR_EACH_DEFINED_FUNCTION (node)
231 : {
232 : /* Get rid of the aux information. */
233 16632940 : if (node->aux)
234 : {
235 14201376 : free (node->aux);
236 14201376 : node->aux = NULL;
237 : }
238 : }
239 1235673 : }
240 :
241 : /* Get the set of nodes for the cycle in the reduced call graph starting
242 : from NODE. */
243 :
244 : vec<cgraph_node *>
245 6356843 : ipa_get_nodes_in_cycle (struct cgraph_node *node)
246 : {
247 6356843 : vec<cgraph_node *> v = vNULL;
248 6356843 : struct ipa_dfs_info *node_dfs_info;
249 12749085 : while (node)
250 : {
251 6392242 : v.safe_push (node);
252 6392242 : node_dfs_info = (struct ipa_dfs_info *) node->aux;
253 6392242 : node = node_dfs_info->next_cycle;
254 : }
255 6356843 : return v;
256 : }
257 :
258 : /* Return true iff the CS is an edge within a strongly connected component as
259 : computed by ipa_reduced_postorder. */
260 :
261 : bool
262 16419016 : ipa_edge_within_scc (struct cgraph_edge *cs)
263 : {
264 16419016 : struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux;
265 16419016 : struct ipa_dfs_info *callee_dfs;
266 16419016 : struct cgraph_node *callee = cs->callee->function_symbol ();
267 :
268 16419016 : callee_dfs = (struct ipa_dfs_info *) callee->aux;
269 16419016 : return (caller_dfs
270 16419016 : && callee_dfs
271 16419016 : && caller_dfs->scc_no == callee_dfs->scc_no);
272 : }
273 :
274 : struct postorder_stack
275 : {
276 : struct cgraph_node *node;
277 : struct cgraph_edge *edge;
278 : int ref;
279 : };
280 :
281 : /* Fill array order with all nodes with output flag set in the reverse
282 : topological order. Return the number of elements in the array.
283 : FIXME: While walking, consider aliases, too. */
284 :
285 : int
286 1252548 : ipa_reverse_postorder (struct cgraph_node **order)
287 : {
288 1252548 : struct cgraph_node *node, *node2;
289 1252548 : int stack_size = 0;
290 1252548 : int order_pos = 0;
291 1252548 : struct cgraph_edge *edge;
292 1252548 : int pass;
293 1252548 : struct ipa_ref *ref = NULL;
294 :
295 1252548 : struct postorder_stack *stack =
296 1252548 : XCNEWVEC (struct postorder_stack, symtab->cgraph_count);
297 :
298 : /* We have to deal with cycles nicely, so use a depth first traversal
299 : output algorithm. Ignore the fact that some functions won't need
300 : to be output and put them into order as well, so we get dependencies
301 : right through inline functions. */
302 26449539 : FOR_EACH_FUNCTION (node)
303 25196991 : node->aux = NULL;
304 3757644 : for (pass = 0; pass < 2; pass++)
305 52899078 : FOR_EACH_FUNCTION (node)
306 50393982 : if (!node->aux
307 50393982 : && (pass
308 22467552 : || (!node->address_taken
309 15715653 : && !node->inlined_to
310 13152902 : && !node->alias && !node->thunk
311 12650158 : && !node->only_called_directly_p ())))
312 : {
313 18424937 : stack_size = 0;
314 18424937 : stack[stack_size].node = node;
315 18424937 : stack[stack_size].edge = node->callers;
316 18424937 : stack[stack_size].ref = 0;
317 18424937 : node->aux = (void *)(size_t)1;
318 43621928 : while (stack_size >= 0)
319 : {
320 72448126 : while (true)
321 : {
322 72448126 : node2 = NULL;
323 118763541 : while (stack[stack_size].edge && !node2)
324 : {
325 46315415 : edge = stack[stack_size].edge;
326 46315415 : node2 = edge->caller;
327 46315415 : stack[stack_size].edge = edge->next_caller;
328 : }
329 174268574 : for (; stack[stack_size].node->iterate_referring (
330 82238900 : stack[stack_size].ref,
331 82238900 : ref) && !node2;
332 : stack[stack_size].ref++)
333 : {
334 9790774 : if (ref->use == IPA_REF_ALIAS)
335 10726494 : node2 = dyn_cast <cgraph_node *> (ref->referring);
336 : }
337 72448126 : if (!node2)
338 : break;
339 47251135 : if (!node2->aux)
340 : {
341 6772054 : stack[++stack_size].node = node2;
342 6772054 : stack[stack_size].edge = node2->callers;
343 6772054 : stack[stack_size].ref = 0;
344 6772054 : node2->aux = (void *)(size_t)1;
345 : }
346 : }
347 25196991 : order[order_pos++] = stack[stack_size--].node;
348 : }
349 : }
350 1252548 : free (stack);
351 26449539 : FOR_EACH_FUNCTION (node)
352 25196991 : node->aux = NULL;
353 1252548 : return order_pos;
354 : }
355 :
356 :
357 :
358 : /* Given a memory reference T, will return the variable at the bottom
359 : of the access. Unlike get_base_address, this will recurse through
360 : INDIRECT_REFS. */
361 :
362 : tree
363 4633851 : get_base_var (tree t)
364 : {
365 4633851 : while (!SSA_VAR_P (t)
366 6878821 : && (!CONSTANT_CLASS_P (t))
367 : && TREE_CODE (t) != LABEL_DECL
368 : && TREE_CODE (t) != FUNCTION_DECL
369 : && TREE_CODE (t) != CONST_DECL
370 9477954 : && TREE_CODE (t) != CONSTRUCTOR)
371 : {
372 4844103 : t = TREE_OPERAND (t, 0);
373 : }
374 4633851 : return t;
375 : }
376 :
377 : /* Scale function of calls in NODE by ratio ORIG_COUNT/NODE->count. */
378 :
379 : void
380 0 : scale_ipa_profile_for_fn (struct cgraph_node *node, profile_count orig_count)
381 : {
382 0 : profile_count to = node->count;
383 0 : profile_count::adjust_for_ipa_scaling (&to, &orig_count);
384 0 : struct cgraph_edge *e;
385 :
386 0 : for (e = node->callees; e; e = e->next_callee)
387 0 : e->count = e->count.apply_scale (to, orig_count);
388 0 : for (e = node->indirect_calls; e; e = e->next_callee)
389 0 : e->count = e->count.apply_scale (to, orig_count);
390 0 : }
391 :
392 : /* SRC and DST are going to be merged. Take SRC's profile and merge it into
393 : DST so it is not going to be lost. Possibly destroy SRC's body on the way
394 : unless PRESERVE_BODY is set. */
395 :
396 : void
397 33693 : ipa_merge_profiles (struct cgraph_node *dst,
398 : struct cgraph_node *src,
399 : bool preserve_body)
400 : {
401 33693 : tree oldsrcdecl = src->decl;
402 33693 : struct function *srccfun, *dstcfun;
403 33693 : bool match = true;
404 33693 : bool copy_counts = false;
405 :
406 33693 : if (!src->definition
407 31824 : || !dst->definition)
408 33689 : return;
409 :
410 31824 : if (src->frequency < dst->frequency)
411 72 : src->frequency = dst->frequency;
412 :
413 : /* Time profiles are merged. */
414 31824 : if (dst->tp_first_run > src->tp_first_run && src->tp_first_run)
415 1 : dst->tp_first_run = src->tp_first_run;
416 :
417 31824 : if (src->profile_id && !dst->profile_id)
418 0 : dst->profile_id = src->profile_id;
419 :
420 : /* Merging zero profile to dst is no-op. */
421 31824 : if (src->count.ipa () == profile_count::zero ())
422 342 : return;
423 :
424 : /* FIXME when we merge in unknown profile, we ought to set counts as
425 : unsafe. */
426 31482 : if (!src->count.initialized_p ()
427 31482 : || !(src->count.ipa () == src->count))
428 31478 : return;
429 4 : profile_count orig_count = dst->count;
430 :
431 : /* Either sum the profiles if both are IPA and not global0, or
432 : pick more informative one (that is nonzero IPA if other is
433 : uninitialized, guessed or global0). */
434 :
435 4 : if ((dst->count.ipa ().nonzero_p ()
436 0 : || src->count.ipa ().nonzero_p ())
437 4 : && dst->count.ipa ().initialized_p ()
438 4 : && src->count.ipa ().initialized_p ())
439 4 : dst->count = dst->count.ipa () + src->count.ipa ();
440 0 : else if (dst->count.ipa ().initialized_p ())
441 : ;
442 0 : else if (src->count.ipa ().initialized_p ())
443 : {
444 0 : copy_counts = true;
445 0 : dst->count = src->count.ipa ();
446 : }
447 :
448 : /* If no updating needed return early. */
449 33693 : if (dst->count == orig_count)
450 : return;
451 :
452 4 : if (symtab->dump_file)
453 : {
454 0 : fprintf (symtab->dump_file, "Merging profiles of %s count:",
455 : src->dump_name ());
456 0 : src->count.dump (symtab->dump_file);
457 0 : fprintf (symtab->dump_file, " to %s count:",
458 : dst->dump_name ());
459 0 : orig_count.dump (symtab->dump_file);
460 0 : fprintf (symtab->dump_file, " resulting count:");
461 0 : dst->count.dump (symtab->dump_file);
462 0 : fprintf (symtab->dump_file, "\n");
463 : }
464 :
465 : /* First handle functions with no gimple body. */
466 4 : if (dst->thunk || dst->alias
467 4 : || src->thunk || src->alias)
468 : {
469 0 : scale_ipa_profile_for_fn (dst, orig_count);
470 0 : return;
471 : }
472 :
473 : /* This is ugly. We need to get both function bodies into memory.
474 : If declaration is merged, we need to duplicate it to be able
475 : to load body that is being replaced. This makes symbol table
476 : temporarily inconsistent. */
477 4 : if (src->decl == dst->decl)
478 : {
479 0 : struct lto_in_decl_state temp;
480 0 : struct lto_in_decl_state *state;
481 :
482 : /* We are going to move the decl, we want to remove its file decl data.
483 : and link these with the new decl. */
484 0 : temp.fn_decl = src->decl;
485 0 : lto_in_decl_state **slot
486 0 : = src->lto_file_data->function_decl_states->find_slot (&temp,
487 : NO_INSERT);
488 0 : state = *slot;
489 0 : src->lto_file_data->function_decl_states->clear_slot (slot);
490 0 : gcc_assert (state);
491 :
492 : /* Duplicate the decl and be sure it does not link into body of DST. */
493 0 : src->decl = copy_node (src->decl);
494 0 : DECL_STRUCT_FUNCTION (src->decl) = NULL;
495 0 : DECL_ARGUMENTS (src->decl) = NULL;
496 0 : DECL_INITIAL (src->decl) = NULL;
497 0 : DECL_RESULT (src->decl) = NULL;
498 :
499 : /* Associate the decl state with new declaration, so LTO streamer
500 : can look it up. */
501 0 : state->fn_decl = src->decl;
502 0 : slot
503 0 : = src->lto_file_data->function_decl_states->find_slot (state, INSERT);
504 0 : gcc_assert (!*slot);
505 0 : *slot = state;
506 : }
507 4 : src->get_untransformed_body ();
508 4 : dst->get_untransformed_body ();
509 4 : srccfun = DECL_STRUCT_FUNCTION (src->decl);
510 4 : dstcfun = DECL_STRUCT_FUNCTION (dst->decl);
511 4 : if (n_basic_blocks_for_fn (srccfun)
512 4 : != n_basic_blocks_for_fn (dstcfun))
513 : {
514 0 : if (symtab->dump_file)
515 0 : fprintf (symtab->dump_file,
516 : "Giving up; number of basic block mismatch.\n");
517 : match = false;
518 : }
519 4 : else if (last_basic_block_for_fn (srccfun)
520 4 : != last_basic_block_for_fn (dstcfun))
521 : {
522 0 : if (symtab->dump_file)
523 0 : fprintf (symtab->dump_file,
524 : "Giving up; last block mismatch.\n");
525 : match = false;
526 : }
527 : else
528 : {
529 4 : basic_block srcbb, dstbb;
530 4 : struct cgraph_edge *e, *e2;
531 :
532 5 : for (e = dst->callees, e2 = src->callees; e && e2 && match;
533 1 : e2 = e2->next_callee, e = e->next_callee)
534 : {
535 1 : if (gimple_bb (e->call_stmt)->index
536 1 : != gimple_bb (e2->call_stmt)->index)
537 : {
538 0 : if (symtab->dump_file)
539 0 : fprintf (symtab->dump_file,
540 : "Giving up; call stmt mismatch.\n");
541 : match = false;
542 : }
543 : }
544 4 : if (e || e2)
545 : {
546 0 : if (symtab->dump_file)
547 0 : fprintf (symtab->dump_file,
548 : "Giving up; number of calls differs.\n");
549 : match = false;
550 : }
551 4 : for (e = dst->indirect_calls, e2 = src->indirect_calls; e && e2 && match;
552 0 : e2 = e2->next_callee, e = e->next_callee)
553 : {
554 0 : if (gimple_bb (e->call_stmt)->index
555 0 : != gimple_bb (e2->call_stmt)->index)
556 : {
557 0 : if (symtab->dump_file)
558 0 : fprintf (symtab->dump_file,
559 : "Giving up; indirect call stmt mismatch.\n");
560 : match = false;
561 : }
562 : }
563 4 : if (e || e2)
564 : {
565 0 : if (symtab->dump_file)
566 0 : fprintf (symtab->dump_file,
567 : "Giving up; number of indirect calls differs.\n");
568 : match=false;
569 : }
570 :
571 4 : if (match)
572 18 : FOR_ALL_BB_FN (srcbb, srccfun)
573 : {
574 14 : unsigned int i;
575 :
576 14 : dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
577 14 : if (dstbb == NULL)
578 : {
579 0 : if (symtab->dump_file)
580 0 : fprintf (symtab->dump_file,
581 : "No matching block for bb %i.\n",
582 : srcbb->index);
583 : match = false;
584 : break;
585 : }
586 34 : if (EDGE_COUNT (srcbb->succs) != EDGE_COUNT (dstbb->succs))
587 : {
588 0 : if (symtab->dump_file)
589 0 : fprintf (symtab->dump_file,
590 : "Edge count mismatch for bb %i.\n",
591 : srcbb->index);
592 : match = false;
593 : break;
594 : }
595 25 : for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
596 : {
597 11 : edge srce = EDGE_SUCC (srcbb, i);
598 11 : edge dste = EDGE_SUCC (dstbb, i);
599 11 : if (srce->dest->index != dste->dest->index)
600 : {
601 0 : if (symtab->dump_file)
602 0 : fprintf (symtab->dump_file,
603 : "Succ edge mismatch for bb %i.\n",
604 : srce->dest->index);
605 : match = false;
606 : break;
607 : }
608 : }
609 : }
610 : }
611 4 : if (match)
612 : {
613 4 : struct cgraph_edge *e, *e2;
614 4 : basic_block srcbb, dstbb;
615 :
616 : /* Function and global profile may be out of sync. First scale it same
617 : way as fixup_cfg would. */
618 4 : profile_count srcnum = src->count;
619 4 : profile_count srcden = ENTRY_BLOCK_PTR_FOR_FN (srccfun)->count;
620 4 : bool srcscale = srcnum.initialized_p () && !(srcnum == srcden);
621 4 : profile_count dstnum = orig_count;
622 4 : profile_count dstden = ENTRY_BLOCK_PTR_FOR_FN (dstcfun)->count;
623 4 : bool dstscale = !copy_counts
624 4 : && dstnum.initialized_p () && !(dstnum == dstden);
625 :
626 : /* TODO: merge also statement histograms. */
627 18 : FOR_ALL_BB_FN (srcbb, srccfun)
628 : {
629 14 : unsigned int i;
630 :
631 14 : dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
632 :
633 14 : profile_count srccount = srcbb->count;
634 14 : if (srcscale)
635 0 : srccount = srccount.apply_scale (srcnum, srcden);
636 14 : if (dstscale)
637 0 : dstbb->count = dstbb->count.apply_scale (dstnum, dstden);
638 :
639 14 : if (copy_counts)
640 : {
641 0 : dstbb->count = srccount;
642 14 : for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
643 : {
644 0 : edge srce = EDGE_SUCC (srcbb, i);
645 0 : edge dste = EDGE_SUCC (dstbb, i);
646 0 : if (srce->probability.initialized_p ())
647 0 : dste->probability = srce->probability;
648 : }
649 : }
650 : else
651 : {
652 25 : for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
653 : {
654 11 : edge srce = EDGE_SUCC (srcbb, i);
655 11 : edge dste = EDGE_SUCC (dstbb, i);
656 11 : profile_count sum =
657 11 : dstbb->count.ipa () + srccount.ipa ();
658 22 : if (sum.nonzero_p ())
659 10 : dste->probability =
660 10 : dste->probability * dstbb->count.ipa ().probability_in
661 10 : (sum)
662 10 : + srce->probability * srcbb->count.ipa ().probability_in
663 20 : (sum);
664 : }
665 14 : dstbb->count = dstbb->count.ipa () + srccount.ipa ();
666 : }
667 : }
668 4 : push_cfun (dstcfun);
669 4 : update_max_bb_count ();
670 4 : compute_function_frequency ();
671 4 : pop_cfun ();
672 5 : for (e = dst->callees; e; e = e->next_callee)
673 : {
674 1 : if (e->speculative)
675 0 : continue;
676 1 : e->count = gimple_bb (e->call_stmt)->count;
677 : }
678 4 : for (e = dst->indirect_calls, e2 = src->indirect_calls; e;
679 0 : e2 = (e2 ? e2->next_callee : NULL), e = e->next_callee)
680 : {
681 0 : if (!e->speculative && !e2->speculative)
682 : {
683 : /* FIXME: we need to also merge ipa-profile histograms
684 : because with LTO merging happens from lto-symtab before
685 : these are converted to indirect edges. */
686 0 : e->count = gimple_bb (e->call_stmt)->count;
687 0 : continue;
688 : }
689 :
690 : /* When copying just remove all speuclations on dst and then copy
691 : one from src. */
692 0 : if (copy_counts)
693 : {
694 0 : while (e->speculative)
695 0 : cgraph_edge::resolve_speculation (e, NULL);
696 0 : e->count = gimple_bb (e->call_stmt)->count;
697 0 : if (e2->speculative)
698 : {
699 0 : for (cgraph_edge *e3 = e2->first_speculative_call_target ();
700 0 : e3;
701 0 : e3 = e3->next_speculative_call_target ())
702 : {
703 0 : cgraph_edge *ns;
704 0 : ns = e->make_speculative
705 0 : (dyn_cast <cgraph_node *>
706 0 : (e3->speculative_call_target_ref ()->referred),
707 0 : e3->count, e3->speculative_id);
708 : /* Target may differ from ref (for example it may be
709 : redirected to local alias. */
710 0 : ns->redirect_callee (e3->callee);
711 : }
712 : }
713 0 : continue;
714 0 : }
715 :
716 : /* Iterate all speculations in SRC, see if corresponding ones exist
717 : int DST and if so, sum the counts. Otherwise create new
718 : speculation. */
719 0 : int max_spec = 0;
720 0 : for (cgraph_edge *e3 = e->first_speculative_call_target ();
721 0 : e3;
722 0 : e3 = e3->next_speculative_call_target ())
723 0 : if (e3->speculative_id > max_spec)
724 : max_spec = e3->speculative_id;
725 0 : for (cgraph_edge *e3 = e2->first_speculative_call_target ();
726 0 : e3;
727 0 : e3 = e3->next_speculative_call_target ())
728 : {
729 0 : cgraph_edge *te
730 : = e->speculative_call_for_target
731 0 : (dyn_cast <cgraph_node *>
732 0 : (e3->speculative_call_target_ref ()->referred));
733 0 : if (te)
734 0 : te->count = te->count + e3->count;
735 : else
736 : {
737 0 : e->count = e->count + e3->count;
738 0 : cgraph_edge *ns;
739 0 : ns = e->make_speculative
740 0 : (dyn_cast <cgraph_node *>
741 0 : (e3->speculative_call_target_ref ()
742 : ->referred),
743 : e3->count,
744 0 : e3->speculative_id + max_spec + 1);
745 : /* Target may differ from ref (for example it may be
746 : redirected to local alias. */
747 0 : ns->redirect_callee (e3->callee);
748 : }
749 : }
750 : }
751 4 : if (!preserve_body)
752 1 : src->release_body ();
753 : /* Update summary. */
754 4 : compute_fn_summary (dst, 0);
755 : }
756 : /* We can't update CFG profile, but we can scale IPA profile. CFG
757 : will be scaled according to dst->count after IPA passes. */
758 : else
759 0 : scale_ipa_profile_for_fn (dst, orig_count);
760 4 : src->decl = oldsrcdecl;
761 : }
762 :
763 : /* Return true if call to DEST is known to be self-recusive
764 : call withing FUNC. */
765 :
766 : bool
767 29928026 : recursive_call_p (tree func, tree dest)
768 : {
769 29928026 : struct cgraph_node *dest_node = cgraph_node::get_create (dest);
770 29928026 : struct cgraph_node *cnode = cgraph_node::get_create (func);
771 29928026 : ipa_ref *alias;
772 29928026 : enum availability avail;
773 :
774 29928026 : gcc_assert (!cnode->alias);
775 29928026 : if (cnode != dest_node->ultimate_alias_target (&avail))
776 : return false;
777 70929 : if (avail >= AVAIL_AVAILABLE)
778 : return true;
779 2417 : if (!dest_node->semantically_equivalent_p (cnode))
780 : return false;
781 : /* If there is only one way to call the fuction or we know all of them
782 : are semantically equivalent, we still can consider call recursive. */
783 2456 : FOR_EACH_ALIAS (cnode, alias)
784 64 : if (!dest_node->semantically_equivalent_p (alias->referring))
785 : return false;
786 : return true;
787 : }
788 :
789 : /* Return true if stmt may terminate execution of function.
790 : If assume_return_or_eh we can further assume that the function ends
791 : either by retrn statement or EH (no trapping or infinite loops). */
792 :
793 : bool
794 71942907 : stmt_may_terminate_function_p (function *fun, gimple *stmt, bool assume_return_or_eh)
795 : {
796 71942907 : if (stmt_can_throw_external (fun, stmt))
797 : return true;
798 69884762 : if (assume_return_or_eh)
799 : return false;
800 814771 : gasm *astmt = dyn_cast <gasm *> (stmt);
801 20 : if (astmt && gimple_asm_volatile_p (astmt))
802 : return true;
803 814751 : if (gimple_could_trap_p (stmt))
804 : return true;
805 753591 : if (gcall *call = dyn_cast <gcall *> (stmt))
806 : {
807 205237 : int flags = gimple_call_flags (call);
808 205237 : if (flags & (ECF_PURE | ECF_CONST) && ! (flags & ECF_LOOPING_CONST_OR_PURE))
809 : return false;
810 72922 : modref_summary *s = get_modref_function_summary (call, NULL);
811 72922 : if (s && !s->side_effects)
812 : return false;
813 : return true;
814 : }
815 : return false;
816 : }
817 :
818 : /* Return bitmap of all basic blocks whose first statements are known to
819 : execute on every invocation of the function.
820 :
821 : If assume_return_or_eh we can further assume that the function ends
822 : either by retrn statement or EH (no trapping or infinite loops).
823 : This is useful when sumarizing function in passes like ipa-modref.
824 :
825 : Seeing assume_return_or_eh to false is used to prove that given
826 : statmeent will be executed even if the function gets into infinite
827 : loop or trap. */
828 : bitmap
829 4806398 : find_always_executed_bbs (function *fun, bool assume_return_or_eh)
830 : {
831 4806398 : auto_vec<basic_block, 20> stack;
832 4806398 : auto_vec<basic_block, 20> terminating_bbs;
833 4806398 : hash_set<basic_block> visited;
834 4806398 : hash_set<basic_block> terminating_bbs_set;
835 4806398 : edge e;
836 4806398 : edge_iterator ei;
837 4806398 : int flags = flags_from_decl_or_type (fun->decl);
838 : /* PUre and const functions always return. */
839 4806398 : assume_return_or_eh |= (flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE);
840 3628804 : if (!assume_return_or_eh)
841 54249 : mark_dfs_back_edges (fun);
842 :
843 : /* First walk all BBs reachable from entry stopping on statements that may
844 : terminate execution. Everything past this statement is not going to be executed
845 : each invocation. */
846 4806398 : stack.safe_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
847 44787502 : while (!stack.is_empty ())
848 : {
849 35174706 : basic_block bb = stack.pop ();
850 35174706 : bool found = false, found_exit = false;
851 35174706 : if (bb->index == EXIT_BLOCK)
852 3389386 : continue;
853 68260298 : FOR_EACH_EDGE (e, ei, bb->succs)
854 : {
855 40733058 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
856 : {
857 : found_exit = true;
858 : break;
859 : }
860 : /* Watch for infinite loops. */
861 36474978 : if (!found
862 682448 : && !assume_return_or_eh && (e->flags & EDGE_DFS_BACK))
863 : {
864 43870 : if (!dom_info_available_p (CDI_DOMINATORS))
865 935 : calculate_dominance_info (CDI_DOMINATORS);
866 : /* If this is not a loop latch edge it is an irreducible region.
867 : Assume that it is infinite.
868 : TODO: with C++ forced progression we can still walk the
869 : irreducible region and see if it contains any side effects.
870 : Similarly for loops. -ffinite-loops does not really imply
871 : this since we allow inlining across -ffinite-loops bondary
872 : and thus it can be used only as a loop flag. */
873 43870 : if (e->dest->loop_father->header != e->dest
874 43870 : || !dominated_by_p (CDI_DOMINATORS, bb, e->dest))
875 : found = true;
876 43870 : else if (!finite_loop_p (e->dest->loop_father))
877 8 : found = true;
878 : }
879 : }
880 31785320 : if (!assume_return_or_eh
881 31785320 : && (EDGE_COUNT (bb->succs) == 0 || (bb->flags & BB_IRREDUCIBLE_LOOP)))
882 : found = true;
883 31785320 : for (gimple_stmt_iterator si = gsi_start_nondebug_after_labels_bb (bb);
884 101424168 : !gsi_end_p (si) && !found; gsi_next_nondebug (&si))
885 71826224 : if (stmt_may_terminate_function_p (fun, gsi_stmt (si), assume_return_or_eh))
886 : {
887 : found = true;
888 : break;
889 : }
890 31785320 : if (found)
891 : {
892 2198289 : visited.add (EXIT_BLOCK_PTR_FOR_FN (fun));
893 2198289 : if (!found_exit)
894 : {
895 1456335 : terminating_bbs.safe_push (bb);
896 1456335 : terminating_bbs_set.add (bb);
897 : }
898 : }
899 : else
900 68066653 : FOR_EACH_EDGE (e, ei, bb->succs)
901 38479622 : if (!visited.add (e->dest))
902 30368308 : stack.safe_push (e->dest);
903 : }
904 :
905 : /* Next walk from exit block and find all articulations in the CFG.
906 : Add all terminating basic blocks as "fake" predecessors of the
907 : exit block. */
908 :
909 4806398 : bitmap ret = BITMAP_ALLOC (NULL);
910 4806398 : bitmap_tree_view (ret);
911 : /* A degenerated case when there is no path to exit. */
912 4806398 : if (!visited.contains (EXIT_BLOCK_PTR_FOR_FN (fun)))
913 : {
914 114386 : bitmap_set_bit (ret,
915 : single_succ_edge
916 57193 : (ENTRY_BLOCK_PTR_FOR_FN (fun))->dest->index);
917 57193 : return ret;
918 : }
919 :
920 4749205 : struct astate
921 : {
922 : unsigned int dfs_preorder;
923 : unsigned int dfs_postorder;
924 :
925 : unsigned int low, high;
926 : };
927 :
928 4749205 : struct worklist
929 : {
930 : basic_block bb;
931 : astate *cstate;
932 : };
933 :
934 4749205 : struct obstack state_obstack;
935 4749205 : gcc_obstack_init (&state_obstack);
936 4749205 : hash_map<basic_block, astate *> state;
937 4749205 : auto_vec<worklist, 32> worklist_vec;
938 4749205 : unsigned int next_dfs_num = 1;
939 :
940 : /* Always executed blocks are blocks that are on every path from entry to exit.
941 : We proceed in two steps. First we do backward DFS walk (so we know that entry
942 : is always reached) and record preorder and postorder visiting times.
943 :
944 : In second step we proceed in postorder and for every block A we compute
945 : minimal preorder (A.low) and maximal postorder (A.high) of block reachable
946 : from the BBs in DFS subtree of A. If A is always executed there are no
947 : edges out of this subtree. This can be tested by checking that A.low == A.preorder
948 : and B.high == A.postorder.
949 :
950 : This is first step. Do backward DFS walk and record preorder, postorder
951 : and predecessor info. Initialize stack in postorder. */
952 4749205 : worklist we = {EXIT_BLOCK_PTR_FOR_FN (fun), NULL};
953 4749205 : worklist_vec.safe_push (we);
954 76750863 : while (!worklist_vec.is_empty ())
955 : {
956 67252453 : worklist &w = worklist_vec.last ();
957 67252453 : basic_block bb = w.bb;
958 67252453 : astate *cstate = w.cstate;
959 :
960 67252453 : if (!cstate)
961 : {
962 37899227 : astate **slot = &state.get_or_insert (bb);
963 :
964 37899227 : cstate = *slot;
965 : /* Already processed by DFS? */
966 37899227 : if (cstate)
967 : {
968 8546001 : worklist_vec.pop ();
969 37899227 : continue;
970 : }
971 : /* DFS is visiting BB for first time. */
972 29353226 : *slot = cstate = XOBNEW (&state_obstack, struct astate);
973 29353226 : cstate->low = cstate->high = cstate->dfs_preorder = next_dfs_num++;
974 29353226 : w.cstate = cstate;
975 : /* Exit block is special; process all fake edges we identified. */
976 29353226 : if (bb == EXIT_BLOCK_PTR_FOR_FN (fun))
977 15703950 : for (basic_block bb2 : terminating_bbs)
978 : {
979 1456335 : worklist we = {bb2, NULL};
980 1456335 : worklist_vec.safe_push (we);
981 : }
982 66552953 : FOR_EACH_EDGE (e, ei, bb->preds)
983 37199727 : if (visited.contains (e->src))
984 : {
985 31693687 : worklist we = {e->src, NULL};
986 31693687 : worklist_vec.safe_push (we);
987 : }
988 : /* Keep BB on worklist so we process it last time. */
989 29353226 : continue;
990 29353226 : }
991 : /* We are finished with processing reachable BBs, see if we have articulation. */
992 29353226 : worklist_vec.pop ();
993 29353226 : cstate->high = cstate->dfs_postorder = next_dfs_num++;
994 29353226 : stack.safe_push (bb);
995 : }
996 : /* This is the final postorder walk. Determine low and high values and mark
997 : always executed blocks. */
998 43600841 : for (basic_block bb : stack)
999 : {
1000 29353226 : astate *cstate = *state.get (bb);
1001 66552953 : FOR_EACH_EDGE (e, ei, bb->preds)
1002 : {
1003 37199727 : astate **cstate2 = state.get (e->src);
1004 : /* We skip walking part of CFG reached only after first edge to exit.
1005 : No BB reachable from the skipped part is always executed */
1006 37199727 : if (!cstate2)
1007 : {
1008 5506040 : if (e->src != ENTRY_BLOCK_PTR_FOR_FN (fun))
1009 756835 : cstate->low = 0;
1010 5506040 : continue;
1011 : }
1012 31693687 : cstate->low = MIN (cstate->low, (*cstate2)->low);
1013 31693687 : cstate->high = MAX (cstate->high, (*cstate2)->high);
1014 : }
1015 697 : if (dump_file && (dump_flags & TDF_DETAILS)
1016 29353240 : && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1017 14 : fprintf (dump_file,
1018 : "BB %i %s preorder %i posorder %i low %i high %i\n",
1019 : bb->index,
1020 7 : terminating_bbs_set.contains (bb) ? "(terminating)" : "",
1021 : cstate->dfs_preorder, cstate->dfs_postorder, cstate->low,
1022 : cstate->high);
1023 29353226 : if (cstate->low == cstate->dfs_preorder
1024 15211232 : && cstate->high == cstate->dfs_postorder
1025 12361320 : && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1026 8288553 : bitmap_set_bit (ret, bb->index);
1027 29353226 : if (terminating_bbs_set.contains (bb))
1028 1456335 : cstate->low = 0;
1029 : else
1030 61001776 : FOR_EACH_EDGE (e, ei, bb->succs)
1031 : {
1032 33104885 : astate **cstate2 = state.get (e->dest);
1033 33104885 : if (!cstate2)
1034 1774357 : continue;
1035 31330528 : cstate->low = MIN (cstate->low, (*cstate2)->low);
1036 31330528 : cstate->high = MAX (cstate->high, (*cstate2)->high);
1037 : }
1038 : }
1039 4749205 : obstack_free (&state_obstack, NULL);
1040 4749205 : if (dump_file)
1041 : {
1042 179 : fprintf (dump_file, "Always executed bbbs %s: ",
1043 : assume_return_or_eh ? "(assuming return or EH)" : "");
1044 169 : bitmap_print (dump_file, ret, "", "\n");
1045 : }
1046 :
1047 4749205 : return ret;
1048 9555603 : }
|