Branch data Line data Source code
1 : : /* Utilities for ipa analysis.
2 : : Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 : : Contributed by Kenneth Zadeck <zadeck@naturalbridge.com>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : #include "config.h"
22 : : #include "system.h"
23 : : #include "coretypes.h"
24 : : #include "backend.h"
25 : : #include "tree.h"
26 : : #include "gimple.h"
27 : : #include "predict.h"
28 : : #include "alloc-pool.h"
29 : : #include "cgraph.h"
30 : : #include "lto-streamer.h"
31 : : #include "dumpfile.h"
32 : : #include "splay-tree.h"
33 : : #include "ipa-utils.h"
34 : : #include "symbol-summary.h"
35 : : #include "tree-vrp.h"
36 : : #include "sreal.h"
37 : : #include "ipa-cp.h"
38 : : #include "ipa-prop.h"
39 : : #include "ipa-fnsummary.h"
40 : : #include "tree-eh.h"
41 : : #include "gimple-iterator.h"
42 : : #include "ipa-modref-tree.h"
43 : : #include "ipa-modref.h"
44 : : #include "tree-ssa-loop-niter.h"
45 : : #include "calls.h"
46 : : #include "cfgloop.h"
47 : : #include "cfganal.h"
48 : :
49 : : /* Debugging function for postorder and inorder code. NOTE is a string
50 : : that is printed before the nodes are printed. ORDER is an array of
51 : : cgraph_nodes that has COUNT useful nodes in it. */
52 : :
53 : : void
54 : 79 : ipa_print_order (FILE* out,
55 : : const char * note,
56 : : struct cgraph_node** order,
57 : : int count)
58 : : {
59 : 79 : int i;
60 : 79 : fprintf (out, "\n\n ordered call graph: %s\n", note);
61 : :
62 : 252 : for (i = count - 1; i >= 0; i--)
63 : 173 : order[i]->dump (out);
64 : 79 : fprintf (out, "\n");
65 : 79 : fflush (out);
66 : 79 : }
67 : :
68 : :
69 : : struct searchc_env {
70 : : struct cgraph_node **stack;
71 : : struct cgraph_node **result;
72 : : int stack_size;
73 : : int order_pos;
74 : : splay_tree nodes_marked_new;
75 : : bool reduce;
76 : : int count;
77 : : };
78 : :
79 : : /* This is an implementation of Tarjan's strongly connected region
80 : : finder as reprinted in Aho Hopcraft and Ullman's The Design and
81 : : Analysis of Computer Programs (1975) pages 192-193. This version
82 : : has been customized for cgraph_nodes. The env parameter is because
83 : : it is recursive and there are no nested functions here. This
84 : : function should only be called from itself or
85 : : ipa_reduced_postorder. ENV is a stack env and would be
86 : : unnecessary if C had nested functions. V is the node to start
87 : : searching from. */
88 : :
89 : : static void
90 : 13005807 : searchc (struct searchc_env* env, struct cgraph_node *v,
91 : : bool (*ignore_edge) (struct cgraph_edge *))
92 : : {
93 : 13005807 : struct cgraph_edge *edge;
94 : 13005807 : struct ipa_dfs_info *v_info = (struct ipa_dfs_info *) v->aux;
95 : :
96 : : /* mark node as old */
97 : 13005807 : v_info->new_node = false;
98 : 13005807 : splay_tree_remove (env->nodes_marked_new, v->get_uid ());
99 : :
100 : 13005807 : v_info->dfn_number = env->count;
101 : 13005807 : v_info->low_link = env->count;
102 : 13005807 : env->count++;
103 : 13005807 : env->stack[(env->stack_size)++] = v;
104 : 13005807 : v_info->on_stack = true;
105 : :
106 : 55930140 : for (edge = v->callees; edge; edge = edge->next_callee)
107 : : {
108 : 42924333 : struct ipa_dfs_info * w_info;
109 : 42924333 : enum availability avail;
110 : 42924333 : struct cgraph_node *w = edge->callee->ultimate_alias_target (&avail);
111 : :
112 : 42924333 : if (!w || (ignore_edge && ignore_edge (edge)))
113 : 26983234 : continue;
114 : :
115 : 15941099 : if (w->aux
116 : 12782195 : && (avail >= AVAIL_INTERPOSABLE))
117 : : {
118 : 12782195 : w_info = (struct ipa_dfs_info *) w->aux;
119 : 12782195 : if (w_info->new_node)
120 : : {
121 : 5008419 : searchc (env, w, ignore_edge);
122 : 5008419 : v_info->low_link =
123 : 5008419 : (v_info->low_link < w_info->low_link) ?
124 : : v_info->low_link : w_info->low_link;
125 : : }
126 : : else
127 : 7773776 : if ((w_info->dfn_number < v_info->dfn_number)
128 : 7002417 : && (w_info->on_stack))
129 : 67344 : v_info->low_link =
130 : 67344 : (w_info->dfn_number < v_info->low_link) ?
131 : : w_info->dfn_number : v_info->low_link;
132 : : }
133 : : }
134 : :
135 : :
136 : 13005807 : if (v_info->low_link == v_info->dfn_number)
137 : : {
138 : : struct cgraph_node *last = NULL;
139 : 13005807 : struct cgraph_node *x;
140 : 13005807 : struct ipa_dfs_info *x_info;
141 : 13005807 : do {
142 : 13005807 : x = env->stack[--(env->stack_size)];
143 : 13005807 : x_info = (struct ipa_dfs_info *) x->aux;
144 : 13005807 : x_info->on_stack = false;
145 : 13005807 : x_info->scc_no = v_info->dfn_number;
146 : :
147 : 13005807 : if (env->reduce)
148 : : {
149 : 13005807 : x_info->next_cycle = last;
150 : 13005807 : last = x;
151 : : }
152 : : else
153 : 0 : env->result[env->order_pos++] = x;
154 : : }
155 : 13005807 : while (v != x);
156 : 12921726 : if (env->reduce)
157 : 12921726 : env->result[env->order_pos++] = v;
158 : : }
159 : 13005807 : }
160 : :
161 : : /* Topsort the call graph by caller relation. Put the result in ORDER.
162 : :
163 : : The REDUCE flag is true if you want the cycles reduced to single nodes.
164 : : You can use ipa_get_nodes_in_cycle to obtain a vector containing all real
165 : : call graph nodes in a reduced node.
166 : :
167 : : Set ALLOW_OVERWRITABLE if nodes with such availability should be included.
168 : : IGNORE_EDGE, if non-NULL is a hook that may make some edges insignificant
169 : : for the topological sort. */
170 : :
171 : : int
172 : 1055681 : ipa_reduced_postorder (struct cgraph_node **order,
173 : : bool reduce,
174 : : bool (*ignore_edge) (struct cgraph_edge *))
175 : : {
176 : 1055681 : struct cgraph_node *node;
177 : 1055681 : struct searchc_env env;
178 : 1055681 : splay_tree_node result;
179 : 1055681 : env.stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
180 : 1055681 : env.stack_size = 0;
181 : 1055681 : env.result = order;
182 : 1055681 : env.order_pos = 0;
183 : 1055681 : env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0);
184 : 1055681 : env.count = 1;
185 : 1055681 : env.reduce = reduce;
186 : :
187 : 14061516 : FOR_EACH_DEFINED_FUNCTION (node)
188 : : {
189 : 13005835 : enum availability avail = node->get_availability ();
190 : :
191 : 13005835 : if (avail > AVAIL_INTERPOSABLE
192 : 13005835 : || avail == AVAIL_INTERPOSABLE)
193 : : {
194 : : /* Reuse the info if it is already there. */
195 : 13005807 : struct ipa_dfs_info *info = (struct ipa_dfs_info *) node->aux;
196 : 13005807 : if (!info)
197 : 13005807 : info = XCNEW (struct ipa_dfs_info);
198 : 13005807 : info->new_node = true;
199 : 13005807 : info->on_stack = false;
200 : 13005807 : info->next_cycle = NULL;
201 : 13005807 : node->aux = info;
202 : :
203 : 13005807 : splay_tree_insert (env.nodes_marked_new,
204 : 13005807 : (splay_tree_key)node->get_uid (),
205 : : (splay_tree_value)node);
206 : : }
207 : : else
208 : 28 : node->aux = NULL;
209 : : }
210 : 1055681 : result = splay_tree_min (env.nodes_marked_new);
211 : 10108750 : while (result)
212 : : {
213 : 7997388 : node = (struct cgraph_node *)result->value;
214 : 7997388 : searchc (&env, node, ignore_edge);
215 : 7997388 : result = splay_tree_min (env.nodes_marked_new);
216 : : }
217 : 1055681 : splay_tree_delete (env.nodes_marked_new);
218 : 1055681 : free (env.stack);
219 : :
220 : 1055681 : return env.order_pos;
221 : : }
222 : :
223 : : /* Deallocate all ipa_dfs_info structures pointed to by the aux pointer of call
224 : : graph nodes. */
225 : :
226 : : void
227 : 1203591 : ipa_free_postorder_info (void)
228 : : {
229 : 1203591 : struct cgraph_node *node;
230 : 16393538 : FOR_EACH_DEFINED_FUNCTION (node)
231 : : {
232 : : /* Get rid of the aux information. */
233 : 15189947 : if (node->aux)
234 : : {
235 : 13005807 : free (node->aux);
236 : 13005807 : node->aux = NULL;
237 : : }
238 : : }
239 : 1203591 : }
240 : :
241 : : /* Get the set of nodes for the cycle in the reduced call graph starting
242 : : from NODE. */
243 : :
244 : : vec<cgraph_node *>
245 : 5900802 : ipa_get_nodes_in_cycle (struct cgraph_node *node)
246 : : {
247 : 5900802 : vec<cgraph_node *> v = vNULL;
248 : 5900802 : struct ipa_dfs_info *node_dfs_info;
249 : 11841870 : while (node)
250 : : {
251 : 5941068 : v.safe_push (node);
252 : 5941068 : node_dfs_info = (struct ipa_dfs_info *) node->aux;
253 : 5941068 : node = node_dfs_info->next_cycle;
254 : : }
255 : 5900802 : return v;
256 : : }
257 : :
258 : : /* Return true iff the CS is an edge within a strongly connected component as
259 : : computed by ipa_reduced_postorder. */
260 : :
261 : : bool
262 : 15340857 : ipa_edge_within_scc (struct cgraph_edge *cs)
263 : : {
264 : 15340857 : struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux;
265 : 15340857 : struct ipa_dfs_info *callee_dfs;
266 : 15340857 : struct cgraph_node *callee = cs->callee->function_symbol ();
267 : :
268 : 15340857 : callee_dfs = (struct ipa_dfs_info *) callee->aux;
269 : 15340857 : return (caller_dfs
270 : 15340857 : && callee_dfs
271 : 15340857 : && caller_dfs->scc_no == callee_dfs->scc_no);
272 : : }
273 : :
274 : : struct postorder_stack
275 : : {
276 : : struct cgraph_node *node;
277 : : struct cgraph_edge *edge;
278 : : int ref;
279 : : };
280 : :
281 : : /* Fill array order with all nodes with output flag set in the reverse
282 : : topological order. Return the number of elements in the array.
283 : : FIXME: While walking, consider aliases, too. */
284 : :
285 : : int
286 : 1222951 : ipa_reverse_postorder (struct cgraph_node **order)
287 : : {
288 : 1222951 : struct cgraph_node *node, *node2;
289 : 1222951 : int stack_size = 0;
290 : 1222951 : int order_pos = 0;
291 : 1222951 : struct cgraph_edge *edge;
292 : 1222951 : int pass;
293 : 1222951 : struct ipa_ref *ref = NULL;
294 : :
295 : 1222951 : struct postorder_stack *stack =
296 : 1222951 : XCNEWVEC (struct postorder_stack, symtab->cgraph_count);
297 : :
298 : : /* We have to deal with cycles nicely, so use a depth first traversal
299 : : output algorithm. Ignore the fact that some functions won't need
300 : : to be output and put them into order as well, so we get dependencies
301 : : right through inline functions. */
302 : 26366572 : FOR_EACH_FUNCTION (node)
303 : 23920670 : node->aux = NULL;
304 : 3668853 : for (pass = 0; pass < 2; pass++)
305 : 100574484 : FOR_EACH_FUNCTION (node)
306 : 47841340 : if (!node->aux
307 : 47841340 : && (pass
308 : 21554399 : || (!node->address_taken
309 : 14902528 : && !node->inlined_to
310 : 12728766 : && !node->alias && !node->thunk
311 : 12205783 : && !node->only_called_directly_p ())))
312 : : {
313 : 17662141 : stack_size = 0;
314 : 17662141 : stack[stack_size].node = node;
315 : 17662141 : stack[stack_size].edge = node->callers;
316 : 17662141 : stack[stack_size].ref = 0;
317 : 17662141 : node->aux = (void *)(size_t)1;
318 : 41582811 : while (stack_size >= 0)
319 : : {
320 : 67698616 : while (true)
321 : : {
322 : 67698616 : node2 = NULL;
323 : 110536946 : while (stack[stack_size].edge && !node2)
324 : : {
325 : 42838330 : edge = stack[stack_size].edge;
326 : 42838330 : node2 = edge->caller;
327 : 42838330 : stack[stack_size].edge = edge->next_caller;
328 : : }
329 : 163943381 : for (; stack[stack_size].node->iterate_referring (
330 : 77213999 : stack[stack_size].ref,
331 : 77213999 : ref) && !node2;
332 : : stack[stack_size].ref++)
333 : : {
334 : 9515383 : if (ref->use == IPA_REF_ALIAS)
335 : 10454999 : node2 = dyn_cast <cgraph_node *> (ref->referring);
336 : : }
337 : 67698616 : if (!node2)
338 : : break;
339 : 43777946 : if (!node2->aux)
340 : : {
341 : 6258529 : stack[++stack_size].node = node2;
342 : 6258529 : stack[stack_size].edge = node2->callers;
343 : 6258529 : stack[stack_size].ref = 0;
344 : 6258529 : node2->aux = (void *)(size_t)1;
345 : : }
346 : : }
347 : 23920670 : order[order_pos++] = stack[stack_size--].node;
348 : : }
349 : : }
350 : 1222951 : free (stack);
351 : 26366572 : FOR_EACH_FUNCTION (node)
352 : 23920670 : node->aux = NULL;
353 : 1222951 : return order_pos;
354 : : }
355 : :
356 : :
357 : :
358 : : /* Given a memory reference T, will return the variable at the bottom
359 : : of the access. Unlike get_base_address, this will recurse through
360 : : INDIRECT_REFS. */
361 : :
362 : : tree
363 : 4612325 : get_base_var (tree t)
364 : : {
365 : 4612325 : while (!SSA_VAR_P (t)
366 : 6841608 : && (!CONSTANT_CLASS_P (t))
367 : : && TREE_CODE (t) != LABEL_DECL
368 : : && TREE_CODE (t) != FUNCTION_DECL
369 : : && TREE_CODE (t) != CONST_DECL
370 : 9427920 : && TREE_CODE (t) != CONSTRUCTOR)
371 : : {
372 : 4815595 : t = TREE_OPERAND (t, 0);
373 : : }
374 : 4612325 : return t;
375 : : }
376 : :
377 : : /* Scale function of calls in NODE by ratio ORIG_COUNT/NODE->count. */
378 : :
379 : : void
380 : 0 : scale_ipa_profile_for_fn (struct cgraph_node *node, profile_count orig_count)
381 : : {
382 : 0 : profile_count to = node->count;
383 : 0 : profile_count::adjust_for_ipa_scaling (&to, &orig_count);
384 : 0 : struct cgraph_edge *e;
385 : :
386 : 0 : for (e = node->callees; e; e = e->next_callee)
387 : 0 : e->count = e->count.apply_scale (to, orig_count);
388 : 0 : for (e = node->indirect_calls; e; e = e->next_callee)
389 : 0 : e->count = e->count.apply_scale (to, orig_count);
390 : 0 : }
391 : :
392 : : /* SRC and DST are going to be merged. Take SRC's profile and merge it into
393 : : DST so it is not going to be lost. Possibly destroy SRC's body on the way
394 : : unless PRESERVE_BODY is set. */
395 : :
396 : : void
397 : 31910 : ipa_merge_profiles (struct cgraph_node *dst,
398 : : struct cgraph_node *src,
399 : : bool preserve_body)
400 : : {
401 : 31910 : tree oldsrcdecl = src->decl;
402 : 31910 : struct function *srccfun, *dstcfun;
403 : 31910 : bool match = true;
404 : 31910 : bool copy_counts = false;
405 : :
406 : 31910 : if (!src->definition
407 : 30075 : || !dst->definition)
408 : 31906 : return;
409 : :
410 : 30075 : if (src->frequency < dst->frequency)
411 : 69 : src->frequency = dst->frequency;
412 : :
413 : : /* Time profiles are merged. */
414 : 30075 : if (dst->tp_first_run > src->tp_first_run && src->tp_first_run)
415 : 1 : dst->tp_first_run = src->tp_first_run;
416 : :
417 : 30075 : if (src->profile_id && !dst->profile_id)
418 : 0 : dst->profile_id = src->profile_id;
419 : :
420 : : /* Merging zero profile to dst is no-op. */
421 : 30075 : if (src->count.ipa () == profile_count::zero ())
422 : 123 : return;
423 : :
424 : : /* FIXME when we merge in unknown profile, we ought to set counts as
425 : : unsafe. */
426 : 29952 : if (!src->count.initialized_p ()
427 : 29952 : || !(src->count.ipa () == src->count))
428 : 29948 : return;
429 : 4 : profile_count orig_count = dst->count;
430 : :
431 : : /* Either sum the profiles if both are IPA and not global0, or
432 : : pick more informative one (that is nonzero IPA if other is
433 : : uninitialized, guessed or global0). */
434 : :
435 : 4 : if ((dst->count.ipa ().nonzero_p ()
436 : 0 : || src->count.ipa ().nonzero_p ())
437 : 4 : && dst->count.ipa ().initialized_p ()
438 : 4 : && src->count.ipa ().initialized_p ())
439 : 4 : dst->count = dst->count.ipa () + src->count.ipa ();
440 : 0 : else if (dst->count.ipa ().initialized_p ())
441 : : ;
442 : 0 : else if (src->count.ipa ().initialized_p ())
443 : : {
444 : 0 : copy_counts = true;
445 : 0 : dst->count = src->count.ipa ();
446 : : }
447 : :
448 : : /* If no updating needed return early. */
449 : 31910 : if (dst->count == orig_count)
450 : : return;
451 : :
452 : 4 : if (symtab->dump_file)
453 : : {
454 : 0 : fprintf (symtab->dump_file, "Merging profiles of %s count:",
455 : : src->dump_name ());
456 : 0 : src->count.dump (symtab->dump_file);
457 : 0 : fprintf (symtab->dump_file, " to %s count:",
458 : : dst->dump_name ());
459 : 0 : orig_count.dump (symtab->dump_file);
460 : 0 : fprintf (symtab->dump_file, " resulting count:");
461 : 0 : dst->count.dump (symtab->dump_file);
462 : 0 : fprintf (symtab->dump_file, "\n");
463 : : }
464 : :
465 : : /* First handle functions with no gimple body. */
466 : 4 : if (dst->thunk || dst->alias
467 : 4 : || src->thunk || src->alias)
468 : : {
469 : 0 : scale_ipa_profile_for_fn (dst, orig_count);
470 : 0 : return;
471 : : }
472 : :
473 : : /* This is ugly. We need to get both function bodies into memory.
474 : : If declaration is merged, we need to duplicate it to be able
475 : : to load body that is being replaced. This makes symbol table
476 : : temporarily inconsistent. */
477 : 4 : if (src->decl == dst->decl)
478 : : {
479 : 0 : struct lto_in_decl_state temp;
480 : 0 : struct lto_in_decl_state *state;
481 : :
482 : : /* We are going to move the decl, we want to remove its file decl data.
483 : : and link these with the new decl. */
484 : 0 : temp.fn_decl = src->decl;
485 : 0 : lto_in_decl_state **slot
486 : 0 : = src->lto_file_data->function_decl_states->find_slot (&temp,
487 : : NO_INSERT);
488 : 0 : state = *slot;
489 : 0 : src->lto_file_data->function_decl_states->clear_slot (slot);
490 : 0 : gcc_assert (state);
491 : :
492 : : /* Duplicate the decl and be sure it does not link into body of DST. */
493 : 0 : src->decl = copy_node (src->decl);
494 : 0 : DECL_STRUCT_FUNCTION (src->decl) = NULL;
495 : 0 : DECL_ARGUMENTS (src->decl) = NULL;
496 : 0 : DECL_INITIAL (src->decl) = NULL;
497 : 0 : DECL_RESULT (src->decl) = NULL;
498 : :
499 : : /* Associate the decl state with new declaration, so LTO streamer
500 : : can look it up. */
501 : 0 : state->fn_decl = src->decl;
502 : 0 : slot
503 : 0 : = src->lto_file_data->function_decl_states->find_slot (state, INSERT);
504 : 0 : gcc_assert (!*slot);
505 : 0 : *slot = state;
506 : : }
507 : 4 : src->get_untransformed_body ();
508 : 4 : dst->get_untransformed_body ();
509 : 4 : srccfun = DECL_STRUCT_FUNCTION (src->decl);
510 : 4 : dstcfun = DECL_STRUCT_FUNCTION (dst->decl);
511 : 4 : if (n_basic_blocks_for_fn (srccfun)
512 : 4 : != n_basic_blocks_for_fn (dstcfun))
513 : : {
514 : 0 : if (symtab->dump_file)
515 : 0 : fprintf (symtab->dump_file,
516 : : "Giving up; number of basic block mismatch.\n");
517 : : match = false;
518 : : }
519 : 4 : else if (last_basic_block_for_fn (srccfun)
520 : 4 : != last_basic_block_for_fn (dstcfun))
521 : : {
522 : 0 : if (symtab->dump_file)
523 : 0 : fprintf (symtab->dump_file,
524 : : "Giving up; last block mismatch.\n");
525 : : match = false;
526 : : }
527 : : else
528 : : {
529 : 4 : basic_block srcbb, dstbb;
530 : 4 : struct cgraph_edge *e, *e2;
531 : :
532 : 5 : for (e = dst->callees, e2 = src->callees; e && e2 && match;
533 : 1 : e2 = e2->next_callee, e = e->next_callee)
534 : : {
535 : 1 : if (gimple_bb (e->call_stmt)->index
536 : 1 : != gimple_bb (e2->call_stmt)->index)
537 : : {
538 : 0 : if (symtab->dump_file)
539 : 0 : fprintf (symtab->dump_file,
540 : : "Giving up; call stmt mismatch.\n");
541 : : match = false;
542 : : }
543 : : }
544 : 4 : if (e || e2)
545 : : {
546 : 0 : if (symtab->dump_file)
547 : 0 : fprintf (symtab->dump_file,
548 : : "Giving up; number of calls differs.\n");
549 : : match = false;
550 : : }
551 : 4 : for (e = dst->indirect_calls, e2 = src->indirect_calls; e && e2 && match;
552 : 0 : e2 = e2->next_callee, e = e->next_callee)
553 : : {
554 : 0 : if (gimple_bb (e->call_stmt)->index
555 : 0 : != gimple_bb (e2->call_stmt)->index)
556 : : {
557 : 0 : if (symtab->dump_file)
558 : 0 : fprintf (symtab->dump_file,
559 : : "Giving up; indirect call stmt mismatch.\n");
560 : : match = false;
561 : : }
562 : : }
563 : 4 : if (e || e2)
564 : : {
565 : 0 : if (symtab->dump_file)
566 : 0 : fprintf (symtab->dump_file,
567 : : "Giving up; number of indirect calls differs.\n");
568 : : match=false;
569 : : }
570 : :
571 : 4 : if (match)
572 : 18 : FOR_ALL_BB_FN (srcbb, srccfun)
573 : : {
574 : 14 : unsigned int i;
575 : :
576 : 14 : dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
577 : 14 : if (dstbb == NULL)
578 : : {
579 : 0 : if (symtab->dump_file)
580 : 0 : fprintf (symtab->dump_file,
581 : : "No matching block for bb %i.\n",
582 : : srcbb->index);
583 : : match = false;
584 : : break;
585 : : }
586 : 34 : if (EDGE_COUNT (srcbb->succs) != EDGE_COUNT (dstbb->succs))
587 : : {
588 : 0 : if (symtab->dump_file)
589 : 0 : fprintf (symtab->dump_file,
590 : : "Edge count mismatch for bb %i.\n",
591 : : srcbb->index);
592 : : match = false;
593 : : break;
594 : : }
595 : 25 : for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
596 : : {
597 : 11 : edge srce = EDGE_SUCC (srcbb, i);
598 : 11 : edge dste = EDGE_SUCC (dstbb, i);
599 : 11 : if (srce->dest->index != dste->dest->index)
600 : : {
601 : 0 : if (symtab->dump_file)
602 : 0 : fprintf (symtab->dump_file,
603 : : "Succ edge mismatch for bb %i.\n",
604 : : srce->dest->index);
605 : : match = false;
606 : : break;
607 : : }
608 : : }
609 : : }
610 : : }
611 : 4 : if (match)
612 : : {
613 : 4 : struct cgraph_edge *e, *e2;
614 : 4 : basic_block srcbb, dstbb;
615 : :
616 : : /* Function and global profile may be out of sync. First scale it same
617 : : way as fixup_cfg would. */
618 : 4 : profile_count srcnum = src->count;
619 : 4 : profile_count srcden = ENTRY_BLOCK_PTR_FOR_FN (srccfun)->count;
620 : 4 : bool srcscale = srcnum.initialized_p () && !(srcnum == srcden);
621 : 4 : profile_count dstnum = orig_count;
622 : 4 : profile_count dstden = ENTRY_BLOCK_PTR_FOR_FN (dstcfun)->count;
623 : 4 : bool dstscale = !copy_counts
624 : 4 : && dstnum.initialized_p () && !(dstnum == dstden);
625 : :
626 : : /* TODO: merge also statement histograms. */
627 : 18 : FOR_ALL_BB_FN (srcbb, srccfun)
628 : : {
629 : 14 : unsigned int i;
630 : :
631 : 14 : dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
632 : :
633 : 14 : profile_count srccount = srcbb->count;
634 : 14 : if (srcscale)
635 : 0 : srccount = srccount.apply_scale (srcnum, srcden);
636 : 14 : if (dstscale)
637 : 0 : dstbb->count = dstbb->count.apply_scale (dstnum, dstden);
638 : :
639 : 14 : if (copy_counts)
640 : : {
641 : 0 : dstbb->count = srccount;
642 : 14 : for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
643 : : {
644 : 0 : edge srce = EDGE_SUCC (srcbb, i);
645 : 0 : edge dste = EDGE_SUCC (dstbb, i);
646 : 0 : if (srce->probability.initialized_p ())
647 : 0 : dste->probability = srce->probability;
648 : : }
649 : : }
650 : : else
651 : : {
652 : 25 : for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
653 : : {
654 : 11 : edge srce = EDGE_SUCC (srcbb, i);
655 : 11 : edge dste = EDGE_SUCC (dstbb, i);
656 : 11 : profile_count sum =
657 : 11 : dstbb->count.ipa () + srccount.ipa ();
658 : 22 : if (sum.nonzero_p ())
659 : 10 : dste->probability =
660 : 20 : dste->probability * dstbb->count.ipa ().probability_in
661 : 10 : (sum)
662 : 10 : + srce->probability * srcbb->count.ipa ().probability_in
663 : 20 : (sum);
664 : : }
665 : 14 : dstbb->count = dstbb->count.ipa () + srccount.ipa ();
666 : : }
667 : : }
668 : 4 : push_cfun (dstcfun);
669 : 4 : update_max_bb_count ();
670 : 4 : compute_function_frequency ();
671 : 4 : pop_cfun ();
672 : 5 : for (e = dst->callees; e; e = e->next_callee)
673 : : {
674 : 1 : if (e->speculative)
675 : 0 : continue;
676 : 1 : e->count = gimple_bb (e->call_stmt)->count;
677 : : }
678 : 4 : for (e = dst->indirect_calls, e2 = src->indirect_calls; e;
679 : 0 : e2 = (e2 ? e2->next_callee : NULL), e = e->next_callee)
680 : : {
681 : 0 : if (!e->speculative && !e2->speculative)
682 : : {
683 : : /* FIXME: we need to also merge ipa-profile histograms
684 : : because with LTO merging happens from lto-symtab before
685 : : these are converted to indirect edges. */
686 : 0 : e->count = gimple_bb (e->call_stmt)->count;
687 : 0 : continue;
688 : : }
689 : :
690 : : /* When copying just remove all speuclations on dst and then copy
691 : : one from src. */
692 : 0 : if (copy_counts)
693 : : {
694 : 0 : while (e->speculative)
695 : 0 : cgraph_edge::resolve_speculation (e, NULL);
696 : 0 : e->count = gimple_bb (e->call_stmt)->count;
697 : 0 : if (e2->speculative)
698 : : {
699 : 0 : for (cgraph_edge *e3 = e2->first_speculative_call_target ();
700 : 0 : e3;
701 : 0 : e3 = e3->next_speculative_call_target ())
702 : : {
703 : 0 : cgraph_edge *ns;
704 : 0 : ns = e->make_speculative
705 : 0 : (dyn_cast <cgraph_node *>
706 : 0 : (e3->speculative_call_target_ref ()->referred),
707 : 0 : e3->count, e3->speculative_id);
708 : : /* Target may differ from ref (for example it may be
709 : : redirected to local alias. */
710 : 0 : ns->redirect_callee (e3->callee);
711 : : }
712 : : }
713 : 0 : continue;
714 : 0 : }
715 : :
716 : : /* Iterate all speculations in SRC, see if corresponding ones exist
717 : : int DST and if so, sum the counts. Otherwise create new
718 : : speculation. */
719 : 0 : int max_spec = 0;
720 : 0 : for (cgraph_edge *e3 = e->first_speculative_call_target ();
721 : 0 : e3;
722 : 0 : e3 = e3->next_speculative_call_target ())
723 : 0 : if (e3->speculative_id > max_spec)
724 : : max_spec = e3->speculative_id;
725 : 0 : for (cgraph_edge *e3 = e2->first_speculative_call_target ();
726 : 0 : e3;
727 : 0 : e3 = e3->next_speculative_call_target ())
728 : : {
729 : 0 : cgraph_edge *te
730 : : = e->speculative_call_for_target
731 : 0 : (dyn_cast <cgraph_node *>
732 : 0 : (e3->speculative_call_target_ref ()->referred));
733 : 0 : if (te)
734 : 0 : te->count = te->count + e3->count;
735 : : else
736 : : {
737 : 0 : e->count = e->count + e3->count;
738 : 0 : cgraph_edge *ns;
739 : 0 : ns = e->make_speculative
740 : 0 : (dyn_cast <cgraph_node *>
741 : 0 : (e3->speculative_call_target_ref ()
742 : : ->referred),
743 : : e3->count,
744 : 0 : e3->speculative_id + max_spec + 1);
745 : : /* Target may differ from ref (for example it may be
746 : : redirected to local alias. */
747 : 0 : ns->redirect_callee (e3->callee);
748 : : }
749 : : }
750 : : }
751 : 4 : if (!preserve_body)
752 : 1 : src->release_body ();
753 : : /* Update summary. */
754 : 4 : compute_fn_summary (dst, 0);
755 : : }
756 : : /* We can't update CFG profile, but we can scale IPA profile. CFG
757 : : will be scaled according to dst->count after IPA passes. */
758 : : else
759 : 0 : scale_ipa_profile_for_fn (dst, orig_count);
760 : 4 : src->decl = oldsrcdecl;
761 : : }
762 : :
763 : : /* Return true if call to DEST is known to be self-recusive
764 : : call withing FUNC. */
765 : :
766 : : bool
767 : 28023537 : recursive_call_p (tree func, tree dest)
768 : : {
769 : 28023537 : struct cgraph_node *dest_node = cgraph_node::get_create (dest);
770 : 28023537 : struct cgraph_node *cnode = cgraph_node::get_create (func);
771 : 28023537 : ipa_ref *alias;
772 : 28023537 : enum availability avail;
773 : :
774 : 28023537 : gcc_assert (!cnode->alias);
775 : 28023537 : if (cnode != dest_node->ultimate_alias_target (&avail))
776 : : return false;
777 : 66811 : if (avail >= AVAIL_AVAILABLE)
778 : : return true;
779 : 2387 : if (!dest_node->semantically_equivalent_p (cnode))
780 : : return false;
781 : : /* If there is only one way to call the fuction or we know all of them
782 : : are semantically equivalent, we still can consider call recursive. */
783 : 2426 : FOR_EACH_ALIAS (cnode, alias)
784 : 64 : if (!dest_node->semantically_equivalent_p (alias->referring))
785 : : return false;
786 : : return true;
787 : : }
788 : :
789 : : /* Return true if stmt may terminate execution of function.
790 : : If assume_return_or_eh we can further assume that the function ends
791 : : either by retrn statement or EH (no trapping or infinite loops). */
792 : :
793 : : bool
794 : 67735715 : stmt_may_terminate_function_p (function *fun, gimple *stmt, bool assume_return_or_eh)
795 : : {
796 : 67735715 : if (stmt_can_throw_external (fun, stmt))
797 : : return true;
798 : 65845778 : if (assume_return_or_eh)
799 : : return false;
800 : 755157 : gasm *astmt = dyn_cast <gasm *> (stmt);
801 : 20 : if (astmt && gimple_asm_volatile_p (astmt))
802 : : return true;
803 : 755137 : if (gimple_could_trap_p (stmt))
804 : : return true;
805 : 690004 : if (gcall *call = dyn_cast <gcall *> (stmt))
806 : : {
807 : 202400 : int flags = gimple_call_flags (call);
808 : 202400 : if (flags & (ECF_PURE | ECF_CONST) && ! (flags & ECF_LOOPING_CONST_OR_PURE))
809 : : return false;
810 : 69988 : modref_summary *s = get_modref_function_summary (call, NULL);
811 : 69988 : if (s && !s->side_effects)
812 : : return false;
813 : : return true;
814 : : }
815 : : return false;
816 : : }
817 : :
818 : : /* Return bitmap of all basic blocks whose first statements are known to
819 : : execute on every invocation of the function.
820 : :
821 : : If assume_return_or_eh we can further assume that the function ends
822 : : either by retrn statement or EH (no trapping or infinite loops).
823 : : This is useful when sumarizing function in passes like ipa-modref.
824 : :
825 : : Seeing assume_return_or_eh to false is used to prove that given
826 : : statmeent will be executed even if the function gets into infinite
827 : : loop or trap. */
828 : : bitmap
829 : 4515520 : find_always_executed_bbs (function *fun, bool assume_return_or_eh)
830 : : {
831 : 4515520 : auto_vec<basic_block, 20> stack;
832 : 4515520 : auto_vec<basic_block, 20> terminating_bbs;
833 : 4515520 : hash_set<basic_block> visited;
834 : 4515520 : hash_set<basic_block> terminating_bbs_set;
835 : 4515520 : edge e;
836 : 4515520 : edge_iterator ei;
837 : 4515520 : int flags = flags_from_decl_or_type (fun->decl);
838 : : /* PUre and const functions always return. */
839 : 4515520 : assume_return_or_eh |= (flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE);
840 : 3413440 : if (!assume_return_or_eh)
841 : 51244 : mark_dfs_back_edges (fun);
842 : :
843 : : /* First walk all BBs reachable from entry stopping on statements that may
844 : : terminate execution. Everything past this statement is not going to be executed
845 : : each invocation. */
846 : 4515520 : stack.safe_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
847 : 41916604 : while (!stack.is_empty ())
848 : : {
849 : 32885564 : basic_block bb = stack.pop ();
850 : 32885564 : bool found = false, found_exit = false;
851 : 32885564 : if (bb->index == EXIT_BLOCK)
852 : 3201594 : continue;
853 : 63759609 : FOR_EACH_EDGE (e, ei, bb->succs)
854 : : {
855 : 38057968 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
856 : : {
857 : : found_exit = true;
858 : : break;
859 : : }
860 : : /* Watch for infinite loops. */
861 : 34075639 : if (!found
862 : 661600 : && !assume_return_or_eh && (e->flags & EDGE_DFS_BACK))
863 : : {
864 : 43824 : if (!dom_info_available_p (CDI_DOMINATORS))
865 : 919 : calculate_dominance_info (CDI_DOMINATORS);
866 : : /* If this is not a loop latch edge it is an irreducible region.
867 : : Assume that it is infinite.
868 : : TODO: with C++ forced progression we can still walk the
869 : : irreducible region and see if it contains any side effects.
870 : : Similarly for loops. -ffinite-loops does not really imply
871 : : this since we allow inlining across -ffinite-loops bondary
872 : : and thus it can be used only as a loop flag. */
873 : 43824 : if (e->dest->loop_father->header != e->dest
874 : 43824 : || !dominated_by_p (CDI_DOMINATORS, bb, e->dest))
875 : : found = true;
876 : 43824 : else if (!finite_loop_p (e->dest->loop_father))
877 : 8 : found = true;
878 : : }
879 : : }
880 : 29683970 : if (!assume_return_or_eh
881 : 29683970 : && (EDGE_COUNT (bb->succs) == 0 || (bb->flags & BB_IRREDUCIBLE_LOOP)))
882 : : found = true;
883 : 29683970 : for (gimple_stmt_iterator si = gsi_start_nondebug_after_labels_bb (bb);
884 : 95300812 : !gsi_end_p (si) && !found; gsi_next_nondebug (&si))
885 : 67637431 : if (stmt_may_terminate_function_p (fun, gsi_stmt (si), assume_return_or_eh))
886 : : {
887 : : found = true;
888 : : break;
889 : : }
890 : 29683970 : if (found)
891 : : {
892 : 2023525 : visited.add (EXIT_BLOCK_PTR_FOR_FN (fun));
893 : 2023525 : if (!found_exit)
894 : : {
895 : 1339391 : terminating_bbs.safe_push (bb);
896 : 1339391 : terminating_bbs_set.add (bb);
897 : : }
898 : : }
899 : : else
900 : 63615193 : FOR_EACH_EDGE (e, ei, bb->succs)
901 : 35954748 : if (!visited.add (e->dest))
902 : 28370044 : stack.safe_push (e->dest);
903 : : }
904 : :
905 : : /* Next walk from exit block and find all articulations in the CFG.
906 : : Add all terminating basic blocks as "fake" predecessors of the
907 : : exit block. */
908 : :
909 : 4515520 : bitmap ret = BITMAP_ALLOC (NULL);
910 : 4515520 : bitmap_tree_view (ret);
911 : : /* A degenerated case when there is no path to exit. */
912 : 4515520 : if (!visited.contains (EXIT_BLOCK_PTR_FOR_FN (fun)))
913 : : {
914 : 109740 : bitmap_set_bit (ret,
915 : : single_succ_edge
916 : 54870 : (ENTRY_BLOCK_PTR_FOR_FN (fun))->dest->index);
917 : 54870 : return ret;
918 : : }
919 : :
920 : 4460650 : struct astate
921 : : {
922 : : unsigned int dfs_preorder;
923 : : unsigned int dfs_postorder;
924 : :
925 : : unsigned int low, high;
926 : : };
927 : :
928 : 4460650 : struct worklist
929 : : {
930 : : basic_block bb;
931 : : astate *cstate;
932 : : };
933 : :
934 : 4460650 : struct obstack state_obstack;
935 : 4460650 : gcc_obstack_init (&state_obstack);
936 : 4460650 : hash_map<basic_block, astate *> state;
937 : 4460650 : auto_vec<worklist, 32> worklist_vec;
938 : 4460650 : unsigned int next_dfs_num = 1;
939 : :
940 : : /* Always executed blocks are blocks that are on every path from entry to exit.
941 : : We proceed in two steps. First we do backward DFS walk (so we know that entry
942 : : is always reached) and record preorder and postorder visiting times.
943 : :
944 : : In second step we proceed in postorder and for every block A we compute
945 : : minimal preorder (A.low) and maximal postorder (A.high) of block reachable
946 : : from the BBs in DFS subtree of A. If A is always executed there are no
947 : : edges out of this subtree. This can be tested by checking that A.low == A.preorder
948 : : and B.high == A.postorder.
949 : :
950 : : This is first step. Do backward DFS walk and record preorder, postorder
951 : : and predecessor info. Initialize stack in postorder. */
952 : 4460650 : worklist we = {EXIT_BLOCK_PTR_FOR_FN (fun), NULL};
953 : 4460650 : worklist_vec.safe_push (we);
954 : 71785647 : while (!worklist_vec.is_empty ())
955 : : {
956 : 62864347 : worklist &w = worklist_vec.last ();
957 : 62864347 : basic_block bb = w.bb;
958 : 62864347 : astate *cstate = w.cstate;
959 : :
960 : 62864347 : if (!cstate)
961 : : {
962 : 35414158 : astate **slot = &state.get_or_insert (bb);
963 : :
964 : 35414158 : cstate = *slot;
965 : : /* Already processed by DFS? */
966 : 35414158 : if (cstate)
967 : : {
968 : 7963969 : worklist_vec.pop ();
969 : 35414158 : continue;
970 : : }
971 : : /* DFS is visiting BB for first time. */
972 : 27450189 : *slot = cstate = XOBNEW (&state_obstack, struct astate);
973 : 27450189 : cstate->low = cstate->high = cstate->dfs_preorder = next_dfs_num++;
974 : 27450189 : w.cstate = cstate;
975 : : /* Exit block is special; process all fake edges we identified. */
976 : 27450189 : if (bb == EXIT_BLOCK_PTR_FOR_FN (fun))
977 : 14721341 : for (basic_block bb2 : terminating_bbs)
978 : : {
979 : 1339391 : worklist we = {bb2, NULL};
980 : 1339391 : worklist_vec.safe_push (we);
981 : : }
982 : 62224488 : FOR_EACH_EDGE (e, ei, bb->preds)
983 : 34774299 : if (visited.contains (e->src))
984 : : {
985 : 29614117 : worklist we = {e->src, NULL};
986 : 29614117 : worklist_vec.safe_push (we);
987 : : }
988 : : /* Keep BB on worklist so we process it last time. */
989 : 27450189 : continue;
990 : 27450189 : }
991 : : /* We are finished with processing reachable BBs, see if we have articulation. */
992 : 27450189 : worklist_vec.pop ();
993 : 27450189 : cstate->high = cstate->dfs_postorder = next_dfs_num++;
994 : 27450189 : stack.safe_push (bb);
995 : : }
996 : : /* This is the final postorder walk. Determine low and high values and mark
997 : : always executed blocks. */
998 : 40832139 : for (basic_block bb : stack)
999 : : {
1000 : 27450189 : astate *cstate = *state.get (bb);
1001 : 62224488 : FOR_EACH_EDGE (e, ei, bb->preds)
1002 : : {
1003 : 34774299 : astate **cstate2 = state.get (e->src);
1004 : : /* We skip walking part of CFG reached only after first edge to exit.
1005 : : No BB reachable from the skipped part is always executed */
1006 : 34774299 : if (!cstate2)
1007 : : {
1008 : 5160182 : if (e->src != ENTRY_BLOCK_PTR_FOR_FN (fun))
1009 : 699532 : cstate->low = 0;
1010 : 5160182 : continue;
1011 : : }
1012 : 29614117 : cstate->low = MIN (cstate->low, (*cstate2)->low);
1013 : 29614117 : cstate->high = MAX (cstate->high, (*cstate2)->high);
1014 : : }
1015 : 699 : if (dump_file && (dump_flags & TDF_DETAILS)
1016 : 27450203 : && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1017 : 14 : fprintf (dump_file,
1018 : : "BB %i %s preorder %i posorder %i low %i high %i\n",
1019 : : bb->index,
1020 : 7 : terminating_bbs_set.contains (bb) ? "(terminating)" : "",
1021 : : cstate->dfs_preorder, cstate->dfs_postorder, cstate->low,
1022 : : cstate->high);
1023 : 27450189 : if (cstate->low == cstate->dfs_preorder
1024 : 14480589 : && cstate->high == cstate->dfs_postorder
1025 : 11564022 : && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1026 : 7744122 : bitmap_set_bit (ret, bb->index);
1027 : 27450189 : if (terminating_bbs_set.contains (bb))
1028 : 1339391 : cstate->low = 0;
1029 : : else
1030 : 56973429 : FOR_EACH_EDGE (e, ei, bb->succs)
1031 : : {
1032 : 30862631 : astate **cstate2 = state.get (e->dest);
1033 : 30862631 : if (!cstate2)
1034 : 1575632 : continue;
1035 : 29286999 : cstate->low = MIN (cstate->low, (*cstate2)->low);
1036 : 29286999 : cstate->high = MAX (cstate->high, (*cstate2)->high);
1037 : : }
1038 : : }
1039 : 4460650 : obstack_free (&state_obstack, NULL);
1040 : 4460650 : if (dump_file)
1041 : : {
1042 : 178 : fprintf (dump_file, "Always executed bbbs %s: ",
1043 : : assume_return_or_eh ? "(assuming return or EH)" : "");
1044 : 168 : bitmap_print (dump_file, ret, "", "\n");
1045 : : }
1046 : :
1047 : 4460650 : return ret;
1048 : 8976170 : }
|