Line data Source code
1 : /* Interprocedural constant propagation
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 :
4 : Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 : <mjambor@suse.cz>
6 :
7 : This file is part of GCC.
8 :
9 : GCC is free software; you can redistribute it and/or modify it under
10 : the terms of the GNU General Public License as published by the Free
11 : Software Foundation; either version 3, or (at your option) any later
12 : version.
13 :
14 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 : for more details.
18 :
19 : You should have received a copy of the GNU General Public License
20 : along with GCC; see the file COPYING3. If not see
21 : <http://www.gnu.org/licenses/>. */
22 :
23 : /* Interprocedural constant propagation (IPA-CP).
24 :
25 : The goal of this transformation is to
26 :
27 : 1) discover functions which are always invoked with some arguments with the
28 : same known constant values and modify the functions so that the
29 : subsequent optimizations can take advantage of the knowledge, and
30 :
31 : 2) partial specialization - create specialized versions of functions
32 : transformed in this way if some parameters are known constants only in
33 : certain contexts but the estimated tradeoff between speedup and cost size
34 : is deemed good.
35 :
36 : The algorithm also propagates types and attempts to perform type based
37 : devirtualization. Types are propagated much like constants.
38 :
39 : The algorithm basically consists of three stages. In the first, functions
40 : are analyzed one at a time and jump functions are constructed for all known
41 : call-sites. In the second phase, the pass propagates information from the
42 : jump functions across the call to reveal what values are available at what
43 : call sites, performs estimations of effects of known values on functions and
44 : their callees, and finally decides what specialized extra versions should be
45 : created. In the third, the special versions materialize and appropriate
46 : calls are redirected.
47 :
48 : The algorithm used is to a certain extent based on "Interprocedural Constant
49 : Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 : Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 : Cooper, Mary W. Hall, and Ken Kennedy.
52 :
53 :
54 : First stage - intraprocedural analysis
55 : =======================================
56 :
57 : This phase computes jump_function and modification flags.
58 :
59 : A jump function for a call-site represents the values passed as an actual
60 : arguments of a given call-site. In principle, there are three types of
61 : values:
62 :
63 : Pass through - the caller's formal parameter is passed as an actual
64 : argument, plus an operation on it can be performed.
65 : Constant - a constant is passed as an actual argument.
66 : Unknown - neither of the above.
67 :
68 : All jump function types are described in detail in ipa-prop.h, together with
69 : the data structures that represent them and methods of accessing them.
70 :
71 : ipcp_generate_summary() is the main function of the first stage.
72 :
73 : Second stage - interprocedural analysis
74 : ========================================
75 :
76 : This stage is itself divided into two phases. In the first, we propagate
77 : known values over the call graph, in the second, we make cloning decisions.
78 : It uses a different algorithm than the original Callahan's paper.
79 :
80 : First, we traverse the functions topologically from callers to callees and,
81 : for each strongly connected component (SCC), we propagate constants
82 : according to previously computed jump functions. We also record what known
83 : values depend on other known values and estimate local effects. Finally, we
84 : propagate cumulative information about these effects from dependent values
85 : to those on which they depend.
86 :
87 : Second, we again traverse the call graph in the same topological order and
88 : make clones for functions which we know are called with the same values in
89 : all contexts and decide about extra specialized clones of functions just for
90 : some contexts - these decisions are based on both local estimates and
91 : cumulative estimates propagated from callees.
92 :
93 : ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 : third stage.
95 :
96 : Third phase - materialization of clones, call statement updates.
97 : ============================================
98 :
99 : This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 : and tree-inline.cc) according to instructions inserted to the call graph by
101 : the second stage. */
102 :
103 : #define INCLUDE_ALGORITHM
104 : #include "config.h"
105 : #include "system.h"
106 : #include "coretypes.h"
107 : #include "backend.h"
108 : #include "tree.h"
109 : #include "gimple-expr.h"
110 : #include "gimple.h"
111 : #include "predict.h"
112 : #include "sreal.h"
113 : #include "alloc-pool.h"
114 : #include "tree-pass.h"
115 : #include "cgraph.h"
116 : #include "diagnostic.h"
117 : #include "fold-const.h"
118 : #include "gimple-iterator.h"
119 : #include "gimple-fold.h"
120 : #include "symbol-summary.h"
121 : #include "tree-vrp.h"
122 : #include "ipa-cp.h"
123 : #include "ipa-prop.h"
124 : #include "tree-pretty-print.h"
125 : #include "tree-inline.h"
126 : #include "ipa-fnsummary.h"
127 : #include "ipa-utils.h"
128 : #include "tree-ssa-ccp.h"
129 : #include "stringpool.h"
130 : #include "attribs.h"
131 : #include "dbgcnt.h"
132 : #include "symtab-clones.h"
133 : #include "gimple-range.h"
134 : #include "attr-callback.h"
135 :
136 : /* Allocation pools for values and their sources in ipa-cp. */
137 :
138 : object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
139 : ("IPA-CP constant values");
140 :
141 : object_allocator<ipcp_value<ipa_polymorphic_call_context> >
142 : ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
143 :
144 : object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
145 : ("IPA-CP value sources");
146 :
147 : object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
148 : ("IPA_CP aggregate lattices");
149 :
150 : /* Original overall size of the program. */
151 :
152 : static long overall_size, orig_overall_size;
153 :
154 : /* The maximum number of IPA-CP decision sweeps that any node requested in its
155 : param. */
156 : static int max_number_sweeps;
157 :
158 : /* Node name to unique clone suffix number map. */
159 : static hash_map<const char *, unsigned> *clone_num_suffixes;
160 :
161 : /* Return the param lattices structure corresponding to the Ith formal
162 : parameter of the function described by INFO. */
163 : static inline class ipcp_param_lattices *
164 33192412 : ipa_get_parm_lattices (class ipa_node_params *info, int i)
165 : {
166 66384824 : gcc_assert (i >= 0 && i < ipa_get_param_count (info));
167 33192412 : gcc_checking_assert (!info->ipcp_orig_node);
168 33192412 : return &(info->lattices[i]);
169 : }
170 :
171 : /* Return the lattice corresponding to the scalar value of the Ith formal
172 : parameter of the function described by INFO. */
173 : static inline ipcp_lattice<tree> *
174 5829077 : ipa_get_scalar_lat (class ipa_node_params *info, int i)
175 : {
176 6010284 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
177 5829077 : return &plats->itself;
178 : }
179 :
180 : /* Return the lattice corresponding to the scalar value of the Ith formal
181 : parameter of the function described by INFO. */
182 : static inline ipcp_lattice<ipa_polymorphic_call_context> *
183 761040 : ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
184 : {
185 761040 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
186 761040 : return &plats->ctxlat;
187 : }
188 :
189 : /* Return whether LAT is a lattice with a single constant and without an
190 : undefined value. */
191 :
192 : template <typename valtype>
193 : inline bool
194 14788735 : ipcp_lattice<valtype>::is_single_const ()
195 : {
196 3199075 : if (bottom || contains_variable || values_count != 1)
197 : return false;
198 : else
199 : return true;
200 : }
201 :
202 : /* Return true iff X and Y should be considered equal values by IPA-CP. */
203 :
204 : bool
205 1383587 : values_equal_for_ipcp_p (tree x, tree y)
206 : {
207 1383587 : gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
208 :
209 1383587 : if (x == y)
210 : return true;
211 :
212 618166 : if (TREE_CODE (x) == ADDR_EXPR
213 218710 : && TREE_CODE (y) == ADDR_EXPR
214 217869 : && (TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
215 172307 : || (TREE_CODE (TREE_OPERAND (x, 0)) == VAR_DECL
216 92123 : && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x, 0))))
217 663728 : && (TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL
218 13 : || (TREE_CODE (TREE_OPERAND (y, 0)) == VAR_DECL
219 8 : && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y, 0)))))
220 45549 : return TREE_OPERAND (x, 0) == TREE_OPERAND (y, 0)
221 90950 : || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
222 45401 : DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
223 : else
224 572617 : return operand_equal_p (x, y, 0);
225 : }
226 :
227 : /* Print V which is extracted from a value in a lattice to F. This overloaded
228 : function is used to print tree constants. */
229 :
230 : static void
231 693 : print_ipcp_constant_value (FILE * f, tree v)
232 : {
233 0 : ipa_print_constant_value (f, v);
234 36 : }
235 :
236 : /* Print V which is extracted from a value in a lattice to F. This overloaded
237 : function is used to print constant polymorphic call contexts. */
238 :
239 : static void
240 214 : print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
241 : {
242 214 : v.dump(f, false);
243 0 : }
244 :
245 : /* Print a lattice LAT to F. */
246 :
247 : template <typename valtype>
248 : void
249 1979 : ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
250 : {
251 : ipcp_value<valtype> *val;
252 1979 : bool prev = false;
253 :
254 1979 : if (bottom)
255 : {
256 838 : fprintf (f, "BOTTOM\n");
257 838 : return;
258 : }
259 :
260 1141 : if (!values_count && !contains_variable)
261 : {
262 0 : fprintf (f, "TOP\n");
263 0 : return;
264 : }
265 :
266 1141 : if (contains_variable)
267 : {
268 861 : fprintf (f, "VARIABLE");
269 861 : prev = true;
270 861 : if (dump_benefits)
271 861 : fprintf (f, "\n");
272 : }
273 :
274 1770 : for (val = values; val; val = val->next)
275 : {
276 629 : if (dump_benefits && prev)
277 349 : fprintf (f, " ");
278 280 : else if (!dump_benefits && prev)
279 0 : fprintf (f, ", ");
280 : else
281 : prev = true;
282 :
283 629 : print_ipcp_constant_value (f, val->value);
284 :
285 629 : if (dump_sources)
286 : {
287 : ipcp_value_source<valtype> *s;
288 :
289 174 : if (val->self_recursion_generated_p ())
290 27 : fprintf (f, " [self_gen(%i), from:",
291 : val->self_recursion_generated_level);
292 : else
293 147 : fprintf (f, " [scc: %i, from:", val->scc_no);
294 366 : for (s = val->sources; s; s = s->next)
295 192 : fprintf (f, " %i(%f)", s->cs->caller->get_uid (),
296 384 : s->cs->sreal_frequency ().to_double ());
297 174 : fprintf (f, "]");
298 : }
299 :
300 629 : if (dump_benefits)
301 629 : fprintf (f, " [loc_time: %g, loc_size: %i, "
302 : "prop_time: %g, prop_size: %i]\n",
303 : val->local_time_benefit.to_double (), val->local_size_cost,
304 : val->prop_time_benefit.to_double (), val->prop_size_cost);
305 : }
306 1141 : if (!dump_benefits)
307 0 : fprintf (f, "\n");
308 : }
309 :
310 : /* Print VALUE to F in a form which in usual cases does not take thousands of
311 : characters. */
312 :
313 : static void
314 1476 : ipcp_print_widest_int (FILE *f, const widest_int &value)
315 : {
316 1476 : if (value == -1)
317 0 : fprintf (f, "-1");
318 1476 : else if (wi::arshift (value, 128) == -1)
319 : {
320 333 : char buf[35], *p = buf + 2;
321 333 : widest_int v = wi::zext (value, 128);
322 333 : size_t len;
323 333 : print_hex (v, buf);
324 333 : len = strlen (p);
325 333 : if (len == 32)
326 : {
327 333 : fprintf (f, "0xf..f");
328 9831 : while (*p == 'f')
329 9165 : ++p;
330 : }
331 : else
332 0 : fprintf (f, "0xf..f%0*d", (int) (32 - len), 0);
333 333 : fputs (p, f);
334 333 : }
335 : else
336 1143 : print_hex (value, f);
337 1476 : }
338 :
339 : void
340 914 : ipcp_bits_lattice::print (FILE *f)
341 : {
342 914 : if (bottom_p ())
343 : {
344 604 : fprintf (f, " Bits unusable (BOTTOM)\n");
345 604 : return;
346 : }
347 :
348 310 : if (top_p ())
349 0 : fprintf (f, " Bits unknown (TOP)");
350 : else
351 : {
352 310 : fprintf (f, " Bits: value = ");
353 310 : ipcp_print_widest_int (f, get_value ());
354 310 : fprintf (f, ", mask = ");
355 310 : ipcp_print_widest_int (f, get_mask ());
356 : }
357 :
358 310 : if (m_recipient_only)
359 136 : fprintf (f, " (recipient only)");
360 310 : fprintf (f, "\n");
361 : }
362 :
363 : /* Print value range lattice to F. */
364 :
365 : void
366 914 : ipcp_vr_lattice::print (FILE * f)
367 : {
368 914 : if (m_recipient_only)
369 263 : fprintf (f, "(recipient only) ");
370 914 : m_vr.dump (f);
371 914 : }
372 :
373 : /* Print all ipcp_lattices of all functions to F. */
374 :
375 : static void
376 161 : print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
377 : {
378 161 : struct cgraph_node *node;
379 161 : int i, count;
380 :
381 161 : fprintf (f, "\nLattices:\n");
382 886 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
383 : {
384 725 : class ipa_node_params *info;
385 :
386 725 : info = ipa_node_params_sum->get (node);
387 : /* Skip unoptimized functions and constprop clones since we don't make
388 : lattices for them. */
389 725 : if (!info || info->ipcp_orig_node)
390 0 : continue;
391 725 : fprintf (f, " Node: %s:\n", node->dump_name ());
392 725 : count = ipa_get_param_count (info);
393 1639 : for (i = 0; i < count; i++)
394 : {
395 914 : struct ipcp_agg_lattice *aglat;
396 914 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
397 914 : fprintf (f, " param [%d]: ", i);
398 914 : plats->itself.print (f, dump_sources, dump_benefits);
399 914 : fprintf (f, " ctxs: ");
400 914 : plats->ctxlat.print (f, dump_sources, dump_benefits);
401 914 : plats->bits_lattice.print (f);
402 914 : fprintf (f, " ");
403 914 : plats->m_value_range.print (f);
404 914 : fprintf (f, "\n");
405 914 : if (plats->virt_call)
406 75 : fprintf (f, " virt_call flag set\n");
407 :
408 914 : if (plats->aggs_bottom)
409 : {
410 439 : fprintf (f, " AGGS BOTTOM\n");
411 439 : continue;
412 : }
413 475 : if (plats->aggs_contain_variable)
414 437 : fprintf (f, " AGGS VARIABLE\n");
415 626 : for (aglat = plats->aggs; aglat; aglat = aglat->next)
416 : {
417 151 : fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
418 151 : plats->aggs_by_ref ? "ref " : "", aglat->offset);
419 151 : aglat->print (f, dump_sources, dump_benefits);
420 : }
421 : }
422 : }
423 161 : }
424 :
425 : /* Determine whether it is at all technically possible to create clones of NODE
426 : and store this information in the ipa_node_params structure associated
427 : with NODE. */
428 :
429 : static void
430 1254697 : determine_versionability (struct cgraph_node *node,
431 : class ipa_node_params *info)
432 : {
433 1254697 : const char *reason = NULL;
434 :
435 : /* There are a number of generic reasons functions cannot be versioned. We
436 : also cannot remove parameters if there are type attributes such as fnspec
437 : present. */
438 1254697 : if (node->alias || node->thunk)
439 : reason = "alias or thunk";
440 1254697 : else if (!node->versionable)
441 : reason = "not a tree_versionable_function";
442 1126088 : else if (node->get_availability () <= AVAIL_INTERPOSABLE)
443 : reason = "insufficient body availability";
444 1059561 : else if (!opt_for_fn (node->decl, optimize)
445 1059561 : || !opt_for_fn (node->decl, flag_ipa_cp))
446 : reason = "non-optimized function";
447 1059561 : else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
448 : {
449 : /* Ideally we should clone the SIMD clones themselves and create
450 : vector copies of them, so IPA-cp and SIMD clones can happily
451 : coexist, but that may not be worth the effort. */
452 : reason = "function has SIMD clones";
453 : }
454 1059201 : else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
455 : {
456 : /* Ideally we should clone the target clones themselves and create
457 : copies of them, so IPA-cp and target clones can happily
458 : coexist, but that may not be worth the effort. */
459 : reason = "function target_clones attribute";
460 : }
461 : /* Don't clone decls local to a comdat group; it breaks and for C++
462 : decloned constructors, inlining is always better anyway. */
463 1059201 : else if (node->comdat_local_p ())
464 : reason = "comdat-local function";
465 1057054 : else if (node->calls_comdat_local)
466 : {
467 : /* TODO: call is versionable if we make sure that all
468 : callers are inside of a comdat group. */
469 2249 : reason = "calls comdat-local function";
470 : }
471 :
472 : /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
473 : work only when inlined. Cloning them may still lead to better code
474 : because ipa-cp will not give up on cloning further. If the function is
475 : external this however leads to wrong code because we may end up producing
476 : offline copy of the function. */
477 1254697 : if (DECL_EXTERNAL (node->decl))
478 81419 : for (cgraph_edge *edge = node->callees; !reason && edge;
479 55350 : edge = edge->next_callee)
480 55350 : if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
481 : {
482 5475 : if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
483 0 : reason = "external function which calls va_arg_pack";
484 5475 : if (DECL_FUNCTION_CODE (edge->callee->decl)
485 : == BUILT_IN_VA_ARG_PACK_LEN)
486 0 : reason = "external function which calls va_arg_pack_len";
487 : }
488 :
489 1254697 : if (reason && dump_file && !node->alias && !node->thunk)
490 56 : fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
491 : node->dump_name (), reason);
492 :
493 1254697 : info->versionable = (reason == NULL);
494 1254697 : }
495 :
496 : /* Return true if it is at all technically possible to create clones of a
497 : NODE. */
498 :
499 : static bool
500 6008953 : ipcp_versionable_function_p (struct cgraph_node *node)
501 : {
502 6008953 : ipa_node_params *info = ipa_node_params_sum->get (node);
503 6008953 : return info && info->versionable;
504 : }
505 :
506 : /* Structure holding accumulated information about callers of a node. */
507 :
508 3360243 : struct caller_statistics
509 : {
510 : /* If requested (see below), self-recursive call counts are summed into this
511 : field. */
512 : profile_count rec_count_sum;
513 : /* The sum of all ipa counts of all the other (non-recursive) calls. */
514 : profile_count count_sum;
515 : /* Sum of all frequencies for all calls. */
516 : sreal freq_sum;
517 : /* Number of calls and calls considered interesting respectively. */
518 : int n_calls, n_interesting_calls;
519 : /* If itself is set up, also count the number of non-self-recursive
520 : calls. */
521 : int n_nonrec_calls;
522 : /* If non-NULL, this is the node itself and calls from it should have their
523 : counts included in rec_count_sum and not count_sum. */
524 : cgraph_node *itself;
525 : /* True if there is a caller that has no IPA profile. */
526 : bool called_without_ipa_profile;
527 : };
528 :
529 : /* Initialize fields of STAT to zeroes and optionally set it up so that edges
530 : from IGNORED_CALLER are not counted. */
531 :
532 : static inline void
533 2624151 : init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
534 : {
535 2624151 : stats->rec_count_sum = profile_count::zero ();
536 2624151 : stats->count_sum = profile_count::zero ();
537 2624151 : stats->n_calls = 0;
538 2624151 : stats->n_interesting_calls = 0;
539 2624151 : stats->n_nonrec_calls = 0;
540 2624151 : stats->freq_sum = 0;
541 2624151 : stats->itself = itself;
542 2624151 : stats->called_without_ipa_profile = false;
543 2624151 : }
544 :
545 : /* We want to propagate across edges that may be executed, however
546 : we do not want to check maybe_hot, since call itself may be cold
547 : while calee contains some heavy loop which makes propagation still
548 : relevant.
549 :
550 : In particular, even edge called once may lead to significant
551 : improvement. */
552 :
553 : static bool
554 4662778 : cs_interesting_for_ipcp_p (cgraph_edge *e)
555 : {
556 : /* If profile says the edge is executed, we want to optimize. */
557 4662778 : if (e->count.ipa ().nonzero_p ())
558 773 : return true;
559 : /* If local (possibly guseed or adjusted 0 profile) claims edge is
560 : not executed, do not propagate.
561 : Do not trust AFDO since branch needs to be executed multiple
562 : time to count while we want to propagate even call called
563 : once during the train run if callee is important. */
564 4662005 : if (e->count.initialized_p () && !e->count.nonzero_p ()
565 5318485 : && e->count.quality () != AFDO)
566 : return false;
567 : /* If we have zero IPA profile, still consider edge for cloning
568 : in case we do partial training. */
569 4005525 : if (e->count.ipa ().initialized_p ()
570 4005525 : && e->count.ipa ().quality () != AFDO
571 4005540 : && !opt_for_fn (e->callee->decl,flag_profile_partial_training))
572 15 : return false;
573 : return true;
574 : }
575 :
576 : /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
577 : non-thunk incoming edges to NODE. */
578 :
579 : static bool
580 2795734 : gather_caller_stats (struct cgraph_node *node, void *data)
581 : {
582 2795734 : struct caller_statistics *stats = (struct caller_statistics *) data;
583 2795734 : struct cgraph_edge *cs;
584 :
585 7262389 : for (cs = node->callers; cs; cs = cs->next_caller)
586 4466655 : if (!cs->caller->thunk)
587 : {
588 4463144 : ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
589 4463144 : if (info && info->node_dead)
590 163130 : continue;
591 :
592 4300014 : if (cs->count.ipa ().initialized_p ())
593 : {
594 325598 : if (stats->itself && stats->itself == cs->caller)
595 0 : stats->rec_count_sum += cs->count.ipa ();
596 : else
597 325598 : stats->count_sum += cs->count.ipa ();
598 : }
599 : else
600 3974416 : stats->called_without_ipa_profile = true;
601 4300014 : stats->freq_sum += cs->sreal_frequency ();
602 4300014 : stats->n_calls++;
603 4300014 : if (stats->itself && stats->itself != cs->caller)
604 11 : stats->n_nonrec_calls++;
605 :
606 : /* If profile known to be zero, we do not want to clone for performance.
607 : However if call is cold, the called function may still contain
608 : important hot loops. */
609 4300014 : if (cs_interesting_for_ipcp_p (cs))
610 3688905 : stats->n_interesting_calls++;
611 : }
612 2795734 : return false;
613 :
614 : }
615 :
616 : /* Return true if this NODE is viable candidate for cloning. */
617 :
618 : static bool
619 781827 : ipcp_cloning_candidate_p (struct cgraph_node *node)
620 : {
621 781827 : struct caller_statistics stats;
622 :
623 781827 : gcc_checking_assert (node->has_gimple_body_p ());
624 :
625 781827 : if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
626 : {
627 731127 : if (dump_file)
628 31 : fprintf (dump_file, "Not considering %s for cloning; "
629 : "-fipa-cp-clone disabled.\n",
630 : node->dump_name ());
631 731127 : return false;
632 : }
633 :
634 : /* Do not use profile here since cold wrapper wrap
635 : hot function. */
636 50700 : if (opt_for_fn (node->decl, optimize_size))
637 : {
638 10 : if (dump_file)
639 0 : fprintf (dump_file, "Not considering %s for cloning; "
640 : "optimizing it for size.\n",
641 : node->dump_name ());
642 10 : return false;
643 : }
644 :
645 50690 : init_caller_stats (&stats);
646 50690 : node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
647 :
648 50690 : if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
649 : {
650 298 : if (dump_file)
651 0 : fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
652 : node->dump_name ());
653 298 : return true;
654 : }
655 50392 : if (!stats.n_interesting_calls)
656 : {
657 38825 : if (dump_file)
658 198 : fprintf (dump_file, "Not considering %s for cloning; "
659 : "no calls considered interesting by profile.\n",
660 : node->dump_name ());
661 38825 : return false;
662 : }
663 11567 : if (dump_file)
664 186 : fprintf (dump_file, "Considering %s for cloning.\n",
665 : node->dump_name ());
666 : return true;
667 : }
668 :
669 : template <typename valtype>
670 : class value_topo_info
671 : {
672 : public:
673 : /* Head of the linked list of topologically sorted values. */
674 : ipcp_value<valtype> *values_topo;
675 : /* Stack for creating SCCs, represented by a linked list too. */
676 : ipcp_value<valtype> *stack;
677 : /* Counter driving the algorithm in add_val_to_toposort. */
678 : int dfs_counter;
679 :
680 127757 : value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
681 : {}
682 : void add_val (ipcp_value<valtype> *cur_val);
683 : void propagate_effects ();
684 : };
685 :
686 : /* Arrays representing a topological ordering of call graph nodes and a stack
687 : of nodes used during constant propagation and also data required to perform
688 : topological sort of values and propagation of benefits in the determined
689 : order. */
690 :
691 : class ipa_topo_info
692 : {
693 : public:
694 : /* Array with obtained topological order of cgraph nodes. */
695 : struct cgraph_node **order;
696 : /* Stack of cgraph nodes used during propagation within SCC until all values
697 : in the SCC stabilize. */
698 : struct cgraph_node **stack;
699 : int nnodes, stack_top;
700 :
701 : value_topo_info<tree> constants;
702 : value_topo_info<ipa_polymorphic_call_context> contexts;
703 :
704 127757 : ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
705 127757 : constants ()
706 : {}
707 : };
708 :
709 : /* Skip edges from and to nodes without ipa_cp enabled.
710 : Ignore not available symbols. */
711 :
712 : static bool
713 5234521 : ignore_edge_p (cgraph_edge *e)
714 : {
715 5234521 : enum availability avail;
716 5234521 : cgraph_node *ultimate_target
717 5234521 : = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
718 :
719 5234521 : return (avail <= AVAIL_INTERPOSABLE
720 1843006 : || !opt_for_fn (ultimate_target->decl, optimize)
721 7068824 : || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
722 : }
723 :
724 : /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
725 :
726 : static void
727 127757 : build_toporder_info (class ipa_topo_info *topo)
728 : {
729 127757 : topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
730 127757 : topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
731 :
732 127757 : gcc_checking_assert (topo->stack_top == 0);
733 127757 : topo->nnodes = ipa_reduced_postorder (topo->order, true,
734 : ignore_edge_p);
735 127757 : }
736 :
737 : /* Free information about strongly connected components and the arrays in
738 : TOPO. */
739 :
740 : static void
741 127757 : free_toporder_info (class ipa_topo_info *topo)
742 : {
743 127757 : ipa_free_postorder_info ();
744 127757 : free (topo->order);
745 127757 : free (topo->stack);
746 127757 : }
747 :
748 : /* Add NODE to the stack in TOPO, unless it is already there. */
749 :
750 : static inline void
751 1258840 : push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
752 : {
753 1258840 : ipa_node_params *info = ipa_node_params_sum->get (node);
754 1258840 : if (info->node_enqueued)
755 : return;
756 1257871 : info->node_enqueued = 1;
757 1257871 : topo->stack[topo->stack_top++] = node;
758 : }
759 :
760 : /* Pop a node from the stack in TOPO and return it or return NULL if the stack
761 : is empty. */
762 :
763 : static struct cgraph_node *
764 2591924 : pop_node_from_stack (class ipa_topo_info *topo)
765 : {
766 2591924 : if (topo->stack_top)
767 : {
768 1257871 : struct cgraph_node *node;
769 1257871 : topo->stack_top--;
770 1257871 : node = topo->stack[topo->stack_top];
771 1257871 : ipa_node_params_sum->get (node)->node_enqueued = 0;
772 1257871 : return node;
773 : }
774 : else
775 : return NULL;
776 : }
777 :
778 : /* Set lattice LAT to bottom and return true if it previously was not set as
779 : such. */
780 :
781 : template <typename valtype>
782 : inline bool
783 2090014 : ipcp_lattice<valtype>::set_to_bottom ()
784 : {
785 2090014 : bool ret = !bottom;
786 2090014 : bottom = true;
787 : return ret;
788 : }
789 :
790 : /* Mark lattice as containing an unknown value and return true if it previously
791 : was not marked as such. */
792 :
793 : template <typename valtype>
794 : inline bool
795 1531923 : ipcp_lattice<valtype>::set_contains_variable ()
796 : {
797 1531923 : bool ret = !contains_variable;
798 1531923 : contains_variable = true;
799 : return ret;
800 : }
801 :
802 : /* Set all aggregate lattices in PLATS to bottom and return true if they were
803 : not previously set as such. */
804 :
805 : static inline bool
806 2089716 : set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
807 : {
808 2089716 : bool ret = !plats->aggs_bottom;
809 2089716 : plats->aggs_bottom = true;
810 2089716 : return ret;
811 : }
812 :
813 : /* Mark all aggregate lattices in PLATS as containing an unknown value and
814 : return true if they were not previously marked as such. */
815 :
816 : static inline bool
817 1039258 : set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
818 : {
819 1039258 : bool ret = !plats->aggs_contain_variable;
820 1039258 : plats->aggs_contain_variable = true;
821 1039258 : return ret;
822 : }
823 :
824 : bool
825 0 : ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
826 : {
827 0 : return meet_with_1 (other.m_vr);
828 : }
829 :
830 : /* Meet the current value of the lattice with the range described by
831 : P_VR. */
832 :
833 : bool
834 494276 : ipcp_vr_lattice::meet_with (const vrange &p_vr)
835 : {
836 494276 : return meet_with_1 (p_vr);
837 : }
838 :
839 : /* Meet the current value of the lattice with the range described by
840 : OTHER_VR. Return TRUE if anything changed. */
841 :
842 : bool
843 494276 : ipcp_vr_lattice::meet_with_1 (const vrange &other_vr)
844 : {
845 494276 : if (bottom_p ())
846 : return false;
847 :
848 494276 : if (other_vr.varying_p ())
849 0 : return set_to_bottom ();
850 :
851 494276 : bool res;
852 494276 : if (flag_checking)
853 : {
854 494276 : value_range save (m_vr);
855 494276 : res = m_vr.union_ (other_vr);
856 494276 : gcc_assert (res == (m_vr != save));
857 494276 : }
858 : else
859 0 : res = m_vr.union_ (other_vr);
860 : return res;
861 : }
862 :
863 : /* Return true if value range information in the lattice is yet unknown. */
864 :
865 : bool
866 : ipcp_vr_lattice::top_p () const
867 : {
868 172769 : return m_vr.undefined_p ();
869 : }
870 :
871 : /* Return true if value range information in the lattice is known to be
872 : unusable. */
873 :
874 : bool
875 4827513 : ipcp_vr_lattice::bottom_p () const
876 : {
877 494276 : return m_vr.varying_p ();
878 : }
879 :
880 : /* Set value range information in the lattice to bottom. Return true if it
881 : previously was in a different state. */
882 :
883 : bool
884 2359880 : ipcp_vr_lattice::set_to_bottom ()
885 : {
886 2359880 : if (m_vr.varying_p ())
887 : return false;
888 :
889 : /* Setting an unsupported type here forces the temporary to default
890 : to unsupported_range, which can handle VARYING/DEFINED ranges,
891 : but nothing else (union, intersect, etc). This allows us to set
892 : bottoms on any ranges, and is safe as all users of the lattice
893 : check for bottom first. */
894 2219353 : m_vr.set_type (void_type_node);
895 2219353 : m_vr.set_varying (void_type_node);
896 :
897 2219353 : return true;
898 : }
899 :
900 : /* Set the flag that this lattice is a recipient only, return true if it was
901 : not set before. */
902 :
903 : bool
904 28070 : ipcp_vr_lattice::set_recipient_only ()
905 : {
906 28070 : if (m_recipient_only)
907 : return false;
908 28070 : m_recipient_only = true;
909 28070 : return true;
910 : }
911 :
912 : /* Set lattice value to bottom, if it already isn't the case. */
913 :
914 : bool
915 2378998 : ipcp_bits_lattice::set_to_bottom ()
916 : {
917 2378998 : if (bottom_p ())
918 : return false;
919 2238999 : m_lattice_val = IPA_BITS_VARYING;
920 2238999 : m_value = 0;
921 2238999 : m_mask = -1;
922 2238999 : return true;
923 : }
924 :
925 : /* Set to constant if it isn't already. Only meant to be called
926 : when switching state from TOP. */
927 :
928 : bool
929 77045 : ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
930 : {
931 77045 : gcc_assert (top_p ());
932 77045 : m_lattice_val = IPA_BITS_CONSTANT;
933 77045 : m_value = wi::bit_and (wi::bit_not (mask), value);
934 77045 : m_mask = mask;
935 77045 : return true;
936 : }
937 :
938 : /* Return true if any of the known bits are non-zero. */
939 :
940 : bool
941 460 : ipcp_bits_lattice::known_nonzero_p () const
942 : {
943 460 : if (!constant_p ())
944 : return false;
945 460 : return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
946 : }
947 :
948 : /* Set the flag that this lattice is a recipient only, return true if it was not
949 : set before. */
950 :
951 : bool
952 28070 : ipcp_bits_lattice::set_recipient_only ()
953 : {
954 28070 : if (m_recipient_only)
955 : return false;
956 28070 : m_recipient_only = true;
957 28070 : return true;
958 : }
959 :
960 : /* Convert operand to value, mask form. */
961 :
962 : void
963 2043 : ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
964 : {
965 2043 : wide_int get_nonzero_bits (const_tree);
966 :
967 2043 : if (TREE_CODE (operand) == INTEGER_CST)
968 : {
969 2043 : *valuep = wi::to_widest (operand);
970 2043 : *maskp = 0;
971 : }
972 : else
973 : {
974 0 : *valuep = 0;
975 0 : *maskp = -1;
976 : }
977 2043 : }
978 :
979 : /* Meet operation, similar to ccp_lattice_meet, we xor values
980 : if this->value, value have different values at same bit positions, we want
981 : to drop that bit to varying. Return true if mask is changed.
982 : This function assumes that the lattice value is in CONSTANT state. If
983 : DROP_ALL_ONES, mask out any known bits with value one afterwards. */
984 :
985 : bool
986 302362 : ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
987 : unsigned precision, bool drop_all_ones)
988 : {
989 302362 : gcc_assert (constant_p ());
990 :
991 302362 : widest_int old_mask = m_mask;
992 302362 : m_mask = (m_mask | mask) | (m_value ^ value);
993 302362 : if (drop_all_ones)
994 197 : m_mask |= m_value;
995 :
996 302362 : widest_int cap_mask = wi::shifted_mask <widest_int> (0, precision, true);
997 302362 : m_mask |= cap_mask;
998 302362 : if (wi::sext (m_mask, precision) == -1)
999 3530 : return set_to_bottom ();
1000 :
1001 298832 : m_value &= ~m_mask;
1002 298832 : return m_mask != old_mask;
1003 302362 : }
1004 :
1005 : /* Meet the bits lattice with operand
1006 : described by <value, mask, sgn, precision. */
1007 :
1008 : bool
1009 408798 : ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1010 : unsigned precision)
1011 : {
1012 408798 : if (bottom_p ())
1013 : return false;
1014 :
1015 408798 : if (top_p ())
1016 : {
1017 119177 : if (wi::sext (mask, precision) == -1)
1018 47348 : return set_to_bottom ();
1019 71829 : return set_to_constant (value, mask);
1020 : }
1021 :
1022 289621 : return meet_with_1 (value, mask, precision, false);
1023 : }
1024 :
1025 : /* Meet bits lattice with the result of bit_value_binop (other, operand)
1026 : if code is binary operation or bit_value_unop (other) if code is unary op.
1027 : In the case when code is nop_expr, no adjustment is required. If
1028 : DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1029 :
1030 : bool
1031 21524 : ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1032 : signop sgn, enum tree_code code, tree operand,
1033 : bool drop_all_ones)
1034 : {
1035 21524 : if (other.bottom_p ())
1036 0 : return set_to_bottom ();
1037 :
1038 21524 : if (bottom_p () || other.top_p ())
1039 : return false;
1040 :
1041 18066 : widest_int adjusted_value, adjusted_mask;
1042 :
1043 18066 : if (TREE_CODE_CLASS (code) == tcc_binary)
1044 : {
1045 2043 : tree type = TREE_TYPE (operand);
1046 2043 : widest_int o_value, o_mask;
1047 2043 : get_value_and_mask (operand, &o_value, &o_mask);
1048 :
1049 2043 : bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1050 4086 : sgn, precision, other.get_value (), other.get_mask (),
1051 2043 : TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1052 :
1053 2043 : if (wi::sext (adjusted_mask, precision) == -1)
1054 80 : return set_to_bottom ();
1055 2043 : }
1056 :
1057 16023 : else if (TREE_CODE_CLASS (code) == tcc_unary)
1058 : {
1059 31996 : bit_value_unop (code, sgn, precision, &adjusted_value,
1060 31996 : &adjusted_mask, sgn, precision, other.get_value (),
1061 15998 : other.get_mask ());
1062 :
1063 15998 : if (wi::sext (adjusted_mask, precision) == -1)
1064 4 : return set_to_bottom ();
1065 : }
1066 :
1067 : else
1068 25 : return set_to_bottom ();
1069 :
1070 17957 : if (top_p ())
1071 : {
1072 5216 : if (drop_all_ones)
1073 : {
1074 263 : adjusted_mask |= adjusted_value;
1075 263 : adjusted_value &= ~adjusted_mask;
1076 : }
1077 5216 : widest_int cap_mask = wi::shifted_mask <widest_int> (0, precision, true);
1078 5216 : adjusted_mask |= cap_mask;
1079 5216 : if (wi::sext (adjusted_mask, precision) == -1)
1080 0 : return set_to_bottom ();
1081 5216 : return set_to_constant (adjusted_value, adjusted_mask);
1082 5216 : }
1083 : else
1084 12741 : return meet_with_1 (adjusted_value, adjusted_mask, precision,
1085 : drop_all_ones);
1086 18066 : }
1087 :
1088 : /* Dump the contents of the list to FILE. */
1089 :
1090 : void
1091 115 : ipa_argagg_value_list::dump (FILE *f)
1092 : {
1093 115 : bool comma = false;
1094 319 : for (const ipa_argagg_value &av : m_elts)
1095 : {
1096 204 : fprintf (f, "%s %i[%u]=", comma ? "," : "",
1097 204 : av.index, av.unit_offset);
1098 204 : print_generic_expr (f, av.value);
1099 204 : if (av.by_ref)
1100 178 : fprintf (f, "(by_ref)");
1101 204 : if (av.killed)
1102 1 : fprintf (f, "(killed)");
1103 204 : comma = true;
1104 : }
1105 115 : fprintf (f, "\n");
1106 115 : }
1107 :
1108 : /* Dump the contents of the list to stderr. */
1109 :
1110 : void
1111 0 : ipa_argagg_value_list::debug ()
1112 : {
1113 0 : dump (stderr);
1114 0 : }
1115 :
1116 : /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1117 : NULL if there is no such constant. */
1118 :
1119 : const ipa_argagg_value *
1120 28782240 : ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1121 : {
1122 28782240 : ipa_argagg_value key;
1123 28782240 : key.index = index;
1124 28782240 : key.unit_offset = unit_offset;
1125 28782240 : const ipa_argagg_value *res
1126 28782240 : = std::lower_bound (m_elts.begin (), m_elts.end (), key,
1127 6425375 : [] (const ipa_argagg_value &elt,
1128 : const ipa_argagg_value &val)
1129 : {
1130 6425375 : if (elt.index < val.index)
1131 : return true;
1132 5468795 : if (elt.index > val.index)
1133 : return false;
1134 4371254 : if (elt.unit_offset < val.unit_offset)
1135 : return true;
1136 : return false;
1137 : });
1138 :
1139 28782240 : if (res == m_elts.end ()
1140 2810399 : || res->index != index
1141 31045512 : || res->unit_offset != unit_offset)
1142 : res = nullptr;
1143 :
1144 : /* TODO: perhaps remove the check (that the underlying array is indeed
1145 : sorted) if it turns out it can be too slow? */
1146 28782240 : if (!flag_checking)
1147 : return res;
1148 :
1149 : const ipa_argagg_value *slow_res = NULL;
1150 : int prev_index = -1;
1151 : unsigned prev_unit_offset = 0;
1152 43230897 : for (const ipa_argagg_value &av : m_elts)
1153 : {
1154 14448657 : gcc_assert (prev_index < 0
1155 : || prev_index < av.index
1156 : || prev_unit_offset < av.unit_offset);
1157 14448657 : prev_index = av.index;
1158 14448657 : prev_unit_offset = av.unit_offset;
1159 14448657 : if (av.index == index
1160 7013714 : && av.unit_offset == unit_offset)
1161 14448657 : slow_res = &av;
1162 : }
1163 28782240 : gcc_assert (res == slow_res);
1164 :
1165 : return res;
1166 : }
1167 :
1168 : /* Return the first item describing a constant stored for parameter with INDEX,
1169 : regardless of offset or reference, or NULL if there is no such constant. */
1170 :
1171 : const ipa_argagg_value *
1172 226994 : ipa_argagg_value_list::get_elt_for_index (int index) const
1173 : {
1174 226994 : const ipa_argagg_value *res
1175 226994 : = std::lower_bound (m_elts.begin (), m_elts.end (), index,
1176 19279 : [] (const ipa_argagg_value &elt, unsigned idx)
1177 : {
1178 19279 : return elt.index < idx;
1179 : });
1180 226994 : if (res == m_elts.end ()
1181 226994 : || res->index != index)
1182 : res = nullptr;
1183 226994 : return res;
1184 : }
1185 :
1186 : /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1187 : performing any check of whether value is passed by reference, or NULL_TREE
1188 : if there is no such constant. */
1189 :
1190 : tree
1191 39037 : ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1192 : {
1193 39037 : const ipa_argagg_value *av = get_elt (index, unit_offset);
1194 39037 : return av ? av->value : NULL_TREE;
1195 : }
1196 :
1197 : /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1198 : passed by reference or not according to BY_REF, or NULL_TREE if there is
1199 : no such constant. */
1200 :
1201 : tree
1202 28732835 : ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1203 : bool by_ref) const
1204 : {
1205 28732835 : const ipa_argagg_value *av = get_elt (index, unit_offset);
1206 28732835 : if (av && av->by_ref == by_ref)
1207 1788535 : return av->value;
1208 : return NULL_TREE;
1209 : }
1210 :
1211 : /* Return true if all elements present in OTHER are also present in this
1212 : list. */
1213 :
1214 : bool
1215 48 : ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1216 : {
1217 48 : unsigned j = 0;
1218 175 : for (unsigned i = 0; i < other.m_elts.size (); i++)
1219 : {
1220 150 : unsigned other_index = other.m_elts[i].index;
1221 150 : unsigned other_offset = other.m_elts[i].unit_offset;
1222 :
1223 150 : while (j < m_elts.size ()
1224 252 : && (m_elts[j].index < other_index
1225 231 : || (m_elts[j].index == other_index
1226 231 : && m_elts[j].unit_offset < other_offset)))
1227 102 : j++;
1228 :
1229 150 : if (j >= m_elts.size ()
1230 129 : || m_elts[j].index != other_index
1231 129 : || m_elts[j].unit_offset != other_offset
1232 127 : || m_elts[j].by_ref != other.m_elts[i].by_ref
1233 127 : || !m_elts[j].value
1234 277 : || !values_equal_for_ipcp_p (m_elts[j].value, other.m_elts[i].value))
1235 23 : return false;
1236 : }
1237 : return true;
1238 : }
1239 :
1240 : /* Push all items in this list that describe parameter SRC_INDEX into RES as
1241 : ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1242 : offsets but skip those which would end up with a negative offset. */
1243 :
1244 : void
1245 3297 : ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1246 : unsigned dest_index,
1247 : unsigned unit_delta,
1248 : vec<ipa_argagg_value> *res) const
1249 : {
1250 3297 : const ipa_argagg_value *av = get_elt_for_index (src_index);
1251 3297 : if (!av)
1252 : return;
1253 : unsigned prev_unit_offset = 0;
1254 : bool first = true;
1255 12411 : for (; av < m_elts.end (); ++av)
1256 : {
1257 9862 : if (av->index > src_index)
1258 : return;
1259 9242 : if (av->index == src_index
1260 9242 : && (av->unit_offset >= unit_delta)
1261 9098 : && av->value)
1262 : {
1263 9098 : ipa_argagg_value new_av;
1264 9098 : gcc_checking_assert (av->value);
1265 9098 : new_av.value = av->value;
1266 9098 : new_av.unit_offset = av->unit_offset - unit_delta;
1267 9098 : new_av.index = dest_index;
1268 9098 : new_av.by_ref = av->by_ref;
1269 9098 : gcc_assert (!av->killed);
1270 9098 : new_av.killed = false;
1271 :
1272 : /* Quick check that the offsets we push are indeed increasing. */
1273 9098 : gcc_assert (first
1274 : || new_av.unit_offset > prev_unit_offset);
1275 9098 : prev_unit_offset = new_av.unit_offset;
1276 9098 : first = false;
1277 :
1278 9098 : res->safe_push (new_av);
1279 : }
1280 : }
1281 : }
1282 :
1283 : /* Push to RES information about single lattices describing aggregate values in
1284 : PLATS as those describing parameter DEST_INDEX and the original offset minus
1285 : UNIT_DELTA. Return true if any item has been pushed to RES. */
1286 :
1287 : static bool
1288 4482679 : push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1289 : unsigned unit_delta,
1290 : vec<ipa_argagg_value> *res)
1291 : {
1292 4482679 : if (plats->aggs_contain_variable)
1293 : return false;
1294 :
1295 3807307 : bool pushed_sth = false;
1296 3807307 : bool first = true;
1297 3807307 : unsigned prev_unit_offset = 0;
1298 3877671 : for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1299 139472 : if (aglat->is_single_const ()
1300 45331 : && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1301 : {
1302 45331 : ipa_argagg_value iav;
1303 45331 : iav.value = aglat->values->value;
1304 45331 : iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1305 45331 : iav.index = dest_index;
1306 45331 : iav.by_ref = plats->aggs_by_ref;
1307 45331 : iav.killed = false;
1308 :
1309 45331 : gcc_assert (first
1310 : || iav.unit_offset > prev_unit_offset);
1311 45331 : prev_unit_offset = iav.unit_offset;
1312 45331 : first = false;
1313 :
1314 45331 : pushed_sth = true;
1315 45331 : res->safe_push (iav);
1316 : }
1317 : return pushed_sth;
1318 : }
1319 :
1320 : /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1321 : Return the number of remaining valid entries. */
1322 :
1323 : static unsigned
1324 53181 : intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1325 : const vec<ipa_argagg_value> &other)
1326 : {
1327 53181 : unsigned valid_entries = 0;
1328 53181 : unsigned j = 0;
1329 380471 : for (unsigned i = 0; i < elts.length (); i++)
1330 : {
1331 327290 : if (!elts[i].value)
1332 46675 : continue;
1333 :
1334 280615 : unsigned this_index = elts[i].index;
1335 280615 : unsigned this_offset = elts[i].unit_offset;
1336 :
1337 280615 : while (j < other.length ()
1338 1048351 : && (other[j].index < this_index
1339 493651 : || (other[j].index == this_index
1340 490281 : && other[j].unit_offset < this_offset)))
1341 247152 : j++;
1342 :
1343 280615 : if (j >= other.length ())
1344 : {
1345 7183 : elts[i].value = NULL_TREE;
1346 7183 : continue;
1347 : }
1348 :
1349 273432 : if (other[j].index == this_index
1350 270062 : && other[j].unit_offset == this_offset
1351 265543 : && other[j].by_ref == elts[i].by_ref
1352 265543 : && other[j].value
1353 538975 : && values_equal_for_ipcp_p (other[j].value, elts[i].value))
1354 247975 : valid_entries++;
1355 : else
1356 25457 : elts[i].value = NULL_TREE;
1357 : }
1358 53181 : return valid_entries;
1359 : }
1360 :
1361 : /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1362 : return true is any of them has not been marked as such so far. If if
1363 : MAKE_SIMPLE_RECIPIENTS is true, set the lattices that can only hold one
1364 : value to being recipients only, otherwise also set them to bottom. */
1365 :
1366 : static inline bool
1367 168273 : set_all_contains_variable (class ipcp_param_lattices *plats,
1368 : bool make_simple_recipients = false)
1369 : {
1370 168273 : bool ret;
1371 168273 : ret = plats->itself.set_contains_variable ();
1372 168273 : ret |= plats->ctxlat.set_contains_variable ();
1373 168273 : ret |= set_agg_lats_contain_variable (plats);
1374 168273 : if (make_simple_recipients)
1375 : {
1376 28070 : ret |= plats->bits_lattice.set_recipient_only ();
1377 28070 : ret |= plats->m_value_range.set_recipient_only ();
1378 : }
1379 : else
1380 : {
1381 140203 : ret |= plats->bits_lattice.set_to_bottom ();
1382 140203 : ret |= plats->m_value_range.set_to_bottom ();
1383 : }
1384 168273 : return ret;
1385 : }
1386 :
1387 : /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1388 : points to by the number of callers to NODE. */
1389 :
1390 : static bool
1391 97387 : count_callers (cgraph_node *node, void *data)
1392 : {
1393 97387 : int *caller_count = (int *) data;
1394 :
1395 400774 : for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1396 : /* Local thunks can be handled transparently, but if the thunk cannot
1397 : be optimized out, count it as a real use. */
1398 303387 : if (!cs->caller->thunk || !cs->caller->local)
1399 303387 : ++*caller_count;
1400 97387 : return false;
1401 : }
1402 :
1403 : /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1404 : the one caller of some other node. Set the caller's corresponding flag. */
1405 :
1406 : static bool
1407 54379 : set_single_call_flag (cgraph_node *node, void *)
1408 : {
1409 54379 : cgraph_edge *cs = node->callers;
1410 : /* Local thunks can be handled transparently, skip them. */
1411 54379 : while (cs && cs->caller->thunk && cs->caller->local)
1412 0 : cs = cs->next_caller;
1413 54379 : if (cs)
1414 53829 : if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1415 : {
1416 53828 : info->node_calling_single_call = true;
1417 53828 : return true;
1418 : }
1419 : return false;
1420 : }
1421 :
1422 : /* Initialize ipcp_lattices. */
1423 :
1424 : static void
1425 1254697 : initialize_node_lattices (struct cgraph_node *node)
1426 : {
1427 1254697 : ipa_node_params *info = ipa_node_params_sum->get (node);
1428 1254697 : struct cgraph_edge *ie;
1429 1254697 : bool disable = false, variable = false;
1430 1254697 : int i;
1431 :
1432 1254697 : gcc_checking_assert (node->has_gimple_body_p ());
1433 :
1434 1254697 : if (!ipa_get_param_count (info))
1435 : disable = true;
1436 1027577 : else if (node->local)
1437 : {
1438 86218 : int caller_count = 0;
1439 86218 : node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1440 : true);
1441 86218 : if (caller_count == 1)
1442 53829 : node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1443 : NULL, true);
1444 32389 : else if (caller_count == 0)
1445 : {
1446 1 : gcc_checking_assert (!opt_for_fn (node->decl, flag_toplevel_reorder));
1447 : variable = true;
1448 : }
1449 : }
1450 : else
1451 : {
1452 : /* When cloning is allowed, we can assume that externally visible
1453 : functions are not called. We will compensate this by cloning
1454 : later. */
1455 941359 : if (ipcp_versionable_function_p (node)
1456 941359 : && ipcp_cloning_candidate_p (node))
1457 : variable = true;
1458 : else
1459 : disable = true;
1460 : }
1461 :
1462 725 : if (dump_file && (dump_flags & TDF_DETAILS)
1463 1254864 : && !node->alias && !node->thunk)
1464 : {
1465 167 : fprintf (dump_file, "Initializing lattices of %s\n",
1466 : node->dump_name ());
1467 167 : if (disable || variable)
1468 132 : fprintf (dump_file, " Marking all lattices as %s\n",
1469 : disable ? "BOTTOM" : "VARIABLE");
1470 : }
1471 :
1472 1254697 : auto_vec<bool, 16> surviving_params;
1473 1254697 : bool pre_modified = false;
1474 :
1475 1254697 : clone_info *cinfo = clone_info::get (node);
1476 :
1477 1254697 : if (!disable && cinfo && cinfo->param_adjustments)
1478 : {
1479 : /* At the moment all IPA optimizations should use the number of
1480 : parameters of the prevailing decl as the m_always_copy_start.
1481 : Handling any other value would complicate the code below, so for the
1482 : time bing let's only assert it is so. */
1483 0 : gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1484 : == ipa_get_param_count (info))
1485 : || cinfo->param_adjustments->m_always_copy_start < 0);
1486 :
1487 0 : pre_modified = true;
1488 0 : cinfo->param_adjustments->get_surviving_params (&surviving_params);
1489 :
1490 0 : if (dump_file && (dump_flags & TDF_DETAILS)
1491 0 : && !node->alias && !node->thunk)
1492 : {
1493 : bool first = true;
1494 0 : for (int j = 0; j < ipa_get_param_count (info); j++)
1495 : {
1496 0 : if (j < (int) surviving_params.length ()
1497 0 : && surviving_params[j])
1498 0 : continue;
1499 0 : if (first)
1500 : {
1501 0 : fprintf (dump_file,
1502 : " The following parameters are dead on arrival:");
1503 0 : first = false;
1504 : }
1505 0 : fprintf (dump_file, " %u", j);
1506 : }
1507 0 : if (!first)
1508 0 : fprintf (dump_file, "\n");
1509 : }
1510 : }
1511 :
1512 6901184 : for (i = 0; i < ipa_get_param_count (info); i++)
1513 : {
1514 2309455 : ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1515 2309455 : tree type = ipa_get_type (info, i);
1516 2309455 : if (disable
1517 220805 : || !ipa_get_type (info, i)
1518 2530260 : || (pre_modified && (surviving_params.length () <= (unsigned) i
1519 0 : || !surviving_params[i])))
1520 : {
1521 2088650 : plats->itself.set_to_bottom ();
1522 2088650 : plats->ctxlat.set_to_bottom ();
1523 2088650 : set_agg_lats_to_bottom (plats);
1524 2088650 : plats->bits_lattice.set_to_bottom ();
1525 2088650 : plats->m_value_range.init (type);
1526 2088650 : plats->m_value_range.set_to_bottom ();
1527 : }
1528 : else
1529 : {
1530 220805 : plats->m_value_range.init (type);
1531 220805 : if (variable)
1532 28070 : set_all_contains_variable (plats, true);
1533 : }
1534 : }
1535 :
1536 1390052 : for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1537 135355 : if (ie->indirect_info->param_index >= 0
1538 144696 : && is_a <cgraph_polymorphic_indirect_info *> (ie->indirect_info))
1539 9341 : ipa_get_parm_lattices (info,
1540 9341 : ie->indirect_info->param_index)->virt_call = 1;
1541 1254697 : }
1542 :
1543 : /* Return VALUE if it is NULL_TREE or if it can be directly safely IPA-CP
1544 : propagated to a parameter of type PARAM_TYPE, or return a fold-converted
1545 : VALUE to PARAM_TYPE if that is possible. Return NULL_TREE otherwise. */
1546 :
1547 : static tree
1548 5109014 : ipacp_value_safe_for_type (tree param_type, tree value)
1549 : {
1550 5109014 : if (!value)
1551 : return NULL_TREE;
1552 5108680 : tree val_type = TREE_TYPE (value);
1553 5108680 : if (param_type == val_type
1554 5108680 : || useless_type_conversion_p (param_type, val_type))
1555 5105325 : return value;
1556 3355 : if (fold_convertible_p (param_type, value))
1557 3150 : return fold_convert (param_type, value);
1558 : else
1559 : return NULL_TREE;
1560 : }
1561 :
1562 : /* Return the result of a (possibly arithmetic) operation determined by OPCODE
1563 : on the constant value INPUT. OPERAND is 2nd operand for binary operation
1564 : and is required for binary operations. RES_TYPE, required when opcode is
1565 : not NOP_EXPR, is the type in which any operation is to be performed. Return
1566 : NULL_TREE if that cannot be determined or be considered an interprocedural
1567 : invariant. */
1568 :
1569 : static tree
1570 70688 : ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1571 : tree res_type)
1572 : {
1573 70688 : tree res;
1574 :
1575 70688 : if (opcode == NOP_EXPR)
1576 : return input;
1577 7118 : if (!is_gimple_ip_invariant (input))
1578 : return NULL_TREE;
1579 :
1580 7118 : if (opcode == ASSERT_EXPR)
1581 : {
1582 3934 : if (values_equal_for_ipcp_p (input, operand))
1583 : return input;
1584 : else
1585 : return NULL_TREE;
1586 : }
1587 :
1588 3184 : if (TREE_CODE_CLASS (opcode) == tcc_unary)
1589 113 : res = fold_unary (opcode, res_type, input);
1590 : else
1591 3071 : res = fold_binary (opcode, res_type, input, operand);
1592 :
1593 3184 : if (res && !is_gimple_ip_invariant (res))
1594 : return NULL_TREE;
1595 :
1596 : return res;
1597 : }
1598 :
1599 : /* Return the result of an ancestor jump function JFUNC on the constant value
1600 : INPUT. Return NULL_TREE if that cannot be determined. */
1601 :
1602 : static tree
1603 1280 : ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1604 : {
1605 1280 : gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1606 1280 : if (TREE_CODE (input) == ADDR_EXPR)
1607 : {
1608 1198 : gcc_checking_assert (is_gimple_ip_invariant_address (input));
1609 1198 : poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1610 1198 : if (known_eq (off, 0))
1611 : return input;
1612 1072 : poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1613 2144 : return build1 (ADDR_EXPR, TREE_TYPE (input),
1614 1072 : fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1615 1072 : build_int_cst (ptr_type_node, byte_offset)));
1616 : }
1617 82 : else if (ipa_get_jf_ancestor_keep_null (jfunc)
1618 82 : && zerop (input))
1619 : return input;
1620 : else
1621 78 : return NULL_TREE;
1622 : }
1623 :
1624 : /* Determine whether JFUNC evaluates to a single known constant value and if
1625 : so, return it. Otherwise return NULL. INFO describes the caller node or
1626 : the one it is inlined to, so that pass-through jump functions can be
1627 : evaluated. PARM_TYPE is the type of the parameter to which the result is
1628 : passed. */
1629 :
1630 : tree
1631 17506420 : ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1632 : tree parm_type)
1633 : {
1634 17506420 : if (!parm_type)
1635 : return NULL_TREE;
1636 17269278 : if (jfunc->type == IPA_JF_CONST)
1637 4667114 : return ipacp_value_safe_for_type (parm_type, ipa_get_jf_constant (jfunc));
1638 12602164 : else if (jfunc->type == IPA_JF_PASS_THROUGH
1639 9825705 : || jfunc->type == IPA_JF_ANCESTOR)
1640 : {
1641 3529157 : tree input;
1642 3529157 : int idx;
1643 :
1644 3529157 : if (jfunc->type == IPA_JF_PASS_THROUGH)
1645 2776459 : idx = ipa_get_jf_pass_through_formal_id (jfunc);
1646 : else
1647 752698 : idx = ipa_get_jf_ancestor_formal_id (jfunc);
1648 :
1649 3529157 : if (info->ipcp_orig_node)
1650 56683 : input = info->known_csts[idx];
1651 : else
1652 : {
1653 3472474 : ipcp_lattice<tree> *lat;
1654 :
1655 6283378 : if (info->lattices.is_empty ()
1656 2810904 : || idx >= ipa_get_param_count (info))
1657 : return NULL_TREE;
1658 2810904 : lat = ipa_get_scalar_lat (info, idx);
1659 2810904 : if (!lat->is_single_const ())
1660 : return NULL_TREE;
1661 147 : input = lat->values->value;
1662 : }
1663 :
1664 56830 : if (!input)
1665 : return NULL_TREE;
1666 :
1667 20634 : if (jfunc->type == IPA_JF_PASS_THROUGH)
1668 : {
1669 19665 : enum tree_code opcode = ipa_get_jf_pass_through_operation (jfunc);
1670 19665 : tree op2 = ipa_get_jf_pass_through_operand (jfunc);
1671 19665 : tree op_type
1672 19665 : = (opcode == NOP_EXPR) ? NULL_TREE
1673 983 : : ipa_get_jf_pass_through_op_type (jfunc);
1674 19665 : tree cstval = ipa_get_jf_arith_result (opcode, input, op2, op_type);
1675 19665 : return ipacp_value_safe_for_type (parm_type, cstval);
1676 : }
1677 : else
1678 969 : return ipacp_value_safe_for_type (parm_type,
1679 : ipa_get_jf_ancestor_result (jfunc,
1680 969 : input));
1681 : }
1682 : else
1683 : return NULL_TREE;
1684 : }
1685 :
1686 : /* Determine whether JFUNC evaluates to single known polymorphic context, given
1687 : that INFO describes the caller node or the one it is inlined to, CS is the
1688 : call graph edge corresponding to JFUNC and CSIDX index of the described
1689 : parameter. */
1690 :
1691 : ipa_polymorphic_call_context
1692 842902 : ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1693 : ipa_jump_func *jfunc)
1694 : {
1695 842902 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1696 842902 : ipa_polymorphic_call_context ctx;
1697 842902 : ipa_polymorphic_call_context *edge_ctx
1698 842902 : = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1699 :
1700 351045 : if (edge_ctx && !edge_ctx->useless_p ())
1701 345842 : ctx = *edge_ctx;
1702 :
1703 842902 : if (jfunc->type == IPA_JF_PASS_THROUGH
1704 749562 : || jfunc->type == IPA_JF_ANCESTOR)
1705 : {
1706 101778 : ipa_polymorphic_call_context srcctx;
1707 101778 : int srcidx;
1708 101778 : bool type_preserved = true;
1709 101778 : if (jfunc->type == IPA_JF_PASS_THROUGH)
1710 : {
1711 93340 : if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1712 1871 : return ctx;
1713 91469 : type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1714 91469 : srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1715 : }
1716 : else
1717 : {
1718 8438 : type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1719 8438 : srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1720 : }
1721 99907 : if (info->ipcp_orig_node)
1722 : {
1723 12334 : if (info->known_contexts.exists ())
1724 1352 : srcctx = info->known_contexts[srcidx];
1725 : }
1726 : else
1727 : {
1728 172979 : if (info->lattices.is_empty ()
1729 85406 : || srcidx >= ipa_get_param_count (info))
1730 2167 : return ctx;
1731 85406 : ipcp_lattice<ipa_polymorphic_call_context> *lat;
1732 85406 : lat = ipa_get_poly_ctx_lat (info, srcidx);
1733 85406 : if (!lat->is_single_const ())
1734 81250 : return ctx;
1735 4156 : srcctx = lat->values->value;
1736 : }
1737 16490 : if (srcctx.useless_p ())
1738 11412 : return ctx;
1739 5078 : if (jfunc->type == IPA_JF_ANCESTOR)
1740 258 : srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1741 5078 : if (!type_preserved)
1742 3035 : srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1743 5078 : srcctx.combine_with (ctx);
1744 5078 : return srcctx;
1745 : }
1746 :
1747 741124 : return ctx;
1748 : }
1749 :
1750 : /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1751 : DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1752 : the result is a range that is not VARYING nor UNDEFINED. */
1753 :
1754 : bool
1755 8887532 : ipa_vr_operation_and_type_effects (vrange &dst_vr,
1756 : const vrange &src_vr,
1757 : enum tree_code operation,
1758 : tree dst_type, tree src_type)
1759 : {
1760 16750761 : if (!ipa_vr_supported_type_p (dst_type)
1761 0 : || !ipa_vr_supported_type_p (src_type))
1762 : return false;
1763 :
1764 8887532 : range_op_handler handler (operation);
1765 8887532 : if (!handler)
1766 : return false;
1767 :
1768 8887532 : value_range varying (dst_type);
1769 8887532 : varying.set_varying (dst_type);
1770 :
1771 8887532 : return (handler.operand_check_p (dst_type, src_type, dst_type)
1772 8887532 : && handler.fold_range (dst_vr, dst_type, src_vr, varying)
1773 8887530 : && !dst_vr.varying_p ()
1774 17775002 : && !dst_vr.undefined_p ());
1775 8887532 : }
1776 :
1777 : /* Same as above, but the SRC_VR argument is an IPA_VR which must
1778 : first be extracted onto a vrange. */
1779 :
1780 : bool
1781 8794831 : ipa_vr_operation_and_type_effects (vrange &dst_vr,
1782 : const ipa_vr &src_vr,
1783 : enum tree_code operation,
1784 : tree dst_type, tree src_type)
1785 : {
1786 8794831 : value_range tmp;
1787 8794831 : src_vr.get_vrange (tmp);
1788 8794831 : return ipa_vr_operation_and_type_effects (dst_vr, tmp, operation,
1789 8794831 : dst_type, src_type);
1790 8794831 : }
1791 :
1792 : /* Given a PASS_THROUGH jump function JFUNC that takes as its source SRC_VR of
1793 : SRC_TYPE and the result needs to be DST_TYPE, if any value range information
1794 : can be deduced at all, intersect VR with it. CONTEXT_NODE is the call graph
1795 : node representing the function for which optimization flags should be
1796 : evaluated. */
1797 :
1798 : static void
1799 93101 : ipa_vr_intersect_with_arith_jfunc (vrange &vr,
1800 : ipa_jump_func *jfunc,
1801 : cgraph_node *context_node,
1802 : const value_range &src_vr,
1803 : tree src_type,
1804 : tree dst_type)
1805 : {
1806 93101 : if (src_vr.undefined_p () || src_vr.varying_p ())
1807 91761 : return;
1808 :
1809 92642 : enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1810 92642 : if (TREE_CODE_CLASS (operation) == tcc_unary)
1811 : {
1812 91302 : value_range op_res;
1813 91302 : const value_range *inter_vr;
1814 91302 : if (operation != NOP_EXPR)
1815 : {
1816 89 : tree operation_type = ipa_get_jf_pass_through_op_type (jfunc);
1817 89 : op_res.set_varying (operation_type);
1818 89 : if (!ipa_vr_operation_and_type_effects (op_res, src_vr, operation,
1819 : operation_type, src_type))
1820 : return;
1821 89 : if (src_type == dst_type)
1822 : {
1823 30 : vr.intersect (op_res);
1824 30 : return;
1825 : }
1826 : inter_vr = &op_res;
1827 : src_type = operation_type;
1828 : }
1829 : else
1830 : inter_vr = &src_vr;
1831 :
1832 91272 : value_range tmp_res (dst_type);
1833 91272 : if (ipa_vr_operation_and_type_effects (tmp_res, *inter_vr, NOP_EXPR,
1834 : dst_type, src_type))
1835 91272 : vr.intersect (tmp_res);
1836 91272 : return;
1837 91302 : }
1838 :
1839 1340 : tree operand = ipa_get_jf_pass_through_operand (jfunc);
1840 1340 : range_op_handler handler (operation);
1841 1340 : if (!handler)
1842 : return;
1843 1340 : value_range op_vr (TREE_TYPE (operand));
1844 1340 : ipa_get_range_from_ip_invariant (op_vr, operand, context_node);
1845 :
1846 1340 : tree operation_type = ipa_get_jf_pass_through_op_type (jfunc);
1847 1340 : value_range op_res (operation_type);
1848 1863 : if (!ipa_vr_supported_type_p (operation_type)
1849 1340 : || !handler.operand_check_p (operation_type, src_type, op_vr.type ())
1850 1340 : || !handler.fold_range (op_res, operation_type, src_vr, op_vr))
1851 0 : return;
1852 :
1853 1340 : value_range tmp_res (dst_type);
1854 1340 : if (ipa_vr_operation_and_type_effects (tmp_res, op_res, NOP_EXPR, dst_type,
1855 : operation_type))
1856 1292 : vr.intersect (tmp_res);
1857 1340 : }
1858 :
1859 : /* Determine range of JFUNC given that INFO describes the caller node or
1860 : the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1861 : and PARM_TYPE of the parameter. */
1862 :
1863 : void
1864 11518871 : ipa_value_range_from_jfunc (vrange &vr,
1865 : ipa_node_params *info, cgraph_edge *cs,
1866 : ipa_jump_func *jfunc, tree parm_type)
1867 : {
1868 11518871 : vr.set_varying (parm_type);
1869 :
1870 11518871 : if (jfunc->m_vr && jfunc->m_vr->known_p ())
1871 8007872 : ipa_vr_operation_and_type_effects (vr,
1872 : *jfunc->m_vr,
1873 : NOP_EXPR, parm_type,
1874 8007872 : jfunc->m_vr->type ());
1875 11518871 : if (vr.singleton_p ())
1876 : return;
1877 :
1878 11518731 : if (jfunc->type == IPA_JF_PASS_THROUGH)
1879 : {
1880 2201612 : ipcp_transformation *sum
1881 2201612 : = ipcp_get_transformation_summary (cs->caller->inlined_to
1882 : ? cs->caller->inlined_to
1883 : : cs->caller);
1884 2201612 : if (!sum || !sum->m_vr)
1885 2123883 : return;
1886 :
1887 118934 : int idx = ipa_get_jf_pass_through_formal_id (jfunc);
1888 :
1889 118934 : if (!(*sum->m_vr)[idx].known_p ())
1890 : return;
1891 77729 : tree src_type = ipa_get_type (info, idx);
1892 77729 : value_range srcvr;
1893 77729 : (*sum->m_vr)[idx].get_vrange (srcvr);
1894 :
1895 77729 : ipa_vr_intersect_with_arith_jfunc (vr, jfunc, cs->caller, srcvr, src_type,
1896 : parm_type);
1897 77729 : }
1898 : }
1899 :
1900 : /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1901 : single known constant value and if so, return it. Otherwise return NULL.
1902 : NODE and INFO describes the caller node or the one it is inlined to, and
1903 : its related info. */
1904 :
1905 : tree
1906 3018326 : ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
1907 : const ipa_agg_jf_item *item)
1908 : {
1909 3018326 : tree value = NULL_TREE;
1910 3018326 : int src_idx;
1911 :
1912 3018326 : if (item->offset < 0
1913 2969305 : || item->jftype == IPA_JF_UNKNOWN
1914 2832821 : || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)
1915 : return NULL_TREE;
1916 :
1917 2832821 : if (item->jftype == IPA_JF_CONST)
1918 2497279 : return item->value.constant;
1919 :
1920 335542 : gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1921 : || item->jftype == IPA_JF_LOAD_AGG);
1922 :
1923 335542 : src_idx = item->value.pass_through.formal_id;
1924 :
1925 335542 : if (info->ipcp_orig_node)
1926 : {
1927 16469 : if (item->jftype == IPA_JF_PASS_THROUGH)
1928 3796 : value = info->known_csts[src_idx];
1929 12673 : else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
1930 : {
1931 12673 : ipa_argagg_value_list avl (ts);
1932 12673 : value = avl.get_value (src_idx,
1933 12673 : item->value.load_agg.offset / BITS_PER_UNIT,
1934 12673 : item->value.load_agg.by_ref);
1935 : }
1936 : }
1937 319073 : else if (!info->lattices.is_empty ())
1938 : {
1939 223900 : class ipcp_param_lattices *src_plats
1940 223900 : = ipa_get_parm_lattices (info, src_idx);
1941 :
1942 223900 : if (item->jftype == IPA_JF_PASS_THROUGH)
1943 : {
1944 133583 : struct ipcp_lattice<tree> *lat = &src_plats->itself;
1945 :
1946 519088 : if (!lat->is_single_const ())
1947 : return NULL_TREE;
1948 :
1949 0 : value = lat->values->value;
1950 : }
1951 90317 : else if (src_plats->aggs
1952 9658 : && !src_plats->aggs_bottom
1953 9658 : && !src_plats->aggs_contain_variable
1954 1499 : && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1955 : {
1956 : struct ipcp_agg_lattice *aglat;
1957 :
1958 2366 : for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1959 : {
1960 2366 : if (aglat->offset > item->value.load_agg.offset)
1961 : break;
1962 :
1963 2334 : if (aglat->offset == item->value.load_agg.offset)
1964 : {
1965 1467 : if (aglat->is_single_const ())
1966 7 : value = aglat->values->value;
1967 : break;
1968 : }
1969 : }
1970 : }
1971 : }
1972 :
1973 16508 : if (!value)
1974 191550 : return NULL_TREE;
1975 :
1976 10409 : if (item->jftype == IPA_JF_LOAD_AGG)
1977 : {
1978 8098 : tree load_type = item->value.load_agg.type;
1979 8098 : tree value_type = TREE_TYPE (value);
1980 :
1981 : /* Ensure value type is compatible with load type. */
1982 8098 : if (!useless_type_conversion_p (load_type, value_type))
1983 : return NULL_TREE;
1984 : }
1985 :
1986 20818 : tree cstval = ipa_get_jf_arith_result (item->value.pass_through.operation,
1987 : value,
1988 10409 : item->value.pass_through.operand,
1989 10409 : item->value.pass_through.op_type);
1990 10409 : return ipacp_value_safe_for_type (item->type, cstval);
1991 : }
1992 :
1993 : /* Process all items in AGG_JFUNC relative to caller (or the node the original
1994 : caller is inlined to) NODE which described by INFO and push the results to
1995 : RES as describing values passed in parameter DST_INDEX. */
1996 :
1997 : void
1998 14029983 : ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
1999 : ipa_agg_jump_function *agg_jfunc,
2000 : unsigned dst_index,
2001 : vec<ipa_argagg_value> *res)
2002 : {
2003 14029983 : unsigned prev_unit_offset = 0;
2004 14029983 : bool first = true;
2005 :
2006 18414956 : for (const ipa_agg_jf_item &item : agg_jfunc->items)
2007 : {
2008 2167063 : tree value = ipa_agg_value_from_jfunc (info, node, &item);
2009 2167063 : if (!value)
2010 489118 : continue;
2011 :
2012 1677945 : ipa_argagg_value iav;
2013 1677945 : iav.value = value;
2014 1677945 : iav.unit_offset = item.offset / BITS_PER_UNIT;
2015 1677945 : iav.index = dst_index;
2016 1677945 : iav.by_ref = agg_jfunc->by_ref;
2017 1677945 : iav.killed = 0;
2018 :
2019 1677945 : gcc_assert (first
2020 : || iav.unit_offset > prev_unit_offset);
2021 1677945 : prev_unit_offset = iav.unit_offset;
2022 1677945 : first = false;
2023 :
2024 1677945 : res->safe_push (iav);
2025 : }
2026 14029983 : }
2027 :
2028 : /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
2029 : bottom, not containing a variable component and without any known value at
2030 : the same time. */
2031 :
2032 : DEBUG_FUNCTION void
2033 127749 : ipcp_verify_propagated_values (void)
2034 : {
2035 127749 : struct cgraph_node *node;
2036 :
2037 1391255 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2038 : {
2039 1263506 : ipa_node_params *info = ipa_node_params_sum->get (node);
2040 1263506 : if (!opt_for_fn (node->decl, flag_ipa_cp)
2041 1263506 : || !opt_for_fn (node->decl, optimize))
2042 8826 : continue;
2043 1254680 : int i, count = ipa_get_param_count (info);
2044 :
2045 3564121 : for (i = 0; i < count; i++)
2046 : {
2047 2309441 : ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
2048 :
2049 2309441 : if (!lat->bottom
2050 219801 : && !lat->contains_variable
2051 32015 : && lat->values_count == 0)
2052 : {
2053 0 : if (dump_file)
2054 : {
2055 0 : symtab->dump (dump_file);
2056 0 : fprintf (dump_file, "\nIPA lattices after constant "
2057 : "propagation, before gcc_unreachable:\n");
2058 0 : print_all_lattices (dump_file, true, false);
2059 : }
2060 :
2061 0 : gcc_unreachable ();
2062 : }
2063 : }
2064 : }
2065 127749 : }
2066 :
2067 : /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2068 :
2069 : static bool
2070 2690 : values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
2071 : ipa_polymorphic_call_context y)
2072 : {
2073 2164 : return x.equal_to (y);
2074 : }
2075 :
2076 :
2077 : /* Add a new value source to the value represented by THIS, marking that a
2078 : value comes from edge CS and (if the underlying jump function is a
2079 : pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2080 : parameter described by SRC_INDEX. OFFSET is negative if the source was the
2081 : scalar value of the parameter itself or the offset within an aggregate. */
2082 :
2083 : template <typename valtype>
2084 : void
2085 338586 : ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
2086 : int src_idx, HOST_WIDE_INT offset)
2087 : {
2088 : ipcp_value_source<valtype> *src;
2089 :
2090 486192 : src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
2091 486192 : src->offset = offset;
2092 486192 : src->cs = cs;
2093 486192 : src->val = src_val;
2094 486192 : src->index = src_idx;
2095 :
2096 486192 : src->next = sources;
2097 486192 : sources = src;
2098 : }
2099 :
2100 : /* Allocate a new ipcp_value holding a tree constant, initialize its value to
2101 : SOURCE and clear all other fields. */
2102 :
2103 : static ipcp_value<tree> *
2104 140012 : allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
2105 : {
2106 140012 : ipcp_value<tree> *val;
2107 :
2108 280024 : val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
2109 140012 : val->value = cst;
2110 140012 : val->self_recursion_generated_level = same_lat_gen_level;
2111 140012 : return val;
2112 : }
2113 :
2114 : /* Allocate a new ipcp_value holding a polymorphic context, initialize its
2115 : value to SOURCE and clear all other fields. */
2116 :
2117 : static ipcp_value<ipa_polymorphic_call_context> *
2118 7594 : allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
2119 : unsigned same_lat_gen_level)
2120 : {
2121 7594 : ipcp_value<ipa_polymorphic_call_context> *val;
2122 :
2123 7594 : val = new (ipcp_poly_ctx_values_pool.allocate ())
2124 7594 : ipcp_value<ipa_polymorphic_call_context>();
2125 7594 : val->value = ctx;
2126 7594 : val->self_recursion_generated_level = same_lat_gen_level;
2127 7594 : return val;
2128 : }
2129 :
2130 : /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2131 : SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2132 : meaning. OFFSET -1 means the source is scalar and not a part of an
2133 : aggregate. If non-NULL, VAL_P records address of existing or newly added
2134 : ipcp_value.
2135 :
2136 : If the value is generated for a self-recursive call as a result of an
2137 : arithmetic pass-through jump-function acting on a value in the same lattice,
2138 : SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2139 : zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2140 :
2141 : template <typename valtype>
2142 : bool
2143 498544 : ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
2144 : ipcp_value<valtype> *src_val,
2145 : int src_idx, HOST_WIDE_INT offset,
2146 : ipcp_value<valtype> **val_p,
2147 : unsigned same_lat_gen_level)
2148 : {
2149 498544 : ipcp_value<valtype> *val, *last_val = NULL;
2150 :
2151 498544 : if (val_p)
2152 1257 : *val_p = NULL;
2153 :
2154 498544 : if (bottom)
2155 : return false;
2156 :
2157 967412 : for (val = values; val; last_val = val, val = val->next)
2158 818442 : if (values_equal_for_ipcp_p (val->value, newval))
2159 : {
2160 346218 : if (val_p)
2161 416 : *val_p = val;
2162 :
2163 346218 : if (val->self_recursion_generated_level < same_lat_gen_level)
2164 179 : val->self_recursion_generated_level = same_lat_gen_level;
2165 :
2166 346218 : if (ipa_edge_within_scc (cs))
2167 : {
2168 : ipcp_value_source<valtype> *s;
2169 48493 : for (s = val->sources; s; s = s->next)
2170 44327 : if (s->cs == cs && s->val == src_val)
2171 : break;
2172 11798 : if (s)
2173 : return false;
2174 : }
2175 :
2176 338586 : val->add_source (cs, src_val, src_idx, offset);
2177 338586 : return false;
2178 : }
2179 :
2180 148970 : if (!same_lat_gen_level && values_count >= opt_for_fn (cs->callee->decl,
2181 : param_ipa_cp_value_list_size))
2182 : {
2183 : /* We can only free sources, not the values themselves, because sources
2184 : of other values in this SCC might point to them. */
2185 12258 : for (val = values; val; val = val->next)
2186 : {
2187 40407 : while (val->sources)
2188 : {
2189 29513 : ipcp_value_source<valtype> *src = val->sources;
2190 29513 : val->sources = src->next;
2191 29513 : ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2192 : }
2193 : }
2194 1364 : values = NULL;
2195 1364 : return set_to_bottom ();
2196 : }
2197 :
2198 147606 : values_count++;
2199 147606 : val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2200 147606 : val->add_source (cs, src_val, src_idx, offset);
2201 147606 : val->next = NULL;
2202 :
2203 : /* Add the new value to end of value list, which can reduce iterations
2204 : of propagation stage for recursive function. */
2205 147606 : if (last_val)
2206 45211 : last_val->next = val;
2207 : else
2208 102395 : values = val;
2209 :
2210 147606 : if (val_p)
2211 841 : *val_p = val;
2212 :
2213 : return true;
2214 : }
2215 :
2216 : /* A helper function that returns result of operation specified by OPCODE on
2217 : the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2218 : value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2219 : acting as its second operand. OP_TYPE is the type in which the operation is
2220 : performed. */
2221 :
2222 : static tree
2223 21345 : get_val_across_arith_op (enum tree_code opcode,
2224 : tree opnd1_type,
2225 : tree opnd2,
2226 : ipcp_value<tree> *src_val,
2227 : tree op_type)
2228 : {
2229 21345 : tree opnd1 = src_val->value;
2230 :
2231 : /* Skip source values that is incompatible with specified type. */
2232 21345 : if (opnd1_type
2233 21345 : && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2234 : return NULL_TREE;
2235 :
2236 21345 : return ipa_get_jf_arith_result (opcode, opnd1, opnd2, op_type);
2237 : }
2238 :
2239 : /* Propagate values through an arithmetic transformation described by a jump
2240 : function associated with edge CS, taking values from SRC_LAT and putting
2241 : them into DEST_LAT. OPND1_TYPE, if non-NULL, is the expected type for the
2242 : values in SRC_LAT. OPND2 is a constant value if transformation is a binary
2243 : operation. SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes
2244 : lattice of a part of an aggregate, otherwise it should be -1. SRC_IDX is
2245 : the index of the source parameter. OP_TYPE is the type in which the
2246 : operation is performed and can be NULL when OPCODE is NOP_EXPR. RES_TYPE is
2247 : the value type of result being propagated into. Return true if DEST_LAT
2248 : changed. */
2249 :
2250 : static bool
2251 76330 : propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2252 : enum tree_code opcode,
2253 : tree opnd1_type,
2254 : tree opnd2,
2255 : ipcp_lattice<tree> *src_lat,
2256 : ipcp_lattice<tree> *dest_lat,
2257 : HOST_WIDE_INT src_offset,
2258 : int src_idx,
2259 : tree op_type,
2260 : tree res_type)
2261 : {
2262 76330 : ipcp_value<tree> *src_val;
2263 76330 : bool ret = false;
2264 :
2265 : /* Due to circular dependencies, propagating within an SCC through arithmetic
2266 : transformation would create infinite number of values. But for
2267 : self-feeding recursive function, we could allow propagation in a limited
2268 : count, and this can enable a simple kind of recursive function versioning.
2269 : For other scenario, we would just make lattices bottom. */
2270 76330 : if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2271 : {
2272 2184 : int i;
2273 :
2274 2184 : int max_recursive_depth = opt_for_fn(cs->caller->decl,
2275 : param_ipa_cp_max_recursive_depth);
2276 2184 : if (src_lat != dest_lat || max_recursive_depth < 1)
2277 1666 : return dest_lat->set_contains_variable ();
2278 :
2279 : /* No benefit if recursive execution is in low probability. */
2280 1300 : if (cs->sreal_frequency () * 100
2281 2600 : <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2282 : param_ipa_cp_min_recursive_probability))
2283 89 : return dest_lat->set_contains_variable ();
2284 :
2285 1211 : auto_vec<ipcp_value<tree> *, 8> val_seeds;
2286 :
2287 2258 : for (src_val = src_lat->values; src_val; src_val = src_val->next)
2288 : {
2289 : /* Now we do not use self-recursively generated value as propagation
2290 : source, this is absolutely conservative, but could avoid explosion
2291 : of lattice's value space, especially when one recursive function
2292 : calls another recursive. */
2293 1740 : if (src_val->self_recursion_generated_p ())
2294 : {
2295 909 : ipcp_value_source<tree> *s;
2296 :
2297 : /* If the lattice has already been propagated for the call site,
2298 : no need to do that again. */
2299 1422 : for (s = src_val->sources; s; s = s->next)
2300 1206 : if (s->cs == cs)
2301 693 : return dest_lat->set_contains_variable ();
2302 : }
2303 : else
2304 831 : val_seeds.safe_push (src_val);
2305 : }
2306 :
2307 1036 : gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2308 :
2309 : /* Recursively generate lattice values with a limited count. */
2310 836 : FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2311 : {
2312 1416 : for (int j = 1; j < max_recursive_depth; j++)
2313 : {
2314 1261 : tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2315 : src_val, op_type);
2316 1261 : cstval = ipacp_value_safe_for_type (res_type, cstval);
2317 1261 : if (!cstval)
2318 : break;
2319 :
2320 1257 : ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2321 : src_offset, &src_val, j);
2322 1257 : gcc_checking_assert (src_val);
2323 : }
2324 : }
2325 518 : ret |= dest_lat->set_contains_variable ();
2326 1211 : }
2327 : else
2328 94355 : for (src_val = src_lat->values; src_val; src_val = src_val->next)
2329 : {
2330 : /* Now we do not use self-recursively generated value as propagation
2331 : source, otherwise it is easy to make value space of normal lattice
2332 : overflow. */
2333 20209 : if (src_val->self_recursion_generated_p ())
2334 : {
2335 125 : ret |= dest_lat->set_contains_variable ();
2336 125 : continue;
2337 : }
2338 :
2339 20084 : tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2340 : src_val, op_type);
2341 20084 : cstval = ipacp_value_safe_for_type (res_type, cstval);
2342 20084 : if (cstval)
2343 19883 : ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2344 : src_offset);
2345 : else
2346 201 : ret |= dest_lat->set_contains_variable ();
2347 : }
2348 :
2349 : return ret;
2350 : }
2351 :
2352 : /* Propagate values through a pass-through jump function JFUNC associated with
2353 : edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2354 : is the index of the source parameter. PARM_TYPE is the type of the
2355 : parameter to which the result is passed. */
2356 :
2357 : static bool
2358 71706 : propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2359 : ipcp_lattice<tree> *src_lat,
2360 : ipcp_lattice<tree> *dest_lat, int src_idx,
2361 : tree parm_type)
2362 : {
2363 71706 : gcc_checking_assert (parm_type);
2364 71706 : enum tree_code opcode = ipa_get_jf_pass_through_operation (jfunc);
2365 71706 : tree op_type = (opcode == NOP_EXPR) ? NULL_TREE
2366 2415 : : ipa_get_jf_pass_through_op_type (jfunc);
2367 71706 : return propagate_vals_across_arith_jfunc (cs, opcode, NULL_TREE,
2368 : ipa_get_jf_pass_through_operand (jfunc),
2369 : src_lat, dest_lat, -1, src_idx, op_type,
2370 71706 : parm_type);
2371 : }
2372 :
2373 : /* Propagate values through an ancestor jump function JFUNC associated with
2374 : edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2375 : is the index of the source parameter. */
2376 :
2377 : static bool
2378 2159 : propagate_vals_across_ancestor (struct cgraph_edge *cs,
2379 : struct ipa_jump_func *jfunc,
2380 : ipcp_lattice<tree> *src_lat,
2381 : ipcp_lattice<tree> *dest_lat, int src_idx,
2382 : tree param_type)
2383 : {
2384 2159 : ipcp_value<tree> *src_val;
2385 2159 : bool ret = false;
2386 :
2387 2159 : if (ipa_edge_within_scc (cs))
2388 14 : return dest_lat->set_contains_variable ();
2389 :
2390 2456 : for (src_val = src_lat->values; src_val; src_val = src_val->next)
2391 : {
2392 311 : tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2393 311 : t = ipacp_value_safe_for_type (param_type, t);
2394 311 : if (t)
2395 253 : ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2396 : else
2397 58 : ret |= dest_lat->set_contains_variable ();
2398 : }
2399 :
2400 : return ret;
2401 : }
2402 :
2403 : /* Propagate scalar values across jump function JFUNC that is associated with
2404 : edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2405 : parameter to which the result is passed. */
2406 :
2407 : static bool
2408 3835028 : propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2409 : struct ipa_jump_func *jfunc,
2410 : ipcp_lattice<tree> *dest_lat,
2411 : tree param_type)
2412 : {
2413 3835028 : if (dest_lat->bottom)
2414 : return false;
2415 :
2416 816354 : if (jfunc->type == IPA_JF_CONST)
2417 : {
2418 369932 : tree val = ipa_get_jf_constant (jfunc);
2419 369932 : val = ipacp_value_safe_for_type (param_type, val);
2420 369932 : if (val)
2421 369915 : return dest_lat->add_value (val, cs, NULL, 0);
2422 : else
2423 17 : return dest_lat->set_contains_variable ();
2424 : }
2425 446422 : else if (jfunc->type == IPA_JF_PASS_THROUGH
2426 269709 : || jfunc->type == IPA_JF_ANCESTOR)
2427 : {
2428 181207 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2429 181207 : ipcp_lattice<tree> *src_lat;
2430 181207 : int src_idx;
2431 181207 : bool ret;
2432 :
2433 181207 : if (jfunc->type == IPA_JF_PASS_THROUGH)
2434 176713 : src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2435 : else
2436 4494 : src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2437 :
2438 181207 : src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2439 181207 : if (src_lat->bottom)
2440 107194 : return dest_lat->set_contains_variable ();
2441 :
2442 : /* If we would need to clone the caller and cannot, do not propagate. */
2443 74013 : if (!ipcp_versionable_function_p (cs->caller)
2444 74013 : && (src_lat->contains_variable
2445 132 : || (src_lat->values_count > 1)))
2446 148 : return dest_lat->set_contains_variable ();
2447 :
2448 73865 : if (jfunc->type == IPA_JF_PASS_THROUGH)
2449 71706 : ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2450 : dest_lat, src_idx,
2451 : param_type);
2452 : else
2453 2159 : ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2454 : src_idx, param_type);
2455 :
2456 73865 : if (src_lat->contains_variable)
2457 64351 : ret |= dest_lat->set_contains_variable ();
2458 :
2459 73865 : return ret;
2460 : }
2461 :
2462 : /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2463 : use it for indirect inlining), we should propagate them too. */
2464 265215 : return dest_lat->set_contains_variable ();
2465 : }
2466 :
2467 : /* Propagate scalar values across jump function JFUNC that is associated with
2468 : edge CS and describes argument IDX and put the values into DEST_LAT. */
2469 :
2470 : static bool
2471 3835028 : propagate_context_across_jump_function (cgraph_edge *cs,
2472 : ipa_jump_func *jfunc, int idx,
2473 : ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2474 : {
2475 3835028 : if (dest_lat->bottom)
2476 : return false;
2477 912786 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2478 912786 : bool ret = false;
2479 912786 : bool added_sth = false;
2480 912786 : bool type_preserved = true;
2481 :
2482 912786 : ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2483 926448 : = ipa_get_ith_polymorhic_call_context (args, idx);
2484 :
2485 13662 : if (edge_ctx_ptr)
2486 13662 : edge_ctx = *edge_ctx_ptr;
2487 :
2488 912786 : if (jfunc->type == IPA_JF_PASS_THROUGH
2489 735582 : || jfunc->type == IPA_JF_ANCESTOR)
2490 : {
2491 181794 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2492 181794 : int src_idx;
2493 181794 : ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2494 :
2495 : /* TODO: Once we figure out how to propagate speculations, it will
2496 : probably be a good idea to switch to speculation if type_preserved is
2497 : not set instead of punting. */
2498 181794 : if (jfunc->type == IPA_JF_PASS_THROUGH)
2499 : {
2500 177204 : if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2501 6896 : goto prop_fail;
2502 170308 : type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2503 170308 : src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2504 : }
2505 : else
2506 : {
2507 4590 : type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2508 4590 : src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2509 : }
2510 :
2511 174898 : src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2512 : /* If we would need to clone the caller and cannot, do not propagate. */
2513 174898 : if (!ipcp_versionable_function_p (cs->caller)
2514 174898 : && (src_lat->contains_variable
2515 14003 : || (src_lat->values_count > 1)))
2516 2481 : goto prop_fail;
2517 :
2518 172417 : ipcp_value<ipa_polymorphic_call_context> *src_val;
2519 173686 : for (src_val = src_lat->values; src_val; src_val = src_val->next)
2520 : {
2521 1269 : ipa_polymorphic_call_context cur = src_val->value;
2522 :
2523 1269 : if (!type_preserved)
2524 854 : cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2525 1269 : if (jfunc->type == IPA_JF_ANCESTOR)
2526 318 : cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2527 : /* TODO: In cases we know how the context is going to be used,
2528 : we can improve the result by passing proper OTR_TYPE. */
2529 1269 : cur.combine_with (edge_ctx);
2530 2538 : if (!cur.useless_p ())
2531 : {
2532 809 : if (src_lat->contains_variable
2533 809 : && !edge_ctx.equal_to (cur))
2534 245 : ret |= dest_lat->set_contains_variable ();
2535 809 : ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2536 809 : added_sth = true;
2537 : }
2538 : }
2539 : }
2540 :
2541 730992 : prop_fail:
2542 181794 : if (!added_sth)
2543 : {
2544 912036 : if (!edge_ctx.useless_p ())
2545 8365 : ret |= dest_lat->add_value (edge_ctx, cs);
2546 : else
2547 903671 : ret |= dest_lat->set_contains_variable ();
2548 : }
2549 :
2550 : return ret;
2551 : }
2552 :
2553 : /* Propagate bits across jfunc that is associated with
2554 : edge cs and update dest_lattice accordingly. */
2555 :
2556 : bool
2557 3835028 : propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2558 : ipa_jump_func *jfunc,
2559 : ipcp_bits_lattice *dest_lattice)
2560 : {
2561 3835028 : if (dest_lattice->bottom_p ())
2562 : return false;
2563 :
2564 529480 : enum availability availability;
2565 529480 : cgraph_node *callee = cs->callee->function_symbol (&availability);
2566 529480 : ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2567 529480 : tree parm_type = ipa_get_type (callee_info, idx);
2568 :
2569 : /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2570 : transform for these cases. Similarly, we can have bad type mismatches
2571 : with LTO, avoid doing anything with those too. */
2572 529480 : if (!parm_type
2573 529480 : || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2574 : {
2575 29184 : if (dump_file && (dump_flags & TDF_DETAILS))
2576 11 : fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2577 : "param %i of %s is NULL or unsuitable for bits propagation\n",
2578 11 : idx, cs->callee->dump_name ());
2579 :
2580 29184 : return dest_lattice->set_to_bottom ();
2581 : }
2582 :
2583 500296 : if (jfunc->type == IPA_JF_PASS_THROUGH
2584 405150 : || jfunc->type == IPA_JF_ANCESTOR)
2585 : {
2586 97615 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2587 97615 : tree operand = NULL_TREE;
2588 97615 : tree op_type = NULL_TREE;
2589 97615 : enum tree_code code;
2590 97615 : unsigned src_idx;
2591 97615 : bool keep_null = false;
2592 :
2593 97615 : if (jfunc->type == IPA_JF_PASS_THROUGH)
2594 : {
2595 95146 : code = ipa_get_jf_pass_through_operation (jfunc);
2596 95146 : src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2597 95146 : if (code != NOP_EXPR)
2598 : {
2599 1860 : operand = ipa_get_jf_pass_through_operand (jfunc);
2600 1860 : op_type = ipa_get_jf_pass_through_op_type (jfunc);
2601 : }
2602 : }
2603 : else
2604 : {
2605 2469 : code = POINTER_PLUS_EXPR;
2606 2469 : src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2607 2469 : unsigned HOST_WIDE_INT offset
2608 2469 : = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2609 2469 : keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2610 2469 : operand = build_int_cstu (size_type_node, offset);
2611 : }
2612 :
2613 97615 : class ipcp_param_lattices *src_lats
2614 97615 : = ipa_get_parm_lattices (caller_info, src_idx);
2615 :
2616 : /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2617 : for eg consider:
2618 : int f(int x)
2619 : {
2620 : g (x & 0xff);
2621 : }
2622 : Assume lattice for x is bottom, however we can still propagate
2623 : result of x & 0xff == 0xff, which gets computed during ccp1 pass
2624 : and we store it in jump function during analysis stage. */
2625 :
2626 97615 : if (!src_lats->bits_lattice.bottom_p ()
2627 97615 : && !src_lats->bits_lattice.recipient_only_p ())
2628 : {
2629 21524 : if (!op_type)
2630 20357 : op_type = ipa_get_type (caller_info, src_idx);
2631 :
2632 21524 : unsigned precision = TYPE_PRECISION (op_type);
2633 21524 : signop sgn = TYPE_SIGN (op_type);
2634 21524 : bool drop_all_ones
2635 21524 : = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2636 :
2637 21524 : return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2638 21524 : sgn, code, operand, drop_all_ones);
2639 : }
2640 : }
2641 :
2642 478772 : value_range vr (parm_type);
2643 478772 : if (jfunc->m_vr)
2644 : {
2645 408798 : jfunc->m_vr->get_vrange (vr);
2646 408798 : if (!vr.undefined_p () && !vr.varying_p ())
2647 : {
2648 408798 : irange_bitmask bm = vr.get_bitmask ();
2649 408798 : widest_int mask
2650 408798 : = widest_int::from (bm.mask (), TYPE_SIGN (parm_type));
2651 408798 : widest_int value
2652 408798 : = widest_int::from (bm.value (), TYPE_SIGN (parm_type));
2653 408798 : return dest_lattice->meet_with (value, mask,
2654 408798 : TYPE_PRECISION (parm_type));
2655 408798 : }
2656 : }
2657 69974 : return dest_lattice->set_to_bottom ();
2658 478772 : }
2659 :
2660 : /* Propagate value range across jump function JFUNC that is associated with
2661 : edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2662 : accordingly. */
2663 :
2664 : static bool
2665 3834183 : propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2666 : class ipcp_param_lattices *dest_plats,
2667 : tree param_type)
2668 : {
2669 3834183 : ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2670 :
2671 3834183 : if (dest_lat->bottom_p ())
2672 : return false;
2673 :
2674 624458 : if (!param_type
2675 624458 : || !ipa_vr_supported_type_p (param_type))
2676 29124 : return dest_lat->set_to_bottom ();
2677 :
2678 595334 : value_range vr (param_type);
2679 595334 : vr.set_varying (param_type);
2680 595334 : if (jfunc->m_vr)
2681 514100 : ipa_vr_operation_and_type_effects (vr, *jfunc->m_vr, NOP_EXPR,
2682 : param_type,
2683 514100 : jfunc->m_vr->type ());
2684 :
2685 595334 : if (jfunc->type == IPA_JF_PASS_THROUGH)
2686 : {
2687 89501 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2688 89501 : int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2689 89501 : class ipcp_param_lattices *src_lats
2690 89501 : = ipa_get_parm_lattices (caller_info, src_idx);
2691 89501 : tree operand_type = ipa_get_type (caller_info, src_idx);
2692 :
2693 89501 : if (src_lats->m_value_range.bottom_p ()
2694 89501 : || src_lats->m_value_range.recipient_only_p ())
2695 73634 : return dest_lat->set_to_bottom ();
2696 :
2697 15867 : if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR
2698 15867 : || !ipa_edge_within_scc (cs))
2699 15372 : ipa_vr_intersect_with_arith_jfunc (vr, jfunc, cs->caller,
2700 15372 : src_lats->m_value_range.m_vr,
2701 : operand_type, param_type);
2702 : }
2703 :
2704 521700 : if (!vr.undefined_p () && !vr.varying_p ())
2705 494276 : return dest_lat->meet_with (vr);
2706 : else
2707 27424 : return dest_lat->set_to_bottom ();
2708 595334 : }
2709 :
2710 : /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2711 : NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2712 : other cases, return false). If there are no aggregate items, set
2713 : aggs_by_ref to NEW_AGGS_BY_REF. */
2714 :
2715 : static bool
2716 41826 : set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2717 : bool new_aggs_by_ref)
2718 : {
2719 0 : if (dest_plats->aggs)
2720 : {
2721 22145 : if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2722 : {
2723 0 : set_agg_lats_to_bottom (dest_plats);
2724 0 : return true;
2725 : }
2726 : }
2727 : else
2728 19681 : dest_plats->aggs_by_ref = new_aggs_by_ref;
2729 : return false;
2730 : }
2731 :
2732 : /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2733 : already existing lattice for the given OFFSET and SIZE, marking all skipped
2734 : lattices as containing variable and checking for overlaps. If there is no
2735 : already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2736 : it with offset, size and contains_variable to PRE_EXISTING, and return true,
2737 : unless there are too many already. If there are two many, return false. If
2738 : there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2739 : skipped lattices were newly marked as containing variable, set *CHANGE to
2740 : true. MAX_AGG_ITEMS is the maximum number of lattices. */
2741 :
2742 : static bool
2743 112823 : merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2744 : HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2745 : struct ipcp_agg_lattice ***aglat,
2746 : bool pre_existing, bool *change, int max_agg_items)
2747 : {
2748 112823 : gcc_checking_assert (offset >= 0);
2749 :
2750 116893 : while (**aglat && (**aglat)->offset < offset)
2751 : {
2752 4070 : if ((**aglat)->offset + (**aglat)->size > offset)
2753 : {
2754 0 : set_agg_lats_to_bottom (dest_plats);
2755 0 : return false;
2756 : }
2757 4070 : *change |= (**aglat)->set_contains_variable ();
2758 4070 : *aglat = &(**aglat)->next;
2759 : }
2760 :
2761 112823 : if (**aglat && (**aglat)->offset == offset)
2762 : {
2763 55495 : if ((**aglat)->size != val_size)
2764 : {
2765 13 : set_agg_lats_to_bottom (dest_plats);
2766 13 : return false;
2767 : }
2768 55482 : gcc_assert (!(**aglat)->next
2769 : || (**aglat)->next->offset >= offset + val_size);
2770 : return true;
2771 : }
2772 : else
2773 : {
2774 57328 : struct ipcp_agg_lattice *new_al;
2775 :
2776 57328 : if (**aglat && (**aglat)->offset < offset + val_size)
2777 : {
2778 3 : set_agg_lats_to_bottom (dest_plats);
2779 3 : return false;
2780 : }
2781 57325 : if (dest_plats->aggs_count == max_agg_items)
2782 : return false;
2783 57286 : dest_plats->aggs_count++;
2784 57286 : new_al = ipcp_agg_lattice_pool.allocate ();
2785 :
2786 57286 : new_al->offset = offset;
2787 57286 : new_al->size = val_size;
2788 57286 : new_al->contains_variable = pre_existing;
2789 :
2790 57286 : new_al->next = **aglat;
2791 57286 : **aglat = new_al;
2792 57286 : return true;
2793 : }
2794 : }
2795 :
2796 : /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2797 : containing an unknown value. */
2798 :
2799 : static bool
2800 41808 : set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2801 : {
2802 41808 : bool ret = false;
2803 44254 : while (aglat)
2804 : {
2805 2446 : ret |= aglat->set_contains_variable ();
2806 2446 : aglat = aglat->next;
2807 : }
2808 41808 : return ret;
2809 : }
2810 :
2811 : /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2812 : DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2813 : parameter used for lattice value sources. Return true if DEST_PLATS changed
2814 : in any way. */
2815 :
2816 : static bool
2817 3959 : merge_aggregate_lattices (struct cgraph_edge *cs,
2818 : class ipcp_param_lattices *dest_plats,
2819 : class ipcp_param_lattices *src_plats,
2820 : int src_idx, HOST_WIDE_INT offset_delta)
2821 : {
2822 3959 : bool pre_existing = dest_plats->aggs != NULL;
2823 3959 : struct ipcp_agg_lattice **dst_aglat;
2824 3959 : bool ret = false;
2825 :
2826 3959 : if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2827 0 : return true;
2828 3959 : if (src_plats->aggs_bottom)
2829 2 : return set_agg_lats_contain_variable (dest_plats);
2830 3957 : if (src_plats->aggs_contain_variable)
2831 2295 : ret |= set_agg_lats_contain_variable (dest_plats);
2832 3957 : dst_aglat = &dest_plats->aggs;
2833 :
2834 3957 : int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2835 : param_ipa_max_agg_items);
2836 3957 : for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2837 11589 : src_aglat;
2838 7632 : src_aglat = src_aglat->next)
2839 : {
2840 7632 : HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2841 :
2842 7632 : if (new_offset < 0)
2843 49 : continue;
2844 7583 : if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2845 : &dst_aglat, pre_existing, &ret, max_agg_items))
2846 : {
2847 7579 : struct ipcp_agg_lattice *new_al = *dst_aglat;
2848 :
2849 7579 : dst_aglat = &(*dst_aglat)->next;
2850 7579 : if (src_aglat->bottom)
2851 : {
2852 0 : ret |= new_al->set_contains_variable ();
2853 0 : continue;
2854 : }
2855 7579 : if (src_aglat->contains_variable)
2856 4417 : ret |= new_al->set_contains_variable ();
2857 7579 : for (ipcp_value<tree> *val = src_aglat->values;
2858 11735 : val;
2859 4156 : val = val->next)
2860 4156 : ret |= new_al->add_value (val->value, cs, val, src_idx,
2861 : src_aglat->offset);
2862 : }
2863 4 : else if (dest_plats->aggs_bottom)
2864 : return true;
2865 : }
2866 3957 : ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2867 3957 : return ret;
2868 : }
2869 :
2870 : /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2871 : pass-through JFUNC and if so, whether it has conform and conforms to the
2872 : rules about propagating values passed by reference. */
2873 :
2874 : static bool
2875 170129 : agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2876 : struct ipa_jump_func *jfunc)
2877 : {
2878 170129 : return src_plats->aggs
2879 170129 : && (!src_plats->aggs_by_ref
2880 4963 : || ipa_get_jf_pass_through_agg_preserved (jfunc));
2881 : }
2882 :
2883 : /* Propagate values through ITEM, jump function for a part of an aggregate,
2884 : into corresponding aggregate lattice AGLAT. CS is the call graph edge
2885 : associated with the jump function. Return true if AGLAT changed in any
2886 : way. */
2887 :
2888 : static bool
2889 105189 : propagate_aggregate_lattice (struct cgraph_edge *cs,
2890 : struct ipa_agg_jf_item *item,
2891 : struct ipcp_agg_lattice *aglat)
2892 : {
2893 105189 : class ipa_node_params *caller_info;
2894 105189 : class ipcp_param_lattices *src_plats;
2895 105189 : struct ipcp_lattice<tree> *src_lat;
2896 105189 : HOST_WIDE_INT src_offset;
2897 105189 : int src_idx;
2898 105189 : tree load_type;
2899 105189 : bool ret;
2900 :
2901 105189 : if (item->jftype == IPA_JF_CONST)
2902 : {
2903 93906 : tree value = item->value.constant;
2904 :
2905 93906 : gcc_checking_assert (is_gimple_ip_invariant (value));
2906 93906 : return aglat->add_value (value, cs, NULL, 0);
2907 : }
2908 :
2909 11283 : gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2910 : || item->jftype == IPA_JF_LOAD_AGG);
2911 :
2912 11283 : caller_info = ipa_node_params_sum->get (cs->caller);
2913 11283 : src_idx = item->value.pass_through.formal_id;
2914 11283 : src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2915 :
2916 11283 : if (item->jftype == IPA_JF_PASS_THROUGH)
2917 : {
2918 3199 : load_type = NULL_TREE;
2919 3199 : src_lat = &src_plats->itself;
2920 3199 : src_offset = -1;
2921 : }
2922 : else
2923 : {
2924 8084 : HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2925 8084 : struct ipcp_agg_lattice *src_aglat;
2926 :
2927 12471 : for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2928 8170 : if (src_aglat->offset >= load_offset)
2929 : break;
2930 :
2931 8084 : load_type = item->value.load_agg.type;
2932 8084 : if (!src_aglat
2933 3783 : || src_aglat->offset > load_offset
2934 3441 : || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2935 11525 : || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2936 4643 : return aglat->set_contains_variable ();
2937 :
2938 : src_lat = src_aglat;
2939 : src_offset = load_offset;
2940 : }
2941 :
2942 6640 : if (src_lat->bottom
2943 6640 : || (!ipcp_versionable_function_p (cs->caller)
2944 6640 : && !src_lat->is_single_const ()))
2945 2016 : return aglat->set_contains_variable ();
2946 :
2947 4624 : ret = propagate_vals_across_arith_jfunc (cs,
2948 : item->value.pass_through.operation,
2949 : load_type,
2950 : item->value.pass_through.operand,
2951 : src_lat, aglat,
2952 : src_offset,
2953 : src_idx,
2954 : item->value.pass_through.op_type,
2955 : item->type);
2956 :
2957 4624 : if (src_lat->contains_variable)
2958 2635 : ret |= aglat->set_contains_variable ();
2959 :
2960 : return ret;
2961 : }
2962 :
2963 : /* Propagate scalar values across jump function JFUNC that is associated with
2964 : edge CS and put the values into DEST_LAT. */
2965 :
2966 : static bool
2967 3835028 : propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2968 : struct ipa_jump_func *jfunc,
2969 : class ipcp_param_lattices *dest_plats)
2970 : {
2971 3835028 : bool ret = false;
2972 :
2973 3835028 : if (dest_plats->aggs_bottom)
2974 : return false;
2975 :
2976 911564 : if (jfunc->type == IPA_JF_PASS_THROUGH
2977 911564 : && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2978 : {
2979 170129 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2980 170129 : int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2981 170129 : class ipcp_param_lattices *src_plats;
2982 :
2983 170129 : src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2984 170129 : if (agg_pass_through_permissible_p (src_plats, jfunc))
2985 : {
2986 : /* Currently we do not produce clobber aggregate jump
2987 : functions, replace with merging when we do. */
2988 3834 : gcc_assert (!jfunc->agg.items);
2989 3834 : ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2990 : src_idx, 0);
2991 3834 : return ret;
2992 : }
2993 : }
2994 741435 : else if (jfunc->type == IPA_JF_ANCESTOR
2995 741435 : && ipa_get_jf_ancestor_agg_preserved (jfunc))
2996 : {
2997 1179 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2998 1179 : int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2999 1179 : class ipcp_param_lattices *src_plats;
3000 :
3001 1179 : src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3002 1179 : if (src_plats->aggs && src_plats->aggs_by_ref)
3003 : {
3004 : /* Currently we do not produce clobber aggregate jump
3005 : functions, replace with merging when we do. */
3006 125 : gcc_assert (!jfunc->agg.items);
3007 125 : ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
3008 : ipa_get_jf_ancestor_offset (jfunc));
3009 : }
3010 1054 : else if (!src_plats->aggs_by_ref)
3011 1050 : ret |= set_agg_lats_to_bottom (dest_plats);
3012 : else
3013 4 : ret |= set_agg_lats_contain_variable (dest_plats);
3014 1179 : return ret;
3015 : }
3016 :
3017 906551 : if (jfunc->agg.items)
3018 : {
3019 37867 : bool pre_existing = dest_plats->aggs != NULL;
3020 37867 : struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
3021 37867 : struct ipa_agg_jf_item *item;
3022 37867 : int i;
3023 :
3024 37867 : if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
3025 16 : return true;
3026 :
3027 37867 : int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
3028 : param_ipa_max_agg_items);
3029 143412 : FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
3030 : {
3031 105561 : HOST_WIDE_INT val_size;
3032 :
3033 105561 : if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
3034 321 : continue;
3035 105240 : val_size = tree_to_shwi (TYPE_SIZE (item->type));
3036 :
3037 105240 : if (merge_agg_lats_step (dest_plats, item->offset, val_size,
3038 : &aglat, pre_existing, &ret, max_agg_items))
3039 : {
3040 105189 : ret |= propagate_aggregate_lattice (cs, item, *aglat);
3041 105189 : aglat = &(*aglat)->next;
3042 : }
3043 51 : else if (dest_plats->aggs_bottom)
3044 : return true;
3045 : }
3046 :
3047 75702 : ret |= set_chain_of_aglats_contains_variable (*aglat);
3048 : }
3049 : else
3050 868684 : ret |= set_agg_lats_contain_variable (dest_plats);
3051 :
3052 906535 : return ret;
3053 : }
3054 :
3055 : /* Return true if on the way cfrom CS->caller to the final (non-alias and
3056 : non-thunk) destination, the call passes through a thunk. */
3057 :
3058 : static bool
3059 1894878 : call_passes_through_thunk (cgraph_edge *cs)
3060 : {
3061 1894878 : cgraph_node *alias_or_thunk = cs->callee;
3062 2026850 : while (alias_or_thunk->alias)
3063 131972 : alias_or_thunk = alias_or_thunk->get_alias_target ();
3064 1894878 : return alias_or_thunk->thunk;
3065 : }
3066 :
3067 : /* Propagate constants from the caller to the callee of CS. INFO describes the
3068 : caller. */
3069 :
3070 : static bool
3071 5240732 : propagate_constants_across_call (struct cgraph_edge *cs)
3072 : {
3073 5240732 : class ipa_node_params *callee_info;
3074 5240732 : enum availability availability;
3075 5240732 : cgraph_node *callee;
3076 5240732 : class ipa_edge_args *args;
3077 5240732 : bool ret = false;
3078 5240732 : int i, args_count, parms_count;
3079 :
3080 5240732 : callee = cs->callee->function_symbol (&availability);
3081 5240732 : if (!callee->definition)
3082 : return false;
3083 1923242 : gcc_checking_assert (callee->has_gimple_body_p ());
3084 1923242 : callee_info = ipa_node_params_sum->get (callee);
3085 1923242 : if (!callee_info)
3086 : return false;
3087 :
3088 1914723 : args = ipa_edge_args_sum->get (cs);
3089 1914723 : parms_count = ipa_get_param_count (callee_info);
3090 1722560 : if (parms_count == 0)
3091 : return false;
3092 1722560 : if (!args
3093 1722316 : || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
3094 3444876 : || !opt_for_fn (cs->caller->decl, optimize))
3095 : {
3096 752 : for (i = 0; i < parms_count; i++)
3097 508 : ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3098 : i));
3099 : return ret;
3100 : }
3101 1722316 : args_count = ipa_get_cs_argument_count (args);
3102 :
3103 : /* If this call goes through a thunk we must not propagate to the first (0th)
3104 : parameter. However, we might need to uncover a thunk from below a series
3105 : of aliases first. */
3106 1722316 : if (call_passes_through_thunk (cs))
3107 : {
3108 227 : ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3109 : 0));
3110 227 : i = 1;
3111 : }
3112 : else
3113 : i = 0;
3114 :
3115 5696630 : for (; (i < args_count) && (i < parms_count); i++)
3116 : {
3117 3974314 : struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3118 3974314 : class ipcp_param_lattices *dest_plats;
3119 3974314 : tree param_type = ipa_get_type (callee_info, i);
3120 :
3121 3974314 : dest_plats = ipa_get_parm_lattices (callee_info, i);
3122 3974314 : if (availability == AVAIL_INTERPOSABLE)
3123 139286 : ret |= set_all_contains_variable (dest_plats);
3124 : else
3125 : {
3126 3835028 : ret |= propagate_scalar_across_jump_function (cs, jump_func,
3127 : &dest_plats->itself,
3128 : param_type);
3129 3835028 : ret |= propagate_context_across_jump_function (cs, jump_func, i,
3130 : &dest_plats->ctxlat);
3131 3835028 : ret
3132 3835028 : |= propagate_bits_across_jump_function (cs, i, jump_func,
3133 : &dest_plats->bits_lattice);
3134 3835028 : ret |= propagate_aggs_across_jump_function (cs, jump_func,
3135 : dest_plats);
3136 3835028 : if (opt_for_fn (callee->decl, flag_ipa_vrp))
3137 3834183 : ret |= propagate_vr_across_jump_function (cs, jump_func,
3138 : dest_plats, param_type);
3139 : else
3140 845 : ret |= dest_plats->m_value_range.set_to_bottom ();
3141 : }
3142 : }
3143 1722498 : for (; i < parms_count; i++)
3144 182 : ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
3145 :
3146 : return ret;
3147 : }
3148 :
3149 : /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3150 : KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3151 : the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3152 : KNOWN_AGGS is ignored. */
3153 :
3154 : static tree
3155 1596407 : ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3156 : const vec<tree> &known_csts,
3157 : const vec<ipa_polymorphic_call_context> &known_contexts,
3158 : const ipa_argagg_value_list &avs,
3159 : bool *speculative)
3160 : {
3161 1596407 : int param_index = ie->indirect_info->param_index;
3162 1596407 : *speculative = false;
3163 :
3164 1596407 : if (param_index == -1)
3165 : return NULL_TREE;
3166 :
3167 634481 : if (cgraph_simple_indirect_info *sii
3168 634481 : = dyn_cast <cgraph_simple_indirect_info *> (ie->indirect_info))
3169 : {
3170 310417 : tree t = NULL;
3171 :
3172 310417 : if (sii->agg_contents)
3173 : {
3174 74603 : t = NULL;
3175 74603 : if ((unsigned) param_index < known_csts.length ()
3176 74603 : && known_csts[param_index])
3177 64095 : t = ipa_find_agg_cst_from_init (known_csts[param_index],
3178 : sii->offset,
3179 : sii->by_ref);
3180 :
3181 74603 : if (!t && sii->guaranteed_unmodified)
3182 68463 : t = avs.get_value (param_index, sii->offset / BITS_PER_UNIT,
3183 : sii->by_ref);
3184 : }
3185 235814 : else if ((unsigned) param_index < known_csts.length ())
3186 235814 : t = known_csts[param_index];
3187 :
3188 310347 : if (t
3189 204432 : && TREE_CODE (t) == ADDR_EXPR
3190 514564 : && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3191 204217 : return TREE_OPERAND (t, 0);
3192 : else
3193 106200 : return NULL_TREE;
3194 : }
3195 :
3196 324064 : if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3197 : return NULL_TREE;
3198 :
3199 324064 : cgraph_polymorphic_indirect_info *pii
3200 324064 : = as_a <cgraph_polymorphic_indirect_info *> (ie->indirect_info);
3201 324064 : if (!pii->usable_p ())
3202 : return NULL_TREE;
3203 :
3204 324064 : HOST_WIDE_INT anc_offset = pii->offset;
3205 324064 : tree t = NULL;
3206 324064 : tree target = NULL;
3207 324064 : if ((unsigned) param_index < known_csts.length ()
3208 324064 : && known_csts[param_index])
3209 22912 : t = ipa_find_agg_cst_from_init (known_csts[param_index], anc_offset, true);
3210 :
3211 : /* Try to work out value of virtual table pointer value in replacements. */
3212 : /* or known aggregate values. */
3213 22912 : if (!t)
3214 324055 : t = avs.get_value (param_index, anc_offset / BITS_PER_UNIT, true);
3215 :
3216 : /* If we found the virtual table pointer, lookup the target. */
3217 324055 : if (t)
3218 : {
3219 12925 : tree vtable;
3220 12925 : unsigned HOST_WIDE_INT offset;
3221 12925 : if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3222 : {
3223 12925 : bool can_refer;
3224 12925 : target = gimple_get_virt_method_for_vtable (pii->otr_token, vtable,
3225 : offset, &can_refer);
3226 12925 : if (can_refer)
3227 : {
3228 12862 : if (!target
3229 12862 : || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3230 25604 : || !possible_polymorphic_call_target_p
3231 12742 : (ie, cgraph_node::get (target)))
3232 : {
3233 : /* Do not speculate builtin_unreachable, it is stupid! */
3234 237 : if (pii->vptr_changed)
3235 11211 : return NULL;
3236 237 : target = ipa_impossible_devirt_target (ie, target);
3237 : }
3238 12862 : *speculative = pii->vptr_changed;
3239 12862 : if (!*speculative)
3240 : return target;
3241 : }
3242 : }
3243 : }
3244 :
3245 : /* Do we know the constant value of pointer? */
3246 312853 : if (!t && (unsigned) param_index < known_csts.length ())
3247 46685 : t = known_csts[param_index];
3248 :
3249 312853 : ipa_polymorphic_call_context context;
3250 312853 : if (known_contexts.length () > (unsigned int) param_index)
3251 : {
3252 312483 : context = known_contexts[param_index];
3253 312483 : context.offset_by (anc_offset);
3254 312483 : if (pii->vptr_changed)
3255 51083 : context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3256 : pii->otr_type);
3257 312483 : if (t)
3258 : {
3259 12393 : ipa_polymorphic_call_context ctx2
3260 12393 : = ipa_polymorphic_call_context (t, pii->otr_type, anc_offset);
3261 24786 : if (!ctx2.useless_p ())
3262 10874 : context.combine_with (ctx2, pii->otr_type);
3263 : }
3264 : }
3265 370 : else if (t)
3266 : {
3267 21 : context = ipa_polymorphic_call_context (t, pii->otr_type, anc_offset);
3268 21 : if (pii->vptr_changed)
3269 6 : context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3270 : pii->otr_type);
3271 : }
3272 : else
3273 : return NULL_TREE;
3274 :
3275 312504 : vec <cgraph_node *>targets;
3276 312504 : bool final;
3277 :
3278 312504 : targets = possible_polymorphic_call_targets (pii->otr_type, pii->otr_token,
3279 : context, &final);
3280 324367 : if (!final || targets.length () > 1)
3281 : {
3282 301275 : struct cgraph_node *node;
3283 301275 : if (*speculative)
3284 : return target;
3285 301247 : if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3286 301247 : || ie->speculative || !ie->maybe_hot_p ())
3287 210726 : return NULL;
3288 90521 : node = try_speculative_devirtualization (pii->otr_type, pii->otr_token,
3289 : context);
3290 90521 : if (node)
3291 : {
3292 701 : *speculative = true;
3293 701 : target = node->decl;
3294 : }
3295 : else
3296 : return NULL;
3297 : }
3298 : else
3299 : {
3300 11229 : *speculative = false;
3301 11229 : if (targets.length () == 1)
3302 11190 : target = targets[0]->decl;
3303 : else
3304 39 : target = ipa_impossible_devirt_target (ie, NULL_TREE);
3305 : }
3306 :
3307 11930 : if (target && !possible_polymorphic_call_target_p (ie,
3308 : cgraph_node::get (target)))
3309 : {
3310 55 : if (*speculative)
3311 : return NULL;
3312 40 : target = ipa_impossible_devirt_target (ie, target);
3313 : }
3314 :
3315 : return target;
3316 : }
3317 :
3318 : /* If an indirect edge IE can be turned into a direct one based on data in
3319 : AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3320 : whether the discovered target is only speculative guess. */
3321 :
3322 : tree
3323 1422249 : ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3324 : ipa_call_arg_values *avals,
3325 : bool *speculative)
3326 : {
3327 1422249 : ipa_argagg_value_list avl (avals);
3328 1422249 : return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3329 1422249 : avals->m_known_contexts,
3330 1422249 : avl, speculative);
3331 : }
3332 :
3333 : /* Calculate devirtualization time bonus for NODE, assuming we know information
3334 : about arguments stored in AVALS.
3335 :
3336 : FIXME: This function will also consider devirtualization of calls that are
3337 : known to be dead in the clone. */
3338 :
3339 : static sreal
3340 1491138 : devirtualization_time_bonus (struct cgraph_node *node,
3341 : ipa_auto_call_arg_values *avals)
3342 : {
3343 1491138 : struct cgraph_edge *ie;
3344 1491138 : sreal res = 0;
3345 :
3346 1663394 : for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3347 : {
3348 172256 : struct cgraph_node *callee;
3349 172256 : class ipa_fn_summary *isummary;
3350 172256 : enum availability avail;
3351 172256 : tree target;
3352 172256 : bool speculative;
3353 :
3354 172256 : ipa_argagg_value_list avl (avals);
3355 172256 : target = ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3356 : avals->m_known_contexts,
3357 : avl, &speculative);
3358 172256 : if (!target)
3359 171373 : continue;
3360 :
3361 : /* Only bare minimum benefit for clearly un-inlineable targets. */
3362 3110 : res = res + ie->combined_sreal_frequency ();
3363 3110 : callee = cgraph_node::get (target);
3364 3110 : if (!callee || !callee->definition)
3365 600 : continue;
3366 2510 : callee = callee->function_symbol (&avail);
3367 2510 : if (avail < AVAIL_AVAILABLE)
3368 0 : continue;
3369 2510 : isummary = ipa_fn_summaries->get (callee);
3370 2510 : if (!isummary || !isummary->inlinable)
3371 66 : continue;
3372 :
3373 2444 : int savings = 0;
3374 2444 : int size = ipa_size_summaries->get (callee)->size;
3375 : /* FIXME: The values below need re-considering and perhaps also
3376 : integrating into the cost metrics, at lest in some very basic way. */
3377 2444 : int max_inline_insns_auto
3378 2444 : = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3379 2444 : if (size <= max_inline_insns_auto / 4)
3380 327 : savings = 31 / ((int)speculative + 1);
3381 2117 : else if (size <= max_inline_insns_auto / 2)
3382 373 : savings = 15 / ((int)speculative + 1);
3383 3305 : else if (size <= max_inline_insns_auto
3384 1744 : || DECL_DECLARED_INLINE_P (callee->decl))
3385 183 : savings = 7 / ((int)speculative + 1);
3386 : else
3387 1561 : continue;
3388 883 : res = res + ie->combined_sreal_frequency () * (sreal) savings;
3389 : }
3390 :
3391 1491138 : return res;
3392 : }
3393 :
3394 : /* Return time bonus incurred because of hints stored in ESTIMATES. */
3395 :
3396 : static sreal
3397 279891 : hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3398 : {
3399 279891 : sreal result = 0;
3400 279891 : ipa_hints hints = estimates.hints;
3401 279891 : if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3402 26751 : result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3403 :
3404 279891 : sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3405 :
3406 279891 : if (hints & INLINE_HINT_loop_iterations)
3407 18732 : result += estimates.loops_with_known_iterations * bonus_for_one;
3408 :
3409 279891 : if (hints & INLINE_HINT_loop_stride)
3410 10610 : result += estimates.loops_with_known_strides * bonus_for_one;
3411 :
3412 279891 : return result;
3413 : }
3414 :
3415 : /* If there is a reason to penalize the function described by INFO in the
3416 : cloning goodness evaluation, do so. */
3417 :
3418 : static inline sreal
3419 94124 : incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3420 : sreal evaluation)
3421 : {
3422 94124 : if (info->node_within_scc && !info->node_is_self_scc)
3423 1988 : evaluation = (evaluation
3424 1988 : * (100 - opt_for_fn (node->decl,
3425 3976 : param_ipa_cp_recursion_penalty))) / 100;
3426 :
3427 94124 : if (info->node_calling_single_call)
3428 5771 : evaluation = (evaluation
3429 5771 : * (100 - opt_for_fn (node->decl,
3430 5771 : param_ipa_cp_single_call_penalty)))
3431 11542 : / 100;
3432 :
3433 94124 : return evaluation;
3434 : }
3435 :
3436 : /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3437 : and SIZE_COST and with the sum of frequencies of incoming edges to the
3438 : potential new clone in FREQUENCIES. CUR_SWEEP is the number of the current
3439 : sweep of IPA-CP over the call-graph in the decision stage. */
3440 :
3441 : static bool
3442 331019 : good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3443 : sreal freq_sum, profile_count count_sum,
3444 : int size_cost, bool called_without_ipa_profile,
3445 : int cur_sweep)
3446 : {
3447 331019 : gcc_assert (count_sum.ipa () == count_sum);
3448 331019 : if (count_sum.quality () == AFDO)
3449 0 : count_sum = count_sum.force_nonzero ();
3450 331019 : if (time_benefit == 0
3451 280962 : || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3452 : /* If there is no call which was executed in profiling or where
3453 : profile is missing, we do not want to clone. */
3454 94215 : || (!called_without_ipa_profile && !count_sum.nonzero_p ()))
3455 : {
3456 236895 : if (dump_file && (dump_flags & TDF_DETAILS))
3457 24 : fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3458 : "size: %i): Definitely not good or prohibited.\n",
3459 : time_benefit.to_double (), size_cost);
3460 236895 : return false;
3461 : }
3462 :
3463 94124 : gcc_assert (size_cost > 0);
3464 :
3465 94124 : ipa_node_params *info = ipa_node_params_sum->get (node);
3466 94124 : int num_sweeps = opt_for_fn (node->decl, param_ipa_cp_sweeps);
3467 94124 : int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3468 94124 : eval_threshold = (eval_threshold * num_sweeps) / cur_sweep;
3469 : /* If we know the execution IPA execution counts, we can estimate overall
3470 : speedup of the program. */
3471 94124 : if (count_sum.nonzero_p ())
3472 : {
3473 365 : profile_count saved_time = count_sum * time_benefit;
3474 365 : sreal evaluation = saved_time.to_sreal_scale (profile_count::one ())
3475 730 : / size_cost;
3476 365 : evaluation = incorporate_penalties (node, info, evaluation);
3477 :
3478 365 : if (dump_file && (dump_flags & TDF_DETAILS))
3479 : {
3480 0 : fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3481 : "size: %i, count_sum: ", time_benefit.to_double (),
3482 : size_cost);
3483 0 : count_sum.dump (dump_file);
3484 0 : fprintf (dump_file, ", overall time saved: ");
3485 0 : saved_time.dump (dump_file);
3486 0 : fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3487 0 : info->node_within_scc
3488 0 : ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3489 0 : info->node_calling_single_call ? ", single_call" : "",
3490 : evaluation.to_double (), eval_threshold);
3491 : }
3492 365 : gcc_checking_assert (saved_time == saved_time.ipa ());
3493 365 : if (!maybe_hot_count_p (NULL, saved_time))
3494 : {
3495 27 : if (dump_file && (dump_flags & TDF_DETAILS))
3496 0 : fprintf (dump_file, " not cloning: time saved is not hot\n");
3497 : }
3498 : /* Evaulation approximately corresponds to time saved per instruction
3499 : introduced. This is likely almost always going to be true, since we
3500 : already checked that time saved is large enough to be considered
3501 : hot. */
3502 338 : else if (evaluation >= (sreal)eval_threshold)
3503 365 : return true;
3504 : /* If all call sites have profile known; we know we do not want t clone.
3505 : If there are calls with unknown profile; try local heuristics. */
3506 335 : if (!called_without_ipa_profile)
3507 : return false;
3508 : }
3509 93759 : sreal evaluation = (time_benefit * freq_sum) / size_cost;
3510 93759 : evaluation = incorporate_penalties (node, info, evaluation);
3511 93759 : evaluation *= 1000;
3512 :
3513 93759 : if (dump_file && (dump_flags & TDF_DETAILS))
3514 302 : fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3515 : "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3516 : "threshold: %i\n",
3517 : time_benefit.to_double (), size_cost, freq_sum.to_double (),
3518 151 : info->node_within_scc
3519 26 : ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3520 151 : info->node_calling_single_call ? ", single_call" : "",
3521 : evaluation.to_double (), eval_threshold);
3522 :
3523 93759 : return evaluation >= eval_threshold;
3524 : }
3525 :
3526 : /* Grow vectors in AVALS and fill them with information about values of
3527 : parameters that are known to be independent of the context. INFO describes
3528 : the function. If REMOVABLE_PARAMS_COST is non-NULL, the movement cost of
3529 : all removable parameters will be stored in it.
3530 :
3531 : TODO: Also grow context independent value range vectors. */
3532 :
3533 : static bool
3534 2135813 : gather_context_independent_values (class ipa_node_params *info,
3535 : ipa_auto_call_arg_values *avals,
3536 : int *removable_params_cost)
3537 : {
3538 2135813 : int i, count = ipa_get_param_count (info);
3539 2135813 : bool ret = false;
3540 :
3541 2135813 : avals->m_known_vals.safe_grow_cleared (count, true);
3542 2135813 : avals->m_known_contexts.safe_grow_cleared (count, true);
3543 :
3544 2135813 : if (removable_params_cost)
3545 2135813 : *removable_params_cost = 0;
3546 :
3547 7096701 : for (i = 0; i < count; i++)
3548 : {
3549 4960888 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3550 4960888 : ipcp_lattice<tree> *lat = &plats->itself;
3551 :
3552 4960888 : if (lat->is_single_const ())
3553 : {
3554 34883 : ipcp_value<tree> *val = lat->values;
3555 34883 : gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3556 34883 : avals->m_known_vals[i] = val->value;
3557 34883 : if (removable_params_cost)
3558 69766 : *removable_params_cost
3559 34883 : += estimate_move_cost (TREE_TYPE (val->value), false);
3560 : ret = true;
3561 : }
3562 4926005 : else if (removable_params_cost
3563 4926005 : && !ipa_is_param_used (info, i))
3564 946296 : *removable_params_cost
3565 473148 : += ipa_get_param_move_cost (info, i);
3566 :
3567 4960888 : if (!ipa_is_param_used (info, i))
3568 478295 : continue;
3569 :
3570 4482593 : ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3571 : /* Do not account known context as reason for cloning. We can see
3572 : if it permits devirtualization. */
3573 4482593 : if (ctxlat->is_single_const ())
3574 22899 : avals->m_known_contexts[i] = ctxlat->values->value;
3575 :
3576 4482593 : ret |= push_agg_values_from_plats (plats, i, 0, &avals->m_known_aggs);
3577 : }
3578 :
3579 2135813 : return ret;
3580 : }
3581 :
3582 : /* Perform time and size measurement of NODE with the context given in AVALS,
3583 : calculate the benefit compared to the node without specialization and store
3584 : it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3585 : context-independent or unused removable parameters and EST_MOVE_COST, the
3586 : estimated movement of the considered parameter. */
3587 :
3588 : static void
3589 76130 : perform_estimation_of_a_value (cgraph_node *node,
3590 : ipa_auto_call_arg_values *avals,
3591 : int removable_params_cost, int est_move_cost,
3592 : ipcp_value_base *val)
3593 : {
3594 76130 : sreal time_benefit;
3595 76130 : ipa_call_estimates estimates;
3596 :
3597 76130 : estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3598 :
3599 : /* Extern inline functions have no cloning local time benefits because they
3600 : will be inlined anyway. The only reason to clone them is if it enables
3601 : optimization in any of the functions they call. */
3602 76130 : if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3603 52 : time_benefit = 0;
3604 : else
3605 76078 : time_benefit = (estimates.nonspecialized_time - estimates.time)
3606 152156 : + hint_time_bonus (node, estimates)
3607 152156 : + (devirtualization_time_bonus (node, avals)
3608 152156 : + removable_params_cost + est_move_cost);
3609 :
3610 76130 : int size = estimates.size;
3611 76130 : gcc_checking_assert (size >=0);
3612 : /* The inliner-heuristics based estimates may think that in certain
3613 : contexts some functions do not have any size at all but we want
3614 : all specializations to have at least a tiny cost, not least not to
3615 : divide by zero. */
3616 76130 : if (size == 0)
3617 0 : size = 1;
3618 :
3619 76130 : val->local_time_benefit = time_benefit;
3620 76130 : val->local_size_cost = size;
3621 76130 : }
3622 :
3623 : /* Get the overall limit of growth based on parameters extracted from growth,
3624 : and CUR_SWEEP, which is the number of the current sweep of IPA-CP over the
3625 : call-graph in the decision stage. It does not really make sense to mix
3626 : functions with different overall growth limits or even number of sweeps but
3627 : it is possible and if it happens, we do not want to select one limit at
3628 : random, so get the limits from NODE. */
3629 :
3630 : static long
3631 206529 : get_max_overall_size (cgraph_node *node, int cur_sweep)
3632 : {
3633 206529 : long max_new_size = orig_overall_size;
3634 206529 : long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3635 206529 : if (max_new_size < large_unit)
3636 : max_new_size = large_unit;
3637 206529 : int num_sweeps = opt_for_fn (node->decl, param_ipa_cp_sweeps);
3638 206529 : gcc_assert (cur_sweep <= num_sweeps);
3639 206529 : int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3640 206529 : max_new_size += ((max_new_size * unit_growth * cur_sweep)
3641 206529 : / num_sweeps) / 100 + 1;
3642 206529 : return max_new_size;
3643 : }
3644 :
3645 : /* Return true if NODE should be cloned just for a parameter removal, possibly
3646 : dumping a reason if not. */
3647 :
3648 : static bool
3649 186992 : clone_for_param_removal_p (cgraph_node *node)
3650 : {
3651 186992 : if (!node->can_change_signature)
3652 : {
3653 4945 : if (dump_file && (dump_flags & TDF_DETAILS))
3654 0 : fprintf (dump_file, " Not considering cloning to remove parameters, "
3655 : "function cannot change signature.\n");
3656 4945 : return false;
3657 : }
3658 182047 : if (node->can_be_local_p ())
3659 : {
3660 133052 : if (dump_file && (dump_flags & TDF_DETAILS))
3661 0 : fprintf (dump_file, " Not considering cloning to remove parameters, "
3662 : "IPA-SRA can do it potentially better.\n");
3663 133052 : return false;
3664 : }
3665 : return true;
3666 : }
3667 :
3668 : /* Iterate over known values of parameters of NODE and estimate the local
3669 : effects in terms of time and size they have. */
3670 :
3671 : static void
3672 1254697 : estimate_local_effects (struct cgraph_node *node)
3673 : {
3674 1254697 : ipa_node_params *info = ipa_node_params_sum->get (node);
3675 1254697 : int count = ipa_get_param_count (info);
3676 1027577 : int removable_params_cost;
3677 :
3678 1027577 : if (!count || !ipcp_versionable_function_p (node))
3679 393847 : return;
3680 :
3681 860850 : if (dump_file && (dump_flags & TDF_DETAILS))
3682 117 : fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3683 :
3684 860850 : ipa_auto_call_arg_values avals;
3685 860850 : gather_context_independent_values (info, &avals, &removable_params_cost);
3686 :
3687 2877345 : for (int i = 0; i < count; i++)
3688 : {
3689 2016495 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3690 2016495 : ipcp_lattice<tree> *lat = &plats->itself;
3691 2016495 : ipcp_value<tree> *val;
3692 :
3693 4011998 : if (lat->bottom
3694 209095 : || !lat->values
3695 2055008 : || avals.m_known_vals[i])
3696 1995503 : continue;
3697 :
3698 64570 : for (val = lat->values; val; val = val->next)
3699 : {
3700 43578 : gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3701 43578 : avals.m_known_vals[i] = val->value;
3702 :
3703 43578 : int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3704 43578 : perform_estimation_of_a_value (node, &avals, removable_params_cost,
3705 : emc, val);
3706 :
3707 43578 : if (dump_file && (dump_flags & TDF_DETAILS))
3708 : {
3709 44 : fprintf (dump_file, " - estimates for value ");
3710 44 : print_ipcp_constant_value (dump_file, val->value);
3711 44 : fprintf (dump_file, " for ");
3712 44 : ipa_dump_param (dump_file, info, i);
3713 44 : fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3714 : val->local_time_benefit.to_double (),
3715 : val->local_size_cost);
3716 : }
3717 : }
3718 20992 : avals.m_known_vals[i] = NULL_TREE;
3719 : }
3720 :
3721 2877345 : for (int i = 0; i < count; i++)
3722 : {
3723 2016495 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3724 :
3725 2016495 : if (!plats->virt_call)
3726 2008725 : continue;
3727 :
3728 7770 : ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3729 7770 : ipcp_value<ipa_polymorphic_call_context> *val;
3730 :
3731 15382 : if (ctxlat->bottom
3732 2769 : || !ctxlat->values
3733 10533 : || !avals.m_known_contexts[i].useless_p ())
3734 7612 : continue;
3735 :
3736 384 : for (val = ctxlat->values; val; val = val->next)
3737 : {
3738 226 : avals.m_known_contexts[i] = val->value;
3739 226 : perform_estimation_of_a_value (node, &avals, removable_params_cost,
3740 : 0, val);
3741 :
3742 226 : if (dump_file && (dump_flags & TDF_DETAILS))
3743 : {
3744 0 : fprintf (dump_file, " - estimates for polymorphic context ");
3745 0 : print_ipcp_constant_value (dump_file, val->value);
3746 0 : fprintf (dump_file, " for ");
3747 0 : ipa_dump_param (dump_file, info, i);
3748 0 : fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3749 : val->local_time_benefit.to_double (),
3750 : val->local_size_cost);
3751 : }
3752 : }
3753 158 : avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3754 : }
3755 :
3756 860850 : unsigned all_ctx_len = avals.m_known_aggs.length ();
3757 860850 : auto_vec<ipa_argagg_value, 32> all_ctx;
3758 860850 : all_ctx.reserve_exact (all_ctx_len);
3759 860850 : all_ctx.splice (avals.m_known_aggs);
3760 860850 : avals.m_known_aggs.safe_grow_cleared (all_ctx_len + 1);
3761 :
3762 860850 : unsigned j = 0;
3763 2877345 : for (int index = 0; index < count; index++)
3764 : {
3765 2016495 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, index);
3766 :
3767 2016495 : if (plats->aggs_bottom || !plats->aggs)
3768 1997361 : continue;
3769 :
3770 73856 : for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3771 : {
3772 54722 : ipcp_value<tree> *val;
3773 54366 : if (aglat->bottom || !aglat->values
3774 : /* If the following is true, the one value is already part of all
3775 : context estimations. */
3776 101783 : || (!plats->aggs_contain_variable
3777 27544 : && aglat->is_single_const ()))
3778 30877 : continue;
3779 :
3780 23845 : unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3781 23845 : while (j < all_ctx_len
3782 32226 : && (all_ctx[j].index < index
3783 3327 : || (all_ctx[j].index == index
3784 2365 : && all_ctx[j].unit_offset < unit_offset)))
3785 : {
3786 3228 : avals.m_known_aggs[j] = all_ctx[j];
3787 3228 : j++;
3788 : }
3789 :
3790 32935 : for (unsigned k = j; k < all_ctx_len; k++)
3791 9090 : avals.m_known_aggs[k+1] = all_ctx[k];
3792 :
3793 56171 : for (val = aglat->values; val; val = val->next)
3794 : {
3795 32326 : avals.m_known_aggs[j].value = val->value;
3796 32326 : avals.m_known_aggs[j].unit_offset = unit_offset;
3797 32326 : avals.m_known_aggs[j].index = index;
3798 32326 : avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3799 32326 : avals.m_known_aggs[j].killed = false;
3800 :
3801 32326 : perform_estimation_of_a_value (node, &avals,
3802 : removable_params_cost, 0, val);
3803 :
3804 32326 : if (dump_file && (dump_flags & TDF_DETAILS))
3805 : {
3806 79 : fprintf (dump_file, " - estimates for value ");
3807 79 : print_ipcp_constant_value (dump_file, val->value);
3808 79 : fprintf (dump_file, " for ");
3809 79 : ipa_dump_param (dump_file, info, index);
3810 158 : fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3811 : "]: time_benefit: %g, size: %i\n",
3812 79 : plats->aggs_by_ref ? "ref " : "",
3813 : aglat->offset,
3814 : val->local_time_benefit.to_double (),
3815 : val->local_size_cost);
3816 : }
3817 : }
3818 : }
3819 : }
3820 860850 : }
3821 :
3822 :
3823 : /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3824 : topological sort of values. */
3825 :
3826 : template <typename valtype>
3827 : void
3828 136865 : value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3829 : {
3830 : ipcp_value_source<valtype> *src;
3831 :
3832 136865 : if (cur_val->dfs)
3833 : return;
3834 :
3835 136703 : dfs_counter++;
3836 136703 : cur_val->dfs = dfs_counter;
3837 136703 : cur_val->low_link = dfs_counter;
3838 :
3839 136703 : cur_val->topo_next = stack;
3840 136703 : stack = cur_val;
3841 136703 : cur_val->on_stack = true;
3842 :
3843 593349 : for (src = cur_val->sources; src; src = src->next)
3844 456646 : if (src->val)
3845 : {
3846 20820 : if (src->val->dfs == 0)
3847 : {
3848 186 : add_val (src->val);
3849 186 : if (src->val->low_link < cur_val->low_link)
3850 19 : cur_val->low_link = src->val->low_link;
3851 : }
3852 20634 : else if (src->val->on_stack
3853 1584 : && src->val->dfs < cur_val->low_link)
3854 73 : cur_val->low_link = src->val->dfs;
3855 : }
3856 :
3857 136703 : if (cur_val->dfs == cur_val->low_link)
3858 : {
3859 : ipcp_value<valtype> *v, *scc_list = NULL;
3860 :
3861 : do
3862 : {
3863 136703 : v = stack;
3864 136703 : stack = v->topo_next;
3865 136703 : v->on_stack = false;
3866 136703 : v->scc_no = cur_val->dfs;
3867 :
3868 136703 : v->scc_next = scc_list;
3869 136703 : scc_list = v;
3870 : }
3871 136703 : while (v != cur_val);
3872 :
3873 136615 : cur_val->topo_next = values_topo;
3874 136615 : values_topo = cur_val;
3875 : }
3876 : }
3877 :
3878 : /* Add all values in lattices associated with NODE to the topological sort if
3879 : they are not there yet. */
3880 :
3881 : static void
3882 1254697 : add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3883 : {
3884 1254697 : ipa_node_params *info = ipa_node_params_sum->get (node);
3885 1254697 : int i, count = ipa_get_param_count (info);
3886 :
3887 3564152 : for (i = 0; i < count; i++)
3888 : {
3889 2309455 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3890 2309455 : ipcp_lattice<tree> *lat = &plats->itself;
3891 2309455 : struct ipcp_agg_lattice *aglat;
3892 :
3893 2309455 : if (!lat->bottom)
3894 : {
3895 219801 : ipcp_value<tree> *val;
3896 291673 : for (val = lat->values; val; val = val->next)
3897 71872 : topo->constants.add_val (val);
3898 : }
3899 :
3900 2309455 : if (!plats->aggs_bottom)
3901 276990 : for (aglat = plats->aggs; aglat; aglat = aglat->next)
3902 57251 : if (!aglat->bottom)
3903 : {
3904 56895 : ipcp_value<tree> *val;
3905 114140 : for (val = aglat->values; val; val = val->next)
3906 57245 : topo->constants.add_val (val);
3907 : }
3908 :
3909 2309455 : ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3910 2309455 : if (!ctxlat->bottom)
3911 : {
3912 220801 : ipcp_value<ipa_polymorphic_call_context> *ctxval;
3913 228363 : for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3914 7562 : topo->contexts.add_val (ctxval);
3915 : }
3916 : }
3917 1254697 : }
3918 :
3919 : /* One pass of constants propagation along the call graph edges, from callers
3920 : to callees (requires topological ordering in TOPO), iterate over strongly
3921 : connected components. */
3922 :
3923 : static void
3924 127757 : propagate_constants_topo (class ipa_topo_info *topo)
3925 : {
3926 127757 : int i;
3927 :
3928 1461810 : for (i = topo->nnodes - 1; i >= 0; i--)
3929 : {
3930 1334053 : unsigned j;
3931 1334053 : struct cgraph_node *v, *node = topo->order[i];
3932 1334053 : vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3933 :
3934 : /* First, iteratively propagate within the strongly connected component
3935 : until all lattices stabilize. */
3936 2672920 : FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3937 1338867 : if (v->has_gimple_body_p ())
3938 : {
3939 1263523 : if (opt_for_fn (v->decl, flag_ipa_cp)
3940 1263523 : && opt_for_fn (v->decl, optimize))
3941 1254697 : push_node_to_stack (topo, v);
3942 : /* When V is not optimized, we can not push it to stack, but
3943 : still we need to set all its callees lattices to bottom. */
3944 : else
3945 : {
3946 21798 : for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3947 12972 : propagate_constants_across_call (cs);
3948 : }
3949 : }
3950 :
3951 1334053 : v = pop_node_from_stack (topo);
3952 3925977 : while (v)
3953 : {
3954 1257871 : struct cgraph_edge *cs;
3955 1257871 : class ipa_node_params *info = NULL;
3956 1257871 : bool self_scc = true;
3957 :
3958 6511773 : for (cs = v->callees; cs; cs = cs->next_callee)
3959 5253902 : if (ipa_edge_within_scc (cs))
3960 : {
3961 29265 : cgraph_node *callee = cs->callee->function_symbol ();
3962 :
3963 29265 : if (v != callee)
3964 17928 : self_scc = false;
3965 :
3966 29265 : if (!info)
3967 : {
3968 13653 : info = ipa_node_params_sum->get (v);
3969 13653 : info->node_within_scc = true;
3970 : }
3971 :
3972 29265 : if (propagate_constants_across_call (cs))
3973 4143 : push_node_to_stack (topo, callee);
3974 : }
3975 :
3976 1257871 : if (info)
3977 13653 : info->node_is_self_scc = self_scc;
3978 :
3979 1257871 : v = pop_node_from_stack (topo);
3980 : }
3981 :
3982 : /* Afterwards, propagate along edges leading out of the SCC, calculates
3983 : the local effects of the discovered constants and all valid values to
3984 : their topological sort. */
3985 2672920 : FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3986 1338867 : if (v->has_gimple_body_p ()
3987 1263523 : && opt_for_fn (v->decl, flag_ipa_cp)
3988 2593564 : && opt_for_fn (v->decl, optimize))
3989 : {
3990 1254697 : struct cgraph_edge *cs;
3991 :
3992 1254697 : estimate_local_effects (v);
3993 1254697 : add_all_node_vals_to_toposort (v, topo);
3994 6475099 : for (cs = v->callees; cs; cs = cs->next_callee)
3995 5220402 : if (!ipa_edge_within_scc (cs))
3996 5198495 : propagate_constants_across_call (cs);
3997 : }
3998 1334053 : cycle_nodes.release ();
3999 : }
4000 127757 : }
4001 :
4002 : /* Propagate the estimated effects of individual values along the topological
4003 : from the dependent values to those they depend on. */
4004 :
4005 : template <typename valtype>
4006 : void
4007 255514 : value_topo_info<valtype>::propagate_effects ()
4008 : {
4009 : ipcp_value<valtype> *base;
4010 255514 : hash_set<ipcp_value<valtype> *> processed_srcvals;
4011 :
4012 392129 : for (base = values_topo; base; base = base->topo_next)
4013 : {
4014 : ipcp_value_source<valtype> *src;
4015 : ipcp_value<valtype> *val;
4016 136615 : sreal time = 0;
4017 136615 : HOST_WIDE_INT size = 0;
4018 :
4019 273318 : for (val = base; val; val = val->scc_next)
4020 : {
4021 136703 : time = time + val->local_time_benefit + val->prop_time_benefit;
4022 136703 : size = size + val->local_size_cost + val->prop_size_cost;
4023 : }
4024 :
4025 273318 : for (val = base; val; val = val->scc_next)
4026 : {
4027 136703 : processed_srcvals.empty ();
4028 593349 : for (src = val->sources; src; src = src->next)
4029 456646 : if (src->val
4030 456646 : && cs_interesting_for_ipcp_p (src->cs))
4031 : {
4032 20782 : if (!processed_srcvals.add (src->val))
4033 : {
4034 16623 : HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
4035 16623 : if (prop_size < INT_MAX)
4036 16623 : src->val->prop_size_cost = prop_size;
4037 : else
4038 0 : continue;
4039 : }
4040 :
4041 20782 : int special_factor = 1;
4042 20782 : if (val->same_scc (src->val))
4043 : special_factor
4044 1672 : = opt_for_fn(src->cs->caller->decl,
4045 : param_ipa_cp_recursive_freq_factor);
4046 19110 : else if (val->self_recursion_generated_p ()
4047 19110 : && (src->cs->callee->function_symbol ()
4048 822 : == src->cs->caller))
4049 : {
4050 822 : int max_recur_gen_depth
4051 822 : = opt_for_fn(src->cs->caller->decl,
4052 : param_ipa_cp_max_recursive_depth);
4053 822 : special_factor = max_recur_gen_depth
4054 822 : - val->self_recursion_generated_level + 1;
4055 : }
4056 :
4057 20782 : src->val->prop_time_benefit
4058 41564 : += time * special_factor * src->cs->sreal_frequency ();
4059 : }
4060 :
4061 136703 : if (size < INT_MAX)
4062 : {
4063 136703 : val->prop_time_benefit = time;
4064 136703 : val->prop_size_cost = size;
4065 : }
4066 : else
4067 : {
4068 0 : val->prop_time_benefit = 0;
4069 0 : val->prop_size_cost = 0;
4070 : }
4071 : }
4072 : }
4073 255514 : }
4074 :
4075 :
4076 : /* Propagate constants, polymorphic contexts and their effects from the
4077 : summaries interprocedurally. */
4078 :
4079 : static void
4080 127757 : ipcp_propagate_stage (class ipa_topo_info *topo)
4081 : {
4082 127757 : struct cgraph_node *node;
4083 :
4084 127757 : if (dump_file)
4085 161 : fprintf (dump_file, "\n Propagating constants:\n\n");
4086 :
4087 1466628 : FOR_EACH_DEFINED_FUNCTION (node)
4088 : {
4089 1338871 : if (node->has_gimple_body_p ()
4090 1263523 : && opt_for_fn (node->decl, flag_ipa_cp)
4091 2593568 : && opt_for_fn (node->decl, optimize))
4092 : {
4093 1254697 : ipa_node_params *info = ipa_node_params_sum->get (node);
4094 1254697 : determine_versionability (node, info);
4095 :
4096 1254697 : unsigned nlattices = ipa_get_param_count (info);
4097 1254697 : info->lattices.safe_grow_cleared (nlattices, true);
4098 1254697 : initialize_node_lattices (node);
4099 :
4100 1254697 : int num_sweeps = opt_for_fn (node->decl, param_ipa_cp_sweeps);
4101 1254697 : if (max_number_sweeps < num_sweeps)
4102 119946 : max_number_sweeps = num_sweeps;
4103 : }
4104 1338871 : ipa_size_summary *s = ipa_size_summaries->get (node);
4105 1338871 : if (node->definition && !node->alias && s != NULL)
4106 1264488 : overall_size += s->self_size;
4107 : }
4108 :
4109 127757 : orig_overall_size = overall_size;
4110 :
4111 127757 : if (dump_file)
4112 161 : fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4113 :
4114 127757 : propagate_constants_topo (topo);
4115 127757 : if (flag_checking)
4116 127749 : ipcp_verify_propagated_values ();
4117 127757 : topo->constants.propagate_effects ();
4118 127757 : topo->contexts.propagate_effects ();
4119 :
4120 127757 : if (dump_file)
4121 : {
4122 161 : fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4123 161 : print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4124 : }
4125 127757 : }
4126 :
4127 : /* Discover newly direct outgoing edges from NODE which is a new clone with
4128 : known KNOWN_CSTS and make them direct. */
4129 :
4130 : static void
4131 22486 : ipcp_discover_new_direct_edges (struct cgraph_node *node,
4132 : vec<tree> known_csts,
4133 : vec<ipa_polymorphic_call_context>
4134 : known_contexts,
4135 : vec<ipa_argagg_value, va_gc> *aggvals)
4136 : {
4137 22486 : struct cgraph_edge *ie, *next_ie;
4138 22486 : bool found = false;
4139 :
4140 24388 : for (ie = node->indirect_calls; ie; ie = next_ie)
4141 : {
4142 1902 : tree target;
4143 1902 : bool speculative;
4144 :
4145 1902 : next_ie = ie->next_callee;
4146 1902 : ipa_argagg_value_list avs (aggvals);
4147 1902 : target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4148 : avs, &speculative);
4149 1902 : if (target)
4150 : {
4151 587 : cgraph_polymorphic_indirect_info *pii
4152 587 : = dyn_cast <cgraph_polymorphic_indirect_info *> (ie->indirect_info);
4153 587 : cgraph_simple_indirect_info *sii
4154 1099 : = dyn_cast <cgraph_simple_indirect_info *> (ie->indirect_info);
4155 442 : bool agg_contents = sii && sii->agg_contents;
4156 587 : bool polymorphic = !!pii;
4157 587 : int param_index = ie->indirect_info->param_index;
4158 587 : struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4159 : speculative);
4160 587 : found = true;
4161 :
4162 587 : if (cs && !agg_contents && !polymorphic)
4163 : {
4164 367 : ipa_node_params *info = ipa_node_params_sum->get (node);
4165 367 : int c = ipa_get_controlled_uses (info, param_index);
4166 367 : if (c != IPA_UNDESCRIBED_USE
4167 367 : && !ipa_get_param_load_dereferenced (info, param_index))
4168 : {
4169 363 : struct ipa_ref *to_del;
4170 :
4171 363 : c--;
4172 363 : ipa_set_controlled_uses (info, param_index, c);
4173 363 : if (dump_file && (dump_flags & TDF_DETAILS))
4174 3 : fprintf (dump_file, " controlled uses count of param "
4175 : "%i bumped down to %i\n", param_index, c);
4176 363 : if (c == 0
4177 363 : && (to_del = node->find_reference (cs->callee, NULL, 0,
4178 : IPA_REF_ADDR)))
4179 : {
4180 289 : if (dump_file && (dump_flags & TDF_DETAILS))
4181 3 : fprintf (dump_file, " and even removing its "
4182 : "cloning-created reference\n");
4183 289 : to_del->remove_reference ();
4184 : }
4185 : }
4186 : }
4187 : }
4188 : }
4189 : /* Turning calls to direct calls will improve overall summary. */
4190 22486 : if (found)
4191 486 : ipa_update_overall_fn_summary (node);
4192 22486 : }
4193 :
4194 : class edge_clone_summary;
4195 : static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4196 :
4197 : /* Edge clone summary. */
4198 :
4199 : class edge_clone_summary
4200 : {
4201 : public:
4202 : /* Default constructor. */
4203 392747 : edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4204 :
4205 : /* Default destructor. */
4206 392747 : ~edge_clone_summary ()
4207 : {
4208 392747 : if (prev_clone)
4209 38203 : edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4210 392747 : if (next_clone)
4211 162560 : edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4212 392747 : }
4213 :
4214 : cgraph_edge *prev_clone;
4215 : cgraph_edge *next_clone;
4216 : };
4217 :
4218 : class edge_clone_summary_t:
4219 : public call_summary <edge_clone_summary *>
4220 : {
4221 : public:
4222 127757 : edge_clone_summary_t (symbol_table *symtab):
4223 255514 : call_summary <edge_clone_summary *> (symtab)
4224 : {
4225 127757 : m_initialize_when_cloning = true;
4226 : }
4227 :
4228 : void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4229 : edge_clone_summary *src_data,
4230 : edge_clone_summary *dst_data) final override;
4231 : };
4232 :
4233 : /* Edge duplication hook. */
4234 :
4235 : void
4236 199713 : edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4237 : edge_clone_summary *src_data,
4238 : edge_clone_summary *dst_data)
4239 : {
4240 199713 : if (src_data->next_clone)
4241 6670 : edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4242 199713 : dst_data->prev_clone = src_edge;
4243 199713 : dst_data->next_clone = src_data->next_clone;
4244 199713 : src_data->next_clone = dst_edge;
4245 199713 : }
4246 :
4247 : /* Return true is CS calls DEST or its clone for all contexts. When
4248 : ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4249 : edges from/to an all-context clone. */
4250 :
4251 : static bool
4252 1801981 : calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4253 : bool allow_recursion_to_clone)
4254 : {
4255 1801981 : enum availability availability;
4256 1801981 : cgraph_node *callee = cs->callee->function_symbol (&availability);
4257 :
4258 1801981 : if (availability <= AVAIL_INTERPOSABLE)
4259 : return false;
4260 1796041 : if (callee == dest)
4261 : return true;
4262 624321 : if (!allow_recursion_to_clone && cs->caller == callee)
4263 : return false;
4264 :
4265 624137 : ipa_node_params *info = ipa_node_params_sum->get (callee);
4266 624137 : return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4267 : }
4268 :
4269 : /* Return true if edge CS does bring about the value described by SRC to
4270 : DEST_VAL of node DEST or its clone for all contexts. */
4271 :
4272 : static bool
4273 1791951 : cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4274 : cgraph_node *dest, ipcp_value<tree> *dest_val)
4275 : {
4276 1791951 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4277 :
4278 1791951 : if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4279 1791951 : || caller_info->node_dead)
4280 : return false;
4281 :
4282 720050 : if (!src->val)
4283 : return true;
4284 :
4285 63239 : if (caller_info->ipcp_orig_node)
4286 : {
4287 21859 : tree t = NULL_TREE;
4288 21859 : if (src->offset == -1)
4289 15501 : t = caller_info->known_csts[src->index];
4290 6358 : else if (ipcp_transformation *ts
4291 6358 : = ipcp_get_transformation_summary (cs->caller))
4292 : {
4293 6358 : ipa_argagg_value_list avl (ts);
4294 6358 : t = avl.get_value (src->index, src->offset / BITS_PER_UNIT);
4295 : }
4296 21859 : return (t != NULL_TREE
4297 21859 : && values_equal_for_ipcp_p (src->val->value, t));
4298 : }
4299 : else
4300 : {
4301 41380 : if (src->val == dest_val)
4302 : return true;
4303 :
4304 35643 : struct ipcp_agg_lattice *aglat;
4305 35643 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4306 : src->index);
4307 35643 : if (src->offset == -1)
4308 26296 : return (plats->itself.is_single_const ()
4309 20 : && values_equal_for_ipcp_p (src->val->value,
4310 20 : plats->itself.values->value));
4311 : else
4312 : {
4313 9347 : if (plats->aggs_bottom || plats->aggs_contain_variable)
4314 : return false;
4315 3620 : for (aglat = plats->aggs; aglat; aglat = aglat->next)
4316 3620 : if (aglat->offset == src->offset)
4317 1464 : return (aglat->is_single_const ()
4318 8 : && values_equal_for_ipcp_p (src->val->value,
4319 8 : aglat->values->value));
4320 : }
4321 : return false;
4322 : }
4323 : }
4324 :
4325 : /* Return true if edge CS does bring about the value described by SRC to
4326 : DST_VAL of node DEST or its clone for all contexts. */
4327 :
4328 : static bool
4329 10030 : cgraph_edge_brings_value_p (cgraph_edge *cs,
4330 : ipcp_value_source<ipa_polymorphic_call_context> *src,
4331 : cgraph_node *dest,
4332 : ipcp_value<ipa_polymorphic_call_context> *)
4333 : {
4334 10030 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4335 :
4336 10030 : if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4337 10030 : || caller_info->node_dead)
4338 : return false;
4339 8836 : if (!src->val)
4340 : return true;
4341 :
4342 1700 : if (caller_info->ipcp_orig_node)
4343 2648 : return (caller_info->known_contexts.length () > (unsigned) src->index)
4344 524 : && values_equal_for_ipcp_p (src->val->value,
4345 262 : caller_info->known_contexts[src->index]);
4346 :
4347 1423 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4348 : src->index);
4349 1423 : return plats->ctxlat.is_single_const ()
4350 264 : && values_equal_for_ipcp_p (src->val->value,
4351 264 : plats->ctxlat.values->value);
4352 : }
4353 :
4354 : /* Get the next clone in the linked list of clones of an edge. */
4355 :
4356 : static inline struct cgraph_edge *
4357 1802290 : get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4358 : {
4359 1802290 : edge_clone_summary *s = edge_clone_summaries->get (cs);
4360 1802290 : return s != NULL ? s->next_clone : NULL;
4361 : }
4362 :
4363 : /* Given VAL that is intended for DEST, iterate over all its sources and if any
4364 : of them is viable and hot, return true. In that case, for those that still
4365 : hold, add their edge frequency and their number and cumulative profile
4366 : counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4367 : REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4368 :
4369 : template <typename valtype>
4370 : static bool
4371 205589 : get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4372 : sreal *freq_sum, int *caller_count,
4373 : profile_count *rec_count_sum,
4374 : profile_count *nonrec_count_sum,
4375 : bool *called_without_ipa_profile)
4376 : {
4377 : ipcp_value_source<valtype> *src;
4378 205589 : sreal freq = 0;
4379 205589 : int count = 0;
4380 205589 : profile_count rec_cnt = profile_count::zero ();
4381 205589 : profile_count nonrec_cnt = profile_count::zero ();
4382 205589 : bool interesting = false;
4383 205589 : bool non_self_recursive = false;
4384 205589 : *called_without_ipa_profile = false;
4385 :
4386 952907 : for (src = val->sources; src; src = src->next)
4387 : {
4388 747318 : struct cgraph_edge *cs = src->cs;
4389 1860923 : while (cs)
4390 : {
4391 1113605 : if (cgraph_edge_brings_value_p (cs, src, dest, val))
4392 : {
4393 341944 : count++;
4394 341944 : freq += cs->sreal_frequency ();
4395 341944 : interesting |= cs_interesting_for_ipcp_p (cs);
4396 341944 : if (cs->caller != dest)
4397 : {
4398 335185 : non_self_recursive = true;
4399 335185 : if (cs->count.ipa ().initialized_p ())
4400 932 : rec_cnt += cs->count.ipa ();
4401 : else
4402 334253 : *called_without_ipa_profile = true;
4403 : }
4404 6759 : else if (cs->count.ipa ().initialized_p ())
4405 0 : nonrec_cnt += cs->count.ipa ();
4406 : else
4407 6759 : *called_without_ipa_profile = true;
4408 : }
4409 1113605 : cs = get_next_cgraph_edge_clone (cs);
4410 : }
4411 : }
4412 :
4413 : /* If the only edges bringing a value are self-recursive ones, do not bother
4414 : evaluating it. */
4415 205589 : if (!non_self_recursive)
4416 : return false;
4417 :
4418 144668 : *freq_sum = freq;
4419 144668 : *caller_count = count;
4420 144668 : *rec_count_sum = rec_cnt;
4421 144668 : *nonrec_count_sum = nonrec_cnt;
4422 :
4423 144668 : return interesting;
4424 : }
4425 :
4426 : /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4427 : to let a non-self-recursive caller be the first element. Thus, we can
4428 : simplify intersecting operations on values that arrive from all of these
4429 : callers, especially when there exists self-recursive call. Return true if
4430 : this kind of adjustment is possible. */
4431 :
4432 : static bool
4433 56681 : adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4434 : cgraph_node *node)
4435 : {
4436 60904 : for (unsigned i = 0; i < callers.length (); i++)
4437 : {
4438 60808 : cgraph_edge *cs = callers[i];
4439 :
4440 60808 : if (cs->caller != node)
4441 : {
4442 56585 : if (i > 0)
4443 : {
4444 1977 : callers[i] = callers[0];
4445 1977 : callers[0] = cs;
4446 : }
4447 56585 : return true;
4448 : }
4449 : }
4450 : return false;
4451 : }
4452 :
4453 : /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4454 : is assumed their number is known and equal to CALLER_COUNT. */
4455 :
4456 : template <typename valtype>
4457 : static auto_vec<cgraph_edge *>
4458 144312 : gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4459 : int caller_count)
4460 : {
4461 : ipcp_value_source<valtype> *src;
4462 144312 : auto_vec<cgraph_edge *> ret (caller_count);
4463 :
4464 496470 : for (src = val->sources; src; src = src->next)
4465 : {
4466 352158 : struct cgraph_edge *cs = src->cs;
4467 792880 : while (cs)
4468 : {
4469 440722 : if (cgraph_edge_brings_value_p (cs, src, dest, val))
4470 338909 : ret.quick_push (cs);
4471 440722 : cs = get_next_cgraph_edge_clone (cs);
4472 : }
4473 : }
4474 :
4475 144312 : if (caller_count > 1)
4476 39227 : adjust_callers_for_value_intersection (ret, dest);
4477 :
4478 144312 : return ret;
4479 : }
4480 :
4481 : /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4482 : Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4483 : should be set to true when the reference created for the constant should be
4484 : a load one and not an address one because the corresponding parameter p is
4485 : only used as *p. */
4486 :
4487 : static struct ipa_replace_map *
4488 25654 : get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4489 : bool force_load_ref)
4490 : {
4491 25654 : struct ipa_replace_map *replace_map;
4492 :
4493 25654 : replace_map = ggc_alloc<ipa_replace_map> ();
4494 25654 : if (dump_file)
4495 : {
4496 181 : fprintf (dump_file, " replacing ");
4497 181 : ipa_dump_param (dump_file, info, parm_num);
4498 :
4499 181 : fprintf (dump_file, " with const ");
4500 181 : print_generic_expr (dump_file, value);
4501 :
4502 181 : if (force_load_ref)
4503 11 : fprintf (dump_file, " - forcing load reference\n");
4504 : else
4505 170 : fprintf (dump_file, "\n");
4506 : }
4507 25654 : replace_map->parm_num = parm_num;
4508 25654 : replace_map->new_tree = value;
4509 25654 : replace_map->force_load_ref = force_load_ref;
4510 25654 : return replace_map;
4511 : }
4512 :
4513 : /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4514 : one, otherwise it will be referred to as the original node. */
4515 :
4516 : static void
4517 4 : dump_profile_updates (cgraph_node *node, bool spec)
4518 : {
4519 4 : if (spec)
4520 2 : fprintf (dump_file, " setting count of the specialized node %s to ",
4521 : node->dump_name ());
4522 : else
4523 2 : fprintf (dump_file, " setting count of the original node %s to ",
4524 : node->dump_name ());
4525 :
4526 4 : node->count.dump (dump_file);
4527 4 : fprintf (dump_file, "\n");
4528 6 : for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4529 : {
4530 2 : fprintf (dump_file, " edge to %s has count ",
4531 2 : cs->callee->dump_name ());
4532 2 : cs->count.dump (dump_file);
4533 2 : fprintf (dump_file, "\n");
4534 : }
4535 4 : }
4536 :
4537 : /* With partial train run we do not want to assume that original's count is
4538 : zero whenever we redurect all executed edges to clone. Simply drop profile
4539 : to local one in this case. In eany case, return the new value. ORIG_NODE
4540 : is the original node and its count has not been updaed yet. */
4541 :
4542 : profile_count
4543 20 : lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4544 : {
4545 40 : if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4546 30 : && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4547 5 : && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4548 0 : remainder = orig_node->count.guessed_local ();
4549 :
4550 20 : return remainder;
4551 : }
4552 :
4553 : /* Structure to sum counts coming from nodes other than the original node and
4554 : its clones. */
4555 :
4556 : struct gather_other_count_struct
4557 : {
4558 : cgraph_node *orig;
4559 : profile_count other_count;
4560 : };
4561 :
4562 : /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4563 : counts that come from non-self-recursive calls.. */
4564 :
4565 : static bool
4566 10 : gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4567 : {
4568 10 : gather_other_count_struct *desc = (gather_other_count_struct *) data;
4569 24 : for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4570 14 : if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4571 0 : if (cs->count.ipa ().initialized_p ())
4572 0 : desc->other_count += cs->count.ipa ();
4573 10 : return false;
4574 : }
4575 :
4576 : /* Structure to help analyze if we need to boost counts of some clones of some
4577 : non-recursive edges to match the new callee count. */
4578 :
4579 : struct desc_incoming_count_struct
4580 : {
4581 : cgraph_node *orig;
4582 : hash_set <cgraph_edge *> *processed_edges;
4583 : profile_count count;
4584 : unsigned unproc_orig_rec_edges;
4585 : };
4586 :
4587 : /* Go over edges calling NODE and its thunks and gather information about
4588 : incoming counts so that we know if we need to make any adjustments. */
4589 :
4590 : static void
4591 10 : analyze_clone_icoming_counts (cgraph_node *node,
4592 : desc_incoming_count_struct *desc)
4593 : {
4594 24 : for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4595 14 : if (cs->caller->thunk)
4596 : {
4597 0 : analyze_clone_icoming_counts (cs->caller, desc);
4598 0 : continue;
4599 : }
4600 : else
4601 : {
4602 14 : if (cs->count.initialized_p ())
4603 14 : desc->count += cs->count.ipa ();
4604 14 : if (!desc->processed_edges->contains (cs)
4605 14 : && cs->caller->clone_of == desc->orig)
4606 4 : desc->unproc_orig_rec_edges++;
4607 : }
4608 10 : }
4609 :
4610 : /* If caller edge counts of a clone created for a self-recursive arithmetic
4611 : jump function must be adjusted because it is coming from a the "seed" clone
4612 : for the first value and so has been excessively scaled back as if it was not
4613 : a recursive call, adjust it so that the incoming counts of NODE match its
4614 : count. NODE is the node or its thunk. */
4615 :
4616 : static void
4617 0 : adjust_clone_incoming_counts (cgraph_node *node,
4618 : desc_incoming_count_struct *desc)
4619 : {
4620 0 : for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4621 0 : if (cs->caller->thunk)
4622 : {
4623 0 : adjust_clone_incoming_counts (cs->caller, desc);
4624 0 : profile_count sum = profile_count::zero ();
4625 0 : for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4626 0 : if (e->count.initialized_p ())
4627 0 : sum += e->count.ipa ();
4628 0 : cs->count = cs->count.combine_with_ipa_count (sum);
4629 : }
4630 0 : else if (!desc->processed_edges->contains (cs)
4631 0 : && cs->caller->clone_of == desc->orig
4632 0 : && cs->count.compatible_p (desc->count))
4633 : {
4634 0 : cs->count += desc->count;
4635 0 : if (dump_file)
4636 : {
4637 0 : fprintf (dump_file, " Adjusted count of an incoming edge of "
4638 0 : "a clone %s -> %s to ", cs->caller->dump_name (),
4639 0 : cs->callee->dump_name ());
4640 0 : cs->count.dump (dump_file);
4641 0 : fprintf (dump_file, "\n");
4642 : }
4643 : }
4644 0 : }
4645 :
4646 : /* When ORIG_NODE has been cloned for values which have been generated fora
4647 : self-recursive call as a result of an arithmetic pass-through
4648 : jump-functions, adjust its count together with counts of all such clones in
4649 : SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4650 :
4651 : The function sums the counts of the original node and all its clones that
4652 : cannot be attributed to a specific clone because it comes from a
4653 : non-recursive edge. This sum is then evenly divided between the clones and
4654 : on top of that each one gets all the counts which can be attributed directly
4655 : to it. */
4656 :
4657 : static void
4658 41 : update_counts_for_self_gen_clones (cgraph_node *orig_node,
4659 : const vec<cgraph_node *> &self_gen_clones)
4660 : {
4661 41 : profile_count redist_sum = orig_node->count.ipa ();
4662 41 : if (!redist_sum.nonzero_p ())
4663 : return;
4664 :
4665 4 : if (dump_file)
4666 0 : fprintf (dump_file, " Updating profile of self recursive clone "
4667 : "series\n");
4668 :
4669 4 : gather_other_count_struct gocs;
4670 4 : gocs.orig = orig_node;
4671 4 : gocs.other_count = profile_count::zero ();
4672 :
4673 4 : auto_vec <profile_count, 8> other_edges_count;
4674 22 : for (cgraph_node *n : self_gen_clones)
4675 : {
4676 10 : gocs.other_count = profile_count::zero ();
4677 10 : n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4678 : &gocs, false);
4679 10 : other_edges_count.safe_push (gocs.other_count);
4680 10 : redist_sum -= gocs.other_count;
4681 : }
4682 :
4683 4 : hash_set<cgraph_edge *> processed_edges;
4684 4 : unsigned i = 0;
4685 22 : for (cgraph_node *n : self_gen_clones)
4686 : {
4687 10 : profile_count new_count
4688 20 : = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4689 10 : new_count = lenient_count_portion_handling (new_count, orig_node);
4690 10 : n->scale_profile_to (new_count);
4691 20 : for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4692 10 : processed_edges.add (cs);
4693 :
4694 10 : i++;
4695 : }
4696 :
4697 : /* There are still going to be edges to ORIG_NODE that have one or more
4698 : clones coming from another node clone in SELF_GEN_CLONES and which we
4699 : scaled by the same amount, which means that the total incoming sum of
4700 : counts to ORIG_NODE will be too high, scale such edges back. */
4701 8 : for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4702 : {
4703 4 : if (cs->callee->ultimate_alias_target () == orig_node)
4704 : {
4705 4 : unsigned den = 0;
4706 22 : for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4707 18 : if (e->callee->ultimate_alias_target () == orig_node
4708 18 : && processed_edges.contains (e))
4709 8 : den++;
4710 4 : if (den > 0)
4711 22 : for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4712 18 : if (e->callee->ultimate_alias_target () == orig_node
4713 8 : && processed_edges.contains (e)
4714 : /* If count is not IPA, this adjustment makes verifier
4715 : unhappy, since we expect bb->count to match e->count.
4716 : We may add a flag to mark edge conts that has been
4717 : modified by IPA code, but so far it does not seem
4718 : to be worth the effort. With local counts the profile
4719 : will not propagate at IPA level. */
4720 34 : && e->count.ipa_p ())
4721 8 : e->count /= den;
4722 : }
4723 : }
4724 :
4725 : /* Edges from the seeds of the valus generated for arithmetic jump-functions
4726 : along self-recursive edges are likely to have fairly low count and so
4727 : edges from them to nodes in the self_gen_clones do not correspond to the
4728 : artificially distributed count of the nodes, the total sum of incoming
4729 : edges to some clones might be too low. Detect this situation and correct
4730 : it. */
4731 22 : for (cgraph_node *n : self_gen_clones)
4732 : {
4733 10 : if (!n->count.ipa ().nonzero_p ())
4734 0 : continue;
4735 :
4736 10 : desc_incoming_count_struct desc;
4737 10 : desc.orig = orig_node;
4738 10 : desc.processed_edges = &processed_edges;
4739 10 : desc.count = profile_count::zero ();
4740 10 : desc.unproc_orig_rec_edges = 0;
4741 10 : analyze_clone_icoming_counts (n, &desc);
4742 :
4743 10 : if (n->count.differs_from_p (desc.count))
4744 : {
4745 0 : if (n->count > desc.count
4746 0 : && desc.unproc_orig_rec_edges > 0)
4747 : {
4748 0 : desc.count = n->count - desc.count;
4749 0 : desc.count = desc.count /= desc.unproc_orig_rec_edges;
4750 0 : adjust_clone_incoming_counts (n, &desc);
4751 : }
4752 0 : else if (dump_file)
4753 0 : fprintf (dump_file,
4754 : " Unable to fix up incoming counts for %s.\n",
4755 : n->dump_name ());
4756 : }
4757 : }
4758 :
4759 4 : if (dump_file)
4760 0 : for (cgraph_node *n : self_gen_clones)
4761 0 : dump_profile_updates (n, n != orig_node);
4762 4 : return;
4763 4 : }
4764 :
4765 : /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4766 : their profile information to reflect this. This function should not be used
4767 : for clones generated for arithmetic pass-through jump functions on a
4768 : self-recursive call graph edge, that situation is handled by
4769 : update_counts_for_self_gen_clones. */
4770 :
4771 : static void
4772 4965 : update_profiling_info (struct cgraph_node *orig_node,
4773 : struct cgraph_node *new_node)
4774 : {
4775 4965 : struct caller_statistics stats;
4776 4965 : profile_count new_sum;
4777 4965 : profile_count remainder, orig_node_count = orig_node->count.ipa ();
4778 :
4779 4965 : if (!orig_node_count.nonzero_p ())
4780 4955 : return;
4781 :
4782 10 : if (dump_file)
4783 : {
4784 2 : fprintf (dump_file, " Updating profile from original count: ");
4785 2 : orig_node_count.dump (dump_file);
4786 2 : fprintf (dump_file, "\n");
4787 : }
4788 :
4789 10 : init_caller_stats (&stats, new_node);
4790 10 : new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4791 : false);
4792 10 : new_sum = stats.count_sum;
4793 :
4794 10 : bool orig_edges_processed = false;
4795 10 : if (new_sum > orig_node_count)
4796 : {
4797 : /* Profile has alreay gone astray, keep what we have but lower it
4798 : to global0adjusted or to local if we have partial training. */
4799 0 : if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4800 0 : orig_node->make_profile_local ();
4801 0 : if (new_sum.quality () == AFDO)
4802 0 : orig_node->make_profile_global0 (GUESSED_GLOBAL0_AFDO);
4803 : else
4804 0 : orig_node->make_profile_global0 (GUESSED_GLOBAL0_ADJUSTED);
4805 : orig_edges_processed = true;
4806 : }
4807 10 : else if (stats.rec_count_sum.nonzero_p ())
4808 : {
4809 0 : int new_nonrec_calls = stats.n_nonrec_calls;
4810 : /* There are self-recursive edges which are likely to bring in the
4811 : majority of calls but which we must divide in between the original and
4812 : new node. */
4813 0 : init_caller_stats (&stats, orig_node);
4814 0 : orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
4815 : &stats, false);
4816 0 : int orig_nonrec_calls = stats.n_nonrec_calls;
4817 0 : profile_count orig_nonrec_call_count = stats.count_sum;
4818 :
4819 0 : if (orig_node->local)
4820 : {
4821 0 : if (!orig_nonrec_call_count.nonzero_p ())
4822 : {
4823 0 : if (dump_file)
4824 0 : fprintf (dump_file, " The original is local and the only "
4825 : "incoming edges from non-dead callers with nonzero "
4826 : "counts are self-recursive, assuming it is cold.\n");
4827 : /* The NEW_NODE count and counts of all its outgoing edges
4828 : are still unmodified copies of ORIG_NODE's. Just clear
4829 : the latter and bail out. */
4830 0 : if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4831 0 : orig_node->make_profile_local ();
4832 0 : else if (orig_nonrec_call_count.quality () == AFDO)
4833 0 : orig_node->make_profile_global0 (GUESSED_GLOBAL0_AFDO);
4834 : else
4835 0 : orig_node->make_profile_global0 (GUESSED_GLOBAL0_ADJUSTED);
4836 0 : return;
4837 : }
4838 : }
4839 : else
4840 : {
4841 : /* Let's behave as if there was another caller that accounts for all
4842 : the calls that were either indirect or from other compilation
4843 : units. */
4844 0 : orig_nonrec_calls++;
4845 0 : profile_count pretend_caller_count
4846 0 : = (orig_node_count - new_sum - orig_nonrec_call_count
4847 0 : - stats.rec_count_sum);
4848 0 : orig_nonrec_call_count += pretend_caller_count;
4849 : }
4850 :
4851 : /* Divide all "unexplained" counts roughly proportionally to sums of
4852 : counts of non-recursive calls.
4853 :
4854 : We put rather arbitrary limits on how many counts we claim because the
4855 : number of non-self-recursive incoming count is only a rough guideline
4856 : and there are cases (such as mcf) where using it blindly just takes
4857 : too many. And if lattices are considered in the opposite order we
4858 : could also take too few. */
4859 0 : profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4860 :
4861 0 : int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4862 0 : profile_count new_part = unexp.apply_scale (limit_den - 1, limit_den);
4863 0 : profile_count den = new_sum + orig_nonrec_call_count;
4864 0 : if (den.nonzero_p ())
4865 0 : new_part = MIN (unexp.apply_scale (new_sum, den), new_part);
4866 0 : new_part = MAX (new_part,
4867 : unexp.apply_scale (new_nonrec_calls, limit_den));
4868 0 : if (dump_file)
4869 : {
4870 0 : fprintf (dump_file, " Claiming ");
4871 0 : new_part.dump (dump_file);
4872 0 : fprintf (dump_file, " of unexplained ");
4873 0 : unexp.dump (dump_file);
4874 0 : fprintf (dump_file, " counts because of self-recursive "
4875 : "calls\n");
4876 : }
4877 0 : new_sum += new_part;
4878 0 : remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4879 : orig_node);
4880 : }
4881 : else
4882 10 : remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4883 : orig_node);
4884 :
4885 10 : new_node->scale_profile_to (new_sum);
4886 :
4887 10 : if (!orig_edges_processed)
4888 10 : orig_node->scale_profile_to (remainder);
4889 :
4890 10 : if (dump_file)
4891 : {
4892 2 : dump_profile_updates (new_node, true);
4893 2 : dump_profile_updates (orig_node, false);
4894 : }
4895 : }
4896 :
4897 : /* Update the respective profile of specialized NEW_NODE and the original
4898 : ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4899 : have been redirected to the specialized version. */
4900 :
4901 : static void
4902 0 : update_specialized_profile (struct cgraph_node *new_node,
4903 : struct cgraph_node *orig_node,
4904 : profile_count redirected_sum)
4905 : {
4906 0 : if (dump_file)
4907 : {
4908 0 : fprintf (dump_file, " the sum of counts of redirected edges is ");
4909 0 : redirected_sum.dump (dump_file);
4910 0 : fprintf (dump_file, "\n old ipa count of the original node is ");
4911 0 : orig_node->count.dump (dump_file);
4912 0 : fprintf (dump_file, "\n");
4913 : }
4914 0 : if (!orig_node->count.ipa ().nonzero_p ()
4915 0 : || !redirected_sum.nonzero_p ())
4916 0 : return;
4917 :
4918 0 : orig_node->scale_profile_to
4919 0 : (lenient_count_portion_handling (orig_node->count.ipa () - redirected_sum,
4920 : orig_node));
4921 :
4922 0 : new_node->scale_profile_to (new_node->count.ipa () + redirected_sum);
4923 :
4924 0 : if (dump_file)
4925 : {
4926 0 : dump_profile_updates (new_node, true);
4927 0 : dump_profile_updates (orig_node, false);
4928 : }
4929 : }
4930 :
4931 : static void adjust_references_in_caller (cgraph_edge *cs,
4932 : symtab_node *symbol, int index);
4933 :
4934 : /* Simple structure to pass a symbol and index (with same meaning as parameters
4935 : of adjust_references_in_caller) through a void* parameter of a
4936 : call_for_symbol_thunks_and_aliases callback. */
4937 : struct symbol_and_index_together
4938 : {
4939 : symtab_node *symbol;
4940 : int index;
4941 : };
4942 :
4943 : /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4944 : adjust_references_in_caller on edges up in the call-graph, if necessary. */
4945 : static bool
4946 8 : adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4947 : {
4948 8 : symbol_and_index_together *pack = (symbol_and_index_together *) data;
4949 38 : for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4950 30 : if (!cs->caller->thunk)
4951 30 : adjust_references_in_caller (cs, pack->symbol, pack->index);
4952 8 : return false;
4953 : }
4954 :
4955 : /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4956 : variable which is only dereferenced and which is represented by SYMBOL. See
4957 : if we can remove ADDR reference in callers assosiated witht the call. */
4958 :
4959 : static void
4960 425 : adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4961 : {
4962 425 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4963 425 : ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
4964 425 : if (jfunc->type == IPA_JF_CONST)
4965 : {
4966 407 : ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
4967 : cs->lto_stmt_uid,
4968 : IPA_REF_ADDR);
4969 407 : if (!to_del)
4970 417 : return;
4971 407 : to_del->remove_reference ();
4972 407 : ipa_zap_jf_refdesc (jfunc);
4973 407 : if (dump_file)
4974 22 : fprintf (dump_file, " Removed a reference from %s to %s.\n",
4975 11 : cs->caller->dump_name (), symbol->dump_name ());
4976 407 : return;
4977 : }
4978 :
4979 18 : if (jfunc->type != IPA_JF_PASS_THROUGH
4980 18 : || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
4981 36 : || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
4982 : return;
4983 :
4984 18 : int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
4985 18 : cgraph_node *caller = cs->caller;
4986 18 : ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
4987 : /* TODO: This consistency check may be too big and not really
4988 : that useful. Consider removing it. */
4989 18 : tree cst;
4990 18 : if (caller_info->ipcp_orig_node)
4991 17 : cst = caller_info->known_csts[fidx];
4992 : else
4993 : {
4994 1 : ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
4995 1 : gcc_assert (lat->is_single_const ());
4996 1 : cst = lat->values->value;
4997 : }
4998 18 : gcc_assert (TREE_CODE (cst) == ADDR_EXPR
4999 : && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
5000 : == symbol));
5001 :
5002 18 : int cuses = ipa_get_controlled_uses (caller_info, fidx);
5003 18 : if (cuses == IPA_UNDESCRIBED_USE)
5004 : return;
5005 18 : gcc_assert (cuses > 0);
5006 18 : cuses--;
5007 18 : ipa_set_controlled_uses (caller_info, fidx, cuses);
5008 18 : ipa_set_jf_pass_through_refdesc_decremented (jfunc, true);
5009 18 : if (dump_file && (dump_flags & TDF_DETAILS))
5010 3 : fprintf (dump_file, " Controlled uses of parameter %i of %s dropped "
5011 : "to %i.\n", fidx, caller->dump_name (), cuses);
5012 18 : if (cuses)
5013 : return;
5014 :
5015 8 : if (caller_info->ipcp_orig_node)
5016 : {
5017 : /* Cloning machinery has created a reference here, we need to either
5018 : remove it or change it to a read one. */
5019 7 : ipa_ref *to_del = caller->find_reference (symbol, NULL, 0, IPA_REF_ADDR);
5020 7 : if (to_del)
5021 : {
5022 7 : to_del->remove_reference ();
5023 7 : if (dump_file)
5024 6 : fprintf (dump_file, " Removed a reference from %s to %s.\n",
5025 3 : cs->caller->dump_name (), symbol->dump_name ());
5026 7 : if (ipa_get_param_load_dereferenced (caller_info, fidx))
5027 : {
5028 3 : caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5029 3 : if (dump_file)
5030 2 : fprintf (dump_file,
5031 : " ...and replaced it with LOAD one.\n");
5032 : }
5033 : }
5034 : }
5035 :
5036 8 : symbol_and_index_together pack;
5037 8 : pack.symbol = symbol;
5038 8 : pack.index = fidx;
5039 8 : if (caller->can_change_signature)
5040 8 : caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5041 : &pack, true);
5042 : }
5043 :
5044 :
5045 : /* Return true if we would like to remove a parameter from NODE when cloning it
5046 : with KNOWN_CSTS scalar constants. */
5047 :
5048 : static bool
5049 20931 : want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5050 : {
5051 20931 : auto_vec<bool, 16> surviving;
5052 20931 : bool filled_vec = false;
5053 20931 : ipa_node_params *info = ipa_node_params_sum->get (node);
5054 20931 : int i, count = ipa_get_param_count (info);
5055 :
5056 41580 : for (i = 0; i < count; i++)
5057 : {
5058 36744 : if (!known_csts[i] && ipa_is_param_used (info, i))
5059 20649 : continue;
5060 :
5061 16095 : if (!filled_vec)
5062 : {
5063 16095 : clone_info *info = clone_info::get (node);
5064 16095 : if (!info || !info->param_adjustments)
5065 : return true;
5066 0 : info->param_adjustments->get_surviving_params (&surviving);
5067 0 : filled_vec = true;
5068 : }
5069 0 : if (surviving.length() < (unsigned) i && surviving[i])
5070 : return true;
5071 : }
5072 : return false;
5073 20931 : }
5074 :
5075 : /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5076 : known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5077 : redirect all edges in CALLERS to it. */
5078 :
5079 : static struct cgraph_node *
5080 22486 : create_specialized_node (struct cgraph_node *node,
5081 : vec<tree> known_csts,
5082 : vec<ipa_polymorphic_call_context> known_contexts,
5083 : vec<ipa_argagg_value, va_gc> *aggvals,
5084 : vec<cgraph_edge *> &callers)
5085 : {
5086 22486 : ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5087 22486 : vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5088 22486 : vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5089 22486 : struct cgraph_node *new_node;
5090 22486 : int i, count = ipa_get_param_count (info);
5091 22486 : clone_info *cinfo = clone_info::get (node);
5092 0 : ipa_param_adjustments *old_adjustments = cinfo
5093 22486 : ? cinfo->param_adjustments : NULL;
5094 22486 : ipa_param_adjustments *new_adjustments;
5095 22486 : gcc_assert (!info->ipcp_orig_node);
5096 22486 : gcc_assert (node->can_change_signature
5097 : || !old_adjustments);
5098 :
5099 20931 : if (old_adjustments)
5100 : {
5101 : /* At the moment all IPA optimizations should use the number of
5102 : parameters of the prevailing decl as the m_always_copy_start.
5103 : Handling any other value would complicate the code below, so for the
5104 : time bing let's only assert it is so. */
5105 0 : gcc_assert (old_adjustments->m_always_copy_start == count
5106 : || old_adjustments->m_always_copy_start < 0);
5107 0 : int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5108 0 : for (i = 0; i < old_adj_count; i++)
5109 : {
5110 0 : ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5111 0 : if (!node->can_change_signature
5112 0 : || old_adj->op != IPA_PARAM_OP_COPY
5113 0 : || (!known_csts[old_adj->base_index]
5114 0 : && ipa_is_param_used (info, old_adj->base_index)))
5115 : {
5116 0 : ipa_adjusted_param new_adj = *old_adj;
5117 :
5118 0 : new_adj.prev_clone_adjustment = true;
5119 0 : new_adj.prev_clone_index = i;
5120 0 : vec_safe_push (new_params, new_adj);
5121 : }
5122 : }
5123 0 : bool skip_return = old_adjustments->m_skip_return;
5124 0 : new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5125 : ipa_param_adjustments (new_params, count,
5126 0 : skip_return));
5127 : }
5128 22486 : else if (node->can_change_signature
5129 22486 : && want_remove_some_param_p (node, known_csts))
5130 : {
5131 16095 : ipa_adjusted_param adj;
5132 16095 : memset (&adj, 0, sizeof (adj));
5133 16095 : adj.op = IPA_PARAM_OP_COPY;
5134 62646 : for (i = 0; i < count; i++)
5135 46551 : if (!known_csts[i] && ipa_is_param_used (info, i))
5136 : {
5137 16834 : adj.base_index = i;
5138 16834 : adj.prev_clone_index = i;
5139 16834 : vec_safe_push (new_params, adj);
5140 : }
5141 16095 : new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5142 16095 : ipa_param_adjustments (new_params, count, false));
5143 : }
5144 : else
5145 : new_adjustments = NULL;
5146 :
5147 22486 : auto_vec<cgraph_edge *, 2> self_recursive_calls;
5148 189450 : for (i = callers.length () - 1; i >= 0; i--)
5149 : {
5150 144478 : cgraph_edge *cs = callers[i];
5151 144478 : if (cs->caller == node)
5152 : {
5153 137 : self_recursive_calls.safe_push (cs);
5154 137 : callers.unordered_remove (i);
5155 : }
5156 : }
5157 22486 : replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5158 85764 : for (i = 0; i < count; i++)
5159 : {
5160 63278 : tree t = known_csts[i];
5161 63278 : if (!t)
5162 37624 : continue;
5163 :
5164 25654 : gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5165 :
5166 25654 : bool load_ref = false;
5167 25654 : symtab_node *ref_symbol;
5168 25654 : if (TREE_CODE (t) == ADDR_EXPR)
5169 : {
5170 6767 : tree base = get_base_address (TREE_OPERAND (t, 0));
5171 6767 : if (TREE_CODE (base) == VAR_DECL
5172 3141 : && ipa_get_controlled_uses (info, i) == 0
5173 951 : && ipa_get_param_load_dereferenced (info, i)
5174 7175 : && (ref_symbol = symtab_node::get (base)))
5175 : {
5176 408 : load_ref = true;
5177 408 : if (node->can_change_signature)
5178 1493 : for (cgraph_edge *caller : callers)
5179 395 : adjust_references_in_caller (caller, ref_symbol, i);
5180 : }
5181 : }
5182 :
5183 25654 : ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5184 25654 : if (replace_map)
5185 25654 : vec_safe_push (replace_trees, replace_map);
5186 : }
5187 :
5188 67458 : unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5189 22486 : IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5190 : node->decl)));
5191 22486 : new_node = node->create_virtual_clone (callers, replace_trees,
5192 : new_adjustments, "constprop",
5193 : suffix_counter);
5194 22486 : suffix_counter++;
5195 :
5196 22486 : bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5197 22623 : for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5198 : {
5199 137 : cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5200 : /* Cloned edges can disappear during cloning as speculation can be
5201 : resolved, check that we have one and that it comes from the last
5202 : cloning. */
5203 137 : if (cs && cs->caller == new_node)
5204 136 : cs->redirect_callee_duplicating_thunks (new_node);
5205 : /* Any future code that would make more than one clone of an outgoing
5206 : edge would confuse this mechanism, so let's check that does not
5207 : happen. */
5208 136 : gcc_checking_assert (!cs
5209 : || !get_next_cgraph_edge_clone (cs)
5210 : || get_next_cgraph_edge_clone (cs)->caller != new_node);
5211 : }
5212 22486 : if (have_self_recursive_calls)
5213 127 : new_node->expand_all_artificial_thunks ();
5214 :
5215 22486 : ipa_set_node_agg_value_chain (new_node, aggvals);
5216 55474 : for (const ipa_argagg_value &av : aggvals)
5217 32988 : new_node->maybe_create_reference (av.value, NULL);
5218 :
5219 22486 : if (dump_file && (dump_flags & TDF_DETAILS))
5220 : {
5221 91 : fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5222 91 : if (known_contexts.exists ())
5223 : {
5224 0 : for (i = 0; i < count; i++)
5225 0 : if (!known_contexts[i].useless_p ())
5226 : {
5227 0 : fprintf (dump_file, " known ctx %i is ", i);
5228 0 : known_contexts[i].dump (dump_file);
5229 : }
5230 : }
5231 91 : if (aggvals)
5232 : {
5233 49 : fprintf (dump_file, " Aggregate replacements:");
5234 49 : ipa_argagg_value_list avs (aggvals);
5235 49 : avs.dump (dump_file);
5236 : }
5237 : }
5238 :
5239 22486 : new_info = ipa_node_params_sum->get (new_node);
5240 22486 : new_info->ipcp_orig_node = node;
5241 22486 : new_node->ipcp_clone = true;
5242 22486 : new_info->known_csts = known_csts;
5243 22486 : new_info->known_contexts = known_contexts;
5244 :
5245 22486 : ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts,
5246 : aggvals);
5247 :
5248 22486 : return new_node;
5249 22486 : }
5250 :
5251 : /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5252 : pass-through function to itself when the cgraph_node involved is not an
5253 : IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5254 : no-operation pass-through. */
5255 :
5256 : static bool
5257 733405 : self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5258 : bool simple = true)
5259 : {
5260 733405 : enum availability availability;
5261 733405 : if (jfunc->type == IPA_JF_PASS_THROUGH
5262 80746 : && cs->caller == cs->callee->function_symbol (&availability)
5263 19334 : && availability > AVAIL_INTERPOSABLE
5264 19334 : && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5265 19334 : && ipa_get_jf_pass_through_formal_id (jfunc) == i
5266 19334 : && ipa_node_params_sum->get (cs->caller)
5267 752739 : && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5268 : return true;
5269 : return false;
5270 : }
5271 :
5272 : /* Return true if JFUNC, which describes the i-th parameter of call CS, is an
5273 : ancestor function with zero offset to itself when the cgraph_node involved
5274 : is not an IPA-CP clone. */
5275 :
5276 : static bool
5277 714083 : self_recursive_ancestor_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i)
5278 : {
5279 714083 : enum availability availability;
5280 714083 : if (jfunc->type == IPA_JF_ANCESTOR
5281 3257 : && cs->caller == cs->callee->function_symbol (&availability)
5282 1 : && availability > AVAIL_INTERPOSABLE
5283 1 : && ipa_get_jf_ancestor_offset (jfunc) == 0
5284 1 : && ipa_get_jf_ancestor_formal_id (jfunc) == i
5285 1 : && ipa_node_params_sum->get (cs->caller)
5286 714084 : && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5287 : return true;
5288 : return false;
5289 : }
5290 :
5291 : /* Return true if JFUNC, which describes a part of an aggregate represented or
5292 : pointed to by the i-th parameter of call CS, is a pass-through function to
5293 : itself when the cgraph_node involved is not an IPA-CP clone.. When
5294 : SIMPLE is true, further check if JFUNC is a simple no-operation
5295 : pass-through. */
5296 :
5297 : static bool
5298 316033 : self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5299 : const ipa_agg_jf_item *jfunc,
5300 : int i, bool simple = true)
5301 : {
5302 316033 : enum availability availability;
5303 316033 : if (cs->caller == cs->callee->function_symbol (&availability)
5304 3758 : && availability > AVAIL_INTERPOSABLE
5305 3758 : && jfunc->jftype == IPA_JF_LOAD_AGG
5306 599 : && jfunc->offset == jfunc->value.load_agg.offset
5307 599 : && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5308 599 : && jfunc->value.pass_through.formal_id == i
5309 593 : && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5310 593 : && ipa_node_params_sum->get (cs->caller)
5311 316626 : && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5312 : return true;
5313 : return false;
5314 : }
5315 :
5316 : /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5317 : KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5318 :
5319 : static void
5320 161670 : find_scalar_values_for_callers_subset (vec<tree> &known_csts,
5321 : ipa_node_params *info,
5322 : const vec<cgraph_edge *> &callers)
5323 : {
5324 161670 : int i, count = ipa_get_param_count (info);
5325 :
5326 689194 : for (i = 0; i < count; i++)
5327 : {
5328 527524 : ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
5329 527524 : if (lat->bottom)
5330 527524 : continue;
5331 513881 : if (lat->is_single_const ())
5332 : {
5333 29327 : known_csts[i] = lat->values->value;
5334 29327 : continue;
5335 : }
5336 :
5337 484554 : struct cgraph_edge *cs;
5338 484554 : tree newval = NULL_TREE;
5339 484554 : int j;
5340 484554 : bool first = true;
5341 484554 : tree type = ipa_get_type (info, i);
5342 :
5343 919781 : FOR_EACH_VEC_ELT (callers, j, cs)
5344 : {
5345 732853 : struct ipa_jump_func *jump_func;
5346 732853 : tree t;
5347 :
5348 732853 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5349 732853 : if (!args
5350 732853 : || i >= ipa_get_cs_argument_count (args)
5351 1465679 : || (i == 0
5352 172562 : && call_passes_through_thunk (cs)))
5353 : {
5354 : newval = NULL_TREE;
5355 : break;
5356 : }
5357 732758 : jump_func = ipa_get_ith_jump_func (args, i);
5358 :
5359 : /* Besides simple pass-through jump function, arithmetic jump
5360 : function could also introduce argument-direct-pass-through for
5361 : self-feeding recursive call. For example,
5362 :
5363 : fn (int i)
5364 : {
5365 : fn (i & 1);
5366 : }
5367 :
5368 : Given that i is 0, recursive propagation via (i & 1) also gets
5369 : 0. */
5370 732758 : if (self_recursive_pass_through_p (cs, jump_func, i, false))
5371 : {
5372 18682 : gcc_assert (newval);
5373 18682 : enum tree_code opcode
5374 18682 : = ipa_get_jf_pass_through_operation (jump_func);
5375 18682 : tree op_type = (opcode == NOP_EXPR) ? NULL_TREE
5376 50 : : ipa_get_jf_pass_through_op_type (jump_func);
5377 18682 : t = ipa_get_jf_arith_result (opcode, newval,
5378 : ipa_get_jf_pass_through_operand (jump_func),
5379 : op_type);
5380 18682 : t = ipacp_value_safe_for_type (type, t);
5381 : }
5382 714076 : else if (self_recursive_ancestor_p (cs, jump_func, i))
5383 0 : continue;
5384 : else
5385 714076 : t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5386 : jump_func, type);
5387 732758 : if (!t
5388 455530 : || (newval
5389 243950 : && !values_equal_for_ipcp_p (t, newval))
5390 1167985 : || (!first && !newval))
5391 : {
5392 : newval = NULL_TREE;
5393 : break;
5394 : }
5395 : else
5396 : newval = t;
5397 : first = false;
5398 : }
5399 :
5400 484554 : if (newval)
5401 186928 : known_csts[i] = newval;
5402 : }
5403 161670 : }
5404 :
5405 : /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5406 : KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5407 : CALLERS. */
5408 :
5409 : static void
5410 161670 : find_contexts_for_caller_subset (vec<ipa_polymorphic_call_context>
5411 : &known_contexts,
5412 : ipa_node_params *info,
5413 : const vec<cgraph_edge *> &callers)
5414 : {
5415 161670 : int i, count = ipa_get_param_count (info);
5416 :
5417 689178 : for (i = 0; i < count; i++)
5418 : {
5419 527520 : if (!ipa_is_param_used (info, i))
5420 28831 : continue;
5421 :
5422 500736 : ipcp_lattice<ipa_polymorphic_call_context> *ctxlat
5423 500736 : = ipa_get_poly_ctx_lat (info, i);
5424 500736 : if (ctxlat->bottom)
5425 927 : continue;
5426 499809 : if (ctxlat->is_single_const ())
5427 : {
5428 1120 : if (!ctxlat->values->value.useless_p ())
5429 : {
5430 1120 : if (known_contexts.is_empty ())
5431 1059 : known_contexts.safe_grow_cleared (count, true);
5432 1120 : known_contexts[i] = ctxlat->values->value;
5433 : }
5434 1120 : continue;
5435 : }
5436 :
5437 498689 : cgraph_edge *cs;
5438 498689 : ipa_polymorphic_call_context newval;
5439 498689 : bool first = true;
5440 498689 : int j;
5441 :
5442 503790 : FOR_EACH_VEC_ELT (callers, j, cs)
5443 : {
5444 500070 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5445 500070 : if (!args
5446 1000140 : || i >= ipa_get_cs_argument_count (args))
5447 12 : return;
5448 500058 : ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5449 500058 : ipa_polymorphic_call_context ctx;
5450 500058 : ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5451 : cs, i, jfunc);
5452 500058 : if (first)
5453 : {
5454 498677 : newval = ctx;
5455 498677 : first = false;
5456 : }
5457 : else
5458 1381 : newval.meet_with (ctx);
5459 997477 : if (newval.useless_p ())
5460 : break;
5461 : }
5462 :
5463 997354 : if (!newval.useless_p ())
5464 : {
5465 3720 : if (known_contexts.is_empty ())
5466 3503 : known_contexts.safe_grow_cleared (count, true);
5467 3720 : known_contexts[i] = newval;
5468 : }
5469 :
5470 : }
5471 : }
5472 :
5473 : /* Push all aggregate values coming along edge CS for parameter number INDEX to
5474 : RES. If INTERIM is non-NULL, it contains the current interim state of
5475 : collected aggregate values which can be used to compute values passed over
5476 : self-recursive edges.
5477 :
5478 : This basically one iteration of push_agg_values_from_edge over one
5479 : parameter, which allows for simpler early returns. */
5480 :
5481 : static void
5482 581845 : push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5483 : vec<ipa_argagg_value> *res,
5484 : const ipa_argagg_value_list *interim)
5485 : {
5486 581845 : bool agg_values_from_caller = false;
5487 581845 : bool agg_jf_preserved = false;
5488 581845 : unsigned unit_delta = UINT_MAX;
5489 581845 : int src_idx = -1;
5490 581845 : ipa_jump_func *jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs),
5491 : index);
5492 :
5493 581845 : if (jfunc->type == IPA_JF_PASS_THROUGH
5494 581845 : && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5495 : {
5496 57353 : agg_values_from_caller = true;
5497 57353 : agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5498 57353 : src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5499 57353 : unit_delta = 0;
5500 : }
5501 524492 : else if (jfunc->type == IPA_JF_ANCESTOR
5502 524492 : && ipa_get_jf_ancestor_agg_preserved (jfunc))
5503 : {
5504 405 : agg_values_from_caller = true;
5505 405 : agg_jf_preserved = true;
5506 405 : src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5507 405 : unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5508 : }
5509 :
5510 581845 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5511 581845 : if (agg_values_from_caller)
5512 : {
5513 57758 : if (caller_info->ipcp_orig_node)
5514 : {
5515 11895 : struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5516 11895 : ipcp_transformation *ts
5517 11895 : = ipcp_get_transformation_summary (cs->caller);
5518 11895 : ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5519 11895 : ipcp_param_lattices *orig_plats
5520 11895 : = ipa_get_parm_lattices (orig_info, src_idx);
5521 11895 : if (ts
5522 11895 : && orig_plats->aggs
5523 3137 : && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5524 : {
5525 2656 : ipa_argagg_value_list src (ts);
5526 2656 : src.push_adjusted_values (src_idx, index, unit_delta, res);
5527 2656 : return;
5528 : }
5529 : }
5530 : else
5531 : {
5532 45863 : ipcp_param_lattices *src_plats
5533 45863 : = ipa_get_parm_lattices (caller_info, src_idx);
5534 45863 : if (src_plats->aggs
5535 2450 : && !src_plats->aggs_bottom
5536 2450 : && (agg_jf_preserved || !src_plats->aggs_by_ref))
5537 : {
5538 1460 : if (interim && (self_recursive_pass_through_p (cs, jfunc, index)
5539 7 : || self_recursive_ancestor_p (cs, jfunc, index)))
5540 : {
5541 641 : interim->push_adjusted_values (src_idx, index, unit_delta,
5542 : res);
5543 641 : return;
5544 : }
5545 819 : if (!src_plats->aggs_contain_variable)
5546 : {
5547 86 : push_agg_values_from_plats (src_plats, index, unit_delta,
5548 : res);
5549 86 : return;
5550 : }
5551 : }
5552 : }
5553 : }
5554 :
5555 578462 : if (!jfunc->agg.items)
5556 : return;
5557 198154 : bool first = true;
5558 198154 : unsigned prev_unit_offset = 0;
5559 1048727 : for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5560 : {
5561 850573 : tree value, srcvalue;
5562 : /* Besides simple pass-through aggregate jump function, arithmetic
5563 : aggregate jump function could also bring same aggregate value as
5564 : parameter passed-in for self-feeding recursive call. For example,
5565 :
5566 : fn (int *i)
5567 : {
5568 : int j = *i & 1;
5569 : fn (&j);
5570 : }
5571 :
5572 : Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5573 850573 : if (interim
5574 316033 : && self_recursive_agg_pass_through_p (cs, &agg_jf, index, false)
5575 851166 : && (srcvalue = interim->get_value(index,
5576 593 : agg_jf.offset / BITS_PER_UNIT)))
5577 : {
5578 1174 : value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation,
5579 : srcvalue,
5580 587 : agg_jf.value.pass_through.operand,
5581 587 : agg_jf.value.pass_through.op_type);
5582 587 : value = ipacp_value_safe_for_type (agg_jf.type, value);
5583 : }
5584 : else
5585 849986 : value = ipa_agg_value_from_jfunc (caller_info, cs->caller,
5586 : &agg_jf);
5587 850573 : if (value)
5588 : {
5589 829028 : struct ipa_argagg_value iav;
5590 829028 : iav.value = value;
5591 829028 : iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5592 829028 : iav.index = index;
5593 829028 : iav.by_ref = jfunc->agg.by_ref;
5594 829028 : iav.killed = false;
5595 :
5596 829028 : gcc_assert (first
5597 : || iav.unit_offset > prev_unit_offset);
5598 829028 : prev_unit_offset = iav.unit_offset;
5599 829028 : first = false;
5600 :
5601 829028 : res->safe_push (iav);
5602 : }
5603 : }
5604 : return;
5605 : }
5606 :
5607 : /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5608 : description of ultimate callee of CS or the one it was cloned from (the
5609 : summary where lattices are). If INTERIM is non-NULL, it contains the
5610 : current interim state of collected aggregate values which can be used to
5611 : compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5612 : is true) and to skip values which clearly will not be part of intersection
5613 : with INTERIM. */
5614 :
5615 : static void
5616 214899 : push_agg_values_from_edge (struct cgraph_edge *cs,
5617 : ipa_node_params *dest_info,
5618 : vec<ipa_argagg_value> *res,
5619 : const ipa_argagg_value_list *interim,
5620 : bool optimize_self_recursion)
5621 : {
5622 214899 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5623 214899 : if (!args)
5624 : return;
5625 :
5626 429798 : int count = MIN (ipa_get_param_count (dest_info),
5627 : ipa_get_cs_argument_count (args));
5628 :
5629 214899 : unsigned interim_index = 0;
5630 879409 : for (int index = 0; index < count; index++)
5631 : {
5632 664510 : if (interim)
5633 : {
5634 254959 : while (interim_index < interim->m_elts.size ()
5635 233105 : && interim->m_elts[interim_index].value
5636 447234 : && interim->m_elts[interim_index].index < index)
5637 117946 : interim_index++;
5638 189475 : if (interim_index >= interim->m_elts.size ()
5639 137013 : || interim->m_elts[interim_index].index > index)
5640 52462 : continue;
5641 : }
5642 :
5643 612048 : ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, index);
5644 612048 : if (!ipa_is_param_used (dest_info, index)
5645 612048 : || plats->aggs_bottom)
5646 30203 : continue;
5647 581893 : push_agg_values_for_index_from_edge (cs, index, res,
5648 : optimize_self_recursion ? interim
5649 : : NULL);
5650 : }
5651 : }
5652 :
5653 :
5654 : /* Look at edges in CALLERS and collect all known aggregate values that arrive
5655 : from all of them into INTERIM. Return how many there are. */
5656 :
5657 : static unsigned int
5658 161670 : find_aggregate_values_for_callers_subset_1 (vec<ipa_argagg_value> &interim,
5659 : struct cgraph_node *node,
5660 : const vec<cgraph_edge *> &callers)
5661 : {
5662 161670 : ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5663 161670 : if (dest_info->ipcp_orig_node)
5664 0 : dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5665 :
5666 : /* gather_edges_for_value puts a non-recursive call into the first element of
5667 : callers if it can. */
5668 161670 : push_agg_values_from_edge (callers[0], dest_info, &interim, NULL, true);
5669 :
5670 245575 : unsigned valid_entries = interim.length ();
5671 161670 : if (!valid_entries)
5672 : return 0;
5673 :
5674 79448 : unsigned caller_count = callers.length();
5675 130946 : for (unsigned i = 1; i < caller_count; i++)
5676 : {
5677 53181 : auto_vec<ipa_argagg_value, 32> last;
5678 53181 : ipa_argagg_value_list avs (&interim);
5679 53181 : push_agg_values_from_edge (callers[i], dest_info, &last, &avs, true);
5680 :
5681 53181 : valid_entries = intersect_argaggs_with (interim, last);
5682 53181 : if (!valid_entries)
5683 1683 : return 0;
5684 53181 : }
5685 :
5686 : return valid_entries;
5687 : }
5688 :
5689 : /* Look at edges in CALLERS and collect all known aggregate values that arrive
5690 : from all of them and return them in a garbage-collected vector. Return
5691 : nullptr if there are none. */
5692 :
5693 : static void
5694 144312 : find_aggregate_values_for_callers_subset (vec<ipa_argagg_value> &res,
5695 : struct cgraph_node *node,
5696 : const vec<cgraph_edge *> &callers)
5697 : {
5698 144312 : auto_vec<ipa_argagg_value, 32> interim;
5699 144312 : unsigned valid_entries
5700 144312 : = find_aggregate_values_for_callers_subset_1 (interim, node, callers);
5701 144312 : if (!valid_entries)
5702 : return;
5703 :
5704 712413 : for (const ipa_argagg_value &av : interim)
5705 495051 : if (av.value)
5706 467458 : res.safe_push(av);
5707 : return;
5708 144312 : }
5709 :
5710 : /* Look at edges in CALLERS and collect all known aggregate values that arrive
5711 : from all of them and return them in a garbage-collected vector. Return
5712 : nullptr if there are none. */
5713 :
5714 : static struct vec<ipa_argagg_value, va_gc> *
5715 17358 : find_aggregate_values_for_callers_subset_gc (struct cgraph_node *node,
5716 : const vec<cgraph_edge *> &callers)
5717 : {
5718 17358 : auto_vec<ipa_argagg_value, 32> interim;
5719 17358 : unsigned valid_entries
5720 17358 : = find_aggregate_values_for_callers_subset_1 (interim, node, callers);
5721 17358 : if (!valid_entries)
5722 : return nullptr;
5723 :
5724 5311 : vec<ipa_argagg_value, va_gc> *res = NULL;
5725 5311 : vec_safe_reserve_exact (res, valid_entries);
5726 39397 : for (const ipa_argagg_value &av : interim)
5727 23464 : if (av.value)
5728 22065 : res->quick_push(av);
5729 5311 : gcc_checking_assert (res->length () == valid_entries);
5730 : return res;
5731 17358 : }
5732 :
5733 : /* Determine whether CS also brings all scalar values that the NODE is
5734 : specialized for. */
5735 :
5736 : static bool
5737 85 : cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5738 : struct cgraph_node *node)
5739 : {
5740 85 : ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5741 85 : int count = ipa_get_param_count (dest_info);
5742 85 : class ipa_node_params *caller_info;
5743 85 : class ipa_edge_args *args;
5744 85 : int i;
5745 :
5746 85 : caller_info = ipa_node_params_sum->get (cs->caller);
5747 85 : args = ipa_edge_args_sum->get (cs);
5748 209 : for (i = 0; i < count; i++)
5749 : {
5750 151 : struct ipa_jump_func *jump_func;
5751 151 : tree val, t;
5752 :
5753 151 : val = dest_info->known_csts[i];
5754 151 : if (!val)
5755 94 : continue;
5756 :
5757 114 : if (i >= ipa_get_cs_argument_count (args))
5758 : return false;
5759 57 : jump_func = ipa_get_ith_jump_func (args, i);
5760 57 : t = ipa_value_from_jfunc (caller_info, jump_func,
5761 : ipa_get_type (dest_info, i));
5762 57 : if (!t || !values_equal_for_ipcp_p (val, t))
5763 27 : return false;
5764 : }
5765 : return true;
5766 : }
5767 :
5768 : /* Determine whether CS also brings all aggregate values that NODE is
5769 : specialized for. */
5770 :
5771 : static bool
5772 58 : cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5773 : struct cgraph_node *node)
5774 : {
5775 58 : ipcp_transformation *ts = ipcp_get_transformation_summary (node);
5776 58 : if (!ts || vec_safe_is_empty (ts->m_agg_values))
5777 : return true;
5778 :
5779 48 : const ipa_argagg_value_list existing (ts->m_agg_values);
5780 48 : auto_vec<ipa_argagg_value, 32> edge_values;
5781 48 : ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5782 48 : gcc_checking_assert (dest_info->ipcp_orig_node);
5783 48 : dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5784 48 : push_agg_values_from_edge (cs, dest_info, &edge_values, &existing, false);
5785 48 : const ipa_argagg_value_list avl (&edge_values);
5786 48 : return avl.superset_of_p (existing);
5787 48 : }
5788 :
5789 : /* Given an original NODE and a VAL for which we have already created a
5790 : specialized clone, look whether there are incoming edges that still lead
5791 : into the old node but now also bring the requested value and also conform to
5792 : all other criteria such that they can be redirected the special node.
5793 : This function can therefore redirect the final edge in a SCC. */
5794 :
5795 : template <typename valtype>
5796 : static void
5797 10365 : perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5798 : {
5799 : ipcp_value_source<valtype> *src;
5800 10365 : profile_count redirected_sum = profile_count::zero ();
5801 :
5802 129952 : for (src = val->sources; src; src = src->next)
5803 : {
5804 119587 : struct cgraph_edge *cs = src->cs;
5805 367241 : while (cs)
5806 : {
5807 247654 : if (cgraph_edge_brings_value_p (cs, src, node, val)
5808 85 : && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5809 247712 : && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5810 : {
5811 35 : if (dump_file)
5812 3 : fprintf (dump_file, " - adding an extra caller %s of %s\n",
5813 3 : cs->caller->dump_name (),
5814 3 : val->spec_node->dump_name ());
5815 :
5816 35 : cs->redirect_callee_duplicating_thunks (val->spec_node);
5817 35 : val->spec_node->expand_all_artificial_thunks ();
5818 35 : if (cs->count.ipa ().initialized_p ())
5819 0 : redirected_sum = redirected_sum + cs->count.ipa ();
5820 : }
5821 247654 : cs = get_next_cgraph_edge_clone (cs);
5822 : }
5823 : }
5824 :
5825 10365 : if (redirected_sum.nonzero_p ())
5826 0 : update_specialized_profile (val->spec_node, node, redirected_sum);
5827 10365 : }
5828 :
5829 : /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5830 :
5831 : static bool
5832 5128 : known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5833 : {
5834 5128 : ipa_polymorphic_call_context *ctx;
5835 5128 : int i;
5836 :
5837 5128 : FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5838 115 : if (!ctx->useless_p ())
5839 : return true;
5840 : return false;
5841 : }
5842 :
5843 : /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5844 :
5845 : static vec<ipa_polymorphic_call_context>
5846 5128 : copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5847 : {
5848 5128 : if (known_contexts_useful_p (known_contexts))
5849 115 : return known_contexts.copy ();
5850 : else
5851 5013 : return vNULL;
5852 : }
5853 :
5854 : /* Return true if the VALUE is represented in KNOWN_CSTS at INDEX if OFFSET is
5855 : minus one or in AGGVALS for INDEX and OFFSET otherwise. */
5856 :
5857 : DEBUG_FUNCTION bool
5858 5077 : ipcp_val_replacement_ok_p (vec<tree> &known_csts,
5859 : vec<ipa_polymorphic_call_context> &,
5860 : vec<ipa_argagg_value, va_gc> *aggvals,
5861 : int index, HOST_WIDE_INT offset, tree value)
5862 : {
5863 5077 : tree v;
5864 5077 : if (offset == -1)
5865 3618 : v = known_csts[index];
5866 : else
5867 : {
5868 1459 : const ipa_argagg_value_list avl (aggvals);
5869 1459 : v = avl.get_value (index, offset / BITS_PER_UNIT);
5870 : }
5871 :
5872 5077 : return v && values_equal_for_ipcp_p (v, value);
5873 : }
5874 :
5875 : /* Dump to F all the values in AVALS for which we are re-evaluating the effects
5876 : on the function represented b INFO. */
5877 :
5878 : DEBUG_FUNCTION void
5879 53 : dump_reestimation_message (FILE *f, ipa_node_params *info,
5880 : const ipa_auto_call_arg_values &avals)
5881 : {
5882 53 : fprintf (f, " Re-estimating effects with\n"
5883 : " Scalar constants:");
5884 53 : int param_count = ipa_get_param_count (info);
5885 125 : for (int i = 0; i < param_count; i++)
5886 72 : if (avals.m_known_vals[i])
5887 : {
5888 36 : fprintf (f, " %i:", i);
5889 36 : print_ipcp_constant_value (f, avals.m_known_vals[i]);
5890 : }
5891 53 : fprintf (f, "\n");
5892 53 : if (!avals.m_known_contexts.is_empty ())
5893 : {
5894 0 : fprintf (f, " Pol. contexts:");
5895 0 : for (int i = 0; i < param_count; i++)
5896 0 : if (!avals.m_known_contexts[i].useless_p ())
5897 : {
5898 0 : fprintf (f, " %i:", i);
5899 0 : avals.m_known_contexts[i].dump (f);
5900 : }
5901 0 : fprintf (f, "\n");
5902 : }
5903 53 : if (!avals.m_known_aggs.is_empty ())
5904 : {
5905 17 : fprintf (f, " Aggregate replacements:");
5906 17 : ipa_argagg_value_list avs (&avals);
5907 17 : avs.dump (f);
5908 : }
5909 53 : }
5910 :
5911 : /* Return true if the VALUE is represented in KNOWN_CONTEXTS at INDEX and that
5912 : if OFFSET is is equal to minus one (because source of a polymorphic context
5913 : cannot be an aggregate value). */
5914 :
5915 : DEBUG_FUNCTION bool
5916 51 : ipcp_val_replacement_ok_p (vec<tree> &,
5917 : vec<ipa_polymorphic_call_context> &known_contexts,
5918 : vec<ipa_argagg_value, va_gc> *,
5919 : int index, HOST_WIDE_INT offset,
5920 : ipa_polymorphic_call_context value)
5921 : {
5922 51 : if (offset != -1
5923 51 : || known_contexts.length () <= (unsigned) index
5924 102 : || known_contexts[index].useless_p ())
5925 : return false;
5926 :
5927 51 : if (known_contexts[index].equal_to (value))
5928 : return true;
5929 :
5930 : /* In some corner cases, the final gathering of contexts can figure out that
5931 : the available context is actually more precise than what we wanted to
5932 : clone for. Allow it. */
5933 0 : value.combine_with (known_contexts[index]);
5934 0 : return known_contexts[index].equal_to (value);
5935 : }
5936 :
5937 : /* Decide whether to create a special version of NODE for value VAL of
5938 : parameter at the given INDEX. If OFFSET is -1, the value is for the
5939 : parameter itself, otherwise it is stored at the given OFFSET of the
5940 : parameter. AVALS describes the other already known values. SELF_GEN_CLONES
5941 : is a vector which contains clones created for self-recursive calls with an
5942 : arithmetic pass-through jump function. CUR_SWEEP is the number of the
5943 : current sweep of the call-graph during the decision stage. */
5944 :
5945 : template <typename valtype>
5946 : static bool
5947 216546 : decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
5948 : ipcp_value<valtype> *val,
5949 : vec<cgraph_node *> *self_gen_clones, int cur_sweep)
5950 : {
5951 : int caller_count;
5952 216546 : sreal freq_sum;
5953 : profile_count count_sum, rec_count_sum;
5954 : bool called_without_ipa_profile;
5955 :
5956 216546 : if (val->spec_node)
5957 : {
5958 10365 : perhaps_add_new_callers (node, val);
5959 10365 : return false;
5960 : }
5961 206181 : else if (val->local_size_cost + overall_size
5962 206181 : > get_max_overall_size (node, cur_sweep))
5963 : {
5964 592 : if (dump_file && (dump_flags & TDF_DETAILS))
5965 0 : fprintf (dump_file, " Ignoring candidate value because "
5966 : "maximum unit size would be reached with %li.\n",
5967 : val->local_size_cost + overall_size);
5968 592 : return false;
5969 : }
5970 205589 : else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
5971 : &rec_count_sum, &count_sum,
5972 : &called_without_ipa_profile))
5973 : return false;
5974 :
5975 144312 : if (!dbg_cnt (ipa_cp_values))
5976 : return false;
5977 :
5978 144312 : if (val->self_recursion_generated_p ())
5979 : {
5980 : /* The edge counts in this case might not have been adjusted yet.
5981 : Nevertleless, even if they were it would be only a guesswork which we
5982 : can do now. The recursive part of the counts can be derived from the
5983 : count of the original node anyway. */
5984 323 : if (node->count.ipa ().nonzero_p ())
5985 : {
5986 16 : unsigned dem = self_gen_clones->length () + 1;
5987 16 : rec_count_sum = node->count.ipa () / dem;
5988 : }
5989 : else
5990 291 : rec_count_sum = profile_count::zero ();
5991 : }
5992 :
5993 : /* get_info_about_necessary_edges only sums up ipa counts. */
5994 144312 : count_sum += rec_count_sum;
5995 :
5996 144312 : if (dump_file && (dump_flags & TDF_DETAILS))
5997 : {
5998 119 : fprintf (dump_file, " - considering value ");
5999 119 : print_ipcp_constant_value (dump_file, val->value);
6000 119 : fprintf (dump_file, " for ");
6001 119 : ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6002 119 : if (offset != -1)
6003 55 : fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6004 119 : fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6005 : }
6006 :
6007 144312 : auto_vec<cgraph_edge *> callers
6008 : = gather_edges_for_value (val, node, caller_count);
6009 144312 : ipa_node_params *info = ipa_node_params_sum->get (node);
6010 144312 : ipa_auto_call_arg_values avals;
6011 144312 : avals.m_known_vals.safe_grow_cleared (ipa_get_param_count (info), true);
6012 144312 : find_scalar_values_for_callers_subset (avals.m_known_vals, info, callers);
6013 144312 : find_contexts_for_caller_subset (avals.m_known_contexts, info, callers);
6014 144312 : find_aggregate_values_for_callers_subset (avals.m_known_aggs, node, callers);
6015 :
6016 :
6017 144312 : if (good_cloning_opportunity_p (node, val->prop_time_benefit,
6018 : freq_sum, count_sum, val->prop_size_cost,
6019 : called_without_ipa_profile, cur_sweep))
6020 : ;
6021 : else
6022 : {
6023 : /* Extern inline functions are only meaningful to clione to propagate
6024 : values to their callees. */
6025 142083 : if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
6026 : {
6027 204 : if (dump_file && (dump_flags & TDF_DETAILS))
6028 0 : fprintf (dump_file, " Skipping extern inline.\n");
6029 139184 : return false;
6030 : }
6031 141879 : if (dump_file && (dump_flags & TDF_DETAILS))
6032 53 : dump_reestimation_message (dump_file, info, avals);
6033 :
6034 141879 : ipa_call_estimates estimates;
6035 141879 : estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
6036 141879 : int removable_params_cost = 0;
6037 899049 : for (tree t : avals.m_known_vals)
6038 473412 : if (t)
6039 196413 : removable_params_cost += estimate_move_cost (TREE_TYPE (t), true);
6040 :
6041 141879 : int size = estimates.size - caller_count * removable_params_cost;
6042 :
6043 141879 : if (size <= 0)
6044 : {
6045 1782 : if (dump_file)
6046 0 : fprintf (dump_file, " Code not going to grow.\n");
6047 : }
6048 : else
6049 : {
6050 : sreal time_benefit
6051 140097 : = ((estimates.nonspecialized_time - estimates.time)
6052 280194 : + hint_time_bonus (node, estimates)
6053 140097 : + (devirtualization_time_bonus (node, &avals)
6054 140097 : + removable_params_cost));
6055 :
6056 140097 : if (!good_cloning_opportunity_p (node, time_benefit, freq_sum,
6057 : count_sum, size,
6058 : called_without_ipa_profile,
6059 : cur_sweep))
6060 138980 : return false;
6061 : }
6062 : }
6063 :
6064 5128 : if (dump_file)
6065 148 : fprintf (dump_file, " Creating a specialized node of %s.\n",
6066 : node->dump_name ());
6067 :
6068 5128 : vec<tree> known_csts = avals.m_known_vals.copy ();
6069 : vec<ipa_polymorphic_call_context> known_contexts
6070 5128 : = copy_useful_known_contexts (avals.m_known_contexts);
6071 :
6072 5128 : vec<ipa_argagg_value, va_gc> *aggvals = NULL;
6073 5128 : vec_safe_reserve_exact (aggvals, avals.m_known_aggs.length ());
6074 26307 : for (const ipa_argagg_value &av : avals.m_known_aggs)
6075 10923 : aggvals->quick_push (av);
6076 5128 : gcc_checking_assert (ipcp_val_replacement_ok_p (known_csts, known_contexts,
6077 : aggvals, index,
6078 : offset, val->value));
6079 5128 : val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6080 : aggvals, callers);
6081 :
6082 5128 : if (val->self_recursion_generated_p ())
6083 163 : self_gen_clones->safe_push (val->spec_node);
6084 : else
6085 4965 : update_profiling_info (node, val->spec_node);
6086 :
6087 5128 : overall_size += val->local_size_cost;
6088 5128 : if (dump_file && (dump_flags & TDF_DETAILS))
6089 67 : fprintf (dump_file, " overall size reached %li\n",
6090 : overall_size);
6091 :
6092 : /* TODO: If for some lattice there is only one other known value
6093 : left, make a special node for it too. */
6094 :
6095 : return true;
6096 144312 : }
6097 :
6098 : /* Like irange::contains_p(), but convert VAL to the range of R if
6099 : necessary. */
6100 :
6101 : static inline bool
6102 47807 : ipa_range_contains_p (const vrange &r, tree val)
6103 : {
6104 47807 : if (r.undefined_p ())
6105 : return false;
6106 :
6107 47807 : tree type = r.type ();
6108 47807 : if (!wi::fits_to_tree_p (wi::to_wide (val), type))
6109 : return false;
6110 :
6111 47807 : val = fold_convert (type, val);
6112 47807 : return r.contains_p (val);
6113 : }
6114 :
6115 : /* Structure holding opportunitties so that they can be pre-sorted. */
6116 :
6117 216546 : struct cloning_opportunity_ranking
6118 : {
6119 : /* A very rough evaluation of likely benefit. */
6120 : sreal eval;
6121 : /* In the case of aggregate constants, a non-negative offset within their
6122 : aggregates. -1 for scalar constants, -2 for polymorphic contexts. */
6123 : HOST_WIDE_INT offset;
6124 : /* The value being considered for evaluation for cloning. */
6125 : ipcp_value_base *val;
6126 : /* Index of the formal parameter the value is coming in. */
6127 : int index;
6128 : };
6129 :
6130 : /* Helper function to qsort a vecotr of cloning opportunities. */
6131 :
6132 : static int
6133 2024173 : compare_cloning_opportunities (const void *a, const void *b)
6134 : {
6135 2024173 : const cloning_opportunity_ranking *o1
6136 : = (const cloning_opportunity_ranking *) a;
6137 2024173 : const cloning_opportunity_ranking *o2
6138 : = (const cloning_opportunity_ranking *) b;
6139 2024173 : if (o1->eval < o2->eval)
6140 : return 1;
6141 1586297 : if (o1->eval > o2->eval)
6142 511672 : return -1;
6143 : return 0;
6144 : }
6145 :
6146 : /* Use the estimations in VAL to determine how good a candidate it represents
6147 : for the purposes of ordering real evaluation of opportunities (which
6148 : includes information about incoming edges, among other things). */
6149 :
6150 : static sreal
6151 216546 : cloning_opportunity_ranking_evaluation (const ipcp_value_base *val)
6152 : {
6153 216546 : sreal e1 = (val->local_time_benefit * 1000) / MAX (val->local_size_cost, 1);
6154 216546 : sreal e2 = (val->prop_time_benefit * 1000) / MAX (val->prop_size_cost, 1);
6155 216546 : if (e2 > e1)
6156 15128 : return e2;
6157 : else
6158 201418 : return e1;
6159 : }
6160 :
6161 : /* Decide whether and what specialized clones of NODE should be created.
6162 : CUR_SWEEP is the number of the current sweep of the call-graph during the
6163 : decision stage. */
6164 :
6165 : static bool
6166 3185881 : decide_whether_version_node (struct cgraph_node *node, int cur_sweep)
6167 : {
6168 3185881 : ipa_node_params *info = ipa_node_params_sum->get (node);
6169 3185881 : int count = ipa_get_param_count (info);
6170 3185881 : bool ret = false;
6171 :
6172 3185881 : if (info->node_dead || count == 0)
6173 : return false;
6174 :
6175 2573451 : if (dump_file && (dump_flags & TDF_DETAILS))
6176 345 : fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6177 : node->dump_name ());
6178 :
6179 2573451 : auto_vec <cloning_opportunity_ranking, 32> opp_ranking;
6180 8598885 : for (int i = 0; i < count;i++)
6181 : {
6182 6025434 : if (!ipa_is_param_used (info, i))
6183 677718 : continue;
6184 :
6185 5347716 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6186 5347716 : ipcp_lattice<tree> *lat = &plats->itself;
6187 5347716 : ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6188 :
6189 5347716 : if (!lat->bottom
6190 5347716 : && !lat->is_single_const ())
6191 : {
6192 526727 : ipcp_value<tree> *val;
6193 642838 : for (val = lat->values; val; val = val->next)
6194 : {
6195 : /* If some values generated for self-recursive calls with
6196 : arithmetic jump functions fall outside of the known
6197 : range for the parameter, we can skip them. */
6198 116173 : if (TREE_CODE (val->value) == INTEGER_CST
6199 70278 : && !plats->m_value_range.bottom_p ()
6200 163918 : && !ipa_range_contains_p (plats->m_value_range.m_vr,
6201 : val->value))
6202 : {
6203 : /* This can happen also if a constant present in the source
6204 : code falls outside of the range of parameter's type, so we
6205 : cannot assert. */
6206 62 : if (dump_file && (dump_flags & TDF_DETAILS))
6207 : {
6208 0 : fprintf (dump_file, " - skipping%s value ",
6209 0 : val->self_recursion_generated_p ()
6210 : ? " self_recursion_generated" : "");
6211 0 : print_ipcp_constant_value (dump_file, val->value);
6212 0 : fprintf (dump_file, " because it is outside known "
6213 : "value range.\n");
6214 : }
6215 62 : continue;
6216 : }
6217 116049 : cloning_opportunity_ranking opp;
6218 116049 : opp.eval = cloning_opportunity_ranking_evaluation (val);
6219 116049 : opp.offset = -1;
6220 116049 : opp.val = val;
6221 116049 : opp.index = i;
6222 116049 : opp_ranking.safe_push (opp);
6223 : }
6224 : }
6225 :
6226 5347716 : if (!plats->aggs_bottom)
6227 : {
6228 556325 : struct ipcp_agg_lattice *aglat;
6229 556325 : ipcp_value<tree> *val;
6230 697499 : for (aglat = plats->aggs; aglat; aglat = aglat->next)
6231 140032 : if (!aglat->bottom && aglat->values
6232 : /* If the following is false, the one value has been considered
6233 : for cloning for all contexts. */
6234 260273 : && (plats->aggs_contain_variable
6235 198384 : || !aglat->is_single_const ()))
6236 169527 : for (val = aglat->values; val; val = val->next)
6237 : {
6238 96740 : cloning_opportunity_ranking opp;
6239 96740 : opp.eval = cloning_opportunity_ranking_evaluation (val);
6240 96740 : opp.offset = aglat->offset;
6241 96740 : opp.val = val;
6242 96740 : opp.index = i;
6243 96740 : opp_ranking.safe_push (opp);
6244 : }
6245 : }
6246 :
6247 5347716 : if (!ctxlat->bottom
6248 6584678 : && !ctxlat->is_single_const ())
6249 : {
6250 541822 : ipcp_value<ipa_polymorphic_call_context> *val;
6251 545579 : for (val = ctxlat->values; val; val = val->next)
6252 7514 : if (!val->value.useless_p ())
6253 : {
6254 3757 : cloning_opportunity_ranking opp;
6255 3757 : opp.eval = cloning_opportunity_ranking_evaluation (val);
6256 3757 : opp.offset = -2;
6257 3757 : opp.val = val;
6258 3757 : opp.index = i;
6259 3757 : opp_ranking.safe_push (opp);
6260 : }
6261 : }
6262 : }
6263 :
6264 2573451 : if (!opp_ranking.is_empty ())
6265 : {
6266 51397 : opp_ranking.qsort (compare_cloning_opportunities);
6267 51397 : auto_vec <cgraph_node *, 9> self_gen_clones;
6268 370737 : for (const cloning_opportunity_ranking &opp : opp_ranking)
6269 216546 : if (opp.offset == -2)
6270 : {
6271 3757 : ipcp_value<ipa_polymorphic_call_context> *val
6272 : = static_cast <ipcp_value<ipa_polymorphic_call_context> *>
6273 : (opp.val);
6274 3757 : ret |= decide_about_value (node, opp.index, -1, val,
6275 : &self_gen_clones, cur_sweep);
6276 : }
6277 : else
6278 : {
6279 212789 : ipcp_value<tree> *val = static_cast<ipcp_value<tree> *> (opp.val);
6280 212789 : ret |= decide_about_value (node, opp.index, opp.offset, val,
6281 : &self_gen_clones, cur_sweep);
6282 : }
6283 :
6284 102794 : if (!self_gen_clones.is_empty ())
6285 : {
6286 41 : self_gen_clones.safe_push (node);
6287 41 : update_counts_for_self_gen_clones (node, self_gen_clones);
6288 : }
6289 51397 : }
6290 :
6291 2573451 : struct caller_statistics stats;
6292 2573451 : init_caller_stats (&stats);
6293 2573451 : node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
6294 : false);
6295 2573451 : if (!stats.n_calls)
6296 : {
6297 1298488 : if (dump_file)
6298 986 : fprintf (dump_file, " Not cloning for all contexts because "
6299 : "there are no callers of the original node (any more).\n");
6300 1298488 : return ret;
6301 : }
6302 :
6303 1274963 : bool do_clone_for_all_contexts = false;
6304 1274963 : ipa_auto_call_arg_values avals;
6305 1274963 : int removable_params_cost;
6306 1274963 : bool ctx_independent_const
6307 1274963 : = gather_context_independent_values (info, &avals, &removable_params_cost);
6308 1274963 : sreal devirt_bonus = devirtualization_time_bonus (node, &avals);
6309 1260248 : if (ctx_independent_const || devirt_bonus > 0
6310 2535205 : || (removable_params_cost && clone_for_param_removal_p (node)))
6311 : {
6312 63716 : ipa_call_estimates estimates;
6313 :
6314 63716 : estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
6315 63716 : sreal time = estimates.nonspecialized_time - estimates.time;
6316 63716 : time += devirt_bonus;
6317 63716 : time += hint_time_bonus (node, estimates);
6318 63716 : time += removable_params_cost;
6319 63716 : int size = estimates.size - stats.n_calls * removable_params_cost;
6320 :
6321 63716 : if (dump_file && (dump_flags & TDF_DETAILS))
6322 26 : fprintf (dump_file, " - context independent values, size: %i, "
6323 : "time_benefit: %f\n", size, (time).to_double ());
6324 :
6325 63716 : if (size <= 0 || node->local)
6326 : {
6327 17106 : if (!dbg_cnt (ipa_cp_values))
6328 0 : return ret;
6329 :
6330 17106 : do_clone_for_all_contexts = true;
6331 17106 : if (dump_file)
6332 106 : fprintf (dump_file, " Decided to specialize for all "
6333 : "known contexts, code not going to grow.\n");
6334 : }
6335 46610 : else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
6336 : stats.count_sum, size,
6337 46610 : stats.called_without_ipa_profile,
6338 : cur_sweep))
6339 : {
6340 348 : if (size + overall_size <= get_max_overall_size (node, cur_sweep))
6341 : {
6342 348 : if (!dbg_cnt (ipa_cp_values))
6343 : return ret;
6344 :
6345 348 : do_clone_for_all_contexts = true;
6346 348 : overall_size += size;
6347 348 : if (dump_file)
6348 14 : fprintf (dump_file, " Decided to specialize for all "
6349 : "known contexts, growth (to %li) deemed "
6350 : "beneficial.\n", overall_size);
6351 : }
6352 0 : else if (dump_file && (dump_flags & TDF_DETAILS))
6353 0 : fprintf (dump_file, " Not cloning for all contexts because "
6354 : "maximum unit size would be reached with %li.\n",
6355 : size + overall_size);
6356 : }
6357 46262 : else if (dump_file && (dump_flags & TDF_DETAILS))
6358 2 : fprintf (dump_file, " Not cloning for all contexts because "
6359 : "!good_cloning_opportunity_p.\n");
6360 : }
6361 :
6362 1274963 : if (do_clone_for_all_contexts)
6363 : {
6364 17454 : auto_vec<cgraph_edge *> callers = node->collect_callers ();
6365 :
6366 85990 : for (int i = callers.length () - 1; i >= 0; i--)
6367 : {
6368 51082 : cgraph_edge *cs = callers[i];
6369 51082 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6370 :
6371 51082 : if (caller_info && caller_info->node_dead)
6372 2660 : callers.unordered_remove (i);
6373 : }
6374 :
6375 17454 : if (!adjust_callers_for_value_intersection (callers, node))
6376 : /* If node is not called by anyone, or all its caller edges are
6377 : self-recursive, the node is not really in use, no need to do
6378 : cloning. */
6379 96 : return ret;
6380 :
6381 17358 : if (dump_file)
6382 118 : fprintf (dump_file, " Creating a specialized node of %s "
6383 : "for all known contexts.\n", node->dump_name ());
6384 :
6385 17358 : vec<tree> known_csts = vNULL;
6386 17358 : known_csts.safe_grow_cleared (count, true);
6387 17358 : find_scalar_values_for_callers_subset (known_csts, info, callers);
6388 17358 : vec<ipa_polymorphic_call_context> known_contexts = vNULL;
6389 17358 : find_contexts_for_caller_subset (known_contexts, info, callers);
6390 17358 : vec<ipa_argagg_value, va_gc> *aggvals
6391 17358 : = find_aggregate_values_for_callers_subset_gc (node, callers);
6392 :
6393 17358 : struct cgraph_node *clone = create_specialized_node (node, known_csts,
6394 : known_contexts,
6395 : aggvals, callers);
6396 17358 : ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6397 17358 : ret = true;
6398 17454 : }
6399 :
6400 : return ret;
6401 3848414 : }
6402 :
6403 : /* Transitively mark all callees of NODE within the same SCC as not dead. */
6404 :
6405 : static void
6406 5459 : spread_undeadness (struct cgraph_node *node)
6407 : {
6408 5459 : struct cgraph_edge *cs;
6409 :
6410 18817 : for (cs = node->callees; cs; cs = cs->next_callee)
6411 13358 : if (ipa_edge_within_scc (cs))
6412 : {
6413 966 : struct cgraph_node *callee;
6414 966 : class ipa_node_params *info;
6415 :
6416 966 : callee = cs->callee->function_symbol (NULL);
6417 966 : info = ipa_node_params_sum->get (callee);
6418 :
6419 966 : if (info && info->node_dead)
6420 : {
6421 68 : info->node_dead = 0;
6422 68 : spread_undeadness (callee);
6423 : }
6424 : }
6425 5459 : }
6426 :
6427 : /* Return true if NODE has a caller from outside of its SCC that is not
6428 : dead. Worker callback for cgraph_for_node_and_aliases. */
6429 :
6430 : static bool
6431 16242 : has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6432 : void *data ATTRIBUTE_UNUSED)
6433 : {
6434 16242 : struct cgraph_edge *cs;
6435 :
6436 81557 : for (cs = node->callers; cs; cs = cs->next_caller)
6437 65953 : if (cs->caller->thunk
6438 65953 : && cs->caller->call_for_symbol_thunks_and_aliases
6439 0 : (has_undead_caller_from_outside_scc_p, NULL, true))
6440 : return true;
6441 65953 : else if (!ipa_edge_within_scc (cs))
6442 : {
6443 65698 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6444 65698 : if (!caller_info /* Unoptimized caller are like dead ones. */
6445 65696 : || !caller_info->node_dead)
6446 : return true;
6447 : }
6448 : return false;
6449 : }
6450 :
6451 :
6452 : /* Identify nodes within the same SCC as NODE which are no longer needed
6453 : because of new clones and will be removed as unreachable. */
6454 :
6455 : static void
6456 20356 : identify_dead_nodes (struct cgraph_node *node)
6457 : {
6458 20356 : struct cgraph_node *v;
6459 41052 : for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6460 20696 : if (v->local)
6461 : {
6462 16002 : ipa_node_params *info = ipa_node_params_sum->get (v);
6463 16002 : if (info
6464 32004 : && !v->call_for_symbol_thunks_and_aliases
6465 16002 : (has_undead_caller_from_outside_scc_p, NULL, true))
6466 15364 : info->node_dead = 1;
6467 : }
6468 :
6469 41052 : for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6470 : {
6471 20696 : ipa_node_params *info = ipa_node_params_sum->get (v);
6472 20696 : if (info && !info->node_dead)
6473 5391 : spread_undeadness (v);
6474 : }
6475 :
6476 20356 : if (dump_file && (dump_flags & TDF_DETAILS))
6477 : {
6478 107 : for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6479 55 : if (ipa_node_params_sum->get (v)
6480 55 : && ipa_node_params_sum->get (v)->node_dead)
6481 32 : fprintf (dump_file, " Marking node as dead: %s.\n",
6482 : v->dump_name ());
6483 : }
6484 20356 : }
6485 :
6486 : /* Removes all useless callback edges from the callgraph. Useless callback
6487 : edges might mess up the callgraph, because they might be impossible to
6488 : redirect and so on, leading to crashes. Their usefulness is evaluated
6489 : through callback_edge_useful_p. */
6490 :
6491 : static void
6492 127757 : purge_useless_callback_edges ()
6493 : {
6494 127757 : if (dump_file)
6495 161 : fprintf (dump_file, "\nPurging useless callback edges:\n");
6496 :
6497 127757 : cgraph_edge *e;
6498 127757 : cgraph_node *node;
6499 1413766 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6500 : {
6501 6717440 : for (e = node->callees; e; e = e->next_callee)
6502 : {
6503 5431431 : if (e->has_callback)
6504 : {
6505 13684 : if (dump_file)
6506 3 : fprintf (dump_file, "\tExamining callbacks of edge %s -> %s:\n",
6507 3 : e->caller->dump_name (), e->callee->dump_name ());
6508 13684 : if (!lookup_attribute (CALLBACK_ATTR_IDENT,
6509 13684 : DECL_ATTRIBUTES (e->callee->decl))
6510 13684 : && !callback_is_special_cased (e->callee->decl, e->call_stmt))
6511 : {
6512 1 : if (dump_file)
6513 0 : fprintf (
6514 : dump_file,
6515 : "\t\tPurging callbacks, because the callback-dispatching"
6516 : "function no longer has any callback attributes.\n");
6517 1 : e->purge_callback_edges ();
6518 1 : continue;
6519 : }
6520 13683 : cgraph_edge *cbe, *next;
6521 27366 : for (cbe = e->first_callback_edge (); cbe; cbe = next)
6522 : {
6523 13683 : next = cbe->next_callback_edge ();
6524 13683 : if (!callback_edge_useful_p (cbe))
6525 : {
6526 13309 : if (dump_file)
6527 1 : fprintf (dump_file,
6528 : "\t\tCallback edge %s -> %s not deemed "
6529 : "useful, removing.\n",
6530 1 : cbe->caller->dump_name (),
6531 1 : cbe->callee->dump_name ());
6532 13309 : cgraph_edge::remove (cbe);
6533 : }
6534 : else
6535 : {
6536 374 : if (dump_file)
6537 2 : fprintf (dump_file,
6538 : "\t\tKept callback edge %s -> %s "
6539 : "because it looks useful.\n",
6540 2 : cbe->caller->dump_name (),
6541 2 : cbe->callee->dump_name ());
6542 : }
6543 : }
6544 : }
6545 : }
6546 : }
6547 :
6548 127757 : if (dump_file)
6549 161 : fprintf (dump_file, "\n");
6550 127757 : }
6551 :
6552 : /* The decision stage. Iterate over the topological order of call graph nodes
6553 : TOPO and make specialized clones if deemed beneficial. */
6554 :
6555 : static void
6556 127757 : ipcp_decision_stage (class ipa_topo_info *topo)
6557 : {
6558 127757 : int i;
6559 :
6560 127757 : if (dump_file)
6561 161 : fprintf (dump_file, "\nIPA decision stage (%i sweeps):\n",
6562 : max_number_sweeps);
6563 :
6564 490207 : for (int cur_sweep = 1; cur_sweep <= max_number_sweeps; cur_sweep++)
6565 : {
6566 362450 : if (dump_file && (dump_flags & TDF_DETAILS))
6567 144 : fprintf (dump_file, "\nIPA decision sweep number %i (out of %i):\n",
6568 : cur_sweep, max_number_sweeps);
6569 :
6570 4336554 : for (i = topo->nnodes - 1; i >= 0; i--)
6571 : {
6572 3974104 : struct cgraph_node *node = topo->order[i];
6573 3974104 : bool change = false, iterate = true;
6574 :
6575 7968570 : while (iterate)
6576 : {
6577 : struct cgraph_node *v;
6578 : iterate = false;
6579 4009258 : for (v = node;
6580 8003724 : v;
6581 4009258 : v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6582 4009258 : if (v->has_gimple_body_p ()
6583 3786481 : && ipcp_versionable_function_p (v)
6584 4009258 : && (cur_sweep
6585 3185881 : <= opt_for_fn (node->decl, param_ipa_cp_sweeps)))
6586 3185881 : iterate |= decide_whether_version_node (v, cur_sweep);
6587 :
6588 3994466 : change |= iterate;
6589 : }
6590 3974104 : if (change)
6591 20356 : identify_dead_nodes (node);
6592 : }
6593 : }
6594 :
6595 : /* Currently, the primary use of callback edges is constant propagation.
6596 : Constant propagation is now over, so we have to remove unused callback
6597 : edges. */
6598 127757 : purge_useless_callback_edges ();
6599 127757 : }
6600 :
6601 : /* Look up all VR and bits information that we have discovered and copy it
6602 : over to the transformation summary. */
6603 :
6604 : static void
6605 127757 : ipcp_store_vr_results (void)
6606 : {
6607 127757 : cgraph_node *node;
6608 :
6609 1413766 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6610 : {
6611 1286009 : ipa_node_params *info = ipa_node_params_sum->get (node);
6612 1286009 : bool dumped_sth = false;
6613 1286009 : bool found_useful_result = false;
6614 1286009 : bool do_vr = true;
6615 1286009 : bool do_bits = true;
6616 :
6617 : /* If the function is not local, the gathered information is only useful
6618 : for clones. */
6619 1286009 : if (!node->local)
6620 1122315 : continue;
6621 :
6622 163694 : if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6623 : {
6624 4829 : if (dump_file)
6625 9 : fprintf (dump_file, "Not considering %s for VR discovery "
6626 : "and propagate; -fipa-ipa-vrp: disabled.\n",
6627 : node->dump_name ());
6628 : do_vr = false;
6629 : }
6630 163694 : if (!info || !opt_for_fn (node->decl, flag_ipa_bit_cp))
6631 : {
6632 4797 : if (dump_file)
6633 2 : fprintf (dump_file, "Not considering %s for ipa bitwise "
6634 : "propagation ; -fipa-bit-cp: disabled.\n",
6635 : node->dump_name ());
6636 4797 : do_bits = false;
6637 : }
6638 4797 : if (!do_bits && !do_vr)
6639 4791 : continue;
6640 :
6641 158903 : if (info->ipcp_orig_node)
6642 22248 : info = ipa_node_params_sum->get (info->ipcp_orig_node);
6643 158903 : if (info->lattices.is_empty ())
6644 : /* Newly expanded artificial thunks do not have lattices. */
6645 50465 : continue;
6646 :
6647 108438 : unsigned count = ipa_get_param_count (info);
6648 223506 : for (unsigned i = 0; i < count; i++)
6649 : {
6650 176213 : ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6651 176213 : if (do_vr
6652 176181 : && !plats->m_value_range.bottom_p ()
6653 235104 : && !plats->m_value_range.top_p ())
6654 : {
6655 : found_useful_result = true;
6656 : break;
6657 : }
6658 117323 : if (do_bits && plats->bits_lattice.constant_p ())
6659 : {
6660 : found_useful_result = true;
6661 : break;
6662 : }
6663 : }
6664 108438 : if (!found_useful_result)
6665 47293 : continue;
6666 :
6667 61145 : ipcp_transformation_initialize ();
6668 61145 : ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6669 61145 : vec_safe_reserve_exact (ts->m_vr, count);
6670 :
6671 224263 : for (unsigned i = 0; i < count; i++)
6672 : {
6673 163118 : ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6674 163118 : ipcp_bits_lattice *bits = NULL;
6675 :
6676 163118 : if (do_bits
6677 163114 : && plats->bits_lattice.constant_p ()
6678 258356 : && dbg_cnt (ipa_cp_bits))
6679 95238 : bits = &plats->bits_lattice;
6680 :
6681 163118 : if (do_vr
6682 163094 : && !plats->m_value_range.bottom_p ()
6683 113878 : && !plats->m_value_range.top_p ()
6684 276996 : && dbg_cnt (ipa_cp_vr))
6685 : {
6686 113878 : if (bits)
6687 : {
6688 89814 : value_range tmp = plats->m_value_range.m_vr;
6689 89814 : tree type = ipa_get_type (info, i);
6690 179628 : irange_bitmask bm (wide_int::from (bits->get_value (),
6691 89814 : TYPE_PRECISION (type),
6692 89814 : TYPE_SIGN (type)),
6693 179628 : wide_int::from (bits->get_mask (),
6694 89814 : TYPE_PRECISION (type),
6695 179628 : TYPE_SIGN (type)));
6696 89814 : tmp.update_bitmask (bm);
6697 : // Reflecting the bitmask on the ranges can sometime
6698 : // produce an UNDEFINED value if the the bitmask update
6699 : // was previously deferred. See PR 120048.
6700 89814 : if (tmp.undefined_p ())
6701 0 : tmp.set_varying (type);
6702 89814 : ipa_vr vr (tmp);
6703 89814 : ts->m_vr->quick_push (vr);
6704 89814 : }
6705 : else
6706 : {
6707 24064 : ipa_vr vr (plats->m_value_range.m_vr);
6708 24064 : ts->m_vr->quick_push (vr);
6709 : }
6710 : }
6711 49240 : else if (bits)
6712 : {
6713 5424 : tree type = ipa_get_type (info, i);
6714 5424 : value_range tmp;
6715 5424 : tmp.set_varying (type);
6716 10848 : irange_bitmask bm (wide_int::from (bits->get_value (),
6717 5424 : TYPE_PRECISION (type),
6718 5424 : TYPE_SIGN (type)),
6719 10848 : wide_int::from (bits->get_mask (),
6720 5424 : TYPE_PRECISION (type),
6721 10848 : TYPE_SIGN (type)));
6722 5424 : tmp.update_bitmask (bm);
6723 : // Reflecting the bitmask on the ranges can sometime
6724 : // produce an UNDEFINED value if the the bitmask update
6725 : // was previously deferred. See PR 120048.
6726 5424 : if (tmp.undefined_p ())
6727 0 : tmp.set_varying (type);
6728 5424 : ipa_vr vr (tmp);
6729 5424 : ts->m_vr->quick_push (vr);
6730 5424 : }
6731 : else
6732 : {
6733 43816 : ipa_vr vr;
6734 43816 : ts->m_vr->quick_push (vr);
6735 : }
6736 :
6737 163118 : if (!dump_file || !bits)
6738 162690 : continue;
6739 :
6740 428 : if (!dumped_sth)
6741 : {
6742 306 : fprintf (dump_file, "Propagated bits info for function %s:\n",
6743 : node->dump_name ());
6744 306 : dumped_sth = true;
6745 : }
6746 428 : fprintf (dump_file, " param %i: value = ", i);
6747 428 : ipcp_print_widest_int (dump_file, bits->get_value ());
6748 428 : fprintf (dump_file, ", mask = ");
6749 428 : ipcp_print_widest_int (dump_file, bits->get_mask ());
6750 428 : fprintf (dump_file, "\n");
6751 : }
6752 : }
6753 127757 : }
6754 :
6755 : /* The IPCP driver. */
6756 :
6757 : static unsigned int
6758 127757 : ipcp_driver (void)
6759 : {
6760 127757 : class ipa_topo_info topo;
6761 :
6762 127757 : if (edge_clone_summaries == NULL)
6763 127757 : edge_clone_summaries = new edge_clone_summary_t (symtab);
6764 :
6765 127757 : ipa_check_create_node_params ();
6766 127757 : ipa_check_create_edge_args ();
6767 127757 : clone_num_suffixes = new hash_map<const char *, unsigned>;
6768 :
6769 127757 : if (dump_file)
6770 : {
6771 161 : fprintf (dump_file, "\nIPA structures before propagation:\n");
6772 161 : if (dump_flags & TDF_DETAILS)
6773 48 : ipa_print_all_params (dump_file);
6774 161 : ipa_print_all_jump_functions (dump_file);
6775 : }
6776 :
6777 : /* Topological sort. */
6778 127757 : build_toporder_info (&topo);
6779 : /* Do the interprocedural propagation. */
6780 127757 : ipcp_propagate_stage (&topo);
6781 : /* Decide what constant propagation and cloning should be performed. */
6782 127757 : ipcp_decision_stage (&topo);
6783 : /* Store results of value range and bits propagation. */
6784 127757 : ipcp_store_vr_results ();
6785 :
6786 : /* Free all IPCP structures. */
6787 255514 : delete clone_num_suffixes;
6788 127757 : free_toporder_info (&topo);
6789 127757 : delete edge_clone_summaries;
6790 127757 : edge_clone_summaries = NULL;
6791 127757 : ipa_free_all_structures_after_ipa_cp ();
6792 127757 : if (dump_file)
6793 161 : fprintf (dump_file, "\nIPA constant propagation end\n");
6794 127757 : return 0;
6795 : }
6796 :
6797 : /* Initialization and computation of IPCP data structures. This is the initial
6798 : intraprocedural analysis of functions, which gathers information to be
6799 : propagated later on. */
6800 :
6801 : static void
6802 124585 : ipcp_generate_summary (void)
6803 : {
6804 124585 : struct cgraph_node *node;
6805 :
6806 124585 : if (dump_file)
6807 163 : fprintf (dump_file, "\nIPA constant propagation start:\n");
6808 124585 : ipa_register_cgraph_hooks ();
6809 :
6810 1364618 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6811 1240033 : ipa_analyze_node (node);
6812 :
6813 124585 : varpool_node *vnode;
6814 1792052 : FOR_EACH_STATIC_INITIALIZER (vnode)
6815 1667467 : ipa_analyze_var_static_initializer (vnode);
6816 124585 : }
6817 :
6818 : namespace {
6819 :
6820 : const pass_data pass_data_ipa_cp =
6821 : {
6822 : IPA_PASS, /* type */
6823 : "cp", /* name */
6824 : OPTGROUP_NONE, /* optinfo_flags */
6825 : TV_IPA_CONSTANT_PROP, /* tv_id */
6826 : 0, /* properties_required */
6827 : 0, /* properties_provided */
6828 : 0, /* properties_destroyed */
6829 : 0, /* todo_flags_start */
6830 : ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6831 : };
6832 :
6833 : class pass_ipa_cp : public ipa_opt_pass_d
6834 : {
6835 : public:
6836 287872 : pass_ipa_cp (gcc::context *ctxt)
6837 : : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6838 : ipcp_generate_summary, /* generate_summary */
6839 : NULL, /* write_summary */
6840 : NULL, /* read_summary */
6841 : ipcp_write_transformation_summaries, /*
6842 : write_optimization_summary */
6843 : ipcp_read_transformation_summaries, /*
6844 : read_optimization_summary */
6845 : NULL, /* stmt_fixup */
6846 : 0, /* function_transform_todo_flags_start */
6847 : ipcp_transform_function, /* function_transform */
6848 287872 : NULL) /* variable_transform */
6849 287872 : {}
6850 :
6851 : /* opt_pass methods: */
6852 576416 : bool gate (function *) final override
6853 : {
6854 : /* FIXME: We should remove the optimize check after we ensure we never run
6855 : IPA passes when not optimizing. */
6856 576416 : return (flag_ipa_cp && optimize) || in_lto_p;
6857 : }
6858 :
6859 127757 : unsigned int execute (function *) final override { return ipcp_driver (); }
6860 :
6861 : }; // class pass_ipa_cp
6862 :
6863 : } // anon namespace
6864 :
6865 : ipa_opt_pass_d *
6866 287872 : make_pass_ipa_cp (gcc::context *ctxt)
6867 : {
6868 287872 : return new pass_ipa_cp (ctxt);
6869 : }
6870 :
6871 : /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6872 : within the same process. For use by toplev::finalize. */
6873 :
6874 : void
6875 258766 : ipa_cp_cc_finalize (void)
6876 : {
6877 258766 : overall_size = 0;
6878 258766 : orig_overall_size = 0;
6879 258766 : ipcp_free_transformation_sum ();
6880 258766 : }
6881 :
6882 : /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6883 : return its index in the DECL_ARGUMENTS chain, using a pre-computed
6884 : DECL_UID-sorted vector if available (which is pre-computed only if there are
6885 : many parameters). Can return -1 if param is static chain not represented
6886 : among DECL_ARGUMENTS. */
6887 :
6888 : int
6889 126079 : ipcp_transformation::get_param_index (const_tree fndecl, const_tree param) const
6890 : {
6891 126079 : gcc_assert (TREE_CODE (param) == PARM_DECL);
6892 126079 : if (m_uid_to_idx)
6893 : {
6894 0 : unsigned puid = DECL_UID (param);
6895 0 : const ipa_uid_to_idx_map_elt *res
6896 0 : = std::lower_bound (m_uid_to_idx->begin(), m_uid_to_idx->end (), puid,
6897 0 : [] (const ipa_uid_to_idx_map_elt &elt, unsigned uid)
6898 : {
6899 0 : return elt.uid < uid;
6900 : });
6901 0 : if (res == m_uid_to_idx->end ()
6902 0 : || res->uid != puid)
6903 : {
6904 0 : gcc_assert (DECL_STATIC_CHAIN (fndecl));
6905 : return -1;
6906 : }
6907 0 : return res->index;
6908 : }
6909 :
6910 126079 : unsigned index = 0;
6911 286858 : for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6912 285410 : if (p == param)
6913 124631 : return (int) index;
6914 :
6915 1448 : gcc_assert (DECL_STATIC_CHAIN (fndecl));
6916 : return -1;
6917 : }
6918 :
6919 : /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6920 : according to the uid. */
6921 :
6922 : static int
6923 0 : compare_uids (const void *a, const void *b)
6924 : {
6925 0 : const ipa_uid_to_idx_map_elt *e1 = (const ipa_uid_to_idx_map_elt *) a;
6926 0 : const ipa_uid_to_idx_map_elt *e2 = (const ipa_uid_to_idx_map_elt *) b;
6927 0 : if (e1->uid < e2->uid)
6928 : return -1;
6929 0 : if (e1->uid > e2->uid)
6930 : return 1;
6931 0 : gcc_unreachable ();
6932 : }
6933 :
6934 : /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6935 : justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6936 : from parameters to their indices in DECL_ARGUMENTS chain. */
6937 :
6938 : void
6939 23203 : ipcp_transformation::maybe_create_parm_idx_map (tree fndecl)
6940 : {
6941 23203 : int c = count_formal_params (fndecl);
6942 23203 : if (c < 32)
6943 : return;
6944 :
6945 0 : m_uid_to_idx = NULL;
6946 0 : vec_safe_reserve (m_uid_to_idx, c, true);
6947 0 : unsigned index = 0;
6948 0 : for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6949 : {
6950 0 : ipa_uid_to_idx_map_elt elt;
6951 0 : elt.uid = DECL_UID (p);
6952 0 : elt.index = index;
6953 0 : m_uid_to_idx->quick_push (elt);
6954 : }
6955 0 : m_uid_to_idx->qsort (compare_uids);
6956 : }
|