Line data Source code
1 : /* Interprocedural constant propagation
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 :
4 : Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 : <mjambor@suse.cz>
6 :
7 : This file is part of GCC.
8 :
9 : GCC is free software; you can redistribute it and/or modify it under
10 : the terms of the GNU General Public License as published by the Free
11 : Software Foundation; either version 3, or (at your option) any later
12 : version.
13 :
14 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 : for more details.
18 :
19 : You should have received a copy of the GNU General Public License
20 : along with GCC; see the file COPYING3. If not see
21 : <http://www.gnu.org/licenses/>. */
22 :
23 : /* Interprocedural constant propagation (IPA-CP).
24 :
25 : The goal of this transformation is to
26 :
27 : 1) discover functions which are always invoked with some arguments with the
28 : same known constant values and modify the functions so that the
29 : subsequent optimizations can take advantage of the knowledge, and
30 :
31 : 2) partial specialization - create specialized versions of functions
32 : transformed in this way if some parameters are known constants only in
33 : certain contexts but the estimated tradeoff between speedup and cost size
34 : is deemed good.
35 :
36 : The algorithm also propagates types and attempts to perform type based
37 : devirtualization. Types are propagated much like constants.
38 :
39 : The algorithm basically consists of three stages. In the first, functions
40 : are analyzed one at a time and jump functions are constructed for all known
41 : call-sites. In the second phase, the pass propagates information from the
42 : jump functions across the call to reveal what values are available at what
43 : call sites, performs estimations of effects of known values on functions and
44 : their callees, and finally decides what specialized extra versions should be
45 : created. In the third, the special versions materialize and appropriate
46 : calls are redirected.
47 :
48 : The algorithm used is to a certain extent based on "Interprocedural Constant
49 : Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 : Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 : Cooper, Mary W. Hall, and Ken Kennedy.
52 :
53 :
54 : First stage - intraprocedural analysis
55 : =======================================
56 :
57 : This phase computes jump_function and modification flags.
58 :
59 : A jump function for a call-site represents the values passed as an actual
60 : arguments of a given call-site. In principle, there are three types of
61 : values:
62 :
63 : Pass through - the caller's formal parameter is passed as an actual
64 : argument, plus an operation on it can be performed.
65 : Constant - a constant is passed as an actual argument.
66 : Unknown - neither of the above.
67 :
68 : All jump function types are described in detail in ipa-prop.h, together with
69 : the data structures that represent them and methods of accessing them.
70 :
71 : ipcp_generate_summary() is the main function of the first stage.
72 :
73 : Second stage - interprocedural analysis
74 : ========================================
75 :
76 : This stage is itself divided into two phases. In the first, we propagate
77 : known values over the call graph, in the second, we make cloning decisions.
78 : It uses a different algorithm than the original Callahan's paper.
79 :
80 : First, we traverse the functions topologically from callers to callees and,
81 : for each strongly connected component (SCC), we propagate constants
82 : according to previously computed jump functions. We also record what known
83 : values depend on other known values and estimate local effects. Finally, we
84 : propagate cumulative information about these effects from dependent values
85 : to those on which they depend.
86 :
87 : Second, we again traverse the call graph in the same topological order and
88 : make clones for functions which we know are called with the same values in
89 : all contexts and decide about extra specialized clones of functions just for
90 : some contexts - these decisions are based on both local estimates and
91 : cumulative estimates propagated from callees.
92 :
93 : ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 : third stage.
95 :
96 : Third phase - materialization of clones, call statement updates.
97 : ============================================
98 :
99 : This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 : and tree-inline.cc) according to instructions inserted to the call graph by
101 : the second stage. */
102 :
103 : #define INCLUDE_ALGORITHM
104 : #include "config.h"
105 : #include "system.h"
106 : #include "coretypes.h"
107 : #include "backend.h"
108 : #include "tree.h"
109 : #include "gimple-expr.h"
110 : #include "gimple.h"
111 : #include "predict.h"
112 : #include "sreal.h"
113 : #include "alloc-pool.h"
114 : #include "tree-pass.h"
115 : #include "cgraph.h"
116 : #include "diagnostic.h"
117 : #include "fold-const.h"
118 : #include "gimple-iterator.h"
119 : #include "gimple-fold.h"
120 : #include "symbol-summary.h"
121 : #include "tree-vrp.h"
122 : #include "ipa-cp.h"
123 : #include "ipa-prop.h"
124 : #include "tree-pretty-print.h"
125 : #include "tree-inline.h"
126 : #include "ipa-fnsummary.h"
127 : #include "ipa-utils.h"
128 : #include "tree-ssa-ccp.h"
129 : #include "stringpool.h"
130 : #include "attribs.h"
131 : #include "dbgcnt.h"
132 : #include "symtab-clones.h"
133 : #include "gimple-range.h"
134 : #include "attr-callback.h"
135 :
136 : /* Allocation pools for values and their sources in ipa-cp. */
137 :
138 : object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
139 : ("IPA-CP constant values");
140 :
141 : object_allocator<ipcp_value<ipa_polymorphic_call_context> >
142 : ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
143 :
144 : object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
145 : ("IPA-CP value sources");
146 :
147 : object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
148 : ("IPA_CP aggregate lattices");
149 :
150 : /* Original overall size of the program. */
151 :
152 : static long overall_size, orig_overall_size;
153 :
154 : /* The maximum number of IPA-CP decision sweeps that any node requested in its
155 : param. */
156 : static int max_number_sweeps;
157 :
158 : /* Node name to unique clone suffix number map. */
159 : static hash_map<const char *, unsigned> *clone_num_suffixes;
160 :
161 : /* Return the param lattices structure corresponding to the Ith formal
162 : parameter of the function described by INFO. */
163 : static inline class ipcp_param_lattices *
164 33254364 : ipa_get_parm_lattices (class ipa_node_params *info, int i)
165 : {
166 66508728 : gcc_assert (i >= 0 && i < ipa_get_param_count (info));
167 33254364 : gcc_checking_assert (!info->ipcp_orig_node);
168 33254364 : return &(info->lattices[i]);
169 : }
170 :
171 : /* Return the lattice corresponding to the scalar value of the Ith formal
172 : parameter of the function described by INFO. */
173 : static inline ipcp_lattice<tree> *
174 5870701 : ipa_get_scalar_lat (class ipa_node_params *info, int i)
175 : {
176 6054017 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
177 5870701 : return &plats->itself;
178 : }
179 :
180 : /* Return the lattice corresponding to the scalar value of the Ith formal
181 : parameter of the function described by INFO. */
182 : static inline ipcp_lattice<ipa_polymorphic_call_context> *
183 796910 : ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
184 : {
185 796910 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
186 796910 : return &plats->ctxlat;
187 : }
188 :
189 : /* Return whether LAT is a lattice with a single constant and without an
190 : undefined value. */
191 :
192 : template <typename valtype>
193 : inline bool
194 14847964 : ipcp_lattice<valtype>::is_single_const ()
195 : {
196 3212275 : if (bottom || contains_variable || values_count != 1)
197 : return false;
198 : else
199 : return true;
200 : }
201 :
202 : /* Return true iff X and Y should be considered equal values by IPA-CP. */
203 :
204 : bool
205 1394847 : values_equal_for_ipcp_p (tree x, tree y)
206 : {
207 1394847 : gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
208 :
209 1394847 : if (x == y)
210 : return true;
211 :
212 619538 : if (TREE_CODE (x) == ADDR_EXPR
213 219213 : && TREE_CODE (y) == ADDR_EXPR
214 218581 : && (TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
215 171208 : || (TREE_CODE (TREE_OPERAND (x, 0)) == VAR_DECL
216 92226 : && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x, 0))))
217 666911 : && (TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL
218 13 : || (TREE_CODE (TREE_OPERAND (y, 0)) == VAR_DECL
219 8 : && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y, 0)))))
220 47360 : return TREE_OPERAND (x, 0) == TREE_OPERAND (y, 0)
221 94464 : || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
222 47104 : DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
223 : else
224 572178 : return operand_equal_p (x, y, 0);
225 : }
226 :
227 : /* Print V which is extracted from a value in a lattice to F. This overloaded
228 : function is used to print tree constants. */
229 :
230 : static void
231 715 : print_ipcp_constant_value (FILE * f, tree v)
232 : {
233 0 : ipa_print_constant_value (f, v);
234 44 : }
235 :
236 : /* Print V which is extracted from a value in a lattice to F. This overloaded
237 : function is used to print constant polymorphic call contexts. */
238 :
239 : static void
240 214 : print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
241 : {
242 214 : v.dump(f, false);
243 0 : }
244 :
245 : /* Print a lattice LAT to F. */
246 :
247 : template <typename valtype>
248 : void
249 1985 : ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
250 : {
251 : ipcp_value<valtype> *val;
252 1985 : bool prev = false;
253 :
254 1985 : if (bottom)
255 : {
256 844 : fprintf (f, "BOTTOM\n");
257 844 : return;
258 : }
259 :
260 1141 : if (!values_count && !contains_variable)
261 : {
262 0 : fprintf (f, "TOP\n");
263 0 : return;
264 : }
265 :
266 1141 : if (contains_variable)
267 : {
268 861 : fprintf (f, "VARIABLE");
269 861 : prev = true;
270 861 : if (dump_benefits)
271 861 : fprintf (f, "\n");
272 : }
273 :
274 1770 : for (val = values; val; val = val->next)
275 : {
276 629 : if (dump_benefits && prev)
277 349 : fprintf (f, " ");
278 280 : else if (!dump_benefits && prev)
279 0 : fprintf (f, ", ");
280 : else
281 : prev = true;
282 :
283 629 : print_ipcp_constant_value (f, val->value);
284 :
285 629 : if (dump_sources)
286 : {
287 : ipcp_value_source<valtype> *s;
288 :
289 174 : if (val->self_recursion_generated_p ())
290 27 : fprintf (f, " [self_gen(%i), from:",
291 : val->self_recursion_generated_level);
292 : else
293 147 : fprintf (f, " [scc: %i, from:", val->scc_no);
294 366 : for (s = val->sources; s; s = s->next)
295 192 : fprintf (f, " %i(%f)", s->cs->caller->get_uid (),
296 384 : s->cs->sreal_frequency ().to_double ());
297 174 : fprintf (f, "]");
298 : }
299 :
300 629 : if (dump_benefits)
301 629 : fprintf (f, " [loc_time: %g, loc_size: %i, "
302 : "prop_time: %g, prop_size: %i]\n",
303 : val->local_time_benefit.to_double (), val->local_size_cost,
304 : val->prop_time_benefit.to_double (), val->prop_size_cost);
305 : }
306 1141 : if (!dump_benefits)
307 0 : fprintf (f, "\n");
308 : }
309 :
310 : /* Print VALUE to F in a form which in usual cases does not take thousands of
311 : characters. */
312 :
313 : static void
314 1450 : ipcp_print_widest_int (FILE *f, const widest_int &value)
315 : {
316 1450 : if (value == -1)
317 0 : fprintf (f, "-1");
318 1450 : else if (wi::arshift (value, 128) == -1)
319 : {
320 330 : char buf[35], *p = buf + 2;
321 330 : widest_int v = wi::zext (value, 128);
322 330 : size_t len;
323 330 : print_hex (v, buf);
324 330 : len = strlen (p);
325 330 : if (len == 32)
326 : {
327 330 : fprintf (f, "0xf..f");
328 9795 : while (*p == 'f')
329 9135 : ++p;
330 : }
331 : else
332 0 : fprintf (f, "0xf..f%0*d", (int) (32 - len), 0);
333 330 : fputs (p, f);
334 330 : }
335 : else
336 1120 : print_hex (value, f);
337 1450 : }
338 :
339 : void
340 917 : ipcp_bits_lattice::print (FILE *f)
341 : {
342 917 : if (bottom_p ())
343 : {
344 607 : fprintf (f, " Bits unusable (BOTTOM)\n");
345 607 : return;
346 : }
347 :
348 310 : if (top_p ())
349 0 : fprintf (f, " Bits unknown (TOP)");
350 : else
351 : {
352 310 : fprintf (f, " Bits: value = ");
353 310 : ipcp_print_widest_int (f, get_value ());
354 310 : fprintf (f, ", mask = ");
355 310 : ipcp_print_widest_int (f, get_mask ());
356 : }
357 :
358 310 : if (m_recipient_only)
359 136 : fprintf (f, " (recipient only)");
360 310 : fprintf (f, "\n");
361 : }
362 :
363 : /* Print value range lattice to F. */
364 :
365 : void
366 917 : ipcp_vr_lattice::print (FILE * f)
367 : {
368 917 : if (m_recipient_only)
369 263 : fprintf (f, "(recipient only) ");
370 917 : m_vr.dump (f);
371 917 : }
372 :
373 : /* Print all ipcp_lattices of all functions to F. */
374 :
375 : static void
376 161 : print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
377 : {
378 161 : struct cgraph_node *node;
379 161 : int i, count;
380 :
381 161 : fprintf (f, "\nLattices:\n");
382 886 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
383 : {
384 725 : class ipa_node_params *info;
385 :
386 725 : info = ipa_node_params_sum->get (node);
387 : /* Skip unoptimized functions and constprop clones since we don't make
388 : lattices for them. */
389 725 : if (!info || info->ipcp_orig_node)
390 0 : continue;
391 725 : fprintf (f, " Node: %s:\n", node->dump_name ());
392 725 : count = ipa_get_param_count (info);
393 1642 : for (i = 0; i < count; i++)
394 : {
395 917 : struct ipcp_agg_lattice *aglat;
396 917 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
397 917 : fprintf (f, " param [%d]: ", i);
398 917 : plats->itself.print (f, dump_sources, dump_benefits);
399 917 : fprintf (f, " ctxs: ");
400 917 : plats->ctxlat.print (f, dump_sources, dump_benefits);
401 917 : plats->bits_lattice.print (f);
402 917 : fprintf (f, " ");
403 917 : plats->m_value_range.print (f);
404 917 : fprintf (f, "\n");
405 917 : if (plats->virt_call)
406 75 : fprintf (f, " virt_call flag set\n");
407 :
408 917 : if (plats->aggs_bottom)
409 : {
410 442 : fprintf (f, " AGGS BOTTOM\n");
411 442 : continue;
412 : }
413 475 : if (plats->aggs_contain_variable)
414 437 : fprintf (f, " AGGS VARIABLE\n");
415 626 : for (aglat = plats->aggs; aglat; aglat = aglat->next)
416 : {
417 151 : fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
418 151 : plats->aggs_by_ref ? "ref " : "", aglat->offset);
419 151 : aglat->print (f, dump_sources, dump_benefits);
420 : }
421 : }
422 : }
423 161 : }
424 :
425 : /* Determine whether it is at all technically possible to create clones of NODE
426 : and store this information in the ipa_node_params structure associated
427 : with NODE. */
428 :
429 : static void
430 1259818 : determine_versionability (struct cgraph_node *node,
431 : class ipa_node_params *info)
432 : {
433 1259818 : const char *reason = NULL;
434 :
435 : /* There are a number of generic reasons functions cannot be versioned. We
436 : also cannot remove parameters if there are type attributes such as fnspec
437 : present. */
438 1259818 : if (node->alias || node->thunk)
439 : reason = "alias or thunk";
440 1259818 : else if (!node->versionable)
441 : reason = "not a tree_versionable_function";
442 1130781 : else if (node->get_availability () <= AVAIL_INTERPOSABLE)
443 : reason = "insufficient body availability";
444 1063375 : else if (!opt_for_fn (node->decl, optimize)
445 1063375 : || !opt_for_fn (node->decl, flag_ipa_cp))
446 : reason = "non-optimized function";
447 1063375 : else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
448 : {
449 : /* Ideally we should clone the SIMD clones themselves and create
450 : vector copies of them, so IPA-cp and SIMD clones can happily
451 : coexist, but that may not be worth the effort. */
452 : reason = "function has SIMD clones";
453 : }
454 1063015 : else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
455 : {
456 : /* Ideally we should clone the target clones themselves and create
457 : copies of them, so IPA-cp and target clones can happily
458 : coexist, but that may not be worth the effort. */
459 : reason = "function target_clones attribute";
460 : }
461 : /* Don't clone decls local to a comdat group; it breaks and for C++
462 : decloned constructors, inlining is always better anyway. */
463 1063015 : else if (node->comdat_local_p ())
464 : reason = "comdat-local function";
465 1060860 : else if (node->calls_comdat_local)
466 : {
467 : /* TODO: call is versionable if we make sure that all
468 : callers are inside of a comdat group. */
469 2257 : reason = "calls comdat-local function";
470 : }
471 :
472 : /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
473 : work only when inlined. Cloning them may still lead to better code
474 : because ipa-cp will not give up on cloning further. If the function is
475 : external this however leads to wrong code because we may end up producing
476 : offline copy of the function. */
477 1259818 : if (DECL_EXTERNAL (node->decl))
478 166617 : for (cgraph_edge *edge = node->callees; !reason && edge;
479 122354 : edge = edge->next_callee)
480 122354 : if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
481 : {
482 34382 : if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
483 0 : reason = "external function which calls va_arg_pack";
484 34382 : if (DECL_FUNCTION_CODE (edge->callee->decl)
485 : == BUILT_IN_VA_ARG_PACK_LEN)
486 0 : reason = "external function which calls va_arg_pack_len";
487 : }
488 :
489 1259818 : if (reason && dump_file && !node->alias && !node->thunk)
490 53 : fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
491 : node->dump_name (), reason);
492 :
493 1259818 : info->versionable = (reason == NULL);
494 1259818 : }
495 :
496 : /* Return true if it is at all technically possible to create clones of a
497 : NODE. */
498 :
499 : static bool
500 6029251 : ipcp_versionable_function_p (struct cgraph_node *node)
501 : {
502 6029251 : ipa_node_params *info = ipa_node_params_sum->get (node);
503 6029251 : return info && info->versionable;
504 : }
505 :
506 : /* Structure holding accumulated information about callers of a node. */
507 :
508 3364453 : struct caller_statistics
509 : {
510 : /* If requested (see below), self-recursive call counts are summed into this
511 : field. */
512 : profile_count rec_count_sum;
513 : /* The sum of all ipa counts of all the other (non-recursive) calls. */
514 : profile_count count_sum;
515 : /* Sum of all frequencies for all calls. */
516 : sreal freq_sum;
517 : /* Number of calls and calls considered interesting respectively. */
518 : int n_calls, n_interesting_calls;
519 : /* If itself is set up, also count the number of non-self-recursive
520 : calls. */
521 : int n_nonrec_calls;
522 : /* If non-NULL, this is the node itself and calls from it should have their
523 : counts included in rec_count_sum and not count_sum. */
524 : cgraph_node *itself;
525 : /* True if there is a caller that has no IPA profile. */
526 : bool called_without_ipa_profile;
527 : };
528 :
529 : /* Initialize fields of STAT to zeroes and optionally set it up so that edges
530 : from IGNORED_CALLER are not counted. */
531 :
532 : static inline void
533 2629710 : init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
534 : {
535 2629710 : stats->rec_count_sum = profile_count::zero ();
536 2629710 : stats->count_sum = profile_count::zero ();
537 2629710 : stats->n_calls = 0;
538 2629710 : stats->n_interesting_calls = 0;
539 2629710 : stats->n_nonrec_calls = 0;
540 2629710 : stats->freq_sum = 0;
541 2629710 : stats->itself = itself;
542 2629710 : stats->called_without_ipa_profile = false;
543 2629710 : }
544 :
545 : /* We want to propagate across edges that may be executed, however
546 : we do not want to check maybe_hot, since call itself may be cold
547 : while calee contains some heavy loop which makes propagation still
548 : relevant.
549 :
550 : In particular, even edge called once may lead to significant
551 : improvement. */
552 :
553 : static bool
554 4618241 : cs_interesting_for_ipcp_p (cgraph_edge *e)
555 : {
556 : /* If profile says the edge is executed, we want to optimize. */
557 4618241 : if (e->count.ipa ().nonzero_p ())
558 899 : return true;
559 : /* If local (possibly guseed or adjusted 0 profile) claims edge is
560 : not executed, do not propagate.
561 : Do not trust AFDO since branch needs to be executed multiple
562 : time to count while we want to propagate even call called
563 : once during the train run if callee is important. */
564 4617342 : if (e->count.initialized_p () && !e->count.nonzero_p ()
565 5270365 : && e->count.quality () != AFDO)
566 : return false;
567 : /* If we have zero IPA profile, still consider edge for cloning
568 : in case we do partial training. */
569 3964319 : if (e->count.ipa ().initialized_p ()
570 3964319 : && e->count.ipa ().quality () != AFDO
571 3964334 : && !opt_for_fn (e->callee->decl,flag_profile_partial_training))
572 15 : return false;
573 : return true;
574 : }
575 :
576 : /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
577 : non-thunk incoming edges to NODE. */
578 :
579 : static bool
580 2799928 : gather_caller_stats (struct cgraph_node *node, void *data)
581 : {
582 2799928 : struct caller_statistics *stats = (struct caller_statistics *) data;
583 2799928 : struct cgraph_edge *cs;
584 :
585 7209452 : for (cs = node->callers; cs; cs = cs->next_caller)
586 4409524 : if (!cs->caller->thunk)
587 : {
588 4406023 : ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
589 4406023 : if (info && info->node_dead)
590 161826 : continue;
591 :
592 4244197 : if (cs->count.ipa ().initialized_p ())
593 : {
594 322199 : if (stats->itself && stats->itself == cs->caller)
595 0 : stats->rec_count_sum += cs->count.ipa ();
596 : else
597 322199 : stats->count_sum += cs->count.ipa ();
598 : }
599 : else
600 3921998 : stats->called_without_ipa_profile = true;
601 4244197 : stats->freq_sum += cs->sreal_frequency ();
602 4244197 : stats->n_calls++;
603 4244197 : if (stats->itself && stats->itself != cs->caller)
604 8 : stats->n_nonrec_calls++;
605 :
606 : /* If profile known to be zero, we do not want to clone for performance.
607 : However if call is cold, the called function may still contain
608 : important hot loops. */
609 4244197 : if (cs_interesting_for_ipcp_p (cs))
610 3636570 : stats->n_interesting_calls++;
611 : }
612 2799928 : return false;
613 :
614 : }
615 :
616 : /* Return true if this NODE is viable candidate for cloning. */
617 :
618 : static bool
619 781916 : ipcp_cloning_candidate_p (struct cgraph_node *node)
620 : {
621 781916 : struct caller_statistics stats;
622 :
623 781916 : gcc_checking_assert (node->has_gimple_body_p ());
624 :
625 781916 : if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
626 : {
627 730553 : if (dump_file)
628 31 : fprintf (dump_file, "Not considering %s for cloning; "
629 : "-fipa-cp-clone disabled.\n",
630 : node->dump_name ());
631 730553 : return false;
632 : }
633 :
634 : /* Do not use profile here since cold wrapper wrap
635 : hot function. */
636 51363 : if (opt_for_fn (node->decl, optimize_size))
637 : {
638 10 : if (dump_file)
639 0 : fprintf (dump_file, "Not considering %s for cloning; "
640 : "optimizing it for size.\n",
641 : node->dump_name ());
642 10 : return false;
643 : }
644 :
645 51353 : init_caller_stats (&stats);
646 51353 : node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
647 :
648 51353 : if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
649 : {
650 303 : if (dump_file)
651 0 : fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
652 : node->dump_name ());
653 303 : return true;
654 : }
655 51050 : if (!stats.n_interesting_calls)
656 : {
657 39187 : if (dump_file)
658 201 : fprintf (dump_file, "Not considering %s for cloning; "
659 : "no calls considered interesting by profile.\n",
660 : node->dump_name ());
661 39187 : return false;
662 : }
663 11863 : if (dump_file)
664 186 : fprintf (dump_file, "Considering %s for cloning.\n",
665 : node->dump_name ());
666 : return true;
667 : }
668 :
669 : template <typename valtype>
670 : class value_topo_info
671 : {
672 : public:
673 : /* Head of the linked list of topologically sorted values. */
674 : ipcp_value<valtype> *values_topo;
675 : /* Stack for creating SCCs, represented by a linked list too. */
676 : ipcp_value<valtype> *stack;
677 : /* Counter driving the algorithm in add_val_to_toposort. */
678 : int dfs_counter;
679 :
680 130823 : value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
681 : {}
682 : void add_val (ipcp_value<valtype> *cur_val);
683 : void propagate_effects ();
684 : };
685 :
686 : /* Arrays representing a topological ordering of call graph nodes and a stack
687 : of nodes used during constant propagation and also data required to perform
688 : topological sort of values and propagation of benefits in the determined
689 : order. */
690 :
691 : class ipa_topo_info
692 : {
693 : public:
694 : /* Array with obtained topological order of cgraph nodes. */
695 : struct cgraph_node **order;
696 : /* Stack of cgraph nodes used during propagation within SCC until all values
697 : in the SCC stabilize. */
698 : struct cgraph_node **stack;
699 : int nnodes, stack_top;
700 :
701 : value_topo_info<tree> constants;
702 : value_topo_info<ipa_polymorphic_call_context> contexts;
703 :
704 130823 : ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
705 130823 : constants ()
706 : {}
707 : };
708 :
709 : /* Skip edges from and to nodes without ipa_cp enabled.
710 : Ignore not available symbols. */
711 :
712 : static bool
713 5219036 : ignore_edge_p (cgraph_edge *e)
714 : {
715 5219036 : enum availability avail;
716 5219036 : cgraph_node *ultimate_target
717 5219036 : = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
718 :
719 5219036 : return (avail <= AVAIL_INTERPOSABLE
720 1821288 : || !opt_for_fn (ultimate_target->decl, optimize)
721 7031598 : || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
722 : }
723 :
724 : /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
725 :
726 : static void
727 130823 : build_toporder_info (class ipa_topo_info *topo)
728 : {
729 130823 : topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
730 130823 : topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
731 :
732 130823 : gcc_checking_assert (topo->stack_top == 0);
733 130823 : topo->nnodes = ipa_reduced_postorder (topo->order, true,
734 : ignore_edge_p);
735 130823 : }
736 :
737 : /* Free information about strongly connected components and the arrays in
738 : TOPO. */
739 :
740 : static void
741 130823 : free_toporder_info (class ipa_topo_info *topo)
742 : {
743 130823 : ipa_free_postorder_info ();
744 130823 : free (topo->order);
745 130823 : free (topo->stack);
746 130823 : }
747 :
748 : /* Add NODE to the stack in TOPO, unless it is already there. */
749 :
750 : static inline void
751 1263977 : push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
752 : {
753 1263977 : ipa_node_params *info = ipa_node_params_sum->get (node);
754 1263977 : if (info->node_enqueued)
755 : return;
756 1263008 : info->node_enqueued = 1;
757 1263008 : topo->stack[topo->stack_top++] = node;
758 : }
759 :
760 : /* Pop a node from the stack in TOPO and return it or return NULL if the stack
761 : is empty. */
762 :
763 : static struct cgraph_node *
764 2603183 : pop_node_from_stack (class ipa_topo_info *topo)
765 : {
766 2603183 : if (topo->stack_top)
767 : {
768 1263008 : struct cgraph_node *node;
769 1263008 : topo->stack_top--;
770 1263008 : node = topo->stack[topo->stack_top];
771 1263008 : ipa_node_params_sum->get (node)->node_enqueued = 0;
772 1263008 : return node;
773 : }
774 : else
775 : return NULL;
776 : }
777 :
778 : /* Set lattice LAT to bottom and return true if it previously was not set as
779 : such. */
780 :
781 : template <typename valtype>
782 : inline bool
783 2091601 : ipcp_lattice<valtype>::set_to_bottom ()
784 : {
785 2091601 : bool ret = !bottom;
786 2091601 : bottom = true;
787 : return ret;
788 : }
789 :
790 : /* Mark lattice as containing an unknown value and return true if it previously
791 : was not marked as such. */
792 :
793 : template <typename valtype>
794 : inline bool
795 1535137 : ipcp_lattice<valtype>::set_contains_variable ()
796 : {
797 1535137 : bool ret = !contains_variable;
798 1535137 : contains_variable = true;
799 : return ret;
800 : }
801 :
802 : /* Set all aggregate lattices in PLATS to bottom and return true if they were
803 : not previously set as such. */
804 :
805 : static inline bool
806 2091308 : set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
807 : {
808 2091308 : bool ret = !plats->aggs_bottom;
809 2091308 : plats->aggs_bottom = true;
810 2091308 : return ret;
811 : }
812 :
813 : /* Mark all aggregate lattices in PLATS as containing an unknown value and
814 : return true if they were not previously marked as such. */
815 :
816 : static inline bool
817 1041626 : set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
818 : {
819 1041626 : bool ret = !plats->aggs_contain_variable;
820 1041626 : plats->aggs_contain_variable = true;
821 1041626 : return ret;
822 : }
823 :
824 : bool
825 0 : ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
826 : {
827 0 : return meet_with_1 (other.m_vr);
828 : }
829 :
830 : /* Meet the current value of the lattice with the range described by
831 : P_VR. */
832 :
833 : bool
834 491347 : ipcp_vr_lattice::meet_with (const vrange &p_vr)
835 : {
836 491347 : return meet_with_1 (p_vr);
837 : }
838 :
839 : /* Meet the current value of the lattice with the range described by
840 : OTHER_VR. Return TRUE if anything changed. */
841 :
842 : bool
843 491347 : ipcp_vr_lattice::meet_with_1 (const vrange &other_vr)
844 : {
845 491347 : if (bottom_p ())
846 : return false;
847 :
848 491347 : if (other_vr.varying_p ())
849 0 : return set_to_bottom ();
850 :
851 491347 : bool res;
852 491347 : if (flag_checking)
853 : {
854 491347 : value_range save (m_vr);
855 491347 : res = m_vr.union_ (other_vr);
856 491347 : gcc_assert (res == (m_vr != save));
857 491347 : }
858 : else
859 0 : res = m_vr.union_ (other_vr);
860 : return res;
861 : }
862 :
863 : /* Return true if value range information in the lattice is yet unknown. */
864 :
865 : bool
866 : ipcp_vr_lattice::top_p () const
867 : {
868 167499 : return m_vr.undefined_p ();
869 : }
870 :
871 : /* Return true if value range information in the lattice is known to be
872 : unusable. */
873 :
874 : bool
875 4762657 : ipcp_vr_lattice::bottom_p () const
876 : {
877 491347 : return m_vr.varying_p ();
878 : }
879 :
880 : /* Set value range information in the lattice to bottom. Return true if it
881 : previously was in a different state. */
882 :
883 : bool
884 2364194 : ipcp_vr_lattice::set_to_bottom ()
885 : {
886 2364194 : if (m_vr.varying_p ())
887 : return false;
888 :
889 : /* Setting an unsupported type here forces the temporary to default
890 : to unsupported_range, which can handle VARYING/DEFINED ranges,
891 : but nothing else (union, intersect, etc). This allows us to set
892 : bottoms on any ranges, and is safe as all users of the lattice
893 : check for bottom first. */
894 2222996 : m_vr.set_range_class (void_type_node);
895 2222996 : m_vr.set_varying (void_type_node);
896 :
897 2222996 : return true;
898 : }
899 :
900 : /* Set the flag that this lattice is a recipient only, return true if it was
901 : not set before. */
902 :
903 : bool
904 29245 : ipcp_vr_lattice::set_recipient_only ()
905 : {
906 29245 : if (m_recipient_only)
907 : return false;
908 29245 : m_recipient_only = true;
909 29245 : return true;
910 : }
911 :
912 : /* Set lattice value to bottom, if it already isn't the case. */
913 :
914 : bool
915 2383055 : ipcp_bits_lattice::set_to_bottom ()
916 : {
917 2383055 : if (bottom_p ())
918 : return false;
919 2242385 : m_lattice_val = IPA_BITS_VARYING;
920 2242385 : m_value = 0;
921 2242385 : m_mask = -1;
922 2242385 : return true;
923 : }
924 :
925 : /* Set to constant if it isn't already. Only meant to be called
926 : when switching state from TOP. */
927 :
928 : bool
929 76549 : ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
930 : {
931 76549 : gcc_assert (top_p ());
932 76549 : m_lattice_val = IPA_BITS_CONSTANT;
933 76549 : m_value = wi::bit_and (wi::bit_not (mask), value);
934 76549 : m_mask = mask;
935 76549 : return true;
936 : }
937 :
938 : /* Return true if any of the known bits are non-zero. */
939 :
940 : bool
941 472 : ipcp_bits_lattice::known_nonzero_p () const
942 : {
943 472 : if (!constant_p ())
944 : return false;
945 472 : return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
946 : }
947 :
948 : /* Set the flag that this lattice is a recipient only, return true if it was not
949 : set before. */
950 :
951 : bool
952 29245 : ipcp_bits_lattice::set_recipient_only ()
953 : {
954 29245 : if (m_recipient_only)
955 : return false;
956 29245 : m_recipient_only = true;
957 29245 : return true;
958 : }
959 :
960 : /* Convert operand to value, mask form. */
961 :
962 : void
963 2067 : ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
964 : {
965 2067 : wide_int get_nonzero_bits (const_tree);
966 :
967 2067 : if (TREE_CODE (operand) == INTEGER_CST)
968 : {
969 2067 : *valuep = wi::to_widest (operand);
970 2067 : *maskp = 0;
971 : }
972 : else
973 : {
974 0 : *valuep = 0;
975 0 : *maskp = -1;
976 : }
977 2067 : }
978 :
979 : /* Meet operation, similar to ccp_lattice_meet, we xor values
980 : if this->value, value have different values at same bit positions, we want
981 : to drop that bit to varying. Return true if mask is changed.
982 : This function assumes that the lattice value is in CONSTANT state. If
983 : DROP_ALL_ONES, mask out any known bits with value one afterwards. */
984 :
985 : bool
986 302282 : ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
987 : unsigned precision, bool drop_all_ones)
988 : {
989 302282 : gcc_assert (constant_p ());
990 :
991 302282 : widest_int old_mask = m_mask;
992 302282 : m_mask = (m_mask | mask) | (m_value ^ value);
993 302282 : if (drop_all_ones)
994 199 : m_mask |= m_value;
995 :
996 302282 : widest_int cap_mask = wi::shifted_mask <widest_int> (0, precision, true);
997 302282 : m_mask |= cap_mask;
998 302282 : if (wi::sext (m_mask, precision) == -1)
999 3512 : return set_to_bottom ();
1000 :
1001 298770 : m_value &= ~m_mask;
1002 298770 : return m_mask != old_mask;
1003 302282 : }
1004 :
1005 : /* Meet the bits lattice with operand
1006 : described by <value, mask, sgn, precision. */
1007 :
1008 : bool
1009 409960 : ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1010 : unsigned precision)
1011 : {
1012 409960 : if (bottom_p ())
1013 : return false;
1014 :
1015 409960 : if (top_p ())
1016 : {
1017 120302 : if (wi::sext (mask, precision) == -1)
1018 49022 : return set_to_bottom ();
1019 71280 : return set_to_constant (value, mask);
1020 : }
1021 :
1022 289658 : return meet_with_1 (value, mask, precision, false);
1023 : }
1024 :
1025 : /* Meet bits lattice with the result of bit_value_binop (other, operand)
1026 : if code is binary operation or bit_value_unop (other) if code is unary op.
1027 : In the case when code is nop_expr, no adjustment is required. If
1028 : DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1029 :
1030 : bool
1031 21467 : ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1032 : signop sgn, enum tree_code code, tree operand,
1033 : bool drop_all_ones)
1034 : {
1035 21467 : if (other.bottom_p ())
1036 0 : return set_to_bottom ();
1037 :
1038 21467 : if (bottom_p () || other.top_p ())
1039 : return false;
1040 :
1041 18009 : widest_int adjusted_value, adjusted_mask;
1042 :
1043 18009 : if (TREE_CODE_CLASS (code) == tcc_binary)
1044 : {
1045 2067 : tree type = TREE_TYPE (operand);
1046 2067 : widest_int o_value, o_mask;
1047 2067 : get_value_and_mask (operand, &o_value, &o_mask);
1048 :
1049 2067 : bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1050 4134 : sgn, precision, other.get_value (), other.get_mask (),
1051 2067 : TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1052 :
1053 2067 : if (wi::sext (adjusted_mask, precision) == -1)
1054 87 : return set_to_bottom ();
1055 2067 : }
1056 :
1057 15942 : else if (TREE_CODE_CLASS (code) == tcc_unary)
1058 : {
1059 31834 : bit_value_unop (code, sgn, precision, &adjusted_value,
1060 31834 : &adjusted_mask, sgn, precision, other.get_value (),
1061 15917 : other.get_mask ());
1062 :
1063 15917 : if (wi::sext (adjusted_mask, precision) == -1)
1064 4 : return set_to_bottom ();
1065 : }
1066 :
1067 : else
1068 25 : return set_to_bottom ();
1069 :
1070 17893 : if (top_p ())
1071 : {
1072 5269 : if (drop_all_ones)
1073 : {
1074 273 : adjusted_mask |= adjusted_value;
1075 273 : adjusted_value &= ~adjusted_mask;
1076 : }
1077 5269 : widest_int cap_mask = wi::shifted_mask <widest_int> (0, precision, true);
1078 5269 : adjusted_mask |= cap_mask;
1079 5269 : if (wi::sext (adjusted_mask, precision) == -1)
1080 0 : return set_to_bottom ();
1081 5269 : return set_to_constant (adjusted_value, adjusted_mask);
1082 5269 : }
1083 : else
1084 12624 : return meet_with_1 (adjusted_value, adjusted_mask, precision,
1085 : drop_all_ones);
1086 18009 : }
1087 :
1088 : /* Dump the contents of the list to FILE. */
1089 :
1090 : void
1091 120 : ipa_argagg_value_list::dump (FILE *f)
1092 : {
1093 120 : bool comma = false;
1094 333 : for (const ipa_argagg_value &av : m_elts)
1095 : {
1096 213 : fprintf (f, "%s %i[%u]=", comma ? "," : "",
1097 213 : av.index, av.unit_offset);
1098 213 : print_generic_expr (f, av.value);
1099 213 : if (av.by_ref)
1100 186 : fprintf (f, "(by_ref)");
1101 213 : if (av.killed)
1102 1 : fprintf (f, "(killed)");
1103 213 : comma = true;
1104 : }
1105 120 : fprintf (f, "\n");
1106 120 : }
1107 :
1108 : /* Dump the contents of the list to stderr. */
1109 :
1110 : void
1111 0 : ipa_argagg_value_list::debug ()
1112 : {
1113 0 : dump (stderr);
1114 0 : }
1115 :
1116 : /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1117 : NULL if there is no such constant. */
1118 :
1119 : const ipa_argagg_value *
1120 28256012 : ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1121 : {
1122 28256012 : ipa_argagg_value key;
1123 28256012 : key.index = index;
1124 28256012 : key.unit_offset = unit_offset;
1125 28256012 : const ipa_argagg_value *res
1126 28256012 : = std::lower_bound (m_elts.begin (), m_elts.end (), key,
1127 6687417 : [] (const ipa_argagg_value &elt,
1128 : const ipa_argagg_value &val)
1129 : {
1130 6687417 : if (elt.index < val.index)
1131 : return true;
1132 5693523 : if (elt.index > val.index)
1133 : return false;
1134 4549006 : if (elt.unit_offset < val.unit_offset)
1135 : return true;
1136 : return false;
1137 : });
1138 :
1139 28256012 : if (res == m_elts.end ()
1140 2867044 : || res->index != index
1141 30573218 : || res->unit_offset != unit_offset)
1142 : res = nullptr;
1143 :
1144 : /* TODO: perhaps remove the check (that the underlying array is indeed
1145 : sorted) if it turns out it can be too slow? */
1146 28256012 : if (!flag_checking)
1147 : return res;
1148 :
1149 : const ipa_argagg_value *slow_res = NULL;
1150 : int prev_index = -1;
1151 : unsigned prev_unit_offset = 0;
1152 44550530 : for (const ipa_argagg_value &av : m_elts)
1153 : {
1154 16294518 : gcc_assert (prev_index < 0
1155 : || prev_index < av.index
1156 : || prev_unit_offset < av.unit_offset);
1157 16294518 : prev_index = av.index;
1158 16294518 : prev_unit_offset = av.unit_offset;
1159 16294518 : if (av.index == index
1160 7447411 : && av.unit_offset == unit_offset)
1161 16294518 : slow_res = &av;
1162 : }
1163 28256012 : gcc_assert (res == slow_res);
1164 :
1165 : return res;
1166 : }
1167 :
1168 : /* Return the first item describing a constant stored for parameter with INDEX,
1169 : regardless of offset or reference, or NULL if there is no such constant. */
1170 :
1171 : const ipa_argagg_value *
1172 230728 : ipa_argagg_value_list::get_elt_for_index (int index) const
1173 : {
1174 230728 : const ipa_argagg_value *res
1175 230728 : = std::lower_bound (m_elts.begin (), m_elts.end (), index,
1176 18692 : [] (const ipa_argagg_value &elt, unsigned idx)
1177 : {
1178 18692 : return elt.index < idx;
1179 : });
1180 230728 : if (res == m_elts.end ()
1181 230728 : || res->index != index)
1182 : res = nullptr;
1183 230728 : return res;
1184 : }
1185 :
1186 : /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1187 : performing any check of whether value is passed by reference, or NULL_TREE
1188 : if there is no such constant. */
1189 :
1190 : tree
1191 36268 : ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1192 : {
1193 36268 : const ipa_argagg_value *av = get_elt (index, unit_offset);
1194 36268 : return av ? av->value : NULL_TREE;
1195 : }
1196 :
1197 : /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1198 : passed by reference or not according to BY_REF, or NULL_TREE if there is
1199 : no such constant. */
1200 :
1201 : tree
1202 28209904 : ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1203 : bool by_ref) const
1204 : {
1205 28209904 : const ipa_argagg_value *av = get_elt (index, unit_offset);
1206 28209904 : if (av && av->by_ref == by_ref)
1207 1834564 : return av->value;
1208 : return NULL_TREE;
1209 : }
1210 :
1211 : /* Return true if all elements present in OTHER are also present in this
1212 : list. */
1213 :
1214 : bool
1215 45 : ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1216 : {
1217 45 : unsigned j = 0;
1218 200 : for (unsigned i = 0; i < other.m_elts.size (); i++)
1219 : {
1220 172 : unsigned other_index = other.m_elts[i].index;
1221 172 : unsigned other_offset = other.m_elts[i].unit_offset;
1222 :
1223 172 : while (j < m_elts.size ()
1224 327 : && (m_elts[j].index < other_index
1225 311 : || (m_elts[j].index == other_index
1226 311 : && m_elts[j].unit_offset < other_offset)))
1227 155 : j++;
1228 :
1229 172 : if (j >= m_elts.size ()
1230 159 : || m_elts[j].index != other_index
1231 159 : || m_elts[j].unit_offset != other_offset
1232 159 : || m_elts[j].by_ref != other.m_elts[i].by_ref
1233 159 : || !m_elts[j].value
1234 331 : || !values_equal_for_ipcp_p (m_elts[j].value, other.m_elts[i].value))
1235 17 : return false;
1236 : }
1237 : return true;
1238 : }
1239 :
1240 : /* Push all items in this list that describe parameter SRC_INDEX into RES as
1241 : ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1242 : offsets but skip those which would end up with a negative offset. */
1243 :
1244 : void
1245 3145 : ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1246 : unsigned dest_index,
1247 : unsigned unit_delta,
1248 : vec<ipa_argagg_value> *res) const
1249 : {
1250 3145 : const ipa_argagg_value *av = get_elt_for_index (src_index);
1251 3145 : if (!av)
1252 : return;
1253 : unsigned prev_unit_offset = 0;
1254 : bool first = true;
1255 11981 : for (; av < m_elts.end (); ++av)
1256 : {
1257 9558 : if (av->index > src_index)
1258 : return;
1259 8948 : if (av->index == src_index
1260 8948 : && (av->unit_offset >= unit_delta)
1261 8804 : && av->value)
1262 : {
1263 8804 : ipa_argagg_value new_av;
1264 8804 : gcc_checking_assert (av->value);
1265 8804 : new_av.value = av->value;
1266 8804 : new_av.unit_offset = av->unit_offset - unit_delta;
1267 8804 : new_av.index = dest_index;
1268 8804 : new_av.by_ref = av->by_ref;
1269 8804 : gcc_assert (!av->killed);
1270 8804 : new_av.killed = false;
1271 :
1272 : /* Quick check that the offsets we push are indeed increasing. */
1273 8804 : gcc_assert (first
1274 : || new_av.unit_offset > prev_unit_offset);
1275 8804 : prev_unit_offset = new_av.unit_offset;
1276 8804 : first = false;
1277 :
1278 8804 : res->safe_push (new_av);
1279 : }
1280 : }
1281 : }
1282 :
1283 : /* Push to RES information about single lattices describing aggregate values in
1284 : PLATS as those describing parameter DEST_INDEX and the original offset minus
1285 : UNIT_DELTA. Return true if any item has been pushed to RES. */
1286 :
1287 : static bool
1288 4478249 : push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1289 : unsigned unit_delta,
1290 : vec<ipa_argagg_value> *res)
1291 : {
1292 4478249 : if (plats->aggs_contain_variable)
1293 : return false;
1294 :
1295 3794727 : bool pushed_sth = false;
1296 3794727 : bool first = true;
1297 3794727 : unsigned prev_unit_offset = 0;
1298 3860093 : for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1299 129524 : if (aglat->is_single_const ()
1300 40419 : && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1301 : {
1302 40419 : ipa_argagg_value iav;
1303 40419 : iav.value = aglat->values->value;
1304 40419 : iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1305 40419 : iav.index = dest_index;
1306 40419 : iav.by_ref = plats->aggs_by_ref;
1307 40419 : iav.killed = false;
1308 :
1309 40419 : gcc_assert (first
1310 : || iav.unit_offset > prev_unit_offset);
1311 40419 : prev_unit_offset = iav.unit_offset;
1312 40419 : first = false;
1313 :
1314 40419 : pushed_sth = true;
1315 40419 : res->safe_push (iav);
1316 : }
1317 : return pushed_sth;
1318 : }
1319 :
1320 : /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1321 : Return the number of remaining valid entries. */
1322 :
1323 : static unsigned
1324 55247 : intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1325 : const vec<ipa_argagg_value> &other)
1326 : {
1327 55247 : unsigned valid_entries = 0;
1328 55247 : unsigned j = 0;
1329 404177 : for (unsigned i = 0; i < elts.length (); i++)
1330 : {
1331 348930 : if (!elts[i].value)
1332 49410 : continue;
1333 :
1334 299520 : unsigned this_index = elts[i].index;
1335 299520 : unsigned this_offset = elts[i].unit_offset;
1336 :
1337 299520 : while (j < other.length ()
1338 1123162 : && (other[j].index < this_index
1339 528762 : || (other[j].index == this_index
1340 525398 : && other[j].unit_offset < this_offset)))
1341 266283 : j++;
1342 :
1343 299520 : if (j >= other.length ())
1344 : {
1345 8444 : elts[i].value = NULL_TREE;
1346 8444 : continue;
1347 : }
1348 :
1349 291076 : if (other[j].index == this_index
1350 287712 : && other[j].unit_offset == this_offset
1351 281151 : && other[j].by_ref == elts[i].by_ref
1352 281151 : && other[j].value
1353 572227 : && values_equal_for_ipcp_p (other[j].value, elts[i].value))
1354 263125 : valid_entries++;
1355 : else
1356 27951 : elts[i].value = NULL_TREE;
1357 : }
1358 55247 : return valid_entries;
1359 : }
1360 :
1361 : /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1362 : return true is any of them has not been marked as such so far. If if
1363 : MAKE_SIMPLE_RECIPIENTS is true, set the lattices that can only hold one
1364 : value to being recipients only, otherwise also set them to bottom. */
1365 :
1366 : static inline bool
1367 170170 : set_all_contains_variable (class ipcp_param_lattices *plats,
1368 : bool make_simple_recipients = false)
1369 : {
1370 170170 : bool ret;
1371 170170 : ret = plats->itself.set_contains_variable ();
1372 170170 : ret |= plats->ctxlat.set_contains_variable ();
1373 170170 : ret |= set_agg_lats_contain_variable (plats);
1374 170170 : if (make_simple_recipients)
1375 : {
1376 29245 : ret |= plats->bits_lattice.set_recipient_only ();
1377 29245 : ret |= plats->m_value_range.set_recipient_only ();
1378 : }
1379 : else
1380 : {
1381 140925 : ret |= plats->bits_lattice.set_to_bottom ();
1382 140925 : ret |= plats->m_value_range.set_to_bottom ();
1383 : }
1384 170170 : return ret;
1385 : }
1386 :
1387 : /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1388 : points to by the number of callers to NODE. */
1389 :
1390 : static bool
1391 98867 : count_callers (cgraph_node *node, void *data)
1392 : {
1393 98867 : int *caller_count = (int *) data;
1394 :
1395 403317 : for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1396 : /* Local thunks can be handled transparently, but if the thunk cannot
1397 : be optimized out, count it as a real use. */
1398 304450 : if (!cs->caller->thunk || !cs->caller->local)
1399 304450 : ++*caller_count;
1400 98867 : return false;
1401 : }
1402 :
1403 : /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1404 : the one caller of some other node. Set the caller's corresponding flag. */
1405 :
1406 : static bool
1407 56436 : set_single_call_flag (cgraph_node *node, void *)
1408 : {
1409 56436 : cgraph_edge *cs = node->callers;
1410 : /* Local thunks can be handled transparently, skip them. */
1411 56436 : while (cs && cs->caller->thunk && cs->caller->local)
1412 0 : cs = cs->next_caller;
1413 56436 : if (cs)
1414 55866 : if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1415 : {
1416 55865 : info->node_calling_single_call = true;
1417 55865 : return true;
1418 : }
1419 : return false;
1420 : }
1421 :
1422 : /* Initialize ipcp_lattices. */
1423 :
1424 : static void
1425 1259818 : initialize_node_lattices (struct cgraph_node *node)
1426 : {
1427 1259818 : ipa_node_params *info = ipa_node_params_sum->get (node);
1428 1259818 : struct cgraph_edge *ie;
1429 1259818 : bool disable = false, variable = false;
1430 1259818 : int i;
1431 :
1432 1259818 : gcc_checking_assert (node->has_gimple_body_p ());
1433 :
1434 1259818 : if (!ipa_get_param_count (info))
1435 : disable = true;
1436 1030476 : else if (node->local)
1437 : {
1438 87995 : int caller_count = 0;
1439 87995 : node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1440 : true);
1441 87995 : if (caller_count == 1)
1442 55866 : node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1443 : NULL, true);
1444 32129 : else if (caller_count == 0)
1445 : {
1446 1 : gcc_checking_assert (!opt_for_fn (node->decl, flag_toplevel_reorder));
1447 : variable = true;
1448 : }
1449 : }
1450 : else
1451 : {
1452 : /* When cloning is allowed, we can assume that externally visible
1453 : functions are not called. We will compensate this by cloning
1454 : later. */
1455 942481 : if (ipcp_versionable_function_p (node)
1456 942481 : && ipcp_cloning_candidate_p (node))
1457 : variable = true;
1458 : else
1459 : disable = true;
1460 : }
1461 :
1462 725 : if (dump_file && (dump_flags & TDF_DETAILS)
1463 1259985 : && !node->alias && !node->thunk)
1464 : {
1465 167 : fprintf (dump_file, "Initializing lattices of %s\n",
1466 : node->dump_name ());
1467 167 : if (disable || variable)
1468 132 : fprintf (dump_file, " Marking all lattices as %s\n",
1469 : disable ? "BOTTOM" : "VARIABLE");
1470 : }
1471 :
1472 1259818 : auto_vec<bool, 16> surviving_params;
1473 1259818 : bool pre_modified = false;
1474 :
1475 1259818 : clone_info *cinfo = clone_info::get (node);
1476 :
1477 1259818 : if (!disable && cinfo && cinfo->param_adjustments)
1478 : {
1479 : /* At the moment all IPA optimizations should use the number of
1480 : parameters of the prevailing decl as the m_always_copy_start.
1481 : Handling any other value would complicate the code below, so for the
1482 : time bing let's only assert it is so. */
1483 0 : gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1484 : == ipa_get_param_count (info))
1485 : || cinfo->param_adjustments->m_always_copy_start < 0);
1486 :
1487 0 : pre_modified = true;
1488 0 : cinfo->param_adjustments->get_surviving_params (&surviving_params);
1489 :
1490 0 : if (dump_file && (dump_flags & TDF_DETAILS)
1491 0 : && !node->alias && !node->thunk)
1492 : {
1493 : bool first = true;
1494 0 : for (int j = 0; j < ipa_get_param_count (info); j++)
1495 : {
1496 0 : if (j < (int) surviving_params.length ()
1497 0 : && surviving_params[j])
1498 0 : continue;
1499 0 : if (first)
1500 : {
1501 0 : fprintf (dump_file,
1502 : " The following parameters are dead on arrival:");
1503 0 : first = false;
1504 : }
1505 0 : fprintf (dump_file, " %u", j);
1506 : }
1507 0 : if (!first)
1508 0 : fprintf (dump_file, "\n");
1509 : }
1510 : }
1511 :
1512 6914954 : for (i = 0; i < ipa_get_param_count (info); i++)
1513 : {
1514 2312330 : ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1515 2312330 : tree type = ipa_get_type (info, i);
1516 2312330 : if (disable
1517 222095 : || !ipa_get_type (info, i)
1518 2534425 : || (pre_modified && (surviving_params.length () <= (unsigned) i
1519 0 : || !surviving_params[i])))
1520 : {
1521 2090235 : plats->itself.set_to_bottom ();
1522 2090235 : plats->ctxlat.set_to_bottom ();
1523 2090235 : set_agg_lats_to_bottom (plats);
1524 2090235 : plats->bits_lattice.set_to_bottom ();
1525 2090235 : plats->m_value_range.init (type);
1526 2090235 : plats->m_value_range.set_to_bottom ();
1527 : }
1528 : else
1529 : {
1530 222095 : plats->m_value_range.init (type);
1531 222095 : if (variable)
1532 29245 : set_all_contains_variable (plats, true);
1533 : }
1534 : }
1535 :
1536 1393462 : for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1537 133644 : if (ie->indirect_info->param_index >= 0
1538 142563 : && is_a <cgraph_polymorphic_indirect_info *> (ie->indirect_info))
1539 8919 : ipa_get_parm_lattices (info,
1540 8919 : ie->indirect_info->param_index)->virt_call = 1;
1541 1259818 : }
1542 :
1543 : /* Return VALUE if it is NULL_TREE or if it can be directly safely IPA-CP
1544 : propagated to a parameter of type PARAM_TYPE, or return a fold-converted
1545 : VALUE to PARAM_TYPE if that is possible. Return NULL_TREE otherwise. */
1546 :
1547 : tree
1548 5130208 : ipacp_value_safe_for_type (tree param_type, tree value)
1549 : {
1550 5130208 : if (!value)
1551 : return NULL_TREE;
1552 5129874 : tree val_type = TREE_TYPE (value);
1553 5129874 : if (param_type == val_type
1554 5129874 : || useless_type_conversion_p (param_type, val_type))
1555 5126501 : return value;
1556 3373 : if (fold_convertible_p (param_type, value))
1557 3164 : return fold_convert (param_type, value);
1558 : else
1559 : return NULL_TREE;
1560 : }
1561 :
1562 : /* Return the result of a (possibly arithmetic) operation determined by OPCODE
1563 : on the constant value INPUT. OPERAND is 2nd operand for binary operation
1564 : and is required for binary operations. RES_TYPE, required when opcode is
1565 : not NOP_EXPR, is the type in which any operation is to be performed. Return
1566 : NULL_TREE if that cannot be determined or be considered an interprocedural
1567 : invariant. */
1568 :
1569 : static tree
1570 68863 : ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1571 : tree res_type)
1572 : {
1573 68863 : tree res;
1574 :
1575 68863 : if (opcode == NOP_EXPR)
1576 : return input;
1577 6603 : if (!is_gimple_ip_invariant (input))
1578 : return NULL_TREE;
1579 :
1580 6603 : if (opcode == ASSERT_EXPR)
1581 : {
1582 3542 : if (values_equal_for_ipcp_p (input, operand))
1583 : return input;
1584 : else
1585 : return NULL_TREE;
1586 : }
1587 :
1588 3061 : if (TREE_CODE_CLASS (opcode) == tcc_unary)
1589 94 : res = fold_unary (opcode, res_type, input);
1590 : else
1591 2967 : res = fold_binary (opcode, res_type, input, operand);
1592 :
1593 3061 : if (res && !is_gimple_ip_invariant (res))
1594 : return NULL_TREE;
1595 :
1596 : return res;
1597 : }
1598 :
1599 : /* Return the result of an ancestor jump function JFUNC on the constant value
1600 : INPUT. Return NULL_TREE if that cannot be determined. */
1601 :
1602 : static tree
1603 1296 : ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1604 : {
1605 1296 : gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1606 1296 : if (TREE_CODE (input) == ADDR_EXPR)
1607 : {
1608 1214 : gcc_checking_assert (is_gimple_ip_invariant_address (input));
1609 1214 : poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1610 1214 : if (known_eq (off, 0))
1611 : return input;
1612 1092 : poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1613 2184 : return build1 (ADDR_EXPR, TREE_TYPE (input),
1614 1092 : fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1615 1092 : build_int_cst (ptr_type_node, byte_offset)));
1616 : }
1617 82 : else if (ipa_get_jf_ancestor_keep_null (jfunc)
1618 82 : && zerop (input))
1619 : return input;
1620 : else
1621 78 : return NULL_TREE;
1622 : }
1623 :
1624 : /* Determine whether JFUNC evaluates to a single known constant value and if
1625 : so, return it. Otherwise return NULL. INFO describes the caller node or
1626 : the one it is inlined to, so that pass-through jump functions can be
1627 : evaluated. PARM_TYPE is the type of the parameter to which the result is
1628 : passed. */
1629 :
1630 : tree
1631 17519460 : ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1632 : tree parm_type)
1633 : {
1634 17519460 : if (!parm_type)
1635 : return NULL_TREE;
1636 17276664 : if (jfunc->type == IPA_JF_CONST)
1637 4541329 : return ipacp_value_safe_for_type (parm_type, ipa_get_jf_constant (jfunc));
1638 12735335 : else if (jfunc->type == IPA_JF_PASS_THROUGH
1639 9964669 : || jfunc->type == IPA_JF_ANCESTOR)
1640 : {
1641 3555802 : tree input;
1642 3555802 : int idx;
1643 :
1644 3555802 : if (jfunc->type == IPA_JF_PASS_THROUGH)
1645 2770666 : idx = ipa_get_jf_pass_through_formal_id (jfunc);
1646 : else
1647 785136 : idx = ipa_get_jf_ancestor_formal_id (jfunc);
1648 :
1649 3555802 : if (info->ipcp_orig_node)
1650 55910 : input = info->known_csts[idx];
1651 : else
1652 : {
1653 3499892 : ipcp_lattice<tree> *lat;
1654 :
1655 6310384 : if (info->lattices.is_empty ()
1656 2810492 : || idx >= ipa_get_param_count (info))
1657 : return NULL_TREE;
1658 2810492 : lat = ipa_get_scalar_lat (info, idx);
1659 2810492 : if (!lat->is_single_const ())
1660 : return NULL_TREE;
1661 150 : input = lat->values->value;
1662 : }
1663 :
1664 56060 : if (!input)
1665 : return NULL_TREE;
1666 :
1667 19445 : if (jfunc->type == IPA_JF_PASS_THROUGH)
1668 : {
1669 18464 : enum tree_code opcode = ipa_get_jf_pass_through_operation (jfunc);
1670 18464 : tree op2 = ipa_get_jf_pass_through_operand (jfunc);
1671 18464 : tree op_type
1672 18464 : = (opcode == NOP_EXPR) ? NULL_TREE
1673 988 : : ipa_get_jf_pass_through_op_type (jfunc);
1674 18464 : tree cstval = ipa_get_jf_arith_result (opcode, input, op2, op_type);
1675 18464 : return ipacp_value_safe_for_type (parm_type, cstval);
1676 : }
1677 : else
1678 981 : return ipacp_value_safe_for_type (parm_type,
1679 : ipa_get_jf_ancestor_result (jfunc,
1680 981 : input));
1681 : }
1682 : else
1683 : return NULL_TREE;
1684 : }
1685 :
1686 : /* Determine whether JFUNC evaluates to single known polymorphic context, given
1687 : that INFO describes the caller node or the one it is inlined to, CS is the
1688 : call graph edge corresponding to JFUNC and CSIDX index of the described
1689 : parameter. */
1690 :
1691 : ipa_polymorphic_call_context
1692 882883 : ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1693 : ipa_jump_func *jfunc)
1694 : {
1695 882883 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1696 882883 : ipa_polymorphic_call_context ctx;
1697 882883 : ipa_polymorphic_call_context *edge_ctx
1698 882883 : = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1699 :
1700 355500 : if (edge_ctx && !edge_ctx->useless_p ())
1701 350244 : ctx = *edge_ctx;
1702 :
1703 882883 : if (jfunc->type == IPA_JF_PASS_THROUGH
1704 791737 : || jfunc->type == IPA_JF_ANCESTOR)
1705 : {
1706 99209 : ipa_polymorphic_call_context srcctx;
1707 99209 : int srcidx;
1708 99209 : bool type_preserved = true;
1709 99209 : if (jfunc->type == IPA_JF_PASS_THROUGH)
1710 : {
1711 91146 : if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1712 1842 : return ctx;
1713 89304 : type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1714 89304 : srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1715 : }
1716 : else
1717 : {
1718 8063 : type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1719 8063 : srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1720 : }
1721 97367 : if (info->ipcp_orig_node)
1722 : {
1723 11475 : if (info->known_contexts.exists ())
1724 1358 : srcctx = info->known_contexts[srcidx];
1725 : }
1726 : else
1727 : {
1728 170149 : if (info->lattices.is_empty ()
1729 84257 : || srcidx >= ipa_get_param_count (info))
1730 1635 : return ctx;
1731 84257 : ipcp_lattice<ipa_polymorphic_call_context> *lat;
1732 84257 : lat = ipa_get_poly_ctx_lat (info, srcidx);
1733 84257 : if (!lat->is_single_const ())
1734 80312 : return ctx;
1735 3945 : srcctx = lat->values->value;
1736 : }
1737 15420 : if (srcctx.useless_p ())
1738 10563 : return ctx;
1739 4857 : if (jfunc->type == IPA_JF_ANCESTOR)
1740 253 : srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1741 4857 : if (!type_preserved)
1742 2889 : srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1743 4857 : srcctx.combine_with (ctx);
1744 4857 : return srcctx;
1745 : }
1746 :
1747 783674 : return ctx;
1748 : }
1749 :
1750 : /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1751 : DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1752 : the result is a range that is not VARYING nor UNDEFINED. */
1753 :
1754 : bool
1755 8883578 : ipa_vr_operation_and_type_effects (vrange &dst_vr,
1756 : const vrange &src_vr,
1757 : enum tree_code operation,
1758 : tree dst_type, tree src_type)
1759 : {
1760 16638000 : if (!ipa_vr_supported_type_p (dst_type)
1761 0 : || !ipa_vr_supported_type_p (src_type))
1762 : return false;
1763 :
1764 8883578 : range_op_handler handler (operation);
1765 8883578 : if (!handler)
1766 : return false;
1767 :
1768 8883578 : value_range varying (dst_type);
1769 8883578 : varying.set_varying (dst_type);
1770 :
1771 8883578 : return (handler.operand_check_p (dst_type, src_type, dst_type)
1772 8883578 : && handler.fold_range (dst_vr, dst_type, src_vr, varying)
1773 8883576 : && !dst_vr.varying_p ()
1774 17767094 : && !dst_vr.undefined_p ());
1775 8883578 : }
1776 :
1777 : /* Same as above, but the SRC_VR argument is an IPA_VR which must
1778 : first be extracted onto a vrange. */
1779 :
1780 : bool
1781 8876125 : ipa_vr_operation_and_type_effects (vrange &dst_vr,
1782 : const ipa_vr &src_vr,
1783 : enum tree_code operation,
1784 : tree dst_type, tree src_type)
1785 : {
1786 8876125 : value_range tmp;
1787 8876125 : src_vr.get_vrange (tmp);
1788 8876125 : return ipa_vr_operation_and_type_effects (dst_vr, tmp, operation,
1789 8876125 : dst_type, src_type);
1790 8876125 : }
1791 :
1792 : /* Given a PASS_THROUGH jump function JFUNC that takes as its source SRC_VR of
1793 : SRC_TYPE and the result needs to be DST_TYPE, if any value range information
1794 : can be deduced at all, intersect VR with it. CONTEXT_NODE is the call graph
1795 : node representing the function for which optimization flags should be
1796 : evaluated. */
1797 :
1798 : static void
1799 92498 : ipa_vr_intersect_with_arith_jfunc (vrange &vr,
1800 : ipa_jump_func *jfunc,
1801 : cgraph_node *context_node,
1802 : const value_range &src_vr,
1803 : tree src_type,
1804 : tree dst_type)
1805 : {
1806 92498 : if (src_vr.undefined_p () || src_vr.varying_p ())
1807 91222 : return;
1808 :
1809 92039 : enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1810 92039 : if (TREE_CODE_CLASS (operation) == tcc_unary)
1811 : {
1812 90763 : value_range op_res;
1813 90763 : const value_range *inter_vr;
1814 90763 : if (operation != NOP_EXPR)
1815 : {
1816 93 : tree operation_type = ipa_get_jf_pass_through_op_type (jfunc);
1817 93 : op_res.set_varying (operation_type);
1818 93 : if (!ipa_vr_operation_and_type_effects (op_res, src_vr, operation,
1819 : operation_type, src_type))
1820 : return;
1821 : inter_vr = &op_res;
1822 : src_type = operation_type;
1823 : }
1824 : else
1825 : inter_vr = &src_vr;
1826 :
1827 90763 : if (src_type != dst_type)
1828 : {
1829 6084 : value_range tmp_res (dst_type);
1830 6084 : if (!ipa_vr_operation_and_type_effects (tmp_res, *inter_vr, NOP_EXPR,
1831 : dst_type, src_type))
1832 0 : return;
1833 6084 : vr.intersect (tmp_res);
1834 6084 : }
1835 : else
1836 84679 : vr.intersect (*inter_vr);
1837 90763 : return;
1838 90763 : }
1839 :
1840 1276 : tree operand = ipa_get_jf_pass_through_operand (jfunc);
1841 1276 : range_op_handler handler (operation);
1842 1276 : if (!handler)
1843 : return;
1844 1276 : value_range op_vr (TREE_TYPE (operand));
1845 1276 : ipa_get_range_from_ip_invariant (op_vr, operand, context_node);
1846 :
1847 1276 : tree operation_type = ipa_get_jf_pass_through_op_type (jfunc);
1848 1276 : value_range op_res (operation_type);
1849 1716 : if (!ipa_vr_supported_type_p (operation_type)
1850 1276 : || !handler.operand_check_p (operation_type, src_type, op_vr.type ())
1851 1276 : || !handler.fold_range (op_res, operation_type, src_vr, op_vr))
1852 0 : return;
1853 :
1854 1276 : value_range tmp_res (dst_type);
1855 1276 : if (ipa_vr_operation_and_type_effects (tmp_res, op_res, NOP_EXPR, dst_type,
1856 : operation_type))
1857 1228 : vr.intersect (tmp_res);
1858 1276 : }
1859 :
1860 : /* Determine range of JFUNC given that INFO describes the caller node or
1861 : the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1862 : and PARM_TYPE of the parameter. */
1863 :
1864 : void
1865 11540354 : ipa_value_range_from_jfunc (vrange &vr,
1866 : ipa_node_params *info, cgraph_edge *cs,
1867 : ipa_jump_func *jfunc, tree parm_type)
1868 : {
1869 11540354 : vr.set_varying (parm_type);
1870 :
1871 11540354 : if (jfunc->m_vr && jfunc->m_vr->known_p ())
1872 8074794 : ipa_vr_operation_and_type_effects (vr,
1873 : *jfunc->m_vr,
1874 : NOP_EXPR, parm_type,
1875 8074794 : jfunc->m_vr->type ());
1876 11540354 : if (vr.singleton_p ())
1877 : return;
1878 :
1879 11540214 : if (jfunc->type == IPA_JF_PASS_THROUGH)
1880 : {
1881 2209158 : ipcp_transformation *sum
1882 2209158 : = ipcp_get_transformation_summary (cs->caller->inlined_to
1883 : ? cs->caller->inlined_to
1884 : : cs->caller);
1885 2209158 : if (!sum || !sum->m_vr)
1886 2131845 : return;
1887 :
1888 118407 : int idx = ipa_get_jf_pass_through_formal_id (jfunc);
1889 :
1890 118407 : if (!(*sum->m_vr)[idx].known_p ())
1891 : return;
1892 77313 : tree src_type = ipa_get_type (info, idx);
1893 77313 : value_range srcvr;
1894 77313 : (*sum->m_vr)[idx].get_vrange (srcvr);
1895 :
1896 77313 : ipa_vr_intersect_with_arith_jfunc (vr, jfunc, cs->caller, srcvr, src_type,
1897 : parm_type);
1898 77313 : }
1899 : }
1900 :
1901 : /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1902 : single known constant value and if so, return it. Otherwise return NULL.
1903 : NODE and INFO describes the caller node or the one it is inlined to, and
1904 : its related info. */
1905 :
1906 : tree
1907 3157267 : ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
1908 : const ipa_agg_jf_item *item)
1909 : {
1910 3157267 : tree value = NULL_TREE;
1911 3157267 : int src_idx;
1912 :
1913 3157267 : if (item->offset < 0
1914 3107054 : || item->jftype == IPA_JF_UNKNOWN
1915 2958539 : || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)
1916 : return NULL_TREE;
1917 :
1918 2958539 : if (item->jftype == IPA_JF_CONST)
1919 2624581 : return item->value.constant;
1920 :
1921 333958 : gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1922 : || item->jftype == IPA_JF_LOAD_AGG);
1923 :
1924 333958 : src_idx = item->value.pass_through.formal_id;
1925 :
1926 333958 : if (info->ipcp_orig_node)
1927 : {
1928 15575 : if (item->jftype == IPA_JF_PASS_THROUGH)
1929 3790 : value = info->known_csts[src_idx];
1930 11785 : else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
1931 : {
1932 11785 : ipa_argagg_value_list avl (ts);
1933 11785 : value = avl.get_value (src_idx,
1934 11785 : item->value.load_agg.offset / BITS_PER_UNIT,
1935 11785 : item->value.load_agg.by_ref);
1936 : }
1937 : }
1938 318383 : else if (!info->lattices.is_empty ())
1939 : {
1940 217526 : class ipcp_param_lattices *src_plats
1941 217526 : = ipa_get_parm_lattices (info, src_idx);
1942 :
1943 217526 : if (item->jftype == IPA_JF_PASS_THROUGH)
1944 : {
1945 130254 : struct ipcp_lattice<tree> *lat = &src_plats->itself;
1946 :
1947 531461 : if (!lat->is_single_const ())
1948 : return NULL_TREE;
1949 :
1950 0 : value = lat->values->value;
1951 : }
1952 87272 : else if (src_plats->aggs
1953 9829 : && !src_plats->aggs_bottom
1954 9829 : && !src_plats->aggs_contain_variable
1955 1499 : && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1956 : {
1957 : struct ipcp_agg_lattice *aglat;
1958 :
1959 2366 : for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1960 : {
1961 2366 : if (aglat->offset > item->value.load_agg.offset)
1962 : break;
1963 :
1964 2334 : if (aglat->offset == item->value.load_agg.offset)
1965 : {
1966 1467 : if (aglat->is_single_const ())
1967 7 : value = aglat->values->value;
1968 : break;
1969 : }
1970 : }
1971 : }
1972 : }
1973 :
1974 15614 : if (!value)
1975 193876 : return NULL_TREE;
1976 :
1977 9828 : if (item->jftype == IPA_JF_LOAD_AGG)
1978 : {
1979 7516 : tree load_type = item->value.load_agg.type;
1980 7516 : tree value_type = TREE_TYPE (value);
1981 :
1982 : /* Ensure value type is compatible with load type. */
1983 7516 : if (!useless_type_conversion_p (load_type, value_type))
1984 : return NULL_TREE;
1985 : }
1986 :
1987 19656 : tree cstval = ipa_get_jf_arith_result (item->value.pass_through.operation,
1988 : value,
1989 9828 : item->value.pass_through.operand,
1990 9828 : item->value.pass_through.op_type);
1991 9828 : return ipacp_value_safe_for_type (item->type, cstval);
1992 : }
1993 :
1994 : /* Process all items in AGG_JFUNC relative to caller (or the node the original
1995 : caller is inlined to) NODE which described by INFO and push the results to
1996 : RES as describing values passed in parameter DST_INDEX. */
1997 :
1998 : void
1999 13988786 : ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
2000 : ipa_agg_jump_function *agg_jfunc,
2001 : unsigned dst_index,
2002 : vec<ipa_argagg_value> *res)
2003 : {
2004 13988786 : unsigned prev_unit_offset = 0;
2005 13988786 : bool first = true;
2006 :
2007 18375277 : for (const ipa_agg_jf_item &item : agg_jfunc->items)
2008 : {
2009 2167557 : tree value = ipa_agg_value_from_jfunc (info, node, &item);
2010 2167557 : if (!value)
2011 500311 : continue;
2012 :
2013 1667246 : ipa_argagg_value iav;
2014 1667246 : iav.value = value;
2015 1667246 : iav.unit_offset = item.offset / BITS_PER_UNIT;
2016 1667246 : iav.index = dst_index;
2017 1667246 : iav.by_ref = agg_jfunc->by_ref;
2018 1667246 : iav.killed = 0;
2019 :
2020 1667246 : gcc_assert (first
2021 : || iav.unit_offset > prev_unit_offset);
2022 1667246 : prev_unit_offset = iav.unit_offset;
2023 1667246 : first = false;
2024 :
2025 1667246 : res->safe_push (iav);
2026 : }
2027 13988786 : }
2028 :
2029 : /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
2030 : bottom, not containing a variable component and without any known value at
2031 : the same time. */
2032 :
2033 : DEBUG_FUNCTION void
2034 130815 : ipcp_verify_propagated_values (void)
2035 : {
2036 130815 : struct cgraph_node *node;
2037 :
2038 1400868 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2039 : {
2040 1270053 : ipa_node_params *info = ipa_node_params_sum->get (node);
2041 1270053 : if (!opt_for_fn (node->decl, flag_ipa_cp)
2042 1270053 : || !opt_for_fn (node->decl, optimize))
2043 10252 : continue;
2044 1259801 : int i, count = ipa_get_param_count (info);
2045 :
2046 3572117 : for (i = 0; i < count; i++)
2047 : {
2048 2312316 : ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
2049 :
2050 2312316 : if (!lat->bottom
2051 221089 : && !lat->contains_variable
2052 31784 : && lat->values_count == 0)
2053 : {
2054 0 : if (dump_file)
2055 : {
2056 0 : symtab->dump (dump_file);
2057 0 : fprintf (dump_file, "\nIPA lattices after constant "
2058 : "propagation, before gcc_unreachable:\n");
2059 0 : print_all_lattices (dump_file, true, false);
2060 : }
2061 :
2062 0 : gcc_unreachable ();
2063 : }
2064 : }
2065 : }
2066 130815 : }
2067 :
2068 : /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2069 :
2070 : static bool
2071 2683 : values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
2072 : ipa_polymorphic_call_context y)
2073 : {
2074 2171 : return x.equal_to (y);
2075 : }
2076 :
2077 :
2078 : /* Add a new value source to the value represented by THIS, marking that a
2079 : value comes from edge CS and (if the underlying jump function is a
2080 : pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2081 : parameter described by SRC_INDEX. OFFSET is negative if the source was the
2082 : scalar value of the parameter itself or the offset within an aggregate. */
2083 :
2084 : template <typename valtype>
2085 : void
2086 486429 : ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
2087 : int src_idx, HOST_WIDE_INT offset)
2088 : {
2089 : ipcp_value_source<valtype> *src;
2090 :
2091 486429 : src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
2092 486429 : src->offset = offset;
2093 486429 : src->cs = cs;
2094 486429 : src->val = src_val;
2095 486429 : src->index = src_idx;
2096 :
2097 486429 : src->next = sources;
2098 486429 : sources = src;
2099 486429 : }
2100 :
2101 : /* Allocate a new ipcp_value holding a tree constant, initialize its value to
2102 : SOURCE and clear all other fields. */
2103 :
2104 : static ipcp_value<tree> *
2105 139450 : allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
2106 : {
2107 139450 : ipcp_value<tree> *val;
2108 :
2109 139450 : val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
2110 139450 : val->value = cst;
2111 139450 : val->self_recursion_generated_level = same_lat_gen_level;
2112 139450 : return val;
2113 : }
2114 :
2115 : /* Allocate a new ipcp_value holding a polymorphic context, initialize its
2116 : value to SOURCE and clear all other fields. */
2117 :
2118 : static ipcp_value<ipa_polymorphic_call_context> *
2119 7494 : allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
2120 : unsigned same_lat_gen_level)
2121 : {
2122 7494 : ipcp_value<ipa_polymorphic_call_context> *val;
2123 :
2124 7494 : val = new (ipcp_poly_ctx_values_pool.allocate ())
2125 7494 : ipcp_value<ipa_polymorphic_call_context>();
2126 7494 : val->value = ctx;
2127 7494 : val->self_recursion_generated_level = same_lat_gen_level;
2128 7494 : return val;
2129 : }
2130 :
2131 : /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2132 : SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2133 : meaning. OFFSET -1 means the source is scalar and not a part of an
2134 : aggregate. If non-NULL, VAL_P records address of existing or newly added
2135 : ipcp_value.
2136 :
2137 : If the value is generated for a self-recursive call as a result of an
2138 : arithmetic pass-through jump-function acting on a value in the same lattice,
2139 : SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2140 : zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2141 :
2142 : template <typename valtype>
2143 : bool
2144 498808 : ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
2145 : ipcp_value<valtype> *src_val,
2146 : int src_idx, HOST_WIDE_INT offset,
2147 : ipcp_value<valtype> **val_p,
2148 : unsigned same_lat_gen_level)
2149 : {
2150 498808 : ipcp_value<valtype> *val, *last_val = NULL;
2151 :
2152 498808 : if (val_p)
2153 1257 : *val_p = NULL;
2154 :
2155 498808 : if (bottom)
2156 : return false;
2157 :
2158 967989 : for (val = values; val; last_val = val, val = val->next)
2159 819679 : if (values_equal_for_ipcp_p (val->value, newval))
2160 : {
2161 347140 : if (val_p)
2162 416 : *val_p = val;
2163 :
2164 347140 : if (val->self_recursion_generated_level < same_lat_gen_level)
2165 179 : val->self_recursion_generated_level = same_lat_gen_level;
2166 :
2167 347140 : if (ipa_edge_within_scc (cs))
2168 : {
2169 : ipcp_value_source<valtype> *s;
2170 48562 : for (s = val->sources; s; s = s->next)
2171 44373 : if (s->cs == cs && s->val == src_val)
2172 : break;
2173 11844 : if (s)
2174 : return false;
2175 : }
2176 :
2177 339485 : val->add_source (cs, src_val, src_idx, offset);
2178 339485 : return false;
2179 : }
2180 :
2181 148310 : if (!same_lat_gen_level && values_count >= opt_for_fn (cs->callee->decl,
2182 : param_ipa_cp_value_list_size))
2183 : {
2184 : /* We can only free sources, not the values themselves, because sources
2185 : of other values in this SCC might point to them. */
2186 12276 : for (val = values; val; val = val->next)
2187 : {
2188 40439 : while (val->sources)
2189 : {
2190 29529 : ipcp_value_source<valtype> *src = val->sources;
2191 29529 : val->sources = src->next;
2192 29529 : ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2193 : }
2194 : }
2195 1366 : values = NULL;
2196 1366 : return set_to_bottom ();
2197 : }
2198 :
2199 146944 : values_count++;
2200 146944 : val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2201 146944 : val->add_source (cs, src_val, src_idx, offset);
2202 146944 : val->next = NULL;
2203 :
2204 : /* Add the new value to end of value list, which can reduce iterations
2205 : of propagation stage for recursive function. */
2206 146944 : if (last_val)
2207 45333 : last_val->next = val;
2208 : else
2209 101611 : values = val;
2210 :
2211 146944 : if (val_p)
2212 841 : *val_p = val;
2213 :
2214 : return true;
2215 : }
2216 :
2217 : /* A helper function that returns result of operation specified by OPCODE on
2218 : the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2219 : value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2220 : acting as its second operand. OP_TYPE is the type in which the operation is
2221 : performed. */
2222 :
2223 : static tree
2224 21394 : get_val_across_arith_op (enum tree_code opcode,
2225 : tree opnd1_type,
2226 : tree opnd2,
2227 : ipcp_value<tree> *src_val,
2228 : tree op_type)
2229 : {
2230 21394 : tree opnd1 = src_val->value;
2231 :
2232 : /* Skip source values that is incompatible with specified type. */
2233 21394 : if (opnd1_type
2234 21394 : && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2235 : return NULL_TREE;
2236 :
2237 21394 : return ipa_get_jf_arith_result (opcode, opnd1, opnd2, op_type);
2238 : }
2239 :
2240 : /* Propagate values through an arithmetic transformation described by a jump
2241 : function associated with edge CS, taking values from SRC_LAT and putting
2242 : them into DEST_LAT. OPND1_TYPE, if non-NULL, is the expected type for the
2243 : values in SRC_LAT. OPND2 is a constant value if transformation is a binary
2244 : operation. SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes
2245 : lattice of a part of an aggregate, otherwise it should be -1. SRC_IDX is
2246 : the index of the source parameter. OP_TYPE is the type in which the
2247 : operation is performed and can be NULL when OPCODE is NOP_EXPR. RES_TYPE is
2248 : the value type of result being propagated into. Return true if DEST_LAT
2249 : changed. */
2250 :
2251 : static bool
2252 76638 : propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2253 : enum tree_code opcode,
2254 : tree opnd1_type,
2255 : tree opnd2,
2256 : ipcp_lattice<tree> *src_lat,
2257 : ipcp_lattice<tree> *dest_lat,
2258 : HOST_WIDE_INT src_offset,
2259 : int src_idx,
2260 : tree op_type,
2261 : tree res_type)
2262 : {
2263 76638 : ipcp_value<tree> *src_val;
2264 76638 : bool ret = false;
2265 :
2266 : /* Due to circular dependencies, propagating within an SCC through arithmetic
2267 : transformation would create infinite number of values. But for
2268 : self-feeding recursive function, we could allow propagation in a limited
2269 : count, and this can enable a simple kind of recursive function versioning.
2270 : For other scenario, we would just make lattices bottom. */
2271 76638 : if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2272 : {
2273 2184 : int i;
2274 :
2275 2184 : int max_recursive_depth = opt_for_fn(cs->caller->decl,
2276 : param_ipa_cp_max_recursive_depth);
2277 2184 : if (src_lat != dest_lat || max_recursive_depth < 1)
2278 1666 : return dest_lat->set_contains_variable ();
2279 :
2280 : /* No benefit if recursive execution is in low probability. */
2281 1300 : if (cs->sreal_frequency () * 100
2282 2600 : <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2283 : param_ipa_cp_min_recursive_probability))
2284 89 : return dest_lat->set_contains_variable ();
2285 :
2286 1211 : auto_vec<ipcp_value<tree> *, 8> val_seeds;
2287 :
2288 2258 : for (src_val = src_lat->values; src_val; src_val = src_val->next)
2289 : {
2290 : /* Now we do not use self-recursively generated value as propagation
2291 : source, this is absolutely conservative, but could avoid explosion
2292 : of lattice's value space, especially when one recursive function
2293 : calls another recursive. */
2294 1740 : if (src_val->self_recursion_generated_p ())
2295 : {
2296 909 : ipcp_value_source<tree> *s;
2297 :
2298 : /* If the lattice has already been propagated for the call site,
2299 : no need to do that again. */
2300 1422 : for (s = src_val->sources; s; s = s->next)
2301 1206 : if (s->cs == cs)
2302 693 : return dest_lat->set_contains_variable ();
2303 : }
2304 : else
2305 831 : val_seeds.safe_push (src_val);
2306 : }
2307 :
2308 1036 : gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2309 :
2310 : /* Recursively generate lattice values with a limited count. */
2311 836 : FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2312 : {
2313 1416 : for (int j = 1; j < max_recursive_depth; j++)
2314 : {
2315 1261 : tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2316 : src_val, op_type);
2317 1261 : cstval = ipacp_value_safe_for_type (res_type, cstval);
2318 1261 : if (!cstval)
2319 : break;
2320 :
2321 1257 : ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2322 : src_offset, &src_val, j);
2323 1257 : gcc_checking_assert (src_val);
2324 : }
2325 : }
2326 518 : ret |= dest_lat->set_contains_variable ();
2327 1211 : }
2328 : else
2329 94712 : for (src_val = src_lat->values; src_val; src_val = src_val->next)
2330 : {
2331 : /* Now we do not use self-recursively generated value as propagation
2332 : source, otherwise it is easy to make value space of normal lattice
2333 : overflow. */
2334 20258 : if (src_val->self_recursion_generated_p ())
2335 : {
2336 125 : ret |= dest_lat->set_contains_variable ();
2337 125 : continue;
2338 : }
2339 :
2340 20133 : tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2341 : src_val, op_type);
2342 20133 : cstval = ipacp_value_safe_for_type (res_type, cstval);
2343 20133 : if (cstval)
2344 19932 : ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2345 : src_offset);
2346 : else
2347 201 : ret |= dest_lat->set_contains_variable ();
2348 : }
2349 :
2350 : return ret;
2351 : }
2352 :
2353 : /* Propagate values through a pass-through jump function JFUNC associated with
2354 : edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2355 : is the index of the source parameter. PARM_TYPE is the type of the
2356 : parameter to which the result is passed. */
2357 :
2358 : static bool
2359 72027 : propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2360 : ipcp_lattice<tree> *src_lat,
2361 : ipcp_lattice<tree> *dest_lat, int src_idx,
2362 : tree parm_type)
2363 : {
2364 72027 : gcc_checking_assert (parm_type);
2365 72027 : enum tree_code opcode = ipa_get_jf_pass_through_operation (jfunc);
2366 72027 : tree op_type = (opcode == NOP_EXPR) ? NULL_TREE
2367 2429 : : ipa_get_jf_pass_through_op_type (jfunc);
2368 72027 : return propagate_vals_across_arith_jfunc (cs, opcode, NULL_TREE,
2369 : ipa_get_jf_pass_through_operand (jfunc),
2370 : src_lat, dest_lat, -1, src_idx, op_type,
2371 72027 : parm_type);
2372 : }
2373 :
2374 : /* Propagate values through an ancestor jump function JFUNC associated with
2375 : edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2376 : is the index of the source parameter. */
2377 :
2378 : static bool
2379 2203 : propagate_vals_across_ancestor (struct cgraph_edge *cs,
2380 : struct ipa_jump_func *jfunc,
2381 : ipcp_lattice<tree> *src_lat,
2382 : ipcp_lattice<tree> *dest_lat, int src_idx,
2383 : tree param_type)
2384 : {
2385 2203 : ipcp_value<tree> *src_val;
2386 2203 : bool ret = false;
2387 :
2388 2203 : if (ipa_edge_within_scc (cs))
2389 14 : return dest_lat->set_contains_variable ();
2390 :
2391 2504 : for (src_val = src_lat->values; src_val; src_val = src_val->next)
2392 : {
2393 315 : tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2394 315 : t = ipacp_value_safe_for_type (param_type, t);
2395 315 : if (t)
2396 257 : ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2397 : else
2398 58 : ret |= dest_lat->set_contains_variable ();
2399 : }
2400 :
2401 : return ret;
2402 : }
2403 :
2404 : /* Propagate scalar values across jump function JFUNC that is associated with
2405 : edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2406 : parameter to which the result is passed. */
2407 :
2408 : static bool
2409 3776960 : propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2410 : struct ipa_jump_func *jfunc,
2411 : ipcp_lattice<tree> *dest_lat,
2412 : tree param_type)
2413 : {
2414 3776960 : if (dest_lat->bottom)
2415 : return false;
2416 :
2417 816776 : if (jfunc->type == IPA_JF_CONST)
2418 : {
2419 370094 : tree val = ipa_get_jf_constant (jfunc);
2420 370094 : val = ipacp_value_safe_for_type (param_type, val);
2421 370094 : if (val)
2422 370076 : return dest_lat->add_value (val, cs, NULL, 0);
2423 : else
2424 18 : return dest_lat->set_contains_variable ();
2425 : }
2426 446682 : else if (jfunc->type == IPA_JF_PASS_THROUGH
2427 267805 : || jfunc->type == IPA_JF_ANCESTOR)
2428 : {
2429 183316 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2430 183316 : ipcp_lattice<tree> *src_lat;
2431 183316 : int src_idx;
2432 183316 : bool ret;
2433 :
2434 183316 : if (jfunc->type == IPA_JF_PASS_THROUGH)
2435 178877 : src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2436 : else
2437 4439 : src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2438 :
2439 183316 : src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2440 183316 : if (src_lat->bottom)
2441 108939 : return dest_lat->set_contains_variable ();
2442 :
2443 : /* If we would need to clone the caller and cannot, do not propagate. */
2444 74377 : if (!ipcp_versionable_function_p (cs->caller)
2445 74377 : && (src_lat->contains_variable
2446 133 : || (src_lat->values_count > 1)))
2447 147 : return dest_lat->set_contains_variable ();
2448 :
2449 74230 : if (jfunc->type == IPA_JF_PASS_THROUGH)
2450 72027 : ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2451 : dest_lat, src_idx,
2452 : param_type);
2453 : else
2454 2203 : ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2455 : src_idx, param_type);
2456 :
2457 74230 : if (src_lat->contains_variable)
2458 64833 : ret |= dest_lat->set_contains_variable ();
2459 :
2460 74230 : return ret;
2461 : }
2462 :
2463 : /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2464 : use it for indirect inlining), we should propagate them too. */
2465 263366 : return dest_lat->set_contains_variable ();
2466 : }
2467 :
2468 : /* Propagate scalar values across jump function JFUNC that is associated with
2469 : edge CS and describes argument IDX and put the values into DEST_LAT. */
2470 :
2471 : static bool
2472 3776960 : propagate_context_across_jump_function (cgraph_edge *cs,
2473 : ipa_jump_func *jfunc, int idx,
2474 : ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2475 : {
2476 3776960 : if (dest_lat->bottom)
2477 : return false;
2478 913258 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2479 913258 : bool ret = false;
2480 913258 : bool added_sth = false;
2481 913258 : bool type_preserved = true;
2482 :
2483 913258 : ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2484 926803 : = ipa_get_ith_polymorhic_call_context (args, idx);
2485 :
2486 13545 : if (edge_ctx_ptr)
2487 13545 : edge_ctx = *edge_ctx_ptr;
2488 :
2489 913258 : if (jfunc->type == IPA_JF_PASS_THROUGH
2490 733890 : || jfunc->type == IPA_JF_ANCESTOR)
2491 : {
2492 183903 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2493 183903 : int src_idx;
2494 183903 : ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2495 :
2496 : /* TODO: Once we figure out how to propagate speculations, it will
2497 : probably be a good idea to switch to speculation if type_preserved is
2498 : not set instead of punting. */
2499 183903 : if (jfunc->type == IPA_JF_PASS_THROUGH)
2500 : {
2501 179368 : if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2502 7545 : goto prop_fail;
2503 171823 : type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2504 171823 : src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2505 : }
2506 : else
2507 : {
2508 4535 : type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2509 4535 : src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2510 : }
2511 :
2512 176358 : src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2513 : /* If we would need to clone the caller and cannot, do not propagate. */
2514 176358 : if (!ipcp_versionable_function_p (cs->caller)
2515 176358 : && (src_lat->contains_variable
2516 14015 : || (src_lat->values_count > 1)))
2517 2485 : goto prop_fail;
2518 :
2519 173873 : ipcp_value<ipa_polymorphic_call_context> *src_val;
2520 175170 : for (src_val = src_lat->values; src_val; src_val = src_val->next)
2521 : {
2522 1297 : ipa_polymorphic_call_context cur = src_val->value;
2523 :
2524 1297 : if (!type_preserved)
2525 871 : cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2526 1297 : if (jfunc->type == IPA_JF_ANCESTOR)
2527 325 : cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2528 : /* TODO: In cases we know how the context is going to be used,
2529 : we can improve the result by passing proper OTR_TYPE. */
2530 1297 : cur.combine_with (edge_ctx);
2531 2594 : if (!cur.useless_p ())
2532 : {
2533 838 : if (src_lat->contains_variable
2534 838 : && !edge_ctx.equal_to (cur))
2535 260 : ret |= dest_lat->set_contains_variable ();
2536 838 : ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2537 838 : added_sth = true;
2538 : }
2539 : }
2540 : }
2541 :
2542 729355 : prop_fail:
2543 183903 : if (!added_sth)
2544 : {
2545 912483 : if (!edge_ctx.useless_p ())
2546 8245 : ret |= dest_lat->add_value (edge_ctx, cs);
2547 : else
2548 904238 : ret |= dest_lat->set_contains_variable ();
2549 : }
2550 :
2551 : return ret;
2552 : }
2553 :
2554 : /* Propagate bits across jfunc that is associated with
2555 : edge cs and update dest_lattice accordingly. */
2556 :
2557 : bool
2558 3776960 : propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2559 : ipa_jump_func *jfunc,
2560 : ipcp_bits_lattice *dest_lattice)
2561 : {
2562 3776960 : if (dest_lattice->bottom_p ())
2563 : return false;
2564 :
2565 530672 : enum availability availability;
2566 530672 : cgraph_node *callee = cs->callee->function_symbol (&availability);
2567 530672 : ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2568 530672 : tree parm_type = ipa_get_type (callee_info, idx);
2569 :
2570 : /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2571 : transform for these cases. Similarly, we can have bad type mismatches
2572 : with LTO, avoid doing anything with those too. */
2573 530672 : if (!parm_type
2574 530672 : || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2575 : {
2576 29215 : if (dump_file && (dump_flags & TDF_DETAILS))
2577 11 : fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2578 : "param %i of %s is NULL or unsuitable for bits propagation\n",
2579 11 : idx, cs->callee->dump_name ());
2580 :
2581 29215 : return dest_lattice->set_to_bottom ();
2582 : }
2583 :
2584 501457 : if (jfunc->type == IPA_JF_PASS_THROUGH
2585 404507 : || jfunc->type == IPA_JF_ANCESTOR)
2586 : {
2587 99427 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2588 99427 : tree operand = NULL_TREE;
2589 99427 : tree op_type = NULL_TREE;
2590 99427 : enum tree_code code;
2591 99427 : unsigned src_idx;
2592 99427 : bool keep_null = false;
2593 :
2594 99427 : if (jfunc->type == IPA_JF_PASS_THROUGH)
2595 : {
2596 96950 : code = ipa_get_jf_pass_through_operation (jfunc);
2597 96950 : src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2598 96950 : if (code != NOP_EXPR)
2599 : {
2600 2070 : operand = ipa_get_jf_pass_through_operand (jfunc);
2601 2070 : op_type = ipa_get_jf_pass_through_op_type (jfunc);
2602 : }
2603 : }
2604 : else
2605 : {
2606 2477 : code = POINTER_PLUS_EXPR;
2607 2477 : src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2608 2477 : unsigned HOST_WIDE_INT offset
2609 2477 : = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2610 2477 : keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2611 2477 : operand = build_int_cstu (size_type_node, offset);
2612 : }
2613 :
2614 99427 : class ipcp_param_lattices *src_lats
2615 99427 : = ipa_get_parm_lattices (caller_info, src_idx);
2616 :
2617 : /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2618 : for eg consider:
2619 : int f(int x)
2620 : {
2621 : g (x & 0xff);
2622 : }
2623 : Assume lattice for x is bottom, however we can still propagate
2624 : result of x & 0xff == 0xff, which gets computed during ccp1 pass
2625 : and we store it in jump function during analysis stage. */
2626 :
2627 99427 : if (!src_lats->bits_lattice.bottom_p ()
2628 99427 : && !src_lats->bits_lattice.recipient_only_p ())
2629 : {
2630 21467 : if (!op_type)
2631 20292 : op_type = ipa_get_type (caller_info, src_idx);
2632 :
2633 21467 : unsigned precision = TYPE_PRECISION (op_type);
2634 21467 : signop sgn = TYPE_SIGN (op_type);
2635 21467 : bool drop_all_ones
2636 21467 : = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2637 :
2638 21467 : return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2639 21467 : sgn, code, operand, drop_all_ones);
2640 : }
2641 : }
2642 :
2643 479990 : value_range vr (parm_type);
2644 479990 : if (jfunc->m_vr)
2645 : {
2646 409960 : jfunc->m_vr->get_vrange (vr);
2647 409960 : if (!vr.undefined_p () && !vr.varying_p ())
2648 : {
2649 409960 : irange_bitmask bm = vr.get_bitmask ();
2650 409960 : widest_int mask
2651 409960 : = widest_int::from (bm.mask (), TYPE_SIGN (parm_type));
2652 409960 : widest_int value
2653 409960 : = widest_int::from (bm.value (), TYPE_SIGN (parm_type));
2654 409960 : return dest_lattice->meet_with (value, mask,
2655 409960 : TYPE_PRECISION (parm_type));
2656 409960 : }
2657 : }
2658 70030 : return dest_lattice->set_to_bottom ();
2659 479990 : }
2660 :
2661 : /* Propagate value range across jump function JFUNC that is associated with
2662 : edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2663 : accordingly. */
2664 :
2665 : static bool
2666 3776115 : propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2667 : class ipcp_param_lattices *dest_plats,
2668 : tree param_type)
2669 : {
2670 3776115 : ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2671 :
2672 3776115 : if (dest_lat->bottom_p ())
2673 : return false;
2674 :
2675 623536 : if (!param_type
2676 623536 : || !ipa_vr_supported_type_p (param_type))
2677 29155 : return dest_lat->set_to_bottom ();
2678 :
2679 594381 : value_range vr (param_type);
2680 594381 : vr.set_varying (param_type);
2681 594381 : if (jfunc->m_vr)
2682 513551 : ipa_vr_operation_and_type_effects (vr, *jfunc->m_vr, NOP_EXPR,
2683 : param_type,
2684 513551 : jfunc->m_vr->type ());
2685 :
2686 594381 : if (jfunc->type == IPA_JF_PASS_THROUGH)
2687 : {
2688 91056 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2689 91056 : int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2690 91056 : class ipcp_param_lattices *src_lats
2691 91056 : = ipa_get_parm_lattices (caller_info, src_idx);
2692 91056 : tree operand_type = ipa_get_type (caller_info, src_idx);
2693 :
2694 91056 : if (src_lats->m_value_range.bottom_p ()
2695 91056 : || src_lats->m_value_range.recipient_only_p ())
2696 75376 : return dest_lat->set_to_bottom ();
2697 :
2698 15680 : if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR
2699 15680 : || !ipa_edge_within_scc (cs))
2700 15185 : ipa_vr_intersect_with_arith_jfunc (vr, jfunc, cs->caller,
2701 15185 : src_lats->m_value_range.m_vr,
2702 : operand_type, param_type);
2703 : }
2704 :
2705 519005 : if (!vr.undefined_p () && !vr.varying_p ())
2706 491347 : return dest_lat->meet_with (vr);
2707 : else
2708 27658 : return dest_lat->set_to_bottom ();
2709 594381 : }
2710 :
2711 : /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2712 : NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2713 : other cases, return false). If there are no aggregate items, set
2714 : aggs_by_ref to NEW_AGGS_BY_REF. */
2715 :
2716 : static bool
2717 41900 : set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2718 : bool new_aggs_by_ref)
2719 : {
2720 0 : if (dest_plats->aggs)
2721 : {
2722 22326 : if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2723 : {
2724 0 : set_agg_lats_to_bottom (dest_plats);
2725 0 : return true;
2726 : }
2727 : }
2728 : else
2729 19574 : dest_plats->aggs_by_ref = new_aggs_by_ref;
2730 : return false;
2731 : }
2732 :
2733 : /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2734 : already existing lattice for the given OFFSET and SIZE, marking all skipped
2735 : lattices as containing variable and checking for overlaps. If there is no
2736 : already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2737 : it with offset, size and contains_variable to PRE_EXISTING, and return true,
2738 : unless there are too many already. If there are two many, return false. If
2739 : there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2740 : skipped lattices were newly marked as containing variable, set *CHANGE to
2741 : true. MAX_AGG_ITEMS is the maximum number of lattices. */
2742 :
2743 : static bool
2744 112913 : merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2745 : HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2746 : struct ipcp_agg_lattice ***aglat,
2747 : bool pre_existing, bool *change, int max_agg_items)
2748 : {
2749 112913 : gcc_checking_assert (offset >= 0);
2750 :
2751 117267 : while (**aglat && (**aglat)->offset < offset)
2752 : {
2753 4354 : if ((**aglat)->offset + (**aglat)->size > offset)
2754 : {
2755 0 : set_agg_lats_to_bottom (dest_plats);
2756 0 : return false;
2757 : }
2758 4354 : *change |= (**aglat)->set_contains_variable ();
2759 4354 : *aglat = &(**aglat)->next;
2760 : }
2761 :
2762 112913 : if (**aglat && (**aglat)->offset == offset)
2763 : {
2764 56365 : if ((**aglat)->size != val_size)
2765 : {
2766 13 : set_agg_lats_to_bottom (dest_plats);
2767 13 : return false;
2768 : }
2769 56352 : gcc_assert (!(**aglat)->next
2770 : || (**aglat)->next->offset >= offset + val_size);
2771 : return true;
2772 : }
2773 : else
2774 : {
2775 56548 : struct ipcp_agg_lattice *new_al;
2776 :
2777 56548 : if (**aglat && (**aglat)->offset < offset + val_size)
2778 : {
2779 3 : set_agg_lats_to_bottom (dest_plats);
2780 3 : return false;
2781 : }
2782 56545 : if (dest_plats->aggs_count == max_agg_items)
2783 : return false;
2784 56506 : dest_plats->aggs_count++;
2785 56506 : new_al = ipcp_agg_lattice_pool.allocate ();
2786 :
2787 56506 : new_al->offset = offset;
2788 56506 : new_al->size = val_size;
2789 56506 : new_al->contains_variable = pre_existing;
2790 :
2791 56506 : new_al->next = **aglat;
2792 56506 : **aglat = new_al;
2793 56506 : return true;
2794 : }
2795 : }
2796 :
2797 : /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2798 : containing an unknown value. */
2799 :
2800 : static bool
2801 41882 : set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2802 : {
2803 41882 : bool ret = false;
2804 44409 : while (aglat)
2805 : {
2806 2527 : ret |= aglat->set_contains_variable ();
2807 2527 : aglat = aglat->next;
2808 : }
2809 41882 : return ret;
2810 : }
2811 :
2812 : /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2813 : DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2814 : parameter used for lattice value sources. Return true if DEST_PLATS changed
2815 : in any way. */
2816 :
2817 : static bool
2818 3984 : merge_aggregate_lattices (struct cgraph_edge *cs,
2819 : class ipcp_param_lattices *dest_plats,
2820 : class ipcp_param_lattices *src_plats,
2821 : int src_idx, HOST_WIDE_INT offset_delta)
2822 : {
2823 3984 : bool pre_existing = dest_plats->aggs != NULL;
2824 3984 : struct ipcp_agg_lattice **dst_aglat;
2825 3984 : bool ret = false;
2826 :
2827 3984 : if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2828 0 : return true;
2829 3984 : if (src_plats->aggs_bottom)
2830 2 : return set_agg_lats_contain_variable (dest_plats);
2831 3982 : if (src_plats->aggs_contain_variable)
2832 2314 : ret |= set_agg_lats_contain_variable (dest_plats);
2833 3982 : dst_aglat = &dest_plats->aggs;
2834 :
2835 3982 : int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2836 : param_ipa_max_agg_items);
2837 3982 : for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2838 11663 : src_aglat;
2839 7681 : src_aglat = src_aglat->next)
2840 : {
2841 7681 : HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2842 :
2843 7681 : if (new_offset < 0)
2844 51 : continue;
2845 7630 : if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2846 : &dst_aglat, pre_existing, &ret, max_agg_items))
2847 : {
2848 7626 : struct ipcp_agg_lattice *new_al = *dst_aglat;
2849 :
2850 7626 : dst_aglat = &(*dst_aglat)->next;
2851 7626 : if (src_aglat->bottom)
2852 : {
2853 0 : ret |= new_al->set_contains_variable ();
2854 0 : continue;
2855 : }
2856 7626 : if (src_aglat->contains_variable)
2857 4444 : ret |= new_al->set_contains_variable ();
2858 7626 : for (ipcp_value<tree> *val = src_aglat->values;
2859 11827 : val;
2860 4201 : val = val->next)
2861 4201 : ret |= new_al->add_value (val->value, cs, val, src_idx,
2862 : src_aglat->offset);
2863 : }
2864 4 : else if (dest_plats->aggs_bottom)
2865 : return true;
2866 : }
2867 3982 : ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2868 3982 : return ret;
2869 : }
2870 :
2871 : /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2872 : pass-through JFUNC and if so, whether it has conform and conforms to the
2873 : rules about propagating values passed by reference. */
2874 :
2875 : static bool
2876 171664 : agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2877 : struct ipa_jump_func *jfunc)
2878 : {
2879 171664 : return src_plats->aggs
2880 171664 : && (!src_plats->aggs_by_ref
2881 5064 : || ipa_get_jf_pass_through_agg_preserved (jfunc));
2882 : }
2883 :
2884 : /* Propagate values through ITEM, jump function for a part of an aggregate,
2885 : into corresponding aggregate lattice AGLAT. CS is the call graph edge
2886 : associated with the jump function. Return true if AGLAT changed in any
2887 : way. */
2888 :
2889 : static bool
2890 105232 : propagate_aggregate_lattice (struct cgraph_edge *cs,
2891 : struct ipa_agg_jf_item *item,
2892 : struct ipcp_agg_lattice *aglat)
2893 : {
2894 105232 : class ipa_node_params *caller_info;
2895 105232 : class ipcp_param_lattices *src_plats;
2896 105232 : struct ipcp_lattice<tree> *src_lat;
2897 105232 : HOST_WIDE_INT src_offset;
2898 105232 : int src_idx;
2899 105232 : tree load_type;
2900 105232 : bool ret;
2901 :
2902 105232 : if (item->jftype == IPA_JF_CONST)
2903 : {
2904 94002 : tree value = item->value.constant;
2905 :
2906 94002 : gcc_checking_assert (is_gimple_ip_invariant (value));
2907 94002 : return aglat->add_value (value, cs, NULL, 0);
2908 : }
2909 :
2910 11230 : gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2911 : || item->jftype == IPA_JF_LOAD_AGG);
2912 :
2913 11230 : caller_info = ipa_node_params_sum->get (cs->caller);
2914 11230 : src_idx = item->value.pass_through.formal_id;
2915 11230 : src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2916 :
2917 11230 : if (item->jftype == IPA_JF_PASS_THROUGH)
2918 : {
2919 3186 : load_type = NULL_TREE;
2920 3186 : src_lat = &src_plats->itself;
2921 3186 : src_offset = -1;
2922 : }
2923 : else
2924 : {
2925 8044 : HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2926 8044 : struct ipcp_agg_lattice *src_aglat;
2927 :
2928 12448 : for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2929 8170 : if (src_aglat->offset >= load_offset)
2930 : break;
2931 :
2932 8044 : load_type = item->value.load_agg.type;
2933 8044 : if (!src_aglat
2934 3766 : || src_aglat->offset > load_offset
2935 3428 : || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2936 11472 : || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2937 4616 : return aglat->set_contains_variable ();
2938 :
2939 : src_lat = src_aglat;
2940 : src_offset = load_offset;
2941 : }
2942 :
2943 6614 : if (src_lat->bottom
2944 6614 : || (!ipcp_versionable_function_p (cs->caller)
2945 6614 : && !src_lat->is_single_const ()))
2946 2003 : return aglat->set_contains_variable ();
2947 :
2948 4611 : ret = propagate_vals_across_arith_jfunc (cs,
2949 : item->value.pass_through.operation,
2950 : load_type,
2951 : item->value.pass_through.operand,
2952 : src_lat, aglat,
2953 : src_offset,
2954 : src_idx,
2955 : item->value.pass_through.op_type,
2956 : item->type);
2957 :
2958 4611 : if (src_lat->contains_variable)
2959 2640 : ret |= aglat->set_contains_variable ();
2960 :
2961 : return ret;
2962 : }
2963 :
2964 : /* Propagate scalar values across jump function JFUNC that is associated with
2965 : edge CS and put the values into DEST_LAT. */
2966 :
2967 : static bool
2968 3776960 : propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2969 : struct ipa_jump_func *jfunc,
2970 : class ipcp_param_lattices *dest_plats)
2971 : {
2972 3776960 : bool ret = false;
2973 :
2974 3776960 : if (dest_plats->aggs_bottom)
2975 : return false;
2976 :
2977 912097 : if (jfunc->type == IPA_JF_PASS_THROUGH
2978 912097 : && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2979 : {
2980 171664 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2981 171664 : int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2982 171664 : class ipcp_param_lattices *src_plats;
2983 :
2984 171664 : src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2985 171664 : if (agg_pass_through_permissible_p (src_plats, jfunc))
2986 : {
2987 : /* Currently we do not produce clobber aggregate jump
2988 : functions, replace with merging when we do. */
2989 3854 : gcc_assert (!jfunc->agg.items);
2990 3854 : ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2991 : src_idx, 0);
2992 3854 : return ret;
2993 : }
2994 : }
2995 740433 : else if (jfunc->type == IPA_JF_ANCESTOR
2996 740433 : && ipa_get_jf_ancestor_agg_preserved (jfunc))
2997 : {
2998 1191 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2999 1191 : int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
3000 1191 : class ipcp_param_lattices *src_plats;
3001 :
3002 1191 : src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3003 1191 : if (src_plats->aggs && src_plats->aggs_by_ref)
3004 : {
3005 : /* Currently we do not produce clobber aggregate jump
3006 : functions, replace with merging when we do. */
3007 130 : gcc_assert (!jfunc->agg.items);
3008 130 : ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
3009 : ipa_get_jf_ancestor_offset (jfunc));
3010 : }
3011 1061 : else if (!src_plats->aggs_by_ref)
3012 1057 : ret |= set_agg_lats_to_bottom (dest_plats);
3013 : else
3014 4 : ret |= set_agg_lats_contain_variable (dest_plats);
3015 1191 : return ret;
3016 : }
3017 :
3018 907052 : if (jfunc->agg.items)
3019 : {
3020 37916 : bool pre_existing = dest_plats->aggs != NULL;
3021 37916 : struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
3022 37916 : struct ipa_agg_jf_item *item;
3023 37916 : int i;
3024 :
3025 37916 : if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
3026 16 : return true;
3027 :
3028 37916 : int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
3029 : param_ipa_max_agg_items);
3030 143502 : FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
3031 : {
3032 105602 : HOST_WIDE_INT val_size;
3033 :
3034 105602 : if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
3035 319 : continue;
3036 105283 : val_size = tree_to_shwi (TYPE_SIZE (item->type));
3037 :
3038 105283 : if (merge_agg_lats_step (dest_plats, item->offset, val_size,
3039 : &aglat, pre_existing, &ret, max_agg_items))
3040 : {
3041 105232 : ret |= propagate_aggregate_lattice (cs, item, *aglat);
3042 105232 : aglat = &(*aglat)->next;
3043 : }
3044 51 : else if (dest_plats->aggs_bottom)
3045 : return true;
3046 : }
3047 :
3048 75800 : ret |= set_chain_of_aglats_contains_variable (*aglat);
3049 : }
3050 : else
3051 869136 : ret |= set_agg_lats_contain_variable (dest_plats);
3052 :
3053 907036 : return ret;
3054 : }
3055 :
3056 : /* Return true if on the way cfrom CS->caller to the final (non-alias and
3057 : non-thunk) destination, the call passes through a thunk. */
3058 :
3059 : static bool
3060 1880325 : call_passes_through_thunk (cgraph_edge *cs)
3061 : {
3062 1880325 : cgraph_node *alias_or_thunk = cs->callee;
3063 2009237 : while (alias_or_thunk->alias)
3064 128912 : alias_or_thunk = alias_or_thunk->get_alias_target ();
3065 1880325 : return alias_or_thunk->thunk;
3066 : }
3067 :
3068 : /* Propagate constants from the caller to the callee of CS. INFO describes the
3069 : caller. */
3070 :
3071 : static bool
3072 5225271 : propagate_constants_across_call (struct cgraph_edge *cs)
3073 : {
3074 5225271 : class ipa_node_params *callee_info;
3075 5225271 : enum availability availability;
3076 5225271 : cgraph_node *callee;
3077 5225271 : class ipa_edge_args *args;
3078 5225271 : bool ret = false;
3079 5225271 : int i, args_count, parms_count;
3080 :
3081 5225271 : callee = cs->callee->function_symbol (&availability);
3082 5225271 : if (!callee->definition)
3083 : return false;
3084 1901911 : gcc_checking_assert (callee->has_gimple_body_p ());
3085 1901911 : callee_info = ipa_node_params_sum->get (callee);
3086 1901911 : if (!callee_info)
3087 : return false;
3088 :
3089 1893425 : args = ipa_edge_args_sum->get (cs);
3090 1893425 : parms_count = ipa_get_param_count (callee_info);
3091 1700990 : if (parms_count == 0)
3092 : return false;
3093 1700990 : if (!args
3094 1700704 : || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
3095 3401694 : || !opt_for_fn (cs->caller->decl, optimize))
3096 : {
3097 857 : for (i = 0; i < parms_count; i++)
3098 571 : ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3099 : i));
3100 : return ret;
3101 : }
3102 1700704 : args_count = ipa_get_cs_argument_count (args);
3103 :
3104 : /* If this call goes through a thunk we must not propagate to the first (0th)
3105 : parameter. However, we might need to uncover a thunk from below a series
3106 : of aliases first. */
3107 1700704 : if (call_passes_through_thunk (cs))
3108 : {
3109 227 : ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3110 : 0));
3111 227 : i = 1;
3112 : }
3113 : else
3114 : i = 0;
3115 :
3116 5617608 : for (; (i < args_count) && (i < parms_count); i++)
3117 : {
3118 3916904 : struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3119 3916904 : class ipcp_param_lattices *dest_plats;
3120 3916904 : tree param_type = ipa_get_type (callee_info, i);
3121 :
3122 3916904 : dest_plats = ipa_get_parm_lattices (callee_info, i);
3123 3916904 : if (availability == AVAIL_INTERPOSABLE)
3124 139944 : ret |= set_all_contains_variable (dest_plats);
3125 : else
3126 : {
3127 3776960 : ret |= propagate_scalar_across_jump_function (cs, jump_func,
3128 : &dest_plats->itself,
3129 : param_type);
3130 3776960 : ret |= propagate_context_across_jump_function (cs, jump_func, i,
3131 : &dest_plats->ctxlat);
3132 3776960 : ret
3133 3776960 : |= propagate_bits_across_jump_function (cs, i, jump_func,
3134 : &dest_plats->bits_lattice);
3135 3776960 : ret |= propagate_aggs_across_jump_function (cs, jump_func,
3136 : dest_plats);
3137 3776960 : if (opt_for_fn (callee->decl, flag_ipa_vrp))
3138 3776115 : ret |= propagate_vr_across_jump_function (cs, jump_func,
3139 : dest_plats, param_type);
3140 : else
3141 845 : ret |= dest_plats->m_value_range.set_to_bottom ();
3142 : }
3143 : }
3144 1700887 : for (; i < parms_count; i++)
3145 183 : ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
3146 :
3147 : return ret;
3148 : }
3149 :
3150 : /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3151 : KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3152 : the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3153 : KNOWN_AGGS is ignored. */
3154 :
3155 : static tree
3156 1573303 : ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3157 : const vec<tree> &known_csts,
3158 : const vec<ipa_polymorphic_call_context> &known_contexts,
3159 : const ipa_argagg_value_list &avs,
3160 : bool *speculative)
3161 : {
3162 1573303 : int param_index = ie->indirect_info->param_index;
3163 1573303 : *speculative = false;
3164 :
3165 1573303 : if (param_index == -1)
3166 : return NULL_TREE;
3167 :
3168 603139 : if (cgraph_simple_indirect_info *sii
3169 603139 : = dyn_cast <cgraph_simple_indirect_info *> (ie->indirect_info))
3170 : {
3171 308845 : tree t = NULL;
3172 :
3173 308845 : if (sii->agg_contents)
3174 : {
3175 72686 : t = NULL;
3176 72686 : if ((unsigned) param_index < known_csts.length ()
3177 72686 : && known_csts[param_index])
3178 61480 : t = ipa_find_agg_cst_from_init (known_csts[param_index],
3179 : sii->offset,
3180 : sii->by_ref);
3181 :
3182 72686 : if (!t && sii->guaranteed_unmodified)
3183 65757 : t = avs.get_value (param_index, sii->offset / BITS_PER_UNIT,
3184 : sii->by_ref);
3185 : }
3186 236159 : else if ((unsigned) param_index < known_csts.length ())
3187 236159 : t = known_csts[param_index];
3188 :
3189 308775 : if (t
3190 204963 : && TREE_CODE (t) == ADDR_EXPR
3191 513519 : && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3192 204744 : return TREE_OPERAND (t, 0);
3193 : else
3194 104101 : return NULL_TREE;
3195 : }
3196 :
3197 294294 : if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3198 : return NULL_TREE;
3199 :
3200 294294 : cgraph_polymorphic_indirect_info *pii
3201 294294 : = as_a <cgraph_polymorphic_indirect_info *> (ie->indirect_info);
3202 294294 : if (!pii->usable_p ())
3203 : return NULL_TREE;
3204 :
3205 294294 : HOST_WIDE_INT anc_offset = pii->offset;
3206 294294 : tree t = NULL;
3207 294294 : tree target = NULL;
3208 294294 : if ((unsigned) param_index < known_csts.length ()
3209 294294 : && known_csts[param_index])
3210 17780 : t = ipa_find_agg_cst_from_init (known_csts[param_index], anc_offset, true);
3211 :
3212 : /* Try to work out value of virtual table pointer value in replacements. */
3213 : /* or known aggregate values. */
3214 17780 : if (!t)
3215 294285 : t = avs.get_value (param_index, anc_offset / BITS_PER_UNIT, true);
3216 :
3217 : /* If we found the virtual table pointer, lookup the target. */
3218 294285 : if (t)
3219 : {
3220 7989 : tree vtable;
3221 7989 : unsigned HOST_WIDE_INT offset;
3222 7989 : if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3223 : {
3224 7989 : bool can_refer;
3225 7989 : target = gimple_get_virt_method_for_vtable (pii->otr_token, vtable,
3226 : offset, &can_refer);
3227 7989 : if (can_refer)
3228 : {
3229 7926 : if (!target
3230 7926 : || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3231 15732 : || !possible_polymorphic_call_target_p
3232 7806 : (ie, cgraph_node::get (target)))
3233 : {
3234 : /* Do not speculate builtin_unreachable, it is stupid! */
3235 237 : if (pii->vptr_changed)
3236 6364 : return NULL;
3237 237 : target = ipa_impossible_devirt_target (ie, target);
3238 : }
3239 7926 : *speculative = pii->vptr_changed;
3240 7926 : if (!*speculative)
3241 : return target;
3242 : }
3243 : }
3244 : }
3245 :
3246 : /* Do we know the constant value of pointer? */
3247 287930 : if (!t && (unsigned) param_index < known_csts.length ())
3248 52592 : t = known_csts[param_index];
3249 :
3250 287930 : ipa_polymorphic_call_context context;
3251 287930 : if (known_contexts.length () > (unsigned int) param_index)
3252 : {
3253 287553 : context = known_contexts[param_index];
3254 287553 : context.offset_by (anc_offset);
3255 287553 : if (pii->vptr_changed)
3256 49048 : context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3257 : pii->otr_type);
3258 287553 : if (t)
3259 : {
3260 12118 : ipa_polymorphic_call_context ctx2
3261 12118 : = ipa_polymorphic_call_context (t, pii->otr_type, anc_offset);
3262 24236 : if (!ctx2.useless_p ())
3263 10587 : context.combine_with (ctx2, pii->otr_type);
3264 : }
3265 : }
3266 377 : else if (t)
3267 : {
3268 23 : context = ipa_polymorphic_call_context (t, pii->otr_type, anc_offset);
3269 23 : if (pii->vptr_changed)
3270 8 : context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3271 : pii->otr_type);
3272 : }
3273 : else
3274 : return NULL_TREE;
3275 :
3276 287576 : vec <cgraph_node *>targets;
3277 287576 : bool final;
3278 :
3279 287576 : targets = possible_polymorphic_call_targets (pii->otr_type, pii->otr_token,
3280 : context, &final);
3281 299258 : if (!final || targets.length () > 1)
3282 : {
3283 276592 : struct cgraph_node *node;
3284 276592 : if (*speculative)
3285 : return target;
3286 276560 : if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3287 276560 : || ie->speculative || !ie->maybe_hot_p ())
3288 188980 : return NULL;
3289 87580 : node = try_speculative_devirtualization (pii->otr_type, pii->otr_token,
3290 : context);
3291 87580 : if (node)
3292 : {
3293 653 : *speculative = true;
3294 653 : target = node->decl;
3295 : }
3296 : else
3297 : return NULL;
3298 : }
3299 : else
3300 : {
3301 10984 : *speculative = false;
3302 10984 : if (targets.length () == 1)
3303 10945 : target = targets[0]->decl;
3304 : else
3305 39 : target = ipa_impossible_devirt_target (ie, NULL_TREE);
3306 : }
3307 :
3308 11637 : if (target && !possible_polymorphic_call_target_p (ie,
3309 : cgraph_node::get (target)))
3310 : {
3311 54 : if (*speculative)
3312 : return NULL;
3313 40 : target = ipa_impossible_devirt_target (ie, target);
3314 : }
3315 :
3316 : return target;
3317 : }
3318 :
3319 : /* If an indirect edge IE can be turned into a direct one based on data in
3320 : AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3321 : whether the discovered target is only speculative guess. */
3322 :
3323 : tree
3324 1403736 : ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3325 : ipa_call_arg_values *avals,
3326 : bool *speculative)
3327 : {
3328 1403736 : ipa_argagg_value_list avl (avals);
3329 1403736 : return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3330 1403736 : avals->m_known_contexts,
3331 1403736 : avl, speculative);
3332 : }
3333 :
3334 : /* Calculate devirtualization time bonus for NODE, assuming we know information
3335 : about arguments stored in AVALS.
3336 :
3337 : FIXME: This function will also consider devirtualization of calls that are
3338 : known to be dead in the clone. */
3339 :
3340 : static sreal
3341 1501726 : devirtualization_time_bonus (struct cgraph_node *node,
3342 : ipa_auto_call_arg_values *avals)
3343 : {
3344 1501726 : struct cgraph_edge *ie;
3345 1501726 : sreal res = 0;
3346 :
3347 1669382 : for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3348 : {
3349 167656 : struct cgraph_node *callee;
3350 167656 : class ipa_fn_summary *isummary;
3351 167656 : enum availability avail;
3352 167656 : tree target;
3353 167656 : bool speculative;
3354 :
3355 167656 : ipa_argagg_value_list avl (avals);
3356 167656 : target = ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3357 : avals->m_known_contexts,
3358 : avl, &speculative);
3359 167656 : if (!target)
3360 166674 : continue;
3361 :
3362 : /* Only bare minimum benefit for clearly un-inlineable targets. */
3363 3232 : res = res + ie->combined_sreal_frequency ();
3364 3232 : callee = cgraph_node::get (target);
3365 3232 : if (!callee || !callee->definition)
3366 624 : continue;
3367 2608 : callee = callee->function_symbol (&avail);
3368 2608 : if (avail < AVAIL_AVAILABLE)
3369 0 : continue;
3370 2608 : isummary = ipa_fn_summaries->get (callee);
3371 2608 : if (!isummary || !isummary->inlinable)
3372 65 : continue;
3373 :
3374 2543 : int savings = 0;
3375 2543 : int size = ipa_size_summaries->get (callee)->size;
3376 : /* FIXME: The values below need re-considering and perhaps also
3377 : integrating into the cost metrics, at lest in some very basic way. */
3378 2543 : int max_inline_insns_auto
3379 2543 : = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3380 2543 : if (size <= max_inline_insns_auto / 4)
3381 402 : savings = 31 / ((int)speculative + 1);
3382 2141 : else if (size <= max_inline_insns_auto / 2)
3383 390 : savings = 15 / ((int)speculative + 1);
3384 3312 : else if (size <= max_inline_insns_auto
3385 1751 : || DECL_DECLARED_INLINE_P (callee->decl))
3386 190 : savings = 7 / ((int)speculative + 1);
3387 : else
3388 1561 : continue;
3389 982 : res = res + ie->combined_sreal_frequency () * (sreal) savings;
3390 : }
3391 :
3392 1501726 : return res;
3393 : }
3394 :
3395 : /* Return time bonus incurred because of hints stored in ESTIMATES. */
3396 :
3397 : static sreal
3398 287154 : hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3399 : {
3400 287154 : sreal result = 0;
3401 287154 : ipa_hints hints = estimates.hints;
3402 287154 : if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3403 27394 : result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3404 :
3405 287154 : sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3406 :
3407 287154 : if (hints & INLINE_HINT_loop_iterations)
3408 19104 : result += estimates.loops_with_known_iterations * bonus_for_one;
3409 :
3410 287154 : if (hints & INLINE_HINT_loop_stride)
3411 11062 : result += estimates.loops_with_known_strides * bonus_for_one;
3412 :
3413 287154 : return result;
3414 : }
3415 :
3416 : /* If there is a reason to penalize the function described by INFO in the
3417 : cloning goodness evaluation, do so. */
3418 :
3419 : static inline sreal
3420 103641 : incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3421 : sreal evaluation)
3422 : {
3423 103641 : if (info->node_within_scc && !info->node_is_self_scc)
3424 1744 : evaluation = (evaluation
3425 1744 : * (100 - opt_for_fn (node->decl,
3426 3488 : param_ipa_cp_recursion_penalty))) / 100;
3427 :
3428 103641 : if (info->node_calling_single_call)
3429 6883 : evaluation = (evaluation
3430 6883 : * (100 - opt_for_fn (node->decl,
3431 6883 : param_ipa_cp_single_call_penalty)))
3432 13766 : / 100;
3433 :
3434 103641 : return evaluation;
3435 : }
3436 :
3437 : /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3438 : and SIZE_COST and with the sum of frequencies of incoming edges to the
3439 : potential new clone in FREQUENCIES. CUR_SWEEP is the number of the current
3440 : sweep of IPA-CP over the call-graph in the decision stage. */
3441 :
3442 : static bool
3443 342718 : good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3444 : sreal freq_sum, profile_count count_sum,
3445 : int size_cost, bool called_without_ipa_profile,
3446 : int cur_sweep)
3447 : {
3448 342718 : gcc_assert (count_sum.ipa () == count_sum);
3449 342718 : if (count_sum.quality () == AFDO)
3450 0 : count_sum = count_sum.force_nonzero ();
3451 342718 : if (time_benefit == 0
3452 289925 : || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3453 : /* If there is no call which was executed in profiling or where
3454 : profile is missing, we do not want to clone. */
3455 103733 : || (!called_without_ipa_profile && !count_sum.nonzero_p ()))
3456 : {
3457 239077 : if (dump_file && (dump_flags & TDF_DETAILS))
3458 24 : fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3459 : "size: %i): Definitely not good or prohibited.\n",
3460 : time_benefit.to_double (), size_cost);
3461 239077 : return false;
3462 : }
3463 :
3464 103641 : gcc_assert (size_cost > 0);
3465 :
3466 103641 : ipa_node_params *info = ipa_node_params_sum->get (node);
3467 103641 : int num_sweeps = opt_for_fn (node->decl, param_ipa_cp_sweeps);
3468 103641 : int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3469 103641 : eval_threshold = (eval_threshold * num_sweeps) / cur_sweep;
3470 : /* If we know the execution IPA execution counts, we can estimate overall
3471 : speedup of the program. */
3472 103641 : if (count_sum.nonzero_p ())
3473 : {
3474 403 : profile_count saved_time = count_sum * time_benefit;
3475 403 : sreal evaluation = saved_time.to_sreal_scale (profile_count::one ())
3476 806 : / size_cost;
3477 403 : evaluation = incorporate_penalties (node, info, evaluation);
3478 :
3479 403 : if (dump_file && (dump_flags & TDF_DETAILS))
3480 : {
3481 0 : fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3482 : "size: %i, count_sum: ", time_benefit.to_double (),
3483 : size_cost);
3484 0 : count_sum.dump (dump_file);
3485 0 : fprintf (dump_file, ", overall time saved: ");
3486 0 : saved_time.dump (dump_file);
3487 0 : fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3488 0 : info->node_within_scc
3489 0 : ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3490 0 : info->node_calling_single_call ? ", single_call" : "",
3491 : evaluation.to_double (), eval_threshold);
3492 : }
3493 403 : gcc_checking_assert (saved_time == saved_time.ipa ());
3494 403 : if (!maybe_hot_count_p (NULL, saved_time))
3495 : {
3496 27 : if (dump_file && (dump_flags & TDF_DETAILS))
3497 0 : fprintf (dump_file, " not cloning: time saved is not hot\n");
3498 : }
3499 : /* Evaluation approximately corresponds to time saved per instruction
3500 : introduced. This is likely almost always going to be true, since we
3501 : already checked that time saved is large enough to be considered
3502 : hot. */
3503 376 : else if (evaluation >= (sreal)eval_threshold)
3504 403 : return true;
3505 : /* If all call sites have profile known; we know we do not want t clone.
3506 : If there are calls with unknown profile; try local heuristics. */
3507 377 : if (!called_without_ipa_profile)
3508 : return false;
3509 : }
3510 103238 : sreal evaluation = (time_benefit * freq_sum) / size_cost;
3511 103238 : evaluation = incorporate_penalties (node, info, evaluation);
3512 103238 : evaluation *= 1000;
3513 :
3514 103238 : if (dump_file && (dump_flags & TDF_DETAILS))
3515 356 : fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3516 : "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3517 : "threshold: %i\n",
3518 : time_benefit.to_double (), size_cost, freq_sum.to_double (),
3519 178 : info->node_within_scc
3520 26 : ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3521 178 : info->node_calling_single_call ? ", single_call" : "",
3522 : evaluation.to_double (), eval_threshold);
3523 :
3524 103238 : return evaluation >= eval_threshold;
3525 : }
3526 :
3527 : /* Grow vectors in AVALS and fill them with information about values of
3528 : parameters that are known to be independent of the context. INFO describes
3529 : the function. If REMOVABLE_PARAMS_COST is non-NULL, the movement cost of
3530 : all removable parameters will be stored in it.
3531 :
3532 : TODO: Also grow context independent value range vectors. */
3533 :
3534 : static bool
3535 2139707 : gather_context_independent_values (class ipa_node_params *info,
3536 : ipa_auto_call_arg_values *avals,
3537 : int *removable_params_cost)
3538 : {
3539 2139707 : int i, count = ipa_get_param_count (info);
3540 2139707 : bool ret = false;
3541 :
3542 2139707 : avals->m_known_vals.safe_grow_cleared (count, true);
3543 2139707 : avals->m_known_contexts.safe_grow_cleared (count, true);
3544 :
3545 2139707 : if (removable_params_cost)
3546 2139707 : *removable_params_cost = 0;
3547 :
3548 7099969 : for (i = 0; i < count; i++)
3549 : {
3550 4960262 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3551 4960262 : ipcp_lattice<tree> *lat = &plats->itself;
3552 :
3553 4960262 : if (lat->is_single_const ())
3554 : {
3555 34383 : ipcp_value<tree> *val = lat->values;
3556 34383 : gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3557 34383 : avals->m_known_vals[i] = val->value;
3558 34383 : if (removable_params_cost)
3559 68766 : *removable_params_cost
3560 34383 : += estimate_move_cost (TREE_TYPE (val->value), false);
3561 : ret = true;
3562 : }
3563 4925879 : else if (removable_params_cost
3564 4925879 : && !ipa_is_param_used (info, i))
3565 953896 : *removable_params_cost
3566 476948 : += ipa_get_param_move_cost (info, i);
3567 :
3568 4960262 : if (!ipa_is_param_used (info, i))
3569 482093 : continue;
3570 :
3571 4478169 : ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3572 : /* Do not account known context as reason for cloning. We can see
3573 : if it permits devirtualization. */
3574 4478169 : if (ctxlat->is_single_const ())
3575 22425 : avals->m_known_contexts[i] = ctxlat->values->value;
3576 :
3577 4478169 : ret |= push_agg_values_from_plats (plats, i, 0, &avals->m_known_aggs);
3578 : }
3579 :
3580 2139707 : return ret;
3581 : }
3582 :
3583 : /* Perform time and size measurement of NODE with the context given in AVALS,
3584 : calculate the benefit compared to the node without specialization and store
3585 : it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3586 : context-independent or unused removable parameters and EST_MOVE_COST, the
3587 : estimated movement of the considered parameter. */
3588 :
3589 : static void
3590 78271 : perform_estimation_of_a_value (cgraph_node *node,
3591 : ipa_auto_call_arg_values *avals,
3592 : int removable_params_cost, int est_move_cost,
3593 : ipcp_value_base *val)
3594 : {
3595 78271 : sreal time_benefit;
3596 78271 : ipa_call_estimates estimates;
3597 :
3598 78271 : estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3599 :
3600 : /* Extern inline functions have no cloning local time benefits because they
3601 : will be inlined anyway. The only reason to clone them is if it enables
3602 : optimization in any of the functions they call. */
3603 78271 : if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3604 114 : time_benefit = 0;
3605 : else
3606 78157 : time_benefit = (estimates.nonspecialized_time - estimates.time)
3607 156314 : + hint_time_bonus (node, estimates)
3608 156314 : + (devirtualization_time_bonus (node, avals)
3609 156314 : + removable_params_cost + est_move_cost);
3610 :
3611 78271 : int size = estimates.size;
3612 78271 : gcc_checking_assert (size >=0);
3613 : /* The inliner-heuristics based estimates may think that in certain
3614 : contexts some functions do not have any size at all but we want
3615 : all specializations to have at least a tiny cost, not least not to
3616 : divide by zero. */
3617 78271 : if (size == 0)
3618 0 : size = 1;
3619 :
3620 78271 : val->local_time_benefit = time_benefit;
3621 78271 : val->local_size_cost = size;
3622 78271 : }
3623 :
3624 : /* Get the overall limit of growth based on parameters extracted from NODE. It
3625 : does not really make sense to mix functions with different overall growth
3626 : limits or even number of sweeps but it is possible and if it happens, we do
3627 : not want to select one limit at random, so get the limits from NODE. */
3628 :
3629 : static long
3630 212037 : get_max_overall_size (cgraph_node *node)
3631 : {
3632 212037 : long max_new_size = orig_overall_size;
3633 212037 : long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3634 212037 : if (max_new_size < large_unit)
3635 : max_new_size = large_unit;
3636 212037 : int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3637 212037 : max_new_size += max_new_size * unit_growth / 100 + 1;
3638 :
3639 212037 : return max_new_size;
3640 : }
3641 :
3642 : /* Return true if NODE should be cloned just for a parameter removal, possibly
3643 : dumping a reason if not. */
3644 :
3645 : static bool
3646 188941 : clone_for_param_removal_p (cgraph_node *node)
3647 : {
3648 188941 : if (!node->can_change_signature)
3649 : {
3650 4996 : if (dump_file && (dump_flags & TDF_DETAILS))
3651 0 : fprintf (dump_file, " Not considering cloning to remove parameters, "
3652 : "function cannot change signature.\n");
3653 4996 : return false;
3654 : }
3655 183945 : if (node->can_be_local_p ())
3656 : {
3657 136086 : if (dump_file && (dump_flags & TDF_DETAILS))
3658 0 : fprintf (dump_file, " Not considering cloning to remove parameters, "
3659 : "IPA-SRA can do it potentially better.\n");
3660 136086 : return false;
3661 : }
3662 : return true;
3663 : }
3664 :
3665 : /* Iterate over known values of parameters of NODE and estimate the local
3666 : effects in terms of time and size they have. */
3667 :
3668 : static void
3669 1259818 : estimate_local_effects (struct cgraph_node *node)
3670 : {
3671 1259818 : ipa_node_params *info = ipa_node_params_sum->get (node);
3672 1259818 : int count = ipa_get_param_count (info);
3673 1030476 : int removable_params_cost;
3674 :
3675 1030476 : if (!count || !ipcp_versionable_function_p (node))
3676 397105 : return;
3677 :
3678 862713 : if (dump_file && (dump_flags & TDF_DETAILS))
3679 117 : fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3680 :
3681 862713 : ipa_auto_call_arg_values avals;
3682 862713 : gather_context_independent_values (info, &avals, &removable_params_cost);
3683 :
3684 2880235 : for (int i = 0; i < count; i++)
3685 : {
3686 2017522 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3687 2017522 : ipcp_lattice<tree> *lat = &plats->itself;
3688 2017522 : ipcp_value<tree> *val;
3689 :
3690 4013756 : if (lat->bottom
3691 210381 : || !lat->values
3692 2056078 : || avals.m_known_vals[i])
3693 1996234 : continue;
3694 :
3695 65218 : for (val = lat->values; val; val = val->next)
3696 : {
3697 43930 : gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3698 43930 : avals.m_known_vals[i] = val->value;
3699 :
3700 43930 : int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3701 43930 : perform_estimation_of_a_value (node, &avals, removable_params_cost,
3702 : emc, val);
3703 :
3704 43930 : if (dump_file && (dump_flags & TDF_DETAILS))
3705 : {
3706 44 : fprintf (dump_file, " - estimates for value ");
3707 44 : print_ipcp_constant_value (dump_file, val->value);
3708 44 : fprintf (dump_file, " for ");
3709 44 : ipa_dump_param (dump_file, info, i);
3710 44 : fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3711 : val->local_time_benefit.to_double (),
3712 : val->local_size_cost);
3713 : }
3714 : }
3715 21288 : avals.m_known_vals[i] = NULL_TREE;
3716 : }
3717 :
3718 2880235 : for (int i = 0; i < count; i++)
3719 : {
3720 2017522 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3721 :
3722 2017522 : if (!plats->virt_call)
3723 2009976 : continue;
3724 :
3725 7546 : ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3726 7546 : ipcp_value<ipa_polymorphic_call_context> *val;
3727 :
3728 14934 : if (ctxlat->bottom
3729 2735 : || !ctxlat->values
3730 10275 : || !avals.m_known_contexts[i].useless_p ())
3731 7388 : continue;
3732 :
3733 384 : for (val = ctxlat->values; val; val = val->next)
3734 : {
3735 226 : avals.m_known_contexts[i] = val->value;
3736 226 : perform_estimation_of_a_value (node, &avals, removable_params_cost,
3737 : 0, val);
3738 :
3739 226 : if (dump_file && (dump_flags & TDF_DETAILS))
3740 : {
3741 0 : fprintf (dump_file, " - estimates for polymorphic context ");
3742 0 : print_ipcp_constant_value (dump_file, val->value);
3743 0 : fprintf (dump_file, " for ");
3744 0 : ipa_dump_param (dump_file, info, i);
3745 0 : fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3746 : val->local_time_benefit.to_double (),
3747 : val->local_size_cost);
3748 : }
3749 : }
3750 158 : avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3751 : }
3752 :
3753 862713 : unsigned all_ctx_len = avals.m_known_aggs.length ();
3754 862713 : auto_vec<ipa_argagg_value, 32> all_ctx;
3755 862713 : all_ctx.reserve_exact (all_ctx_len);
3756 862713 : all_ctx.splice (avals.m_known_aggs);
3757 862713 : avals.m_known_aggs.safe_grow_cleared (all_ctx_len + 1);
3758 :
3759 862713 : unsigned j = 0;
3760 2880235 : for (int index = 0; index < count; index++)
3761 : {
3762 2017522 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, index);
3763 :
3764 2017522 : if (plats->aggs_bottom || !plats->aggs)
3765 1998493 : continue;
3766 :
3767 72995 : for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3768 : {
3769 53966 : ipcp_value<tree> *val;
3770 53610 : if (aglat->bottom || !aglat->values
3771 : /* If the following is true, the one value is already part of all
3772 : context estimations. */
3773 100299 : || (!plats->aggs_contain_variable
3774 25101 : && aglat->is_single_const ()))
3775 28383 : continue;
3776 :
3777 25583 : unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3778 25583 : while (j < all_ctx_len
3779 33916 : && (all_ctx[j].index < index
3780 3317 : || (all_ctx[j].index == index
3781 2333 : && all_ctx[j].unit_offset < unit_offset)))
3782 : {
3783 3209 : avals.m_known_aggs[j] = all_ctx[j];
3784 3209 : j++;
3785 : }
3786 :
3787 34647 : for (unsigned k = j; k < all_ctx_len; k++)
3788 9064 : avals.m_known_aggs[k+1] = all_ctx[k];
3789 :
3790 59698 : for (val = aglat->values; val; val = val->next)
3791 : {
3792 34115 : avals.m_known_aggs[j].value = val->value;
3793 34115 : avals.m_known_aggs[j].unit_offset = unit_offset;
3794 34115 : avals.m_known_aggs[j].index = index;
3795 34115 : avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3796 34115 : avals.m_known_aggs[j].killed = false;
3797 :
3798 34115 : perform_estimation_of_a_value (node, &avals,
3799 : removable_params_cost, 0, val);
3800 :
3801 34115 : if (dump_file && (dump_flags & TDF_DETAILS))
3802 : {
3803 79 : fprintf (dump_file, " - estimates for value ");
3804 79 : print_ipcp_constant_value (dump_file, val->value);
3805 79 : fprintf (dump_file, " for ");
3806 79 : ipa_dump_param (dump_file, info, index);
3807 158 : fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3808 : "]: time_benefit: %g, size: %i\n",
3809 79 : plats->aggs_by_ref ? "ref " : "",
3810 : aglat->offset,
3811 : val->local_time_benefit.to_double (),
3812 : val->local_size_cost);
3813 : }
3814 : }
3815 : }
3816 : }
3817 862713 : }
3818 :
3819 :
3820 : /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3821 : topological sort of values. */
3822 :
3823 : template <typename valtype>
3824 : void
3825 136187 : value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3826 : {
3827 : ipcp_value_source<valtype> *src;
3828 :
3829 136187 : if (cur_val->dfs)
3830 : return;
3831 :
3832 136025 : dfs_counter++;
3833 136025 : cur_val->dfs = dfs_counter;
3834 136025 : cur_val->low_link = dfs_counter;
3835 :
3836 136025 : cur_val->topo_next = stack;
3837 136025 : stack = cur_val;
3838 136025 : cur_val->on_stack = true;
3839 :
3840 592892 : for (src = cur_val->sources; src; src = src->next)
3841 456867 : if (src->val)
3842 : {
3843 20944 : if (src->val->dfs == 0)
3844 : {
3845 186 : add_val (src->val);
3846 186 : if (src->val->low_link < cur_val->low_link)
3847 19 : cur_val->low_link = src->val->low_link;
3848 : }
3849 20758 : else if (src->val->on_stack
3850 1587 : && src->val->dfs < cur_val->low_link)
3851 73 : cur_val->low_link = src->val->dfs;
3852 : }
3853 :
3854 136025 : if (cur_val->dfs == cur_val->low_link)
3855 : {
3856 : ipcp_value<valtype> *v, *scc_list = NULL;
3857 :
3858 : do
3859 : {
3860 136025 : v = stack;
3861 136025 : stack = v->topo_next;
3862 136025 : v->on_stack = false;
3863 136025 : v->scc_no = cur_val->dfs;
3864 :
3865 136025 : v->scc_next = scc_list;
3866 136025 : scc_list = v;
3867 : }
3868 136025 : while (v != cur_val);
3869 :
3870 135937 : cur_val->topo_next = values_topo;
3871 135937 : values_topo = cur_val;
3872 : }
3873 : }
3874 :
3875 : /* Add all values in lattices associated with NODE to the topological sort if
3876 : they are not there yet. */
3877 :
3878 : static void
3879 1259818 : add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3880 : {
3881 1259818 : ipa_node_params *info = ipa_node_params_sum->get (node);
3882 1259818 : int i, count = ipa_get_param_count (info);
3883 :
3884 3572148 : for (i = 0; i < count; i++)
3885 : {
3886 2312330 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3887 2312330 : ipcp_lattice<tree> *lat = &plats->itself;
3888 2312330 : struct ipcp_agg_lattice *aglat;
3889 :
3890 2312330 : if (!lat->bottom)
3891 : {
3892 221089 : ipcp_value<tree> *val;
3893 293075 : for (val = lat->values; val; val = val->next)
3894 71986 : topo->constants.add_val (val);
3895 : }
3896 :
3897 2312330 : if (!plats->aggs_bottom)
3898 277493 : for (aglat = plats->aggs; aglat; aglat = aglat->next)
3899 56471 : if (!aglat->bottom)
3900 : {
3901 56115 : ipcp_value<tree> *val;
3902 112668 : for (val = aglat->values; val; val = val->next)
3903 56553 : topo->constants.add_val (val);
3904 : }
3905 :
3906 2312330 : ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3907 2312330 : if (!ctxlat->bottom)
3908 : {
3909 222091 : ipcp_value<ipa_polymorphic_call_context> *ctxval;
3910 229553 : for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3911 7462 : topo->contexts.add_val (ctxval);
3912 : }
3913 : }
3914 1259818 : }
3915 :
3916 : /* One pass of constants propagation along the call graph edges, from callers
3917 : to callees (requires topological ordering in TOPO), iterate over strongly
3918 : connected components. */
3919 :
3920 : static void
3921 130823 : propagate_constants_topo (class ipa_topo_info *topo)
3922 : {
3923 130823 : int i;
3924 :
3925 1470998 : for (i = topo->nnodes - 1; i >= 0; i--)
3926 : {
3927 1340175 : unsigned j;
3928 1340175 : struct cgraph_node *v, *node = topo->order[i];
3929 1340175 : vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3930 :
3931 : /* First, iteratively propagate within the strongly connected component
3932 : until all lattices stabilize. */
3933 2685204 : FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3934 1345029 : if (v->has_gimple_body_p ())
3935 : {
3936 1270070 : if (opt_for_fn (v->decl, flag_ipa_cp)
3937 1270070 : && opt_for_fn (v->decl, optimize))
3938 1259818 : push_node_to_stack (topo, v);
3939 : /* When V is not optimized, we can not push it to stack, but
3940 : still we need to set all its callees lattices to bottom. */
3941 : else
3942 : {
3943 23265 : for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3944 13013 : propagate_constants_across_call (cs);
3945 : }
3946 : }
3947 :
3948 1340175 : v = pop_node_from_stack (topo);
3949 3943358 : while (v)
3950 : {
3951 1263008 : struct cgraph_edge *cs;
3952 1263008 : class ipa_node_params *info = NULL;
3953 1263008 : bool self_scc = true;
3954 :
3955 6501694 : for (cs = v->callees; cs; cs = cs->next_callee)
3956 5238686 : if (ipa_edge_within_scc (cs))
3957 : {
3958 29552 : cgraph_node *callee = cs->callee->function_symbol ();
3959 :
3960 29552 : if (v != callee)
3961 17987 : self_scc = false;
3962 :
3963 29552 : if (!info)
3964 : {
3965 13836 : info = ipa_node_params_sum->get (v);
3966 13836 : info->node_within_scc = true;
3967 : }
3968 :
3969 29552 : if (propagate_constants_across_call (cs))
3970 4159 : push_node_to_stack (topo, callee);
3971 : }
3972 :
3973 1263008 : if (info)
3974 13836 : info->node_is_self_scc = self_scc;
3975 :
3976 1263008 : v = pop_node_from_stack (topo);
3977 : }
3978 :
3979 : /* Afterwards, propagate along edges leading out of the SCC, calculates
3980 : the local effects of the discovered constants and all valid values to
3981 : their topological sort. */
3982 2685204 : FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3983 1345029 : if (v->has_gimple_body_p ()
3984 1270070 : && opt_for_fn (v->decl, flag_ipa_cp)
3985 2604847 : && opt_for_fn (v->decl, optimize))
3986 : {
3987 1259818 : struct cgraph_edge *cs;
3988 :
3989 1259818 : estimate_local_effects (v);
3990 1259818 : add_all_node_vals_to_toposort (v, topo);
3991 6464694 : for (cs = v->callees; cs; cs = cs->next_callee)
3992 5204876 : if (!ipa_edge_within_scc (cs))
3993 5182706 : propagate_constants_across_call (cs);
3994 : }
3995 1340175 : cycle_nodes.release ();
3996 : }
3997 130823 : }
3998 :
3999 : /* Propagate the estimated effects of individual values along the topological
4000 : from the dependent values to those they depend on. */
4001 :
4002 : template <typename valtype>
4003 : void
4004 261646 : value_topo_info<valtype>::propagate_effects ()
4005 : {
4006 : ipcp_value<valtype> *base;
4007 261646 : hash_set<ipcp_value<valtype> *> processed_srcvals;
4008 :
4009 397583 : for (base = values_topo; base; base = base->topo_next)
4010 : {
4011 : ipcp_value_source<valtype> *src;
4012 : ipcp_value<valtype> *val;
4013 135937 : sreal time = 0;
4014 135937 : HOST_WIDE_INT size = 0;
4015 :
4016 271962 : for (val = base; val; val = val->scc_next)
4017 : {
4018 136025 : time = time + val->local_time_benefit + val->prop_time_benefit;
4019 136025 : size = size + val->local_size_cost + val->prop_size_cost;
4020 : }
4021 :
4022 271962 : for (val = base; val; val = val->scc_next)
4023 : {
4024 136025 : processed_srcvals.empty ();
4025 592892 : for (src = val->sources; src; src = src->next)
4026 456867 : if (src->val
4027 456867 : && cs_interesting_for_ipcp_p (src->cs))
4028 : {
4029 20904 : if (!processed_srcvals.add (src->val))
4030 : {
4031 16737 : HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
4032 16737 : if (prop_size < INT_MAX)
4033 16737 : src->val->prop_size_cost = prop_size;
4034 : else
4035 0 : continue;
4036 : }
4037 :
4038 20904 : int special_factor = 1;
4039 20904 : if (val->same_scc (src->val))
4040 : special_factor
4041 1675 : = opt_for_fn(src->cs->caller->decl,
4042 : param_ipa_cp_recursive_freq_factor);
4043 19229 : else if (val->self_recursion_generated_p ()
4044 19229 : && (src->cs->callee->function_symbol ()
4045 822 : == src->cs->caller))
4046 : {
4047 822 : int max_recur_gen_depth
4048 822 : = opt_for_fn(src->cs->caller->decl,
4049 : param_ipa_cp_max_recursive_depth);
4050 822 : special_factor = max_recur_gen_depth
4051 822 : - val->self_recursion_generated_level + 1;
4052 : }
4053 :
4054 20904 : src->val->prop_time_benefit
4055 41808 : += time * special_factor * src->cs->sreal_frequency ();
4056 : }
4057 :
4058 136025 : if (size < INT_MAX)
4059 : {
4060 136025 : val->prop_time_benefit = time;
4061 136025 : val->prop_size_cost = size;
4062 : }
4063 : else
4064 : {
4065 0 : val->prop_time_benefit = 0;
4066 0 : val->prop_size_cost = 0;
4067 : }
4068 : }
4069 : }
4070 261646 : }
4071 :
4072 :
4073 : /* Propagate constants, polymorphic contexts and their effects from the
4074 : summaries interprocedurally. */
4075 :
4076 : static void
4077 130823 : ipcp_propagate_stage (class ipa_topo_info *topo)
4078 : {
4079 130823 : struct cgraph_node *node;
4080 :
4081 130823 : if (dump_file)
4082 161 : fprintf (dump_file, "\n Propagating constants:\n\n");
4083 :
4084 1475856 : FOR_EACH_DEFINED_FUNCTION (node)
4085 : {
4086 1345033 : if (node->has_gimple_body_p ()
4087 1270070 : && opt_for_fn (node->decl, flag_ipa_cp)
4088 2604851 : && opt_for_fn (node->decl, optimize))
4089 : {
4090 1259818 : ipa_node_params *info = ipa_node_params_sum->get (node);
4091 1259818 : determine_versionability (node, info);
4092 :
4093 1259818 : unsigned nlattices = ipa_get_param_count (info);
4094 1259818 : info->lattices.safe_grow_cleared (nlattices, true);
4095 1259818 : initialize_node_lattices (node);
4096 :
4097 1259818 : int num_sweeps = opt_for_fn (node->decl, param_ipa_cp_sweeps);
4098 1259818 : if (max_number_sweeps < num_sweeps)
4099 121583 : max_number_sweeps = num_sweeps;
4100 : }
4101 1345033 : ipa_size_summary *s = ipa_size_summaries->get (node);
4102 1345033 : if (node->definition && !node->alias && s != NULL)
4103 1271003 : overall_size += s->self_size;
4104 : }
4105 :
4106 130823 : orig_overall_size = overall_size;
4107 :
4108 130823 : if (dump_file)
4109 161 : fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4110 :
4111 130823 : propagate_constants_topo (topo);
4112 130823 : if (flag_checking)
4113 130815 : ipcp_verify_propagated_values ();
4114 130823 : topo->constants.propagate_effects ();
4115 130823 : topo->contexts.propagate_effects ();
4116 :
4117 130823 : if (dump_file)
4118 : {
4119 161 : fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4120 161 : print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4121 : }
4122 130823 : }
4123 :
4124 : /* Discover newly direct outgoing edges from NODE which is a new clone with
4125 : known KNOWN_CSTS and make them direct. */
4126 :
4127 : static void
4128 21303 : ipcp_discover_new_direct_edges (struct cgraph_node *node,
4129 : vec<tree> known_csts,
4130 : vec<ipa_polymorphic_call_context>
4131 : known_contexts,
4132 : vec<ipa_argagg_value, va_gc> *aggvals)
4133 : {
4134 21303 : struct cgraph_edge *ie, *next_ie;
4135 21303 : bool found = false;
4136 :
4137 23214 : for (ie = node->indirect_calls; ie; ie = next_ie)
4138 : {
4139 1911 : tree target;
4140 1911 : bool speculative;
4141 :
4142 1911 : next_ie = ie->next_callee;
4143 1911 : ipa_argagg_value_list avs (aggvals);
4144 1911 : target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4145 : avs, &speculative);
4146 1911 : if (target)
4147 : {
4148 561 : cgraph_polymorphic_indirect_info *pii
4149 561 : = dyn_cast <cgraph_polymorphic_indirect_info *> (ie->indirect_info);
4150 561 : cgraph_simple_indirect_info *sii
4151 1055 : = dyn_cast <cgraph_simple_indirect_info *> (ie->indirect_info);
4152 416 : bool agg_contents = sii && sii->agg_contents;
4153 561 : bool polymorphic = !!pii;
4154 561 : int param_index = ie->indirect_info->param_index;
4155 561 : struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4156 : speculative);
4157 561 : found = true;
4158 :
4159 561 : if (cs && !agg_contents && !polymorphic)
4160 : {
4161 349 : ipa_node_params *info = ipa_node_params_sum->get (node);
4162 349 : int c = ipa_get_controlled_uses (info, param_index);
4163 349 : if (c != IPA_UNDESCRIBED_USE
4164 349 : && !ipa_get_param_load_dereferenced (info, param_index))
4165 : {
4166 345 : struct ipa_ref *to_del;
4167 :
4168 345 : c--;
4169 345 : ipa_set_controlled_uses (info, param_index, c);
4170 345 : if (dump_file && (dump_flags & TDF_DETAILS))
4171 3 : fprintf (dump_file, " controlled uses count of param "
4172 : "%i bumped down to %i\n", param_index, c);
4173 345 : if (c == 0
4174 345 : && (to_del = node->find_reference (cs->callee, NULL, 0,
4175 : IPA_REF_ADDR)))
4176 : {
4177 281 : if (dump_file && (dump_flags & TDF_DETAILS))
4178 3 : fprintf (dump_file, " and even removing its "
4179 : "cloning-created reference\n");
4180 281 : to_del->remove_reference ();
4181 : }
4182 : }
4183 : }
4184 : }
4185 : }
4186 : /* Turning calls to direct calls will improve overall summary. */
4187 21303 : if (found)
4188 464 : ipa_update_overall_fn_summary (node);
4189 21303 : }
4190 :
4191 : class edge_clone_summary;
4192 : static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4193 :
4194 : /* Edge clone summary. */
4195 :
4196 : class edge_clone_summary
4197 : {
4198 : public:
4199 : /* Default constructor. */
4200 377352 : edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4201 :
4202 : /* Default destructor. */
4203 377352 : ~edge_clone_summary ()
4204 : {
4205 377352 : if (prev_clone)
4206 33642 : edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4207 377352 : if (next_clone)
4208 158273 : edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4209 377352 : }
4210 :
4211 : cgraph_edge *prev_clone;
4212 : cgraph_edge *next_clone;
4213 : };
4214 :
4215 : class edge_clone_summary_t:
4216 : public call_summary <edge_clone_summary *>
4217 : {
4218 : public:
4219 130823 : edge_clone_summary_t (symbol_table *symtab):
4220 261646 : call_summary <edge_clone_summary *> (symtab)
4221 : {
4222 130823 : m_initialize_when_cloning = true;
4223 : }
4224 :
4225 : void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4226 : edge_clone_summary *src_data,
4227 : edge_clone_summary *dst_data) final override;
4228 : };
4229 :
4230 : /* Edge duplication hook. */
4231 :
4232 : void
4233 191322 : edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4234 : edge_clone_summary *src_data,
4235 : edge_clone_summary *dst_data)
4236 : {
4237 191322 : if (src_data->next_clone)
4238 5283 : edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4239 191322 : dst_data->prev_clone = src_edge;
4240 191322 : dst_data->next_clone = src_data->next_clone;
4241 191322 : src_data->next_clone = dst_edge;
4242 191322 : }
4243 :
4244 : /* Return true is CS calls DEST or its clone for all contexts. When
4245 : ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4246 : edges from/to an all-context clone. */
4247 :
4248 : static bool
4249 1814382 : calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4250 : bool allow_recursion_to_clone)
4251 : {
4252 1814382 : enum availability availability;
4253 1814382 : cgraph_node *callee = cs->callee->function_symbol (&availability);
4254 :
4255 1814382 : if (availability <= AVAIL_INTERPOSABLE)
4256 : return false;
4257 1808515 : if (callee == dest)
4258 : return true;
4259 617907 : if (!allow_recursion_to_clone && cs->caller == callee)
4260 : return false;
4261 :
4262 617750 : ipa_node_params *info = ipa_node_params_sum->get (callee);
4263 617750 : return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4264 : }
4265 :
4266 : /* Return true if edge CS does bring about the value described by SRC to
4267 : DEST_VAL of node DEST or its clone for all contexts. */
4268 :
4269 : static bool
4270 1804379 : cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4271 : cgraph_node *dest, ipcp_value<tree> *dest_val)
4272 : {
4273 1804379 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4274 :
4275 1804379 : if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4276 1804379 : || caller_info->node_dead)
4277 : return false;
4278 :
4279 741201 : if (!src->val)
4280 : return true;
4281 :
4282 61175 : if (caller_info->ipcp_orig_node)
4283 : {
4284 19068 : tree t = NULL_TREE;
4285 19068 : if (src->offset == -1)
4286 13624 : t = caller_info->known_csts[src->index];
4287 5444 : else if (ipcp_transformation *ts
4288 5444 : = ipcp_get_transformation_summary (cs->caller))
4289 : {
4290 5444 : ipa_argagg_value_list avl (ts);
4291 5444 : t = avl.get_value (src->index, src->offset / BITS_PER_UNIT);
4292 : }
4293 19068 : return (t != NULL_TREE
4294 19068 : && values_equal_for_ipcp_p (src->val->value, t));
4295 : }
4296 : else
4297 : {
4298 42107 : if (src->val == dest_val)
4299 : return true;
4300 :
4301 36389 : struct ipcp_agg_lattice *aglat;
4302 36389 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4303 : src->index);
4304 36389 : if (src->offset == -1)
4305 26895 : return (plats->itself.is_single_const ()
4306 20 : && values_equal_for_ipcp_p (src->val->value,
4307 20 : plats->itself.values->value));
4308 : else
4309 : {
4310 9494 : if (plats->aggs_bottom || plats->aggs_contain_variable)
4311 : return false;
4312 3867 : for (aglat = plats->aggs; aglat; aglat = aglat->next)
4313 3867 : if (aglat->offset == src->offset)
4314 1739 : return (aglat->is_single_const ()
4315 8 : && values_equal_for_ipcp_p (src->val->value,
4316 8 : aglat->values->value));
4317 : }
4318 : return false;
4319 : }
4320 : }
4321 :
4322 : /* Return true if edge CS does bring about the value described by SRC to
4323 : DST_VAL of node DEST or its clone for all contexts. */
4324 :
4325 : static bool
4326 10003 : cgraph_edge_brings_value_p (cgraph_edge *cs,
4327 : ipcp_value_source<ipa_polymorphic_call_context> *src,
4328 : cgraph_node *dest,
4329 : ipcp_value<ipa_polymorphic_call_context> *)
4330 : {
4331 10003 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4332 :
4333 10003 : if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4334 10003 : || caller_info->node_dead)
4335 : return false;
4336 8988 : if (!src->val)
4337 : return true;
4338 :
4339 1717 : if (caller_info->ipcp_orig_node)
4340 2480 : return (caller_info->known_contexts.length () > (unsigned) src->index)
4341 484 : && values_equal_for_ipcp_p (src->val->value,
4342 242 : caller_info->known_contexts[src->index]);
4343 :
4344 1460 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4345 : src->index);
4346 1460 : return plats->ctxlat.is_single_const ()
4347 270 : && values_equal_for_ipcp_p (src->val->value,
4348 270 : plats->ctxlat.values->value);
4349 : }
4350 :
4351 : /* Get the next clone in the linked list of clones of an edge. */
4352 :
4353 : static inline struct cgraph_edge *
4354 1814671 : get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4355 : {
4356 1814671 : edge_clone_summary *s = edge_clone_summaries->get (cs);
4357 1814671 : return s != NULL ? s->next_clone : NULL;
4358 : }
4359 :
4360 : /* Given VAL that is intended for DEST, iterate over all its sources and if any
4361 : of them is viable and hot, return true. In that case, for those that still
4362 : hold, add their edge frequency and their number and cumulative profile
4363 : counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4364 : REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4365 :
4366 : template <typename valtype>
4367 : static bool
4368 211396 : get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4369 : sreal *freq_sum, int *caller_count,
4370 : profile_count *rec_count_sum,
4371 : profile_count *nonrec_count_sum,
4372 : bool *called_without_ipa_profile)
4373 : {
4374 : ipcp_value_source<valtype> *src;
4375 211396 : sreal freq = 0;
4376 211396 : int count = 0;
4377 211396 : profile_count rec_cnt = profile_count::zero ();
4378 211396 : profile_count nonrec_cnt = profile_count::zero ();
4379 211396 : bool interesting = false;
4380 211396 : bool non_self_recursive = false;
4381 211396 : *called_without_ipa_profile = false;
4382 :
4383 968659 : for (src = val->sources; src; src = src->next)
4384 : {
4385 757263 : struct cgraph_edge *cs = src->cs;
4386 1878475 : while (cs)
4387 : {
4388 1121212 : if (cgraph_edge_brings_value_p (cs, src, dest, val))
4389 : {
4390 353100 : count++;
4391 353100 : freq += cs->sreal_frequency ();
4392 353100 : interesting |= cs_interesting_for_ipcp_p (cs);
4393 353100 : if (cs->caller != dest)
4394 : {
4395 346289 : non_self_recursive = true;
4396 346289 : if (cs->count.ipa ().initialized_p ())
4397 972 : rec_cnt += cs->count.ipa ();
4398 : else
4399 345317 : *called_without_ipa_profile = true;
4400 : }
4401 6811 : else if (cs->count.ipa ().initialized_p ())
4402 0 : nonrec_cnt += cs->count.ipa ();
4403 : else
4404 6811 : *called_without_ipa_profile = true;
4405 : }
4406 1121212 : cs = get_next_cgraph_edge_clone (cs);
4407 : }
4408 : }
4409 :
4410 : /* If the only edges bringing a value are self-recursive ones, do not bother
4411 : evaluating it. */
4412 211396 : if (!non_self_recursive)
4413 : return false;
4414 :
4415 150961 : *freq_sum = freq;
4416 150961 : *caller_count = count;
4417 150961 : *rec_count_sum = rec_cnt;
4418 150961 : *nonrec_count_sum = nonrec_cnt;
4419 :
4420 150961 : return interesting;
4421 : }
4422 :
4423 : /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4424 : to let a non-self-recursive caller be the first element. Thus, we can
4425 : simplify intersecting operations on values that arrive from all of these
4426 : callers, especially when there exists self-recursive call. Return true if
4427 : this kind of adjustment is possible. */
4428 :
4429 : static bool
4430 57510 : adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4431 : cgraph_node *node)
4432 : {
4433 61723 : for (unsigned i = 0; i < callers.length (); i++)
4434 : {
4435 61627 : cgraph_edge *cs = callers[i];
4436 :
4437 61627 : if (cs->caller != node)
4438 : {
4439 57414 : if (i > 0)
4440 : {
4441 1967 : callers[i] = callers[0];
4442 1967 : callers[0] = cs;
4443 : }
4444 57414 : return true;
4445 : }
4446 : }
4447 : return false;
4448 : }
4449 :
4450 : /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4451 : is assumed their number is known and equal to CALLER_COUNT. */
4452 :
4453 : template <typename valtype>
4454 : static auto_vec<cgraph_edge *>
4455 150599 : gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4456 : int caller_count)
4457 : {
4458 : ipcp_value_source<valtype> *src;
4459 150599 : auto_vec<cgraph_edge *> ret (caller_count);
4460 :
4461 516879 : for (src = val->sources; src; src = src->next)
4462 : {
4463 366280 : struct cgraph_edge *cs = src->cs;
4464 819496 : while (cs)
4465 : {
4466 453216 : if (cgraph_edge_brings_value_p (cs, src, dest, val))
4467 350009 : ret.quick_push (cs);
4468 453216 : cs = get_next_cgraph_edge_clone (cs);
4469 : }
4470 : }
4471 :
4472 150599 : if (caller_count > 1)
4473 40441 : adjust_callers_for_value_intersection (ret, dest);
4474 :
4475 150599 : return ret;
4476 : }
4477 :
4478 : /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4479 : Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4480 : should be set to true when the reference created for the constant should be
4481 : a load one and not an address one because the corresponding parameter p is
4482 : only used as *p. */
4483 :
4484 : static struct ipa_replace_map *
4485 24352 : get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4486 : bool force_load_ref)
4487 : {
4488 24352 : struct ipa_replace_map *replace_map;
4489 :
4490 24352 : replace_map = ggc_alloc<ipa_replace_map> ();
4491 24352 : if (dump_file)
4492 : {
4493 171 : fprintf (dump_file, " replacing ");
4494 171 : ipa_dump_param (dump_file, info, parm_num);
4495 :
4496 171 : fprintf (dump_file, " with const ");
4497 171 : print_generic_expr (dump_file, value);
4498 :
4499 171 : if (force_load_ref)
4500 11 : fprintf (dump_file, " - forcing load reference\n");
4501 : else
4502 160 : fprintf (dump_file, "\n");
4503 : }
4504 24352 : replace_map->parm_num = parm_num;
4505 24352 : replace_map->new_tree = value;
4506 24352 : replace_map->force_load_ref = force_load_ref;
4507 24352 : return replace_map;
4508 : }
4509 :
4510 : /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4511 : one, otherwise it will be referred to as the original node. */
4512 :
4513 : static void
4514 4 : dump_profile_updates (cgraph_node *node, bool spec)
4515 : {
4516 4 : if (spec)
4517 2 : fprintf (dump_file, " setting count of the specialized node %s to ",
4518 : node->dump_name ());
4519 : else
4520 2 : fprintf (dump_file, " setting count of the original node %s to ",
4521 : node->dump_name ());
4522 :
4523 4 : node->count.dump (dump_file);
4524 4 : fprintf (dump_file, "\n");
4525 6 : for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4526 : {
4527 2 : fprintf (dump_file, " edge to %s has count ",
4528 2 : cs->callee->dump_name ());
4529 2 : cs->count.dump (dump_file);
4530 2 : fprintf (dump_file, "\n");
4531 : }
4532 4 : }
4533 :
4534 : /* With partial train run we do not want to assume that original's count is
4535 : zero whenever we redurect all executed edges to clone. Simply drop profile
4536 : to local one in this case. In eany case, return the new value. ORIG_NODE
4537 : is the original node and its count has not been updated yet. */
4538 :
4539 : profile_count
4540 16 : lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4541 : {
4542 32 : if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4543 24 : && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4544 4 : && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4545 0 : remainder = orig_node->count.guessed_local ();
4546 :
4547 16 : return remainder;
4548 : }
4549 :
4550 : /* Structure to sum counts coming from nodes other than the original node and
4551 : its clones. */
4552 :
4553 : struct gather_other_count_struct
4554 : {
4555 : cgraph_node *orig;
4556 : profile_count other_count;
4557 : };
4558 :
4559 : /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4560 : counts that come from non-self-recursive calls.. */
4561 :
4562 : static bool
4563 8 : gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4564 : {
4565 8 : gather_other_count_struct *desc = (gather_other_count_struct *) data;
4566 20 : for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4567 12 : if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4568 0 : if (cs->count.ipa ().initialized_p ())
4569 0 : desc->other_count += cs->count.ipa ();
4570 8 : return false;
4571 : }
4572 :
4573 : /* Structure to help analyze if we need to boost counts of some clones of some
4574 : non-recursive edges to match the new callee count. */
4575 :
4576 : struct desc_incoming_count_struct
4577 : {
4578 : cgraph_node *orig;
4579 : hash_set <cgraph_edge *> *processed_edges;
4580 : profile_count count;
4581 : unsigned unproc_orig_rec_edges;
4582 : };
4583 :
4584 : /* Go over edges calling NODE and its thunks and gather information about
4585 : incoming counts so that we know if we need to make any adjustments. */
4586 :
4587 : static void
4588 8 : analyze_clone_icoming_counts (cgraph_node *node,
4589 : desc_incoming_count_struct *desc)
4590 : {
4591 20 : for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4592 12 : if (cs->caller->thunk)
4593 : {
4594 0 : analyze_clone_icoming_counts (cs->caller, desc);
4595 0 : continue;
4596 : }
4597 : else
4598 : {
4599 12 : if (cs->count.initialized_p ())
4600 12 : desc->count += cs->count.ipa ();
4601 12 : if (!desc->processed_edges->contains (cs)
4602 12 : && cs->caller->clone_of == desc->orig)
4603 4 : desc->unproc_orig_rec_edges++;
4604 : }
4605 8 : }
4606 :
4607 : /* If caller edge counts of a clone created for a self-recursive arithmetic
4608 : jump function must be adjusted because it is coming from a the "seed" clone
4609 : for the first value and so has been excessively scaled back as if it was not
4610 : a recursive call, adjust it so that the incoming counts of NODE match its
4611 : count. NODE is the node or its thunk. */
4612 :
4613 : static void
4614 0 : adjust_clone_incoming_counts (cgraph_node *node,
4615 : desc_incoming_count_struct *desc)
4616 : {
4617 0 : for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4618 0 : if (cs->caller->thunk)
4619 : {
4620 0 : adjust_clone_incoming_counts (cs->caller, desc);
4621 0 : profile_count sum = profile_count::zero ();
4622 0 : for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4623 0 : if (e->count.initialized_p ())
4624 0 : sum += e->count.ipa ();
4625 0 : cs->count = cs->count.combine_with_ipa_count (sum);
4626 : }
4627 0 : else if (!desc->processed_edges->contains (cs)
4628 0 : && cs->caller->clone_of == desc->orig
4629 0 : && cs->count.compatible_p (desc->count))
4630 : {
4631 0 : cs->count += desc->count;
4632 0 : if (dump_file)
4633 : {
4634 0 : fprintf (dump_file, " Adjusted count of an incoming edge of "
4635 0 : "a clone %s -> %s to ", cs->caller->dump_name (),
4636 0 : cs->callee->dump_name ());
4637 0 : cs->count.dump (dump_file);
4638 0 : fprintf (dump_file, "\n");
4639 : }
4640 : }
4641 0 : }
4642 :
4643 : /* When ORIG_NODE has been cloned for values which have been generated fora
4644 : self-recursive call as a result of an arithmetic pass-through
4645 : jump-functions, adjust its count together with counts of all such clones in
4646 : SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4647 :
4648 : The function sums the counts of the original node and all its clones that
4649 : cannot be attributed to a specific clone because it comes from a
4650 : non-recursive edge. This sum is then evenly divided between the clones and
4651 : on top of that each one gets all the counts which can be attributed directly
4652 : to it. */
4653 :
4654 : static void
4655 33 : update_counts_for_self_gen_clones (cgraph_node *orig_node,
4656 : const vec<cgraph_node *> &self_gen_clones)
4657 : {
4658 33 : profile_count redist_sum = orig_node->count.ipa ();
4659 33 : if (!redist_sum.nonzero_p ())
4660 : return;
4661 :
4662 4 : if (dump_file)
4663 0 : fprintf (dump_file, " Updating profile of self recursive clone "
4664 : "series\n");
4665 :
4666 4 : gather_other_count_struct gocs;
4667 4 : gocs.orig = orig_node;
4668 4 : gocs.other_count = profile_count::zero ();
4669 :
4670 4 : auto_vec <profile_count, 8> other_edges_count;
4671 20 : for (cgraph_node *n : self_gen_clones)
4672 : {
4673 8 : gocs.other_count = profile_count::zero ();
4674 8 : n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4675 : &gocs, false);
4676 8 : other_edges_count.safe_push (gocs.other_count);
4677 8 : redist_sum -= gocs.other_count;
4678 : }
4679 :
4680 4 : hash_set<cgraph_edge *> processed_edges;
4681 4 : unsigned i = 0;
4682 20 : for (cgraph_node *n : self_gen_clones)
4683 : {
4684 8 : profile_count new_count
4685 16 : = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4686 8 : new_count = lenient_count_portion_handling (new_count, orig_node);
4687 8 : n->scale_profile_to (new_count);
4688 16 : for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4689 8 : processed_edges.add (cs);
4690 :
4691 8 : i++;
4692 : }
4693 :
4694 : /* There are still going to be edges to ORIG_NODE that have one or more
4695 : clones coming from another node clone in SELF_GEN_CLONES and which we
4696 : scaled by the same amount, which means that the total incoming sum of
4697 : counts to ORIG_NODE will be too high, scale such edges back. */
4698 8 : for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4699 : {
4700 4 : if (cs->callee->ultimate_alias_target () == orig_node)
4701 : {
4702 4 : unsigned den = 0;
4703 18 : for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4704 14 : if (e->callee->ultimate_alias_target () == orig_node
4705 14 : && processed_edges.contains (e))
4706 8 : den++;
4707 4 : if (den > 0)
4708 18 : for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4709 14 : if (e->callee->ultimate_alias_target () == orig_node
4710 8 : && processed_edges.contains (e)
4711 : /* If count is not IPA, this adjustment makes verifier
4712 : unhappy, since we expect bb->count to match e->count.
4713 : We may add a flag to mark edge conts that has been
4714 : modified by IPA code, but so far it does not seem
4715 : to be worth the effort. With local counts the profile
4716 : will not propagate at IPA level. */
4717 30 : && e->count.ipa_p ())
4718 8 : e->count /= den;
4719 : }
4720 : }
4721 :
4722 : /* Edges from the seeds of the values generated for arithmetic jump-functions
4723 : along self-recursive edges are likely to have fairly low count and so
4724 : edges from them to nodes in the self_gen_clones do not correspond to the
4725 : artificially distributed count of the nodes, the total sum of incoming
4726 : edges to some clones might be too low. Detect this situation and correct
4727 : it. */
4728 20 : for (cgraph_node *n : self_gen_clones)
4729 : {
4730 8 : if (!n->count.ipa ().nonzero_p ())
4731 0 : continue;
4732 :
4733 8 : desc_incoming_count_struct desc;
4734 8 : desc.orig = orig_node;
4735 8 : desc.processed_edges = &processed_edges;
4736 8 : desc.count = profile_count::zero ();
4737 8 : desc.unproc_orig_rec_edges = 0;
4738 8 : analyze_clone_icoming_counts (n, &desc);
4739 :
4740 8 : if (n->count.differs_from_p (desc.count))
4741 : {
4742 0 : if (n->count > desc.count
4743 0 : && desc.unproc_orig_rec_edges > 0)
4744 : {
4745 0 : desc.count = n->count - desc.count;
4746 0 : desc.count = desc.count /= desc.unproc_orig_rec_edges;
4747 0 : adjust_clone_incoming_counts (n, &desc);
4748 : }
4749 0 : else if (dump_file)
4750 0 : fprintf (dump_file,
4751 : " Unable to fix up incoming counts for %s.\n",
4752 : n->dump_name ());
4753 : }
4754 : }
4755 :
4756 4 : if (dump_file)
4757 0 : for (cgraph_node *n : self_gen_clones)
4758 0 : dump_profile_updates (n, n != orig_node);
4759 4 : return;
4760 4 : }
4761 :
4762 : /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4763 : their profile information to reflect this. This function should not be used
4764 : for clones generated for arithmetic pass-through jump functions on a
4765 : self-recursive call graph edge, that situation is handled by
4766 : update_counts_for_self_gen_clones. */
4767 :
4768 : static void
4769 4188 : update_profiling_info (struct cgraph_node *orig_node,
4770 : struct cgraph_node *new_node)
4771 : {
4772 4188 : struct caller_statistics stats;
4773 4188 : profile_count new_sum;
4774 4188 : profile_count remainder, orig_node_count = orig_node->count.ipa ();
4775 :
4776 4188 : if (!orig_node_count.nonzero_p ())
4777 4180 : return;
4778 :
4779 8 : if (dump_file)
4780 : {
4781 2 : fprintf (dump_file, " Updating profile from original count: ");
4782 2 : orig_node_count.dump (dump_file);
4783 2 : fprintf (dump_file, "\n");
4784 : }
4785 :
4786 8 : init_caller_stats (&stats, new_node);
4787 8 : new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4788 : false);
4789 8 : new_sum = stats.count_sum;
4790 :
4791 8 : bool orig_edges_processed = false;
4792 8 : if (new_sum > orig_node_count)
4793 : {
4794 : /* Profile has already gone astray, keep what we have but lower it
4795 : to global0adjusted or to local if we have partial training. */
4796 0 : if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4797 0 : orig_node->make_profile_local ();
4798 0 : if (new_sum.quality () == AFDO)
4799 0 : orig_node->make_profile_global0 (GUESSED_GLOBAL0_AFDO);
4800 : else
4801 0 : orig_node->make_profile_global0 (GUESSED_GLOBAL0_ADJUSTED);
4802 : orig_edges_processed = true;
4803 : }
4804 8 : else if (stats.rec_count_sum.nonzero_p ())
4805 : {
4806 0 : int new_nonrec_calls = stats.n_nonrec_calls;
4807 : /* There are self-recursive edges which are likely to bring in the
4808 : majority of calls but which we must divide in between the original and
4809 : new node. */
4810 0 : init_caller_stats (&stats, orig_node);
4811 0 : orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
4812 : &stats, false);
4813 0 : int orig_nonrec_calls = stats.n_nonrec_calls;
4814 0 : profile_count orig_nonrec_call_count = stats.count_sum;
4815 :
4816 0 : if (orig_node->local)
4817 : {
4818 0 : if (!orig_nonrec_call_count.nonzero_p ())
4819 : {
4820 0 : if (dump_file)
4821 0 : fprintf (dump_file, " The original is local and the only "
4822 : "incoming edges from non-dead callers with nonzero "
4823 : "counts are self-recursive, assuming it is cold.\n");
4824 : /* The NEW_NODE count and counts of all its outgoing edges
4825 : are still unmodified copies of ORIG_NODE's. Just clear
4826 : the latter and bail out. */
4827 0 : if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4828 0 : orig_node->make_profile_local ();
4829 0 : else if (orig_nonrec_call_count.quality () == AFDO)
4830 0 : orig_node->make_profile_global0 (GUESSED_GLOBAL0_AFDO);
4831 : else
4832 0 : orig_node->make_profile_global0 (GUESSED_GLOBAL0_ADJUSTED);
4833 0 : return;
4834 : }
4835 : }
4836 : else
4837 : {
4838 : /* Let's behave as if there was another caller that accounts for all
4839 : the calls that were either indirect or from other compilation
4840 : units. */
4841 0 : orig_nonrec_calls++;
4842 0 : profile_count pretend_caller_count
4843 0 : = (orig_node_count - new_sum - orig_nonrec_call_count
4844 0 : - stats.rec_count_sum);
4845 0 : orig_nonrec_call_count += pretend_caller_count;
4846 : }
4847 :
4848 : /* Divide all "unexplained" counts roughly proportionally to sums of
4849 : counts of non-recursive calls.
4850 :
4851 : We put rather arbitrary limits on how many counts we claim because the
4852 : number of non-self-recursive incoming count is only a rough guideline
4853 : and there are cases (such as mcf) where using it blindly just takes
4854 : too many. And if lattices are considered in the opposite order we
4855 : could also take too few. */
4856 0 : profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4857 :
4858 0 : int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4859 0 : profile_count new_part = unexp.apply_scale (limit_den - 1, limit_den);
4860 0 : profile_count den = new_sum + orig_nonrec_call_count;
4861 0 : if (den.nonzero_p ())
4862 0 : new_part = MIN (unexp.apply_scale (new_sum, den), new_part);
4863 0 : new_part = MAX (new_part,
4864 : unexp.apply_scale (new_nonrec_calls, limit_den));
4865 0 : if (dump_file)
4866 : {
4867 0 : fprintf (dump_file, " Claiming ");
4868 0 : new_part.dump (dump_file);
4869 0 : fprintf (dump_file, " of unexplained ");
4870 0 : unexp.dump (dump_file);
4871 0 : fprintf (dump_file, " counts because of self-recursive "
4872 : "calls\n");
4873 : }
4874 0 : new_sum += new_part;
4875 0 : remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4876 : orig_node);
4877 : }
4878 : else
4879 8 : remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4880 : orig_node);
4881 :
4882 8 : new_node->scale_profile_to (new_sum);
4883 :
4884 8 : if (!orig_edges_processed)
4885 8 : orig_node->scale_profile_to (remainder);
4886 :
4887 8 : if (dump_file)
4888 : {
4889 2 : dump_profile_updates (new_node, true);
4890 2 : dump_profile_updates (orig_node, false);
4891 : }
4892 : }
4893 :
4894 : /* Update the respective profile of specialized NEW_NODE and the original
4895 : ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4896 : have been redirected to the specialized version. */
4897 :
4898 : static void
4899 0 : update_specialized_profile (struct cgraph_node *new_node,
4900 : struct cgraph_node *orig_node,
4901 : profile_count redirected_sum)
4902 : {
4903 0 : if (dump_file)
4904 : {
4905 0 : fprintf (dump_file, " the sum of counts of redirected edges is ");
4906 0 : redirected_sum.dump (dump_file);
4907 0 : fprintf (dump_file, "\n old ipa count of the original node is ");
4908 0 : orig_node->count.dump (dump_file);
4909 0 : fprintf (dump_file, "\n");
4910 : }
4911 0 : if (!orig_node->count.ipa ().nonzero_p ()
4912 0 : || !redirected_sum.nonzero_p ())
4913 0 : return;
4914 :
4915 0 : orig_node->scale_profile_to
4916 0 : (lenient_count_portion_handling (orig_node->count.ipa () - redirected_sum,
4917 : orig_node));
4918 :
4919 0 : new_node->scale_profile_to (new_node->count.ipa () + redirected_sum);
4920 :
4921 0 : if (dump_file)
4922 : {
4923 0 : dump_profile_updates (new_node, true);
4924 0 : dump_profile_updates (orig_node, false);
4925 : }
4926 : }
4927 :
4928 : static void adjust_references_in_caller (cgraph_edge *cs,
4929 : symtab_node *symbol, int index);
4930 :
4931 : /* Simple structure to pass a symbol and index (with same meaning as parameters
4932 : of adjust_references_in_caller) through a void* parameter of a
4933 : call_for_symbol_thunks_and_aliases callback. */
4934 : struct symbol_and_index_together
4935 : {
4936 : symtab_node *symbol;
4937 : int index;
4938 : };
4939 :
4940 : /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4941 : adjust_references_in_caller on edges up in the call-graph, if necessary. */
4942 : static bool
4943 9 : adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4944 : {
4945 9 : symbol_and_index_together *pack = (symbol_and_index_together *) data;
4946 40 : for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4947 31 : if (!cs->caller->thunk)
4948 31 : adjust_references_in_caller (cs, pack->symbol, pack->index);
4949 9 : return false;
4950 : }
4951 :
4952 : /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4953 : variable which is only dereferenced and which is represented by SYMBOL. See
4954 : if we can remove ADDR reference in callers associated with the call. */
4955 :
4956 : static void
4957 402 : adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4958 : {
4959 402 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4960 402 : ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
4961 402 : if (jfunc->type == IPA_JF_CONST)
4962 : {
4963 383 : ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
4964 : cs->lto_stmt_uid,
4965 : IPA_REF_ADDR);
4966 383 : if (!to_del)
4967 393 : return;
4968 383 : to_del->remove_reference ();
4969 383 : ipa_zap_jf_refdesc (jfunc);
4970 383 : if (dump_file)
4971 22 : fprintf (dump_file, " Removed a reference from %s to %s.\n",
4972 11 : cs->caller->dump_name (), symbol->dump_name ());
4973 383 : return;
4974 : }
4975 :
4976 19 : if (jfunc->type != IPA_JF_PASS_THROUGH
4977 19 : || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
4978 38 : || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
4979 : return;
4980 :
4981 19 : int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
4982 19 : cgraph_node *caller = cs->caller;
4983 19 : ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
4984 : /* TODO: This consistency check may be too big and not really
4985 : that useful. Consider removing it. */
4986 19 : tree cst;
4987 19 : if (caller_info->ipcp_orig_node)
4988 17 : cst = caller_info->known_csts[fidx];
4989 : else
4990 : {
4991 2 : ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
4992 2 : gcc_assert (lat->is_single_const ());
4993 2 : cst = lat->values->value;
4994 : }
4995 19 : gcc_assert (TREE_CODE (cst) == ADDR_EXPR
4996 : && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
4997 : == symbol));
4998 :
4999 19 : int cuses = ipa_get_controlled_uses (caller_info, fidx);
5000 19 : if (cuses == IPA_UNDESCRIBED_USE)
5001 : return;
5002 19 : gcc_assert (cuses > 0);
5003 19 : cuses--;
5004 19 : ipa_set_controlled_uses (caller_info, fidx, cuses);
5005 19 : ipa_set_jf_pass_through_refdesc_decremented (jfunc, true);
5006 19 : if (dump_file && (dump_flags & TDF_DETAILS))
5007 3 : fprintf (dump_file, " Controlled uses of parameter %i of %s dropped "
5008 : "to %i.\n", fidx, caller->dump_name (), cuses);
5009 19 : if (cuses)
5010 : return;
5011 :
5012 9 : if (caller_info->ipcp_orig_node)
5013 : {
5014 : /* Cloning machinery has created a reference here, we need to either
5015 : remove it or change it to a read one. */
5016 7 : ipa_ref *to_del = caller->find_reference (symbol, NULL, 0, IPA_REF_ADDR);
5017 7 : if (to_del)
5018 : {
5019 7 : to_del->remove_reference ();
5020 7 : if (dump_file)
5021 6 : fprintf (dump_file, " Removed a reference from %s to %s.\n",
5022 3 : cs->caller->dump_name (), symbol->dump_name ());
5023 7 : if (ipa_get_param_load_dereferenced (caller_info, fidx))
5024 : {
5025 3 : caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5026 3 : if (dump_file)
5027 2 : fprintf (dump_file,
5028 : " ...and replaced it with LOAD one.\n");
5029 : }
5030 : }
5031 : }
5032 :
5033 9 : symbol_and_index_together pack;
5034 9 : pack.symbol = symbol;
5035 9 : pack.index = fidx;
5036 9 : if (caller->can_change_signature)
5037 9 : caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5038 : &pack, true);
5039 : }
5040 :
5041 :
5042 : /* Return true if we would like to remove a parameter from NODE when cloning it
5043 : with KNOWN_CSTS scalar constants. */
5044 :
5045 : static bool
5046 19888 : want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5047 : {
5048 19888 : auto_vec<bool, 16> surviving;
5049 19888 : bool filled_vec = false;
5050 19888 : ipa_node_params *info = ipa_node_params_sum->get (node);
5051 19888 : int i, count = ipa_get_param_count (info);
5052 :
5053 39472 : for (i = 0; i < count; i++)
5054 : {
5055 34981 : if (!known_csts[i] && ipa_is_param_used (info, i))
5056 19584 : continue;
5057 :
5058 15397 : if (!filled_vec)
5059 : {
5060 15397 : clone_info *info = clone_info::get (node);
5061 15397 : if (!info || !info->param_adjustments)
5062 : return true;
5063 0 : info->param_adjustments->get_surviving_params (&surviving);
5064 0 : filled_vec = true;
5065 : }
5066 0 : if (surviving.length() < (unsigned) i && surviving[i])
5067 : return true;
5068 : }
5069 : return false;
5070 19888 : }
5071 :
5072 : /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5073 : known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5074 : redirect all edges in CALLERS to it. */
5075 :
5076 : static struct cgraph_node *
5077 21303 : create_specialized_node (struct cgraph_node *node,
5078 : vec<tree> known_csts,
5079 : vec<ipa_polymorphic_call_context> known_contexts,
5080 : vec<ipa_argagg_value, va_gc> *aggvals,
5081 : vec<cgraph_edge *> &callers)
5082 : {
5083 21303 : ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5084 21303 : vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5085 21303 : vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5086 21303 : struct cgraph_node *new_node;
5087 21303 : int i, count = ipa_get_param_count (info);
5088 21303 : clone_info *cinfo = clone_info::get (node);
5089 0 : ipa_param_adjustments *old_adjustments = cinfo
5090 21303 : ? cinfo->param_adjustments : NULL;
5091 21303 : ipa_param_adjustments *new_adjustments;
5092 21303 : gcc_assert (!info->ipcp_orig_node);
5093 21303 : gcc_assert (node->can_change_signature
5094 : || !old_adjustments);
5095 :
5096 19888 : if (old_adjustments)
5097 : {
5098 : /* At the moment all IPA optimizations should use the number of
5099 : parameters of the prevailing decl as the m_always_copy_start.
5100 : Handling any other value would complicate the code below, so for the
5101 : time bing let's only assert it is so. */
5102 0 : gcc_assert (old_adjustments->m_always_copy_start == count
5103 : || old_adjustments->m_always_copy_start < 0);
5104 0 : int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5105 0 : for (i = 0; i < old_adj_count; i++)
5106 : {
5107 0 : ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5108 0 : if (!node->can_change_signature
5109 0 : || old_adj->op != IPA_PARAM_OP_COPY
5110 0 : || (!known_csts[old_adj->base_index]
5111 0 : && ipa_is_param_used (info, old_adj->base_index)))
5112 : {
5113 0 : ipa_adjusted_param new_adj = *old_adj;
5114 :
5115 0 : new_adj.prev_clone_adjustment = true;
5116 0 : new_adj.prev_clone_index = i;
5117 0 : vec_safe_push (new_params, new_adj);
5118 : }
5119 : }
5120 0 : bool skip_return = old_adjustments->m_skip_return;
5121 0 : new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5122 : ipa_param_adjustments (new_params, count,
5123 0 : skip_return));
5124 : }
5125 21303 : else if (node->can_change_signature
5126 21303 : && want_remove_some_param_p (node, known_csts))
5127 : {
5128 15397 : ipa_adjusted_param adj;
5129 15397 : memset (&adj, 0, sizeof (adj));
5130 15397 : adj.op = IPA_PARAM_OP_COPY;
5131 59798 : for (i = 0; i < count; i++)
5132 44401 : if (!known_csts[i] && ipa_is_param_used (info, i))
5133 : {
5134 15877 : adj.base_index = i;
5135 15877 : adj.prev_clone_index = i;
5136 15877 : vec_safe_push (new_params, adj);
5137 : }
5138 15397 : new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5139 15397 : ipa_param_adjustments (new_params, count, false));
5140 : }
5141 : else
5142 : new_adjustments = NULL;
5143 :
5144 21303 : auto_vec<cgraph_edge *, 2> self_recursive_calls;
5145 181478 : for (i = callers.length () - 1; i >= 0; i--)
5146 : {
5147 138872 : cgraph_edge *cs = callers[i];
5148 138872 : if (cs->caller == node)
5149 : {
5150 131 : self_recursive_calls.safe_push (cs);
5151 131 : callers.unordered_remove (i);
5152 : }
5153 : }
5154 21303 : replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5155 81029 : for (i = 0; i < count; i++)
5156 : {
5157 59726 : tree t = known_csts[i];
5158 59726 : if (!t)
5159 35374 : continue;
5160 :
5161 24352 : gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5162 :
5163 24352 : bool load_ref = false;
5164 24352 : symtab_node *ref_symbol;
5165 24352 : if (TREE_CODE (t) == ADDR_EXPR)
5166 : {
5167 6525 : tree base = get_base_address (TREE_OPERAND (t, 0));
5168 6525 : if (TREE_CODE (base) == VAR_DECL
5169 3128 : && ipa_get_controlled_uses (info, i) == 0
5170 931 : && ipa_get_param_load_dereferenced (info, i)
5171 6911 : && (ref_symbol = symtab_node::get (base)))
5172 : {
5173 386 : load_ref = true;
5174 386 : if (node->can_change_signature)
5175 1403 : for (cgraph_edge *caller : callers)
5176 371 : adjust_references_in_caller (caller, ref_symbol, i);
5177 : }
5178 : }
5179 :
5180 24352 : ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5181 24352 : if (replace_map)
5182 24352 : vec_safe_push (replace_trees, replace_map);
5183 : }
5184 :
5185 63909 : unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5186 21303 : IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5187 : node->decl)));
5188 21303 : new_node = node->create_virtual_clone (callers, replace_trees,
5189 : new_adjustments, "constprop",
5190 : suffix_counter);
5191 21303 : suffix_counter++;
5192 :
5193 21303 : bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5194 21434 : for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5195 : {
5196 131 : cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5197 : /* Cloned edges can disappear during cloning as speculation can be
5198 : resolved, check that we have one and that it comes from the last
5199 : cloning. */
5200 131 : if (cs && cs->caller == new_node)
5201 130 : cs->redirect_callee_duplicating_thunks (new_node);
5202 : /* Any future code that would make more than one clone of an outgoing
5203 : edge would confuse this mechanism, so let's check that does not
5204 : happen. */
5205 130 : gcc_checking_assert (!cs
5206 : || !get_next_cgraph_edge_clone (cs)
5207 : || get_next_cgraph_edge_clone (cs)->caller != new_node);
5208 : }
5209 21303 : if (have_self_recursive_calls)
5210 121 : new_node->expand_all_artificial_thunks ();
5211 :
5212 21303 : ipa_set_node_agg_value_chain (new_node, aggvals);
5213 51025 : for (const ipa_argagg_value &av : aggvals)
5214 29722 : new_node->maybe_create_reference (av.value, NULL);
5215 :
5216 21303 : if (dump_file && (dump_flags & TDF_DETAILS))
5217 : {
5218 90 : fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5219 90 : if (known_contexts.exists ())
5220 : {
5221 0 : for (i = 0; i < count; i++)
5222 0 : if (!known_contexts[i].useless_p ())
5223 : {
5224 0 : fprintf (dump_file, " known ctx %i is ", i);
5225 0 : known_contexts[i].dump (dump_file);
5226 : }
5227 : }
5228 90 : if (aggvals)
5229 : {
5230 48 : fprintf (dump_file, " Aggregate replacements:");
5231 48 : ipa_argagg_value_list avs (aggvals);
5232 48 : avs.dump (dump_file);
5233 : }
5234 : }
5235 :
5236 21303 : new_info = ipa_node_params_sum->get (new_node);
5237 21303 : new_info->ipcp_orig_node = node;
5238 21303 : new_node->ipcp_clone = true;
5239 21303 : new_info->known_csts = known_csts;
5240 21303 : new_info->known_contexts = known_contexts;
5241 :
5242 21303 : ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts,
5243 : aggvals);
5244 :
5245 21303 : return new_node;
5246 21303 : }
5247 :
5248 : /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5249 : pass-through function to itself when the cgraph_node involved is not an
5250 : IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5251 : no-operation pass-through. */
5252 :
5253 : static bool
5254 778064 : self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5255 : bool simple = true)
5256 : {
5257 778064 : enum availability availability;
5258 778064 : if (jfunc->type == IPA_JF_PASS_THROUGH
5259 80024 : && cs->caller == cs->callee->function_symbol (&availability)
5260 19350 : && availability > AVAIL_INTERPOSABLE
5261 19350 : && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5262 19350 : && ipa_get_jf_pass_through_formal_id (jfunc) == i
5263 19350 : && ipa_node_params_sum->get (cs->caller)
5264 797414 : && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5265 : return true;
5266 : return false;
5267 : }
5268 :
5269 : /* Return true if JFUNC, which describes the i-th parameter of call CS, is an
5270 : ancestor function with zero offset to itself when the cgraph_node involved
5271 : is not an IPA-CP clone. */
5272 :
5273 : static bool
5274 758726 : self_recursive_ancestor_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i)
5275 : {
5276 758726 : enum availability availability;
5277 758726 : if (jfunc->type == IPA_JF_ANCESTOR
5278 3226 : && cs->caller == cs->callee->function_symbol (&availability)
5279 1 : && availability > AVAIL_INTERPOSABLE
5280 1 : && ipa_get_jf_ancestor_offset (jfunc) == 0
5281 1 : && ipa_get_jf_ancestor_formal_id (jfunc) == i
5282 1 : && ipa_node_params_sum->get (cs->caller)
5283 758727 : && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5284 : return true;
5285 : return false;
5286 : }
5287 :
5288 : /* Return true if JFUNC, which describes a part of an aggregate represented or
5289 : pointed to by the i-th parameter of call CS, is a pass-through function to
5290 : itself when the cgraph_node involved is not an IPA-CP clone.. When
5291 : SIMPLE is true, further check if JFUNC is a simple no-operation
5292 : pass-through. */
5293 :
5294 : static bool
5295 338731 : self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5296 : const ipa_agg_jf_item *jfunc,
5297 : int i, bool simple = true)
5298 : {
5299 338731 : enum availability availability;
5300 338731 : if (cs->caller == cs->callee->function_symbol (&availability)
5301 3690 : && availability > AVAIL_INTERPOSABLE
5302 3690 : && jfunc->jftype == IPA_JF_LOAD_AGG
5303 487 : && jfunc->offset == jfunc->value.load_agg.offset
5304 487 : && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5305 487 : && jfunc->value.pass_through.formal_id == i
5306 481 : && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5307 481 : && ipa_node_params_sum->get (cs->caller)
5308 339212 : && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5309 : return true;
5310 : return false;
5311 : }
5312 :
5313 : /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5314 : KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5315 :
5316 : static void
5317 167572 : find_scalar_values_for_callers_subset (vec<tree> &known_csts,
5318 : ipa_node_params *info,
5319 : const vec<cgraph_edge *> &callers)
5320 : {
5321 167572 : int i, count = ipa_get_param_count (info);
5322 :
5323 732147 : for (i = 0; i < count; i++)
5324 : {
5325 564575 : ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
5326 564575 : if (lat->bottom)
5327 564575 : continue;
5328 551060 : if (lat->is_single_const ())
5329 : {
5330 29268 : known_csts[i] = lat->values->value;
5331 29268 : continue;
5332 : }
5333 :
5334 521792 : struct cgraph_edge *cs;
5335 521792 : tree newval = NULL_TREE;
5336 521792 : int j;
5337 521792 : bool first = true;
5338 521792 : tree type = ipa_get_type (info, i);
5339 :
5340 973164 : FOR_EACH_VEC_ELT (callers, j, cs)
5341 : {
5342 777522 : struct ipa_jump_func *jump_func;
5343 777522 : tree t;
5344 :
5345 777522 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5346 777522 : if (!args
5347 777522 : || i >= ipa_get_cs_argument_count (args)
5348 1555013 : || (i == 0
5349 179621 : && call_passes_through_thunk (cs)))
5350 : {
5351 : newval = NULL_TREE;
5352 : break;
5353 : }
5354 777421 : jump_func = ipa_get_ith_jump_func (args, i);
5355 :
5356 : /* Besides simple pass-through jump function, arithmetic jump
5357 : function could also introduce argument-direct-pass-through for
5358 : self-feeding recursive call. For example,
5359 :
5360 : fn (int i)
5361 : {
5362 : fn (i & 1);
5363 : }
5364 :
5365 : Given that i is 0, recursive propagation via (i & 1) also gets
5366 : 0. */
5367 777421 : if (self_recursive_pass_through_p (cs, jump_func, i, false))
5368 : {
5369 18702 : gcc_assert (newval);
5370 18702 : enum tree_code opcode
5371 18702 : = ipa_get_jf_pass_through_operation (jump_func);
5372 18702 : tree op_type = (opcode == NOP_EXPR) ? NULL_TREE
5373 49 : : ipa_get_jf_pass_through_op_type (jump_func);
5374 18702 : t = ipa_get_jf_arith_result (opcode, newval,
5375 : ipa_get_jf_pass_through_operand (jump_func),
5376 : op_type);
5377 18702 : t = ipacp_value_safe_for_type (type, t);
5378 : }
5379 758719 : else if (self_recursive_ancestor_p (cs, jump_func, i))
5380 0 : continue;
5381 : else
5382 758719 : t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5383 : jump_func, type);
5384 777421 : if (!t
5385 472616 : || (newval
5386 251152 : && !values_equal_for_ipcp_p (t, newval))
5387 1228793 : || (!first && !newval))
5388 : {
5389 : newval = NULL_TREE;
5390 : break;
5391 : }
5392 : else
5393 : newval = t;
5394 : first = false;
5395 : }
5396 :
5397 521792 : if (newval)
5398 195642 : known_csts[i] = newval;
5399 : }
5400 167572 : }
5401 :
5402 : /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5403 : KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5404 : CALLERS. */
5405 :
5406 : static void
5407 167572 : find_contexts_for_caller_subset (vec<ipa_polymorphic_call_context>
5408 : &known_contexts,
5409 : ipa_node_params *info,
5410 : const vec<cgraph_edge *> &callers)
5411 : {
5412 167572 : int i, count = ipa_get_param_count (info);
5413 :
5414 732128 : for (i = 0; i < count; i++)
5415 : {
5416 564569 : if (!ipa_is_param_used (info, i))
5417 30301 : continue;
5418 :
5419 536295 : ipcp_lattice<ipa_polymorphic_call_context> *ctxlat
5420 536295 : = ipa_get_poly_ctx_lat (info, i);
5421 536295 : if (ctxlat->bottom)
5422 910 : continue;
5423 535385 : if (ctxlat->is_single_const ())
5424 : {
5425 1117 : if (!ctxlat->values->value.useless_p ())
5426 : {
5427 1117 : if (known_contexts.is_empty ())
5428 1056 : known_contexts.safe_grow_cleared (count, true);
5429 1117 : known_contexts[i] = ctxlat->values->value;
5430 : }
5431 1117 : continue;
5432 : }
5433 :
5434 534268 : cgraph_edge *cs;
5435 534268 : ipa_polymorphic_call_context newval;
5436 534268 : bool first = true;
5437 534268 : int j;
5438 :
5439 539489 : FOR_EACH_VEC_ELT (callers, j, cs)
5440 : {
5441 535747 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5442 535747 : if (!args
5443 1071494 : || i >= ipa_get_cs_argument_count (args))
5444 13 : return;
5445 535734 : ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5446 535734 : ipa_polymorphic_call_context ctx;
5447 535734 : ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5448 : cs, i, jfunc);
5449 535734 : if (first)
5450 : {
5451 534255 : newval = ctx;
5452 534255 : first = false;
5453 : }
5454 : else
5455 1479 : newval.meet_with (ctx);
5456 1068730 : if (newval.useless_p ())
5457 : break;
5458 : }
5459 :
5460 1068510 : if (!newval.useless_p ())
5461 : {
5462 3742 : if (known_contexts.is_empty ())
5463 3519 : known_contexts.safe_grow_cleared (count, true);
5464 3742 : known_contexts[i] = newval;
5465 : }
5466 :
5467 : }
5468 : }
5469 :
5470 : /* Push all aggregate values coming along edge CS for parameter number INDEX to
5471 : RES. If INTERIM is non-NULL, it contains the current interim state of
5472 : collected aggregate values which can be used to compute values passed over
5473 : self-recursive edges.
5474 :
5475 : This basically one iteration of push_agg_values_from_edge over one
5476 : parameter, which allows for simpler early returns. */
5477 :
5478 : static void
5479 620858 : push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5480 : vec<ipa_argagg_value> *res,
5481 : const ipa_argagg_value_list *interim)
5482 : {
5483 620858 : bool agg_values_from_caller = false;
5484 620858 : bool agg_jf_preserved = false;
5485 620858 : unsigned unit_delta = UINT_MAX;
5486 620858 : int src_idx = -1;
5487 620858 : ipa_jump_func *jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs),
5488 : index);
5489 :
5490 620858 : if (jfunc->type == IPA_JF_PASS_THROUGH
5491 620858 : && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5492 : {
5493 57030 : agg_values_from_caller = true;
5494 57030 : agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5495 57030 : src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5496 57030 : unit_delta = 0;
5497 : }
5498 563828 : else if (jfunc->type == IPA_JF_ANCESTOR
5499 563828 : && ipa_get_jf_ancestor_agg_preserved (jfunc))
5500 : {
5501 407 : agg_values_from_caller = true;
5502 407 : agg_jf_preserved = true;
5503 407 : src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5504 407 : unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5505 : }
5506 :
5507 620858 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5508 620858 : if (agg_values_from_caller)
5509 : {
5510 57437 : if (caller_info->ipcp_orig_node)
5511 : {
5512 11057 : struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5513 11057 : ipcp_transformation *ts
5514 11057 : = ipcp_get_transformation_summary (cs->caller);
5515 11057 : ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5516 11057 : ipcp_param_lattices *orig_plats
5517 11057 : = ipa_get_parm_lattices (orig_info, src_idx);
5518 11057 : if (ts
5519 11057 : && orig_plats->aggs
5520 2988 : && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5521 : {
5522 2508 : ipa_argagg_value_list src (ts);
5523 2508 : src.push_adjusted_values (src_idx, index, unit_delta, res);
5524 2508 : return;
5525 : }
5526 : }
5527 : else
5528 : {
5529 46380 : ipcp_param_lattices *src_plats
5530 46380 : = ipa_get_parm_lattices (caller_info, src_idx);
5531 46380 : if (src_plats->aggs
5532 2440 : && !src_plats->aggs_bottom
5533 2440 : && (agg_jf_preserved || !src_plats->aggs_by_ref))
5534 : {
5535 1450 : if (interim && (self_recursive_pass_through_p (cs, jfunc, index)
5536 7 : || self_recursive_ancestor_p (cs, jfunc, index)))
5537 : {
5538 637 : interim->push_adjusted_values (src_idx, index, unit_delta,
5539 : res);
5540 637 : return;
5541 : }
5542 813 : if (!src_plats->aggs_contain_variable)
5543 : {
5544 80 : push_agg_values_from_plats (src_plats, index, unit_delta,
5545 : res);
5546 80 : return;
5547 : }
5548 : }
5549 : }
5550 : }
5551 :
5552 617633 : if (!jfunc->agg.items)
5553 : return;
5554 220442 : bool first = true;
5555 220442 : unsigned prev_unit_offset = 0;
5556 1209556 : for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5557 : {
5558 989114 : tree value, srcvalue;
5559 : /* Besides simple pass-through aggregate jump function, arithmetic
5560 : aggregate jump function could also bring same aggregate value as
5561 : parameter passed-in for self-feeding recursive call. For example,
5562 :
5563 : fn (int *i)
5564 : {
5565 : int j = *i & 1;
5566 : fn (&j);
5567 : }
5568 :
5569 : Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5570 989114 : if (interim
5571 338731 : && self_recursive_agg_pass_through_p (cs, &agg_jf, index, false)
5572 989595 : && (srcvalue = interim->get_value(index,
5573 481 : agg_jf.offset / BITS_PER_UNIT)))
5574 : {
5575 950 : value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation,
5576 : srcvalue,
5577 475 : agg_jf.value.pass_through.operand,
5578 475 : agg_jf.value.pass_through.op_type);
5579 475 : value = ipacp_value_safe_for_type (agg_jf.type, value);
5580 : }
5581 : else
5582 988639 : value = ipa_agg_value_from_jfunc (caller_info, cs->caller,
5583 : &agg_jf);
5584 989114 : if (value)
5585 : {
5586 966542 : struct ipa_argagg_value iav;
5587 966542 : iav.value = value;
5588 966542 : iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5589 966542 : iav.index = index;
5590 966542 : iav.by_ref = jfunc->agg.by_ref;
5591 966542 : iav.killed = false;
5592 :
5593 966542 : gcc_assert (first
5594 : || iav.unit_offset > prev_unit_offset);
5595 966542 : prev_unit_offset = iav.unit_offset;
5596 966542 : first = false;
5597 :
5598 966542 : res->safe_push (iav);
5599 : }
5600 : }
5601 : return;
5602 : }
5603 :
5604 : /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5605 : description of ultimate callee of CS or the one it was cloned from (the
5606 : summary where lattices are). If INTERIM is non-NULL, it contains the
5607 : current interim state of collected aggregate values which can be used to
5608 : compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5609 : is true) and to skip values which clearly will not be part of intersection
5610 : with INTERIM. */
5611 :
5612 : static void
5613 222864 : push_agg_values_from_edge (struct cgraph_edge *cs,
5614 : ipa_node_params *dest_info,
5615 : vec<ipa_argagg_value> *res,
5616 : const ipa_argagg_value_list *interim,
5617 : bool optimize_self_recursion)
5618 : {
5619 222864 : ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5620 222864 : if (!args)
5621 : return;
5622 :
5623 445728 : int count = MIN (ipa_get_param_count (dest_info),
5624 : ipa_get_cs_argument_count (args));
5625 :
5626 222864 : unsigned interim_index = 0;
5627 930884 : for (int index = 0; index < count; index++)
5628 : {
5629 708020 : if (interim)
5630 : {
5631 274719 : while (interim_index < interim->m_elts.size ()
5632 250989 : && interim->m_elts[interim_index].value
5633 482739 : && interim->m_elts[interim_index].index < index)
5634 131243 : interim_index++;
5635 198366 : if (interim_index >= interim->m_elts.size ()
5636 143476 : || interim->m_elts[interim_index].index > index)
5637 54890 : continue;
5638 : }
5639 :
5640 653130 : ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, index);
5641 653130 : if (!ipa_is_param_used (dest_info, index)
5642 653130 : || plats->aggs_bottom)
5643 32272 : continue;
5644 620906 : push_agg_values_for_index_from_edge (cs, index, res,
5645 : optimize_self_recursion ? interim
5646 : : NULL);
5647 : }
5648 : }
5649 :
5650 :
5651 : /* Look at edges in CALLERS and collect all known aggregate values that arrive
5652 : from all of them into INTERIM. Return how many there are. */
5653 :
5654 : static unsigned int
5655 167572 : find_aggregate_values_for_callers_subset_1 (vec<ipa_argagg_value> &interim,
5656 : struct cgraph_node *node,
5657 : const vec<cgraph_edge *> &callers)
5658 : {
5659 167572 : ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5660 167572 : if (dest_info->ipcp_orig_node)
5661 0 : dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5662 :
5663 : /* gather_edges_for_value puts a non-recursive call into the first element of
5664 : callers if it can. */
5665 167572 : push_agg_values_from_edge (callers[0], dest_info, &interim, NULL, true);
5666 :
5667 252624 : unsigned valid_entries = interim.length ();
5668 167572 : if (!valid_entries)
5669 : return 0;
5670 :
5671 84199 : unsigned caller_count = callers.length();
5672 137767 : for (unsigned i = 1; i < caller_count; i++)
5673 : {
5674 55247 : auto_vec<ipa_argagg_value, 32> last;
5675 55247 : ipa_argagg_value_list avs (&interim);
5676 55247 : push_agg_values_from_edge (callers[i], dest_info, &last, &avs, true);
5677 :
5678 55247 : valid_entries = intersect_argaggs_with (interim, last);
5679 55247 : if (!valid_entries)
5680 1679 : return 0;
5681 55247 : }
5682 :
5683 : return valid_entries;
5684 : }
5685 :
5686 : /* Look at edges in CALLERS and collect all known aggregate values that arrive
5687 : from all of them and return them in a garbage-collected vector. Return
5688 : nullptr if there are none. */
5689 :
5690 : static void
5691 150599 : find_aggregate_values_for_callers_subset (vec<ipa_argagg_value> &res,
5692 : struct cgraph_node *node,
5693 : const vec<cgraph_edge *> &callers)
5694 : {
5695 150599 : auto_vec<ipa_argagg_value, 32> interim;
5696 150599 : unsigned valid_entries
5697 150599 : = find_aggregate_values_for_callers_subset_1 (interim, node, callers);
5698 150599 : if (!valid_entries)
5699 : return;
5700 :
5701 844057 : for (const ipa_argagg_value &av : interim)
5702 612013 : if (av.value)
5703 580664 : res.safe_push(av);
5704 : return;
5705 150599 : }
5706 :
5707 : /* Look at edges in CALLERS and collect all known aggregate values that arrive
5708 : from all of them and return them in a garbage-collected vector. Return
5709 : nullptr if there are none. */
5710 :
5711 : static struct vec<ipa_argagg_value, va_gc> *
5712 16973 : find_aggregate_values_for_callers_subset_gc (struct cgraph_node *node,
5713 : const vec<cgraph_edge *> &callers)
5714 : {
5715 16973 : auto_vec<ipa_argagg_value, 32> interim;
5716 16973 : unsigned valid_entries
5717 16973 : = find_aggregate_values_for_callers_subset_1 (interim, node, callers);
5718 16973 : if (!valid_entries)
5719 : return nullptr;
5720 :
5721 5172 : vec<ipa_argagg_value, va_gc> *res = NULL;
5722 5172 : vec_safe_reserve_exact (res, valid_entries);
5723 36534 : for (const ipa_argagg_value &av : interim)
5724 21018 : if (av.value)
5725 19617 : res->quick_push(av);
5726 5172 : gcc_checking_assert (res->length () == valid_entries);
5727 : return res;
5728 16973 : }
5729 :
5730 : /* Determine whether CS also brings all scalar values that the NODE is
5731 : specialized for. */
5732 :
5733 : static bool
5734 77 : cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5735 : struct cgraph_node *node)
5736 : {
5737 77 : ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5738 77 : int count = ipa_get_param_count (dest_info);
5739 77 : class ipa_node_params *caller_info;
5740 77 : class ipa_edge_args *args;
5741 77 : int i;
5742 :
5743 77 : caller_info = ipa_node_params_sum->get (cs->caller);
5744 77 : args = ipa_edge_args_sum->get (cs);
5745 177 : for (i = 0; i < count; i++)
5746 : {
5747 122 : struct ipa_jump_func *jump_func;
5748 122 : tree val, t;
5749 :
5750 122 : val = dest_info->known_csts[i];
5751 122 : if (!val)
5752 72 : continue;
5753 :
5754 100 : if (i >= ipa_get_cs_argument_count (args))
5755 : return false;
5756 50 : jump_func = ipa_get_ith_jump_func (args, i);
5757 50 : t = ipa_value_from_jfunc (caller_info, jump_func,
5758 : ipa_get_type (dest_info, i));
5759 50 : if (!t || !values_equal_for_ipcp_p (val, t))
5760 22 : return false;
5761 : }
5762 : return true;
5763 : }
5764 :
5765 : /* Determine whether CS also brings all aggregate values that NODE is
5766 : specialized for. */
5767 :
5768 : static bool
5769 55 : cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5770 : struct cgraph_node *node)
5771 : {
5772 55 : ipcp_transformation *ts = ipcp_get_transformation_summary (node);
5773 55 : if (!ts || vec_safe_is_empty (ts->m_agg_values))
5774 : return true;
5775 :
5776 45 : const ipa_argagg_value_list existing (ts->m_agg_values);
5777 45 : auto_vec<ipa_argagg_value, 32> edge_values;
5778 45 : ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5779 45 : gcc_checking_assert (dest_info->ipcp_orig_node);
5780 45 : dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5781 45 : push_agg_values_from_edge (cs, dest_info, &edge_values, &existing, false);
5782 45 : const ipa_argagg_value_list avl (&edge_values);
5783 45 : return avl.superset_of_p (existing);
5784 45 : }
5785 :
5786 : /* Given an original NODE and a VAL for which we have already created a
5787 : specialized clone, look whether there are incoming edges that still lead
5788 : into the old node but now also bring the requested value and also conform to
5789 : all other criteria such that they can be redirected the special node.
5790 : This function can therefore redirect the final edge in a SCC. */
5791 :
5792 : template <typename valtype>
5793 : static void
5794 8780 : perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5795 : {
5796 : ipcp_value_source<valtype> *src;
5797 8780 : profile_count redirected_sum = profile_count::zero ();
5798 :
5799 122882 : for (src = val->sources; src; src = src->next)
5800 : {
5801 114102 : struct cgraph_edge *cs = src->cs;
5802 354056 : while (cs)
5803 : {
5804 239954 : if (cgraph_edge_brings_value_p (cs, src, node, val)
5805 77 : && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5806 240009 : && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5807 : {
5808 38 : if (dump_file)
5809 3 : fprintf (dump_file, " - adding an extra caller %s of %s\n",
5810 3 : cs->caller->dump_name (),
5811 3 : val->spec_node->dump_name ());
5812 :
5813 38 : cs->redirect_callee_duplicating_thunks (val->spec_node);
5814 38 : val->spec_node->expand_all_artificial_thunks ();
5815 38 : if (cs->count.ipa ().initialized_p ())
5816 0 : redirected_sum = redirected_sum + cs->count.ipa ();
5817 : }
5818 239954 : cs = get_next_cgraph_edge_clone (cs);
5819 : }
5820 : }
5821 :
5822 8780 : if (redirected_sum.nonzero_p ())
5823 0 : update_specialized_profile (val->spec_node, node, redirected_sum);
5824 8780 : }
5825 :
5826 : /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5827 :
5828 : static bool
5829 4330 : known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5830 : {
5831 4330 : ipa_polymorphic_call_context *ctx;
5832 4330 : int i;
5833 :
5834 4330 : FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5835 102 : if (!ctx->useless_p ())
5836 : return true;
5837 : return false;
5838 : }
5839 :
5840 : /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5841 :
5842 : static vec<ipa_polymorphic_call_context>
5843 4330 : copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5844 : {
5845 4330 : if (known_contexts_useful_p (known_contexts))
5846 102 : return known_contexts.copy ();
5847 : else
5848 4228 : return vNULL;
5849 : }
5850 :
5851 : /* Return true if the VALUE is represented in KNOWN_CSTS at INDEX if OFFSET is
5852 : minus one or in AGGVALS for INDEX and OFFSET otherwise. */
5853 :
5854 : DEBUG_FUNCTION bool
5855 4279 : ipcp_val_replacement_ok_p (vec<tree> &known_csts,
5856 : vec<ipa_polymorphic_call_context> &,
5857 : vec<ipa_argagg_value, va_gc> *aggvals,
5858 : int index, HOST_WIDE_INT offset, tree value)
5859 : {
5860 4279 : tree v;
5861 4279 : if (offset == -1)
5862 3129 : v = known_csts[index];
5863 : else
5864 : {
5865 1150 : const ipa_argagg_value_list avl (aggvals);
5866 1150 : v = avl.get_value (index, offset / BITS_PER_UNIT);
5867 : }
5868 :
5869 4279 : return v && values_equal_for_ipcp_p (v, value);
5870 : }
5871 :
5872 : /* Dump to F all the values in AVALS for which we are re-evaluating the effects
5873 : on the function represented b INFO. */
5874 :
5875 : DEBUG_FUNCTION void
5876 68 : dump_reestimation_message (FILE *f, ipa_node_params *info,
5877 : const ipa_auto_call_arg_values &avals)
5878 : {
5879 68 : fprintf (f, " Re-estimating effects with\n"
5880 : " Scalar constants:");
5881 68 : int param_count = ipa_get_param_count (info);
5882 168 : for (int i = 0; i < param_count; i++)
5883 100 : if (avals.m_known_vals[i])
5884 : {
5885 44 : fprintf (f, " %i:", i);
5886 44 : print_ipcp_constant_value (f, avals.m_known_vals[i]);
5887 : }
5888 68 : fprintf (f, "\n");
5889 68 : if (!avals.m_known_contexts.is_empty ())
5890 : {
5891 0 : fprintf (f, " Pol. contexts:");
5892 0 : for (int i = 0; i < param_count; i++)
5893 0 : if (!avals.m_known_contexts[i].useless_p ())
5894 : {
5895 0 : fprintf (f, " %i:", i);
5896 0 : avals.m_known_contexts[i].dump (f);
5897 : }
5898 0 : fprintf (f, "\n");
5899 : }
5900 68 : if (!avals.m_known_aggs.is_empty ())
5901 : {
5902 24 : fprintf (f, " Aggregate replacements:");
5903 24 : ipa_argagg_value_list avs (&avals);
5904 24 : avs.dump (f);
5905 : }
5906 68 : }
5907 :
5908 : /* Return true if the VALUE is represented in KNOWN_CONTEXTS at INDEX and that
5909 : if OFFSET is is equal to minus one (because source of a polymorphic context
5910 : cannot be an aggregate value). */
5911 :
5912 : DEBUG_FUNCTION bool
5913 51 : ipcp_val_replacement_ok_p (vec<tree> &,
5914 : vec<ipa_polymorphic_call_context> &known_contexts,
5915 : vec<ipa_argagg_value, va_gc> *,
5916 : int index, HOST_WIDE_INT offset,
5917 : ipa_polymorphic_call_context value)
5918 : {
5919 51 : if (offset != -1
5920 51 : || known_contexts.length () <= (unsigned) index
5921 102 : || known_contexts[index].useless_p ())
5922 : return false;
5923 :
5924 51 : if (known_contexts[index].equal_to (value))
5925 : return true;
5926 :
5927 : /* In some corner cases, the final gathering of contexts can figure out that
5928 : the available context is actually more precise than what we wanted to
5929 : clone for. Allow it. */
5930 0 : value.combine_with (known_contexts[index]);
5931 0 : return known_contexts[index].equal_to (value);
5932 : }
5933 :
5934 : /* Decide whether to create a special version of NODE for value VAL of
5935 : parameter at the given INDEX. If OFFSET is -1, the value is for the
5936 : parameter itself, otherwise it is stored at the given OFFSET of the
5937 : parameter. AVALS describes the other already known values. SELF_GEN_CLONES
5938 : is a vector which contains clones created for self-recursive calls with an
5939 : arithmetic pass-through jump function. CUR_SWEEP is the number of the
5940 : current sweep of the call-graph during the decision stage. */
5941 :
5942 : template <typename valtype>
5943 : static bool
5944 220626 : decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
5945 : ipcp_value<valtype> *val,
5946 : vec<cgraph_node *> *self_gen_clones, int cur_sweep)
5947 : {
5948 : int caller_count;
5949 220626 : sreal freq_sum;
5950 : profile_count count_sum, rec_count_sum;
5951 : bool called_without_ipa_profile;
5952 :
5953 220626 : if (val->spec_node)
5954 : {
5955 8780 : perhaps_add_new_callers (node, val);
5956 8780 : return false;
5957 : }
5958 211846 : else if (val->local_size_cost + overall_size > get_max_overall_size (node))
5959 : {
5960 450 : if (dump_file && (dump_flags & TDF_DETAILS))
5961 0 : fprintf (dump_file, " Ignoring candidate value because "
5962 : "maximum unit size would be reached with %li.\n",
5963 : val->local_size_cost + overall_size);
5964 450 : return false;
5965 : }
5966 211396 : else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
5967 : &rec_count_sum, &count_sum,
5968 : &called_without_ipa_profile))
5969 : return false;
5970 :
5971 150599 : if (!dbg_cnt (ipa_cp_values))
5972 : return false;
5973 :
5974 150599 : if (val->self_recursion_generated_p ())
5975 : {
5976 : /* The edge counts in this case might not have been adjusted yet.
5977 : Nevertleless, even if they were it would be only a guesswork which we
5978 : can do now. The recursive part of the counts can be derived from the
5979 : count of the original node anyway. */
5980 293 : if (node->count.ipa ().nonzero_p ())
5981 : {
5982 14 : unsigned dem = self_gen_clones->length () + 1;
5983 14 : rec_count_sum = node->count.ipa () / dem;
5984 : }
5985 : else
5986 265 : rec_count_sum = profile_count::zero ();
5987 : }
5988 :
5989 : /* get_info_about_necessary_edges only sums up ipa counts. */
5990 150599 : count_sum += rec_count_sum;
5991 :
5992 150599 : if (dump_file && (dump_flags & TDF_DETAILS))
5993 : {
5994 133 : fprintf (dump_file, " - considering value ");
5995 133 : print_ipcp_constant_value (dump_file, val->value);
5996 133 : fprintf (dump_file, " for ");
5997 133 : ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
5998 133 : if (offset != -1)
5999 61 : fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6000 133 : fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6001 : }
6002 :
6003 150599 : auto_vec<cgraph_edge *> callers
6004 : = gather_edges_for_value (val, node, caller_count);
6005 150599 : ipa_node_params *info = ipa_node_params_sum->get (node);
6006 150599 : ipa_auto_call_arg_values avals;
6007 150599 : avals.m_known_vals.safe_grow_cleared (ipa_get_param_count (info), true);
6008 150599 : find_scalar_values_for_callers_subset (avals.m_known_vals, info, callers);
6009 150599 : find_contexts_for_caller_subset (avals.m_known_contexts, info, callers);
6010 150599 : find_aggregate_values_for_callers_subset (avals.m_known_aggs, node, callers);
6011 :
6012 :
6013 150599 : if (good_cloning_opportunity_p (node, val->prop_time_benefit,
6014 : freq_sum, count_sum, val->prop_size_cost,
6015 : called_without_ipa_profile, cur_sweep))
6016 : ;
6017 : else
6018 : {
6019 : /* Extern inline functions are only meaningful to clione to propagate
6020 : values to their callees. */
6021 148719 : if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
6022 : {
6023 345 : if (dump_file && (dump_flags & TDF_DETAILS))
6024 0 : fprintf (dump_file, " Skipping extern inline.\n");
6025 146269 : return false;
6026 : }
6027 148374 : if (dump_file && (dump_flags & TDF_DETAILS))
6028 68 : dump_reestimation_message (dump_file, info, avals);
6029 :
6030 148374 : ipa_call_estimates estimates;
6031 148374 : estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
6032 148374 : int removable_params_cost = 0;
6033 957491 : for (tree t : avals.m_known_vals)
6034 512369 : if (t)
6035 205664 : removable_params_cost += estimate_move_cost (TREE_TYPE (t), true);
6036 :
6037 148374 : int size = estimates.size - caller_count * removable_params_cost;
6038 :
6039 148374 : if (size <= 0)
6040 : {
6041 1799 : if (dump_file)
6042 0 : fprintf (dump_file, " Code not going to grow.\n");
6043 : }
6044 : else
6045 : {
6046 : sreal time_benefit
6047 146575 : = ((estimates.nonspecialized_time - estimates.time)
6048 293150 : + hint_time_bonus (node, estimates)
6049 146575 : + (devirtualization_time_bonus (node, &avals)
6050 146575 : + removable_params_cost));
6051 :
6052 146575 : if (!good_cloning_opportunity_p (node, time_benefit, freq_sum,
6053 : count_sum, size,
6054 : called_without_ipa_profile,
6055 : cur_sweep))
6056 145924 : return false;
6057 : }
6058 : }
6059 :
6060 4330 : if (dump_file)
6061 137 : fprintf (dump_file, " Creating a specialized node of %s.\n",
6062 : node->dump_name ());
6063 :
6064 4330 : vec<tree> known_csts = avals.m_known_vals.copy ();
6065 : vec<ipa_polymorphic_call_context> known_contexts
6066 4330 : = copy_useful_known_contexts (avals.m_known_contexts);
6067 :
6068 4330 : vec<ipa_argagg_value, va_gc> *aggvals = NULL;
6069 4330 : vec_safe_reserve_exact (aggvals, avals.m_known_aggs.length ());
6070 23095 : for (const ipa_argagg_value &av : avals.m_known_aggs)
6071 10105 : aggvals->quick_push (av);
6072 4330 : gcc_checking_assert (ipcp_val_replacement_ok_p (known_csts, known_contexts,
6073 : aggvals, index,
6074 : offset, val->value));
6075 4330 : val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6076 : aggvals, callers);
6077 :
6078 4330 : if (val->self_recursion_generated_p ())
6079 142 : self_gen_clones->safe_push (val->spec_node);
6080 : else
6081 4188 : update_profiling_info (node, val->spec_node);
6082 :
6083 4330 : overall_size += val->local_size_cost;
6084 4330 : if (dump_file && (dump_flags & TDF_DETAILS))
6085 66 : fprintf (dump_file, " overall size reached %li\n",
6086 : overall_size);
6087 :
6088 : /* TODO: If for some lattice there is only one other known value
6089 : left, make a special node for it too. */
6090 :
6091 : return true;
6092 150599 : }
6093 :
6094 : /* Like irange::contains_p(), but convert VAL to the range of R if
6095 : necessary. */
6096 :
6097 : static inline bool
6098 47730 : ipa_range_contains_p (const vrange &r, tree val)
6099 : {
6100 47730 : if (r.undefined_p ())
6101 : return false;
6102 :
6103 47730 : tree type = r.type ();
6104 47730 : if (!wi::fits_to_tree_p (wi::to_wide (val), type))
6105 : return false;
6106 :
6107 47730 : val = fold_convert (type, val);
6108 47730 : return r.contains_p (val);
6109 : }
6110 :
6111 : /* Structure holding opportunitties so that they can be pre-sorted. */
6112 :
6113 220626 : struct cloning_opportunity_ranking
6114 : {
6115 : /* A very rough evaluation of likely benefit. */
6116 : sreal eval;
6117 : /* In the case of aggregate constants, a non-negative offset within their
6118 : aggregates. -1 for scalar constants, -2 for polymorphic contexts. */
6119 : HOST_WIDE_INT offset;
6120 : /* The value being considered for evaluation for cloning. */
6121 : ipcp_value_base *val;
6122 : /* Index of the formal parameter the value is coming in. */
6123 : int index;
6124 : };
6125 :
6126 : /* Helper function to qsort a vector of cloning opportunities. */
6127 :
6128 : static int
6129 2077975 : compare_cloning_opportunities (const void *a, const void *b)
6130 : {
6131 2077975 : const cloning_opportunity_ranking *o1
6132 : = (const cloning_opportunity_ranking *) a;
6133 2077975 : const cloning_opportunity_ranking *o2
6134 : = (const cloning_opportunity_ranking *) b;
6135 2077975 : if (o1->eval < o2->eval)
6136 : return 1;
6137 1629166 : if (o1->eval > o2->eval)
6138 525543 : return -1;
6139 : return 0;
6140 : }
6141 :
6142 : /* Use the estimations in VAL to determine how good a candidate it represents
6143 : for the purposes of ordering real evaluation of opportunities (which
6144 : includes information about incoming edges, among other things). */
6145 :
6146 : static sreal
6147 220626 : cloning_opportunity_ranking_evaluation (const ipcp_value_base *val)
6148 : {
6149 220626 : sreal e1 = (val->local_time_benefit * 1000) / MAX (val->local_size_cost, 1);
6150 220626 : sreal e2 = (val->prop_time_benefit * 1000) / MAX (val->prop_size_cost, 1);
6151 220626 : if (e2 > e1)
6152 15323 : return e2;
6153 : else
6154 205303 : return e1;
6155 : }
6156 :
6157 : /* Decide whether and what specialized clones of NODE should be created.
6158 : CUR_SWEEP is the number of the current sweep of the call-graph during the
6159 : decision stage. */
6160 :
6161 : static bool
6162 3196210 : decide_whether_version_node (struct cgraph_node *node, int cur_sweep)
6163 : {
6164 3196210 : ipa_node_params *info = ipa_node_params_sum->get (node);
6165 3196210 : int count = ipa_get_param_count (info);
6166 3196210 : bool ret = false;
6167 :
6168 3196210 : if (info->node_dead || count == 0)
6169 : return false;
6170 :
6171 2578349 : if (dump_file && (dump_flags & TDF_DETAILS))
6172 344 : fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6173 : node->dump_name ());
6174 :
6175 2578349 : auto_vec <cloning_opportunity_ranking, 32> opp_ranking;
6176 8606374 : for (int i = 0; i < count;i++)
6177 : {
6178 6028025 : if (!ipa_is_param_used (info, i))
6179 681025 : continue;
6180 :
6181 5347000 : class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6182 5347000 : ipcp_lattice<tree> *lat = &plats->itself;
6183 5347000 : ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6184 :
6185 5347000 : if (!lat->bottom
6186 5347000 : && !lat->is_single_const ())
6187 : {
6188 531221 : ipcp_value<tree> *val;
6189 646991 : for (val = lat->values; val; val = val->next)
6190 : {
6191 : /* If some values generated for self-recursive calls with
6192 : arithmetic jump functions fall outside of the known
6193 : range for the parameter, we can skip them. */
6194 115832 : if (TREE_CODE (val->value) == INTEGER_CST
6195 70155 : && !plats->m_value_range.bottom_p ()
6196 163500 : && !ipa_range_contains_p (plats->m_value_range.m_vr,
6197 : val->value))
6198 : {
6199 : /* This can happen also if a constant present in the source
6200 : code falls outside of the range of parameter's type, so we
6201 : cannot assert. */
6202 62 : if (dump_file && (dump_flags & TDF_DETAILS))
6203 : {
6204 0 : fprintf (dump_file, " - skipping%s value ",
6205 0 : val->self_recursion_generated_p ()
6206 : ? " self_recursion_generated" : "");
6207 0 : print_ipcp_constant_value (dump_file, val->value);
6208 0 : fprintf (dump_file, " because it is outside known "
6209 : "value range.\n");
6210 : }
6211 62 : continue;
6212 : }
6213 115708 : cloning_opportunity_ranking opp;
6214 115708 : opp.eval = cloning_opportunity_ranking_evaluation (val);
6215 115708 : opp.offset = -1;
6216 115708 : opp.val = val;
6217 115708 : opp.index = i;
6218 115708 : opp_ranking.safe_push (opp);
6219 : }
6220 : }
6221 :
6222 5347000 : if (!plats->aggs_bottom)
6223 : {
6224 560317 : struct ipcp_agg_lattice *aglat;
6225 560317 : ipcp_value<tree> *val;
6226 701063 : for (aglat = plats->aggs; aglat; aglat = aglat->next)
6227 139660 : if (!aglat->bottom && aglat->values
6228 : /* If the following is false, the one value has been considered
6229 : for cloning for all contexts. */
6230 259530 : && (plats->aggs_contain_variable
6231 192958 : || !aglat->is_single_const ()))
6232 178547 : for (val = aglat->values; val; val = val->next)
6233 : {
6234 101147 : cloning_opportunity_ranking opp;
6235 101147 : opp.eval = cloning_opportunity_ranking_evaluation (val);
6236 101147 : opp.offset = aglat->offset;
6237 101147 : opp.val = val;
6238 101147 : opp.index = i;
6239 101147 : opp_ranking.safe_push (opp);
6240 : }
6241 : }
6242 :
6243 5347000 : if (!ctxlat->bottom
6244 6591220 : && !ctxlat->is_single_const ())
6245 : {
6246 546124 : ipcp_value<ipa_polymorphic_call_context> *val;
6247 549895 : for (val = ctxlat->values; val; val = val->next)
6248 7542 : if (!val->value.useless_p ())
6249 : {
6250 3771 : cloning_opportunity_ranking opp;
6251 3771 : opp.eval = cloning_opportunity_ranking_evaluation (val);
6252 3771 : opp.offset = -2;
6253 3771 : opp.val = val;
6254 3771 : opp.index = i;
6255 3771 : opp_ranking.safe_push (opp);
6256 : }
6257 : }
6258 : }
6259 :
6260 2578349 : if (!opp_ranking.is_empty ())
6261 : {
6262 51441 : opp_ranking.qsort (compare_cloning_opportunities);
6263 51441 : auto_vec <cgraph_node *, 9> self_gen_clones;
6264 374949 : for (const cloning_opportunity_ranking &opp : opp_ranking)
6265 220626 : if (opp.offset == -2)
6266 : {
6267 3771 : ipcp_value<ipa_polymorphic_call_context> *val
6268 : = static_cast <ipcp_value<ipa_polymorphic_call_context> *>
6269 : (opp.val);
6270 3771 : ret |= decide_about_value (node, opp.index, -1, val,
6271 : &self_gen_clones, cur_sweep);
6272 : }
6273 : else
6274 : {
6275 216855 : ipcp_value<tree> *val = static_cast<ipcp_value<tree> *> (opp.val);
6276 216855 : ret |= decide_about_value (node, opp.index, opp.offset, val,
6277 : &self_gen_clones, cur_sweep);
6278 : }
6279 :
6280 102882 : if (!self_gen_clones.is_empty ())
6281 : {
6282 33 : self_gen_clones.safe_push (node);
6283 33 : update_counts_for_self_gen_clones (node, self_gen_clones);
6284 : }
6285 51441 : }
6286 :
6287 2578349 : struct caller_statistics stats;
6288 2578349 : init_caller_stats (&stats);
6289 2578349 : node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
6290 : false);
6291 2578349 : if (!stats.n_calls)
6292 : {
6293 1301355 : if (dump_file)
6294 942 : fprintf (dump_file, " Not cloning for all contexts because "
6295 : "there are no callers of the original node (any more).\n");
6296 1301355 : return ret;
6297 : }
6298 :
6299 1276994 : bool do_clone_for_all_contexts = false;
6300 1276994 : ipa_auto_call_arg_values avals;
6301 1276994 : int removable_params_cost;
6302 1276994 : bool ctx_independent_const
6303 1276994 : = gather_context_independent_values (info, &avals, &removable_params_cost);
6304 1276994 : sreal devirt_bonus = devirtualization_time_bonus (node, &avals);
6305 1262437 : if (ctx_independent_const || devirt_bonus > 0
6306 2539425 : || (removable_params_cost && clone_for_param_removal_p (node)))
6307 : {
6308 62422 : ipa_call_estimates estimates;
6309 :
6310 62422 : estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
6311 62422 : sreal time = estimates.nonspecialized_time - estimates.time;
6312 62422 : time += devirt_bonus;
6313 62422 : time += hint_time_bonus (node, estimates);
6314 62422 : time += removable_params_cost;
6315 62422 : int size = estimates.size - stats.n_calls * removable_params_cost;
6316 :
6317 62422 : if (dump_file && (dump_flags & TDF_DETAILS))
6318 24 : fprintf (dump_file, " - context independent values, size: %i, "
6319 : "time_benefit: %f\n", size, (time).to_double ());
6320 :
6321 62422 : if (size <= 0 || node->local)
6322 : {
6323 16878 : if (!dbg_cnt (ipa_cp_values))
6324 0 : return ret;
6325 :
6326 16878 : do_clone_for_all_contexts = true;
6327 16878 : if (dump_file)
6328 106 : fprintf (dump_file, " Decided to specialize for all "
6329 : "known contexts, code not going to grow.\n");
6330 : }
6331 45544 : else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
6332 : stats.count_sum, size,
6333 45544 : stats.called_without_ipa_profile,
6334 : cur_sweep))
6335 : {
6336 191 : if (size + overall_size <= get_max_overall_size (node))
6337 : {
6338 191 : if (!dbg_cnt (ipa_cp_values))
6339 : return ret;
6340 :
6341 191 : do_clone_for_all_contexts = true;
6342 191 : overall_size += size;
6343 191 : if (dump_file)
6344 11 : fprintf (dump_file, " Decided to specialize for all "
6345 : "known contexts, growth (to %li) deemed "
6346 : "beneficial.\n", overall_size);
6347 : }
6348 0 : else if (dump_file && (dump_flags & TDF_DETAILS))
6349 0 : fprintf (dump_file, " Not cloning for all contexts because "
6350 : "maximum unit size would be reached with %li.\n",
6351 : size + overall_size);
6352 : }
6353 45353 : else if (dump_file && (dump_flags & TDF_DETAILS))
6354 0 : fprintf (dump_file, " Not cloning for all contexts because "
6355 : "!good_cloning_opportunity_p.\n");
6356 : }
6357 :
6358 1276994 : if (do_clone_for_all_contexts)
6359 : {
6360 17069 : auto_vec<cgraph_edge *> callers = node->collect_callers ();
6361 :
6362 82192 : for (int i = callers.length () - 1; i >= 0; i--)
6363 : {
6364 48054 : cgraph_edge *cs = callers[i];
6365 48054 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6366 :
6367 48054 : if (caller_info && caller_info->node_dead)
6368 2711 : callers.unordered_remove (i);
6369 : }
6370 :
6371 17069 : if (!adjust_callers_for_value_intersection (callers, node))
6372 : /* If node is not called by anyone, or all its caller edges are
6373 : self-recursive, the node is not really in use, no need to do
6374 : cloning. */
6375 96 : return ret;
6376 :
6377 16973 : if (dump_file)
6378 115 : fprintf (dump_file, " Creating a specialized node of %s "
6379 : "for all known contexts.\n", node->dump_name ());
6380 :
6381 16973 : vec<tree> known_csts = vNULL;
6382 16973 : known_csts.safe_grow_cleared (count, true);
6383 16973 : find_scalar_values_for_callers_subset (known_csts, info, callers);
6384 16973 : vec<ipa_polymorphic_call_context> known_contexts = vNULL;
6385 16973 : find_contexts_for_caller_subset (known_contexts, info, callers);
6386 16973 : vec<ipa_argagg_value, va_gc> *aggvals
6387 16973 : = find_aggregate_values_for_callers_subset_gc (node, callers);
6388 :
6389 16973 : struct cgraph_node *clone = create_specialized_node (node, known_csts,
6390 : known_contexts,
6391 : aggvals, callers);
6392 16973 : ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6393 16973 : ret = true;
6394 17069 : }
6395 :
6396 : return ret;
6397 3855343 : }
6398 :
6399 : /* Transitively mark all callees of NODE within the same SCC as not dead. */
6400 :
6401 : static void
6402 4604 : spread_undeadness (struct cgraph_node *node)
6403 : {
6404 4604 : struct cgraph_edge *cs;
6405 :
6406 15509 : for (cs = node->callees; cs; cs = cs->next_callee)
6407 10905 : if (ipa_edge_within_scc (cs))
6408 : {
6409 832 : struct cgraph_node *callee;
6410 832 : class ipa_node_params *info;
6411 :
6412 832 : callee = cs->callee->function_symbol (NULL);
6413 832 : info = ipa_node_params_sum->get (callee);
6414 :
6415 832 : if (info && info->node_dead)
6416 : {
6417 68 : info->node_dead = 0;
6418 68 : spread_undeadness (callee);
6419 : }
6420 : }
6421 4604 : }
6422 :
6423 : /* Return true if NODE has a caller from outside of its SCC that is not
6424 : dead. Worker callback for cgraph_for_node_and_aliases. */
6425 :
6426 : static bool
6427 15848 : has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6428 : void *data ATTRIBUTE_UNUSED)
6429 : {
6430 15848 : struct cgraph_edge *cs;
6431 :
6432 81144 : for (cs = node->callers; cs; cs = cs->next_caller)
6433 65751 : if (cs->caller->thunk
6434 65751 : && cs->caller->call_for_symbol_thunks_and_aliases
6435 0 : (has_undead_caller_from_outside_scc_p, NULL, true))
6436 : return true;
6437 65751 : else if (!ipa_edge_within_scc (cs))
6438 : {
6439 65499 : ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6440 65499 : if (!caller_info /* Unoptimized caller are like dead ones. */
6441 65497 : || !caller_info->node_dead)
6442 : return true;
6443 : }
6444 : return false;
6445 : }
6446 :
6447 :
6448 : /* Identify nodes within the same SCC as NODE which are no longer needed
6449 : because of new clones and will be removed as unreachable. */
6450 :
6451 : static void
6452 19352 : identify_dead_nodes (struct cgraph_node *node)
6453 : {
6454 19352 : struct cgraph_node *v;
6455 38984 : for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6456 19632 : if (v->local)
6457 : {
6458 15610 : ipa_node_params *info = ipa_node_params_sum->get (v);
6459 15610 : if (info
6460 31220 : && !v->call_for_symbol_thunks_and_aliases
6461 15610 : (has_undead_caller_from_outside_scc_p, NULL, true))
6462 15155 : info->node_dead = 1;
6463 : }
6464 :
6465 38984 : for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6466 : {
6467 19632 : ipa_node_params *info = ipa_node_params_sum->get (v);
6468 19632 : if (info && !info->node_dead)
6469 4536 : spread_undeadness (v);
6470 : }
6471 :
6472 19352 : if (dump_file && (dump_flags & TDF_DETAILS))
6473 : {
6474 105 : for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6475 54 : if (ipa_node_params_sum->get (v)
6476 54 : && ipa_node_params_sum->get (v)->node_dead)
6477 32 : fprintf (dump_file, " Marking node as dead: %s.\n",
6478 : v->dump_name ());
6479 : }
6480 19352 : }
6481 :
6482 : /* Removes all useless callback edges from the callgraph. Useless callback
6483 : edges might mess up the callgraph, because they might be impossible to
6484 : redirect and so on, leading to crashes. Their usefulness is evaluated
6485 : through callback_edge_useful_p. */
6486 :
6487 : static void
6488 130823 : purge_useless_callback_edges ()
6489 : {
6490 130823 : if (dump_file)
6491 161 : fprintf (dump_file, "\nPurging useless callback edges:\n");
6492 :
6493 130823 : cgraph_edge *e;
6494 130823 : cgraph_node *node;
6495 1422196 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6496 : {
6497 6698925 : for (e = node->callees; e; e = e->next_callee)
6498 : {
6499 5407552 : if (e->has_callback)
6500 : {
6501 13660 : if (dump_file)
6502 3 : fprintf (dump_file, "\tExamining callbacks of edge %s -> %s:\n",
6503 3 : e->caller->dump_name (), e->callee->dump_name ());
6504 13660 : if (!lookup_attribute (CALLBACK_ATTR_IDENT,
6505 13660 : DECL_ATTRIBUTES (e->callee->decl))
6506 13660 : && !callback_is_special_cased (e->callee->decl, e->call_stmt))
6507 : {
6508 1 : if (dump_file)
6509 0 : fprintf (
6510 : dump_file,
6511 : "\t\tPurging callbacks, because the callback-dispatching"
6512 : "function no longer has any callback attributes.\n");
6513 1 : e->purge_callback_edges ();
6514 1 : continue;
6515 : }
6516 13659 : cgraph_edge *cbe, *next;
6517 27318 : for (cbe = e->first_callback_edge (); cbe; cbe = next)
6518 : {
6519 13659 : next = cbe->next_callback_edge ();
6520 13659 : if (!callback_edge_useful_p (cbe))
6521 : {
6522 13297 : if (dump_file)
6523 1 : fprintf (dump_file,
6524 : "\t\tCallback edge %s -> %s not deemed "
6525 : "useful, removing.\n",
6526 1 : cbe->caller->dump_name (),
6527 1 : cbe->callee->dump_name ());
6528 13297 : cgraph_edge::remove (cbe);
6529 : }
6530 : else
6531 : {
6532 362 : if (dump_file)
6533 2 : fprintf (dump_file,
6534 : "\t\tKept callback edge %s -> %s "
6535 : "because it looks useful.\n",
6536 2 : cbe->caller->dump_name (),
6537 2 : cbe->callee->dump_name ());
6538 : }
6539 : }
6540 : }
6541 : }
6542 : }
6543 :
6544 130823 : if (dump_file)
6545 161 : fprintf (dump_file, "\n");
6546 130823 : }
6547 :
6548 : /* The decision stage. Iterate over the topological order of call graph nodes
6549 : TOPO and make specialized clones if deemed beneficial. */
6550 :
6551 : static void
6552 130823 : ipcp_decision_stage (class ipa_topo_info *topo)
6553 : {
6554 130823 : int i;
6555 :
6556 130823 : if (dump_file)
6557 161 : fprintf (dump_file, "\nIPA decision stage (%i sweeps):\n",
6558 : max_number_sweeps);
6559 :
6560 498241 : for (int cur_sweep = 1; cur_sweep <= max_number_sweeps; cur_sweep++)
6561 : {
6562 367418 : if (dump_file && (dump_flags & TDF_DETAILS))
6563 144 : fprintf (dump_file, "\nIPA decision sweep number %i (out of %i):\n",
6564 : cur_sweep, max_number_sweeps);
6565 :
6566 4355787 : for (i = topo->nnodes - 1; i >= 0; i--)
6567 : {
6568 3988369 : struct cgraph_node *node = topo->order[i];
6569 3988369 : bool change = false, iterate = true;
6570 :
6571 7996097 : while (iterate)
6572 : {
6573 : struct cgraph_node *v;
6574 : iterate = false;
6575 4022578 : for (v = node;
6576 8030306 : v;
6577 4022578 : v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6578 4022578 : if (v->has_gimple_body_p ()
6579 3800947 : && ipcp_versionable_function_p (v)
6580 4022578 : && (cur_sweep
6581 3196210 : <= opt_for_fn (node->decl, param_ipa_cp_sweeps)))
6582 3196210 : iterate |= decide_whether_version_node (v, cur_sweep);
6583 :
6584 4007728 : change |= iterate;
6585 : }
6586 3988369 : if (change)
6587 19352 : identify_dead_nodes (node);
6588 : }
6589 : }
6590 :
6591 : /* Currently, the primary use of callback edges is constant propagation.
6592 : Constant propagation is now over, so we have to remove unused callback
6593 : edges. */
6594 130823 : purge_useless_callback_edges ();
6595 130823 : }
6596 :
6597 : /* Look up all VR and bits information that we have discovered and copy it
6598 : over to the transformation summary. */
6599 :
6600 : static void
6601 130823 : ipcp_store_vr_results (void)
6602 : {
6603 130823 : cgraph_node *node;
6604 :
6605 1422196 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6606 : {
6607 1291373 : ipa_node_params *info = ipa_node_params_sum->get (node);
6608 1291373 : bool dumped_sth = false;
6609 1291373 : bool found_useful_result = false;
6610 1291373 : bool do_vr = true;
6611 1291373 : bool do_bits = true;
6612 :
6613 : /* If the function is not local, the gathered information is only useful
6614 : for clones. */
6615 1291373 : if (!node->local)
6616 1126317 : continue;
6617 :
6618 165056 : if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6619 : {
6620 4819 : if (dump_file)
6621 6 : fprintf (dump_file, "Not considering %s for VR discovery "
6622 : "and propagate; -fipa-ipa-vrp: disabled.\n",
6623 : node->dump_name ());
6624 : do_vr = false;
6625 : }
6626 165056 : if (!info || !opt_for_fn (node->decl, flag_ipa_bit_cp))
6627 : {
6628 4790 : if (dump_file)
6629 2 : fprintf (dump_file, "Not considering %s for ipa bitwise "
6630 : "propagation ; -fipa-bit-cp: disabled.\n",
6631 : node->dump_name ());
6632 4790 : do_bits = false;
6633 : }
6634 4790 : if (!do_bits && !do_vr)
6635 4784 : continue;
6636 :
6637 160272 : if (info->ipcp_orig_node)
6638 21110 : info = ipa_node_params_sum->get (info->ipcp_orig_node);
6639 160272 : if (info->lattices.is_empty ())
6640 : /* Newly expanded artificial thunks do not have lattices. */
6641 51195 : continue;
6642 :
6643 109077 : unsigned count = ipa_get_param_count (info);
6644 225443 : for (unsigned i = 0; i < count; i++)
6645 : {
6646 176236 : ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6647 176236 : if (do_vr
6648 176210 : && !plats->m_value_range.bottom_p ()
6649 233941 : && !plats->m_value_range.top_p ())
6650 : {
6651 : found_useful_result = true;
6652 : break;
6653 : }
6654 118532 : if (do_bits && plats->bits_lattice.constant_p ())
6655 : {
6656 : found_useful_result = true;
6657 : break;
6658 : }
6659 : }
6660 109077 : if (!found_useful_result)
6661 49207 : continue;
6662 :
6663 59870 : ipcp_transformation_initialize ();
6664 59870 : ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6665 59870 : vec_safe_reserve_exact (ts->m_vr, count);
6666 :
6667 217668 : for (unsigned i = 0; i < count; i++)
6668 : {
6669 157798 : ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6670 157798 : ipcp_bits_lattice *bits = NULL;
6671 :
6672 157798 : if (do_bits
6673 157794 : && plats->bits_lattice.constant_p ()
6674 249301 : && dbg_cnt (ipa_cp_bits))
6675 91503 : bits = &plats->bits_lattice;
6676 :
6677 157798 : if (do_vr
6678 157774 : && !plats->m_value_range.bottom_p ()
6679 109794 : && !plats->m_value_range.top_p ()
6680 267592 : && dbg_cnt (ipa_cp_vr))
6681 : {
6682 109794 : if (bits)
6683 : {
6684 86305 : value_range tmp = plats->m_value_range.m_vr;
6685 86305 : tree type = ipa_get_type (info, i);
6686 172610 : irange_bitmask bm (wide_int::from (bits->get_value (),
6687 86305 : TYPE_PRECISION (type),
6688 86305 : TYPE_SIGN (type)),
6689 172610 : wide_int::from (bits->get_mask (),
6690 86305 : TYPE_PRECISION (type),
6691 172610 : TYPE_SIGN (type)));
6692 86305 : tmp.update_bitmask (bm);
6693 : // Reflecting the bitmask on the ranges can sometime
6694 : // produce an UNDEFINED value if the the bitmask update
6695 : // was previously deferred. See PR 120048.
6696 86305 : if (tmp.undefined_p ())
6697 0 : tmp.set_varying (type);
6698 86305 : ipa_vr vr (tmp);
6699 86305 : ts->m_vr->quick_push (vr);
6700 86305 : }
6701 : else
6702 : {
6703 23489 : ipa_vr vr (plats->m_value_range.m_vr);
6704 23489 : ts->m_vr->quick_push (vr);
6705 : }
6706 : }
6707 48004 : else if (bits)
6708 : {
6709 5198 : tree type = ipa_get_type (info, i);
6710 5198 : value_range tmp;
6711 5198 : tmp.set_varying (type);
6712 10396 : irange_bitmask bm (wide_int::from (bits->get_value (),
6713 5198 : TYPE_PRECISION (type),
6714 5198 : TYPE_SIGN (type)),
6715 10396 : wide_int::from (bits->get_mask (),
6716 5198 : TYPE_PRECISION (type),
6717 10396 : TYPE_SIGN (type)));
6718 5198 : tmp.update_bitmask (bm);
6719 : // Reflecting the bitmask on the ranges can sometime
6720 : // produce an UNDEFINED value if the the bitmask update
6721 : // was previously deferred. See PR 120048.
6722 5198 : if (tmp.undefined_p ())
6723 0 : tmp.set_varying (type);
6724 5198 : ipa_vr vr (tmp);
6725 5198 : ts->m_vr->quick_push (vr);
6726 5198 : }
6727 : else
6728 : {
6729 42806 : ipa_vr vr;
6730 42806 : ts->m_vr->quick_push (vr);
6731 : }
6732 :
6733 157798 : if (!dump_file || !bits)
6734 157383 : continue;
6735 :
6736 415 : if (!dumped_sth)
6737 : {
6738 296 : fprintf (dump_file, "Propagated bits info for function %s:\n",
6739 : node->dump_name ());
6740 296 : dumped_sth = true;
6741 : }
6742 415 : fprintf (dump_file, " param %i: value = ", i);
6743 415 : ipcp_print_widest_int (dump_file, bits->get_value ());
6744 415 : fprintf (dump_file, ", mask = ");
6745 415 : ipcp_print_widest_int (dump_file, bits->get_mask ());
6746 415 : fprintf (dump_file, "\n");
6747 : }
6748 : }
6749 130823 : }
6750 :
6751 : /* The IPCP driver. */
6752 :
6753 : static unsigned int
6754 130823 : ipcp_driver (void)
6755 : {
6756 130823 : class ipa_topo_info topo;
6757 :
6758 130823 : if (edge_clone_summaries == NULL)
6759 130823 : edge_clone_summaries = new edge_clone_summary_t (symtab);
6760 :
6761 130823 : ipa_check_create_node_params ();
6762 130823 : ipa_check_create_edge_args ();
6763 130823 : clone_num_suffixes = new hash_map<const char *, unsigned>;
6764 :
6765 130823 : if (dump_file)
6766 : {
6767 161 : fprintf (dump_file, "\nIPA structures before propagation:\n");
6768 161 : if (dump_flags & TDF_DETAILS)
6769 48 : ipa_print_all_params (dump_file);
6770 161 : ipa_print_all_jump_functions (dump_file);
6771 : }
6772 :
6773 : /* Topological sort. */
6774 130823 : build_toporder_info (&topo);
6775 : /* Do the interprocedural propagation. */
6776 130823 : ipcp_propagate_stage (&topo);
6777 : /* Decide what constant propagation and cloning should be performed. */
6778 130823 : ipcp_decision_stage (&topo);
6779 : /* Store results of value range and bits propagation. */
6780 130823 : ipcp_store_vr_results ();
6781 :
6782 : /* Free all IPCP structures. */
6783 261646 : delete clone_num_suffixes;
6784 130823 : free_toporder_info (&topo);
6785 130823 : delete edge_clone_summaries;
6786 130823 : edge_clone_summaries = NULL;
6787 130823 : ipa_free_all_structures_after_ipa_cp ();
6788 130823 : if (dump_file)
6789 161 : fprintf (dump_file, "\nIPA constant propagation end\n");
6790 130823 : return 0;
6791 : }
6792 :
6793 : /* Initialization and computation of IPCP data structures. This is the initial
6794 : intraprocedural analysis of functions, which gathers information to be
6795 : propagated later on. */
6796 :
6797 : static void
6798 126258 : ipcp_generate_summary (void)
6799 : {
6800 126258 : struct cgraph_node *node;
6801 :
6802 126258 : if (dump_file)
6803 163 : fprintf (dump_file, "\nIPA constant propagation start:\n");
6804 126258 : ipa_register_cgraph_hooks ();
6805 :
6806 1371028 : FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6807 1244770 : ipa_analyze_node (node);
6808 :
6809 126258 : varpool_node *vnode;
6810 1799408 : FOR_EACH_STATIC_INITIALIZER (vnode)
6811 1673150 : ipa_analyze_var_static_initializer (vnode);
6812 126258 : }
6813 :
6814 : namespace {
6815 :
6816 : const pass_data pass_data_ipa_cp =
6817 : {
6818 : IPA_PASS, /* type */
6819 : "cp", /* name */
6820 : OPTGROUP_NONE, /* optinfo_flags */
6821 : TV_IPA_CONSTANT_PROP, /* tv_id */
6822 : 0, /* properties_required */
6823 : 0, /* properties_provided */
6824 : 0, /* properties_destroyed */
6825 : 0, /* todo_flags_start */
6826 : ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6827 : };
6828 :
6829 : class pass_ipa_cp : public ipa_opt_pass_d
6830 : {
6831 : public:
6832 298828 : pass_ipa_cp (gcc::context *ctxt)
6833 : : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6834 : ipcp_generate_summary, /* generate_summary */
6835 : NULL, /* write_summary */
6836 : NULL, /* read_summary */
6837 : ipcp_write_transformation_summaries, /*
6838 : write_optimization_summary */
6839 : ipcp_read_transformation_summaries, /*
6840 : read_optimization_summary */
6841 : NULL, /* stmt_fixup */
6842 : 0, /* function_transform_todo_flags_start */
6843 : ipcp_transform_function, /* function_transform */
6844 298828 : NULL) /* variable_transform */
6845 298828 : {}
6846 :
6847 : /* opt_pass methods: */
6848 594863 : bool gate (function *) final override
6849 : {
6850 : /* FIXME: We should remove the optimize check after we ensure we never run
6851 : IPA passes when not optimizing. */
6852 594863 : return (flag_ipa_cp && optimize) || in_lto_p;
6853 : }
6854 :
6855 130823 : unsigned int execute (function *) final override { return ipcp_driver (); }
6856 :
6857 : }; // class pass_ipa_cp
6858 :
6859 : } // anon namespace
6860 :
6861 : ipa_opt_pass_d *
6862 298828 : make_pass_ipa_cp (gcc::context *ctxt)
6863 : {
6864 298828 : return new pass_ipa_cp (ctxt);
6865 : }
6866 :
6867 : /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6868 : within the same process. For use by toplev::finalize. */
6869 :
6870 : void
6871 268600 : ipa_cp_cc_finalize (void)
6872 : {
6873 268600 : overall_size = 0;
6874 268600 : orig_overall_size = 0;
6875 268600 : ipcp_free_transformation_sum ();
6876 268600 : }
6877 :
6878 : /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6879 : return its index in the DECL_ARGUMENTS chain, using a pre-computed
6880 : DECL_UID-sorted vector if available (which is pre-computed only if there are
6881 : many parameters). Can return -1 if param is static chain not represented
6882 : among DECL_ARGUMENTS. */
6883 :
6884 : int
6885 125263 : ipcp_transformation::get_param_index (const_tree fndecl, const_tree param) const
6886 : {
6887 125263 : gcc_assert (TREE_CODE (param) == PARM_DECL);
6888 125263 : if (m_uid_to_idx)
6889 : {
6890 0 : unsigned puid = DECL_UID (param);
6891 0 : const ipa_uid_to_idx_map_elt *res
6892 0 : = std::lower_bound (m_uid_to_idx->begin(), m_uid_to_idx->end (), puid,
6893 0 : [] (const ipa_uid_to_idx_map_elt &elt, unsigned uid)
6894 : {
6895 0 : return elt.uid < uid;
6896 : });
6897 0 : if (res == m_uid_to_idx->end ()
6898 0 : || res->uid != puid)
6899 : {
6900 0 : gcc_assert (DECL_STATIC_CHAIN (fndecl));
6901 : return -1;
6902 : }
6903 0 : return res->index;
6904 : }
6905 :
6906 125263 : unsigned index = 0;
6907 285424 : for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6908 283928 : if (p == param)
6909 123767 : return (int) index;
6910 :
6911 1496 : gcc_assert (DECL_STATIC_CHAIN (fndecl));
6912 : return -1;
6913 : }
6914 :
6915 : /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6916 : according to the uid. */
6917 :
6918 : static int
6919 0 : compare_uids (const void *a, const void *b)
6920 : {
6921 0 : const ipa_uid_to_idx_map_elt *e1 = (const ipa_uid_to_idx_map_elt *) a;
6922 0 : const ipa_uid_to_idx_map_elt *e2 = (const ipa_uid_to_idx_map_elt *) b;
6923 0 : if (e1->uid < e2->uid)
6924 : return -1;
6925 0 : if (e1->uid > e2->uid)
6926 : return 1;
6927 0 : gcc_unreachable ();
6928 : }
6929 :
6930 : /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6931 : justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6932 : from parameters to their indices in DECL_ARGUMENTS chain. */
6933 :
6934 : void
6935 22941 : ipcp_transformation::maybe_create_parm_idx_map (tree fndecl)
6936 : {
6937 22941 : int c = count_formal_params (fndecl);
6938 22941 : if (c < 32)
6939 : return;
6940 :
6941 0 : m_uid_to_idx = NULL;
6942 0 : vec_safe_reserve (m_uid_to_idx, c, true);
6943 0 : unsigned index = 0;
6944 0 : for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6945 : {
6946 0 : ipa_uid_to_idx_map_elt elt;
6947 0 : elt.uid = DECL_UID (p);
6948 0 : elt.index = index;
6949 0 : m_uid_to_idx->quick_push (elt);
6950 : }
6951 0 : m_uid_to_idx->qsort (compare_uids);
6952 : }
|