Branch data Line data Source code
1 : : /* Code locality based function cloning.
2 : : Copyright The GNU Toolchain Authors
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : /* This file implements cloning required to improve partitioning of the
21 : : callgraph for locality considerations.
22 : :
23 : : Partitioning for improving code locality.
24 : : This pass aims to place frequently executed callchains closer together in
25 : : memory to improve performance through improved locality. If any frequent
26 : : callchains cannot be placed together because they are already placed
27 : : elsewhere, local function clones are created and all callers near to the
28 : : clones are redirected to use this copy.
29 : :
30 : : Locality code placement is done in 2 parts.
31 : : 1. IPA pass to be executed after ipa-inline and before ipa-pure-const.
32 : : Execute stage prepares the plan to place all nodes into partitions.
33 : : 2. WPA Partition stage actually implements the plan.
34 : :
35 : : Brief overview of the IPA pass:
36 : : 1. Create and sort callchains. If PGO is available, use real profile
37 : : counts. Otherwise, use a set of heuristics to sort the callchains.
38 : : 2. Create a partition plan for the callchains, processing them in the sorted
39 : : order.
40 : : 1. If a function is unpartitioned, place it in the current partition.
41 : : 2. If a function is already placed in a partition away from current
42 : : partition as part of another callchain:
43 : : Create a local clone in current partition, if cloning criteria is
44 : : satisfied.
45 : : 3. Redirect any new caller to a local clone if one exists.
46 : : Partition size is param controlled to fine tune per program behavior. */
47 : :
48 : : #include "config.h"
49 : : #define INCLUDE_ALGORITHM
50 : : #include "system.h"
51 : : #include "coretypes.h"
52 : : #include "target.h"
53 : : #include "function.h"
54 : : #include "tree.h"
55 : : #include "alloc-pool.h"
56 : : #include "tree-pass.h"
57 : : #include "cgraph.h"
58 : : #include "symbol-summary.h"
59 : : #include "tree-vrp.h"
60 : : #include "symtab-thunks.h"
61 : : #include "sreal.h"
62 : : #include "ipa-cp.h"
63 : : #include "ipa-prop.h"
64 : : #include "ipa-fnsummary.h"
65 : : #include "ipa-modref-tree.h"
66 : : #include "ipa-modref.h"
67 : : #include "symtab-clones.h"
68 : : #include "ipa-locality-cloning.h"
69 : :
70 : : /* Locality partitions, assigns nodes to partitions. These are used later in
71 : : WPA partitioning. */
72 : : vec<locality_partition> locality_partitions;
73 : :
74 : : /* Map from original node to its latest clone. Gets overwritten whenever a new
75 : : clone is created from the same node. */
76 : : hash_map<cgraph_node *, cgraph_node *> node_to_clone;
77 : : /* Map from clone to its original node. */
78 : : hash_map<cgraph_node *, cgraph_node *> clone_to_node;
79 : :
80 : : /* Data structure to hold static heuristics and orders for cgraph_nodes. */
81 : : struct locality_order
82 : : {
83 : : cgraph_node *node;
84 : : sreal order;
85 : 0 : locality_order (cgraph_node *node, sreal order) : node (node), order (order)
86 : : {}
87 : : };
88 : :
89 : : /* Return true if NODE is already in some partition. */
90 : : static inline bool
91 : 0 : node_partitioned_p (cgraph_node *node)
92 : : {
93 : 0 : return node->aux;
94 : : }
95 : :
96 : : /* Add symbol NODE to partition PART. */
97 : : static void
98 : 0 : add_node_to_partition (locality_partition part, cgraph_node *node)
99 : : {
100 : 0 : struct cgraph_edge *e;
101 : 0 : if (node_partitioned_p (node))
102 : 0 : return;
103 : :
104 : 0 : part->nodes.safe_push (node);
105 : 0 : node->aux = (void *) (uintptr_t) (part->part_id);
106 : :
107 : 0 : if (!node->alias && node->get_partitioning_class () == SYMBOL_PARTITION)
108 : 0 : part->insns += ipa_size_summaries->get (node)->size;
109 : :
110 : : /* Add all inline clones and callees that are duplicated. */
111 : 0 : for (e = node->callees; e; e = e->next_callee)
112 : 0 : if (!e->inline_failed)
113 : 0 : add_node_to_partition (part, e->callee);
114 : : /* omp declare_variant_alt or transparent_alias with definition or linker
115 : : discardable (non-local comdat but not forced and not
116 : : used by non-LTO). */
117 : 0 : else if (e->callee->get_partitioning_class () == SYMBOL_DUPLICATE)
118 : 0 : add_node_to_partition (part, e->callee);
119 : :
120 : : /* Add all thunks associated with the function. */
121 : 0 : for (e = node->callers; e; e = e->next_caller)
122 : 0 : if (e->caller->thunk && !e->caller->inlined_to)
123 : 0 : add_node_to_partition (part, e->caller);
124 : :
125 : : /* Add all aliases associated with the symbol. */
126 : : struct ipa_ref *ref;
127 : 0 : FOR_EACH_ALIAS (node, ref)
128 : 0 : if (!ref->referring->transparent_alias)
129 : : {
130 : 0 : cgraph_node *referring = dyn_cast<cgraph_node *> (ref->referring);
131 : : /* Only add function aliases.
132 : : Varpool refs are added later in LTO partitioning pass. */
133 : 0 : if (referring)
134 : 0 : add_node_to_partition (part, referring);
135 : : }
136 : : else
137 : : {
138 : : struct ipa_ref *ref2;
139 : : /* We do not need to add transparent aliases if they are not used.
140 : : However we must add aliases of transparent aliases if they exist. */
141 : 0 : FOR_EACH_ALIAS (ref->referring, ref2)
142 : : {
143 : : /* Nested transparent aliases are not permitted. */
144 : 0 : gcc_checking_assert (!ref2->referring->transparent_alias);
145 : 0 : cgraph_node *referring = dyn_cast<cgraph_node *> (ref2->referring);
146 : 0 : if (referring)
147 : 0 : add_node_to_partition (part, referring);
148 : : }
149 : : }
150 : : }
151 : :
152 : : /* Return TRUE if NODE is in PARTITION. */
153 : : static bool
154 : 0 : node_in_partition_p (locality_partition partition, cgraph_node *node)
155 : : {
156 : 0 : return ((uintptr_t) (partition->part_id) == (uintptr_t) (node->aux));
157 : : }
158 : :
159 : : /* Helper function for qsort; to break ties. */
160 : : static int
161 : 0 : compare_node_uids (cgraph_node *n1, cgraph_node *n2)
162 : : {
163 : 0 : int res = n1->get_uid () - n2->get_uid ();
164 : 0 : gcc_assert (res != 0);
165 : 0 : return res > 0 ? 1 : -1;
166 : : }
167 : :
168 : : /* Helper function for qsort; sort nodes by order. */
169 : : static int
170 : 0 : static_profile_cmp (const void *pa, const void *pb)
171 : : {
172 : 0 : const locality_order *a = *static_cast<const locality_order *const *> (pa);
173 : 0 : const locality_order *b = *static_cast<const locality_order *const *> (pb);
174 : : /* Ascending order. */
175 : 0 : if (b->order < a->order)
176 : : return 1;
177 : 0 : if (b->order > a->order)
178 : : return -1;
179 : 0 : return compare_node_uids (a->node, b->node);
180 : : }
181 : :
182 : : /* Helper function for qsort; sort nodes by profile count. */
183 : : static int
184 : 0 : compare_edge_profile_counts (const void *pa, const void *pb)
185 : : {
186 : 0 : const locality_order *a = *static_cast<const locality_order *const *> (pa);
187 : 0 : const locality_order *b = *static_cast<const locality_order *const *> (pb);
188 : :
189 : 0 : profile_count cnt1 = a->node->count.ipa ();
190 : 0 : profile_count cnt2 = b->node->count.ipa ();
191 : 0 : if (!cnt1.compatible_p (cnt2))
192 : 0 : return static_profile_cmp (pa, pb);
193 : :
194 : 0 : if (cnt1 < cnt2)
195 : : return 1;
196 : 0 : if (cnt1 > cnt2)
197 : : return -1;
198 : 0 : return static_profile_cmp (pa, pb);
199 : : }
200 : :
201 : : /* Create and return a new partition and increment NPARTITIONS. */
202 : :
203 : : static locality_partition
204 : 0 : create_partition (int &npartitions)
205 : : {
206 : 0 : locality_partition part = XCNEW (struct locality_partition_def);
207 : 0 : npartitions++;
208 : 0 : part->part_id = npartitions;
209 : 0 : part->nodes.create (1);
210 : 0 : part->insns = 0;
211 : 0 : locality_partitions.safe_push (part);
212 : 0 : return part;
213 : : }
214 : :
215 : : /* Structure for holding profile count information of callers of a node. */
216 : : struct profile_stats
217 : : {
218 : : /* Sum of non-recursive call counts. */
219 : : profile_count nonrec_count;
220 : :
221 : : /* Sum of recursive call counts. */
222 : : profile_count rec_count;
223 : :
224 : : /* If non-NULL, this node is the target of alias or thunk and calls from this
225 : : should be count in rec_count. */
226 : : cgraph_node *target;
227 : : };
228 : :
229 : : /* Initialize fields of STATS. */
230 : : static inline void
231 : 0 : init_profile_stats (profile_stats *stats, cgraph_node *target = NULL)
232 : : {
233 : 0 : stats->nonrec_count = profile_count::zero ();
234 : 0 : stats->rec_count = profile_count::zero ();
235 : 0 : stats->target = target;
236 : 0 : }
237 : :
238 : : /* Helper function of to accumulate call counts. */
239 : : static bool
240 : 0 : accumulate_profile_counts_after_cloning (cgraph_node *node, void *data)
241 : : {
242 : 0 : struct profile_stats *stats = (struct profile_stats *) data;
243 : 0 : for (cgraph_edge *e = node->callers; e; e = e->next_caller)
244 : : {
245 : 0 : if (!e->count.initialized_p ())
246 : 0 : continue;
247 : :
248 : 0 : if (e->caller == stats->target)
249 : 0 : stats->rec_count += e->count.ipa ();
250 : : else
251 : 0 : stats->nonrec_count += e->count.ipa ();
252 : : }
253 : 0 : return false;
254 : : }
255 : :
256 : : /* NEW_NODE is a previously created clone of ORIG_NODE already present in
257 : : current partition. EDGES contains newly redirected edges to NEW_NODE.
258 : : Adjust profile information for both nodes and the edge. */
259 : :
260 : : static void
261 : 0 : adjust_profile_info_for_non_self_rec_edges (auto_vec<cgraph_edge *> &edges,
262 : : cgraph_node *new_node,
263 : : cgraph_node *orig_node)
264 : : {
265 : 0 : profile_count orig_node_count = orig_node->count.ipa ();
266 : 0 : profile_count edge_count = profile_count::zero ();
267 : 0 : profile_count final_new_count = profile_count::zero ();
268 : 0 : profile_count final_orig_count = profile_count::zero ();
269 : :
270 : 0 : for (unsigned i = 0; i < edges.length (); ++i)
271 : 0 : if (edges[i]->count.initialized_p ())
272 : 0 : edge_count += edges[i]->count.ipa ();
273 : :
274 : 0 : final_orig_count = orig_node_count - edge_count;
275 : :
276 : : /* NEW_NODE->count was adjusted for other callers when the clone was
277 : : first created. Just add the new edge count. */
278 : 0 : final_new_count = new_node->count + edge_count;
279 : :
280 : 0 : final_new_count = orig_node_count.combine_with_ipa_count (final_new_count);
281 : 0 : orig_node->count = final_orig_count;
282 : 0 : new_node->count = final_new_count;
283 : :
284 : 0 : if (dump_file)
285 : : {
286 : 0 : fprintf (dump_file, "Adjusting profile information for %s\n",
287 : : new_node->dump_asm_name ());
288 : 0 : fprintf (dump_file, "\tOriginal node %s\n", orig_node->dump_asm_name ());
289 : 0 : fprintf (dump_file, "\tOriginal count: ");
290 : 0 : orig_node_count.dump (dump_file);
291 : 0 : fprintf (dump_file, "\n\tAdjusted original count to: ");
292 : 0 : final_orig_count.dump (dump_file);
293 : 0 : fprintf (dump_file, "\n\tAdjusted clone count to: ");
294 : 0 : final_new_count.dump (dump_file);
295 : 0 : fprintf (dump_file, "\n");
296 : : }
297 : :
298 : : /* Scale all callee edges according to adjusted counts. */
299 : 0 : profile_count orig_node_count_copy = orig_node_count;
300 : 0 : profile_count::adjust_for_ipa_scaling (&final_new_count,
301 : : &orig_node_count_copy);
302 : 0 : for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
303 : 0 : cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy);
304 : 0 : for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
305 : 0 : cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy);
306 : :
307 : 0 : profile_count::adjust_for_ipa_scaling (&final_orig_count, &orig_node_count);
308 : 0 : for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
309 : 0 : cs->count = cs->count.apply_scale (final_orig_count, orig_node_count);
310 : 0 : for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee)
311 : 0 : cs->count = cs->count.apply_scale (final_orig_count, orig_node_count);
312 : 0 : }
313 : :
314 : : /* Adjust profile counts of NEW_NODE and ORIG_NODE, where NEW_NODE is a clone
315 : : of OLD_NODE.
316 : : Assumes that all eligible edges from current partition so far are redirected
317 : : to NEW_NODE and recursive edges are adjusted. */
318 : :
319 : : static void
320 : 0 : adjust_profile_info (cgraph_node *new_node, cgraph_node *orig_node)
321 : : {
322 : : /* If all calls to NEW_NODE are non-recursive, subtract corresponding count
323 : : from ORIG_NODE and assign to NEW_NODE, any unexpected remainder stays with
324 : : ORIG_NODE.
325 : : Recursive calls if present, likely contribute to majority of count;
326 : : scale according to redirected callers' count. */
327 : :
328 : 0 : profile_count orig_node_count = orig_node->count.ipa ();
329 : 0 : profile_stats new_stats, orig_stats;
330 : :
331 : 0 : init_profile_stats (&new_stats);
332 : 0 : init_profile_stats (&orig_stats);
333 : :
334 : 0 : new_node->call_for_symbol_thunks_and_aliases
335 : 0 : (accumulate_profile_counts_after_cloning, &new_stats, false);
336 : 0 : orig_node->call_for_symbol_thunks_and_aliases
337 : 0 : (accumulate_profile_counts_after_cloning, &orig_stats, false);
338 : :
339 : 0 : profile_count orig_nonrec_count = orig_stats.nonrec_count;
340 : 0 : profile_count orig_rec_count = orig_stats.rec_count;
341 : 0 : profile_count new_nonrec_count = new_stats.nonrec_count;
342 : 0 : profile_count new_rec_count = new_stats.rec_count;
343 : :
344 : 0 : profile_count final_new_count = new_nonrec_count;
345 : 0 : profile_count final_orig_count = profile_count::zero ();
346 : :
347 : : /* All calls to NEW_NODE are non-recursive or recursive calls have
348 : : zero count. */
349 : 0 : if (!new_rec_count.nonzero_p ())
350 : 0 : final_orig_count = orig_node_count - new_nonrec_count;
351 : : else
352 : : {
353 : : /* If ORIG_NODE is externally visible, indirect calls or calls from
354 : : another part of the code may contribute to the count.
355 : : update_profiling_info () from ipa-cp.cc pretends to have an extra
356 : : caller to represent the extra counts. */
357 : 0 : if (!orig_node->local)
358 : : {
359 : 0 : profile_count pretend_count = (orig_node_count - new_nonrec_count -
360 : 0 : orig_nonrec_count - orig_rec_count);
361 : 0 : orig_nonrec_count += pretend_count;
362 : : }
363 : :
364 : : /* Remaining rec_count is assigned in proportion to clone's non-recursive
365 : : count. */
366 : 0 : profile_count rec_count = orig_node_count - new_nonrec_count
367 : 0 : - orig_nonrec_count;
368 : 0 : profile_count new_rec_scaled
369 : 0 : = rec_count.apply_scale (new_nonrec_count,
370 : : new_nonrec_count + orig_nonrec_count);
371 : 0 : final_new_count += new_rec_scaled;
372 : 0 : final_orig_count = orig_node_count - final_new_count;
373 : : }
374 : :
375 : 0 : final_new_count = orig_node_count.combine_with_ipa_count (final_new_count);
376 : 0 : new_node->count = final_new_count;
377 : 0 : orig_node->count = final_orig_count;
378 : :
379 : 0 : if (dump_file)
380 : : {
381 : 0 : fprintf (dump_file, "Adjusting profile information for %s\n",
382 : : new_node->dump_asm_name ());
383 : 0 : fprintf (dump_file, "\tOriginal node %s\n", orig_node->dump_asm_name ());
384 : 0 : fprintf (dump_file, "\tOriginal count: ");
385 : 0 : orig_node_count.dump (dump_file);
386 : 0 : fprintf (dump_file, "\n\tAdjusted original count to: ");
387 : 0 : final_orig_count.dump (dump_file);
388 : 0 : fprintf (dump_file, "\n\tAdjusted clone count to: ");
389 : 0 : final_new_count.dump (dump_file);
390 : 0 : fprintf (dump_file, "\n");
391 : : }
392 : :
393 : : /* Scale all callee edges according to adjusted counts. */
394 : 0 : profile_count orig_node_count_copy = orig_node_count;
395 : 0 : profile_count::adjust_for_ipa_scaling (&final_new_count,
396 : : &orig_node_count_copy);
397 : 0 : for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
398 : 0 : cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy);
399 : 0 : for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
400 : 0 : cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy);
401 : :
402 : 0 : profile_count::adjust_for_ipa_scaling (&final_orig_count, &orig_node_count);
403 : 0 : for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
404 : 0 : cs->count = cs->count.apply_scale (final_orig_count, orig_node_count);
405 : 0 : for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee)
406 : 0 : cs->count = cs->count.apply_scale (final_orig_count, orig_node_count);
407 : 0 : }
408 : :
409 : : /* Return true if EDGE can be safely redirected to another callee. */
410 : : static inline bool
411 : 0 : edge_redirectable_p (cgraph_edge *edge, lto_locality_cloning_model cm)
412 : : {
413 : 0 : if (cm == LTO_LOCALITY_NON_INTERPOSABLE_CLONING)
414 : : {
415 : : /* Interposability may change on edge basis. */
416 : 0 : enum availability avail;
417 : 0 : avail = edge->callee->get_availability (edge->caller);
418 : 0 : if (avail <= AVAIL_INTERPOSABLE)
419 : : return false;
420 : : }
421 : : return true;
422 : : }
423 : :
424 : : /* Create a locality clone of CNODE and redirect all callers present in
425 : : PARTITION.
426 : : Create a clone dpending on whether CNODE itself is a clone or not. */
427 : :
428 : : static cgraph_node *
429 : 0 : create_locality_clone (cgraph_node *cnode,
430 : : locality_partition partition, int &cl_num,
431 : : lto_locality_cloning_model cm)
432 : : {
433 : 0 : cgraph_node *cl_node = NULL;
434 : 0 : vec<cgraph_edge *> redirect_callers = vNULL;
435 : : /* All callers of cnode in current partition are redirected. */
436 : 0 : struct cgraph_edge *edge;
437 : 0 : for (edge = cnode->callers; edge; edge = edge->next_caller)
438 : : {
439 : 0 : struct cgraph_node *caller = edge->caller;
440 : 0 : if (node_in_partition_p (partition, caller) && caller->definition
441 : 0 : && caller != cnode && edge_redirectable_p (edge, cm))
442 : 0 : redirect_callers.safe_push (edge);
443 : : }
444 : :
445 : 0 : const char *suffix = "locality_clone";
446 : :
447 : 0 : tree old_decl = cnode->decl;
448 : 0 : tree new_decl = copy_node (old_decl);
449 : :
450 : : /* Generate a new name for the new version. */
451 : 0 : const char *name = IDENTIFIER_POINTER (DECL_NAME (old_decl));
452 : 0 : DECL_NAME (new_decl) = clone_function_name (name, suffix, cl_num);
453 : 0 : SET_DECL_ASSEMBLER_NAME (new_decl,
454 : : clone_function_name (old_decl, suffix, cl_num));
455 : 0 : cl_num++;
456 : 0 : if (dump_file)
457 : 0 : fprintf (dump_file, "\tNew name %s\n",
458 : 0 : IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (new_decl)));
459 : :
460 : 0 : cl_node = cnode->create_clone (new_decl, cnode->count /*profile_count*/,
461 : : false /*update_original*/, redirect_callers,
462 : : false /*call_duplication_hook*/,
463 : : NULL /*new_inlined_to*/,
464 : : NULL /*param_adjustments*/, suffix);
465 : :
466 : 0 : set_new_clone_decl_and_node_flags (cl_node);
467 : :
468 : 0 : if (cnode->ipa_transforms_to_apply.exists ())
469 : 0 : cl_node->ipa_transforms_to_apply
470 : 0 : = cnode->ipa_transforms_to_apply.copy ();
471 : :
472 : 0 : if (dump_file)
473 : : {
474 : 0 : fprintf (dump_file, "Cloned Node: %s %s\n", cnode->dump_asm_name (),
475 : : cl_node->dump_asm_name ());
476 : :
477 : 0 : for (edge = cl_node->callers; edge; edge = edge->next_caller)
478 : 0 : fprintf (dump_file, "Redirected callers: %s\n",
479 : 0 : edge->caller->dump_asm_name ());
480 : :
481 : 0 : for (edge = cl_node->callees; edge; edge = edge->next_callee)
482 : 0 : fprintf (dump_file, "Callees of clone: %s %d\n",
483 : 0 : edge->callee->dump_asm_name (), edge->frequency ());
484 : : }
485 : 0 : return cl_node;
486 : : }
487 : :
488 : : /* Redirect recursive edges of CLONE to correctly point to CLONE. As part of
489 : : cloning process, all callee edges of a node are just duplicated but not
490 : : redirected. Therefore, these edges still call to original of CLONE.
491 : :
492 : : For non-inlined CLONEs, NEW_CALLEE == CLONE and ORIG_CALLEE is CLONE's
493 : : original node.
494 : :
495 : : For inlined node, self recursion to CLONE's original same as non-inlined,
496 : : additionally, calls to CLONE->inlined_to are also recursive:
497 : : NEW_CALLEE == CLONE->inlined_into and
498 : : ORIG_CALLEE == original node of CLONE->inlined_into. */
499 : :
500 : : static void
501 : 0 : adjust_recursive_callees (cgraph_node *clone, cgraph_node *new_callee,
502 : : cgraph_node *orig_callee)
503 : : {
504 : 0 : cgraph_node *alias = NULL;
505 : 0 : for (cgraph_edge *e = clone->callees; e; e = e->next_callee)
506 : : {
507 : 0 : if (!e->inline_failed)
508 : 0 : continue;
509 : :
510 : : /* Only self-cycle or local alias are handled. */
511 : 0 : cgraph_node *callee = e->callee;
512 : 0 : if (callee == orig_callee)
513 : : {
514 : 0 : cgraph_node **cl = node_to_clone.get (orig_callee);
515 : 0 : gcc_assert (cl && *cl == new_callee);
516 : 0 : e->redirect_callee_duplicating_thunks (new_callee);
517 : 0 : if (dump_file)
518 : 0 : fprintf (dump_file, "recursive call from %s to %s orig %s\n",
519 : 0 : e->caller->dump_asm_name (), e->callee->dump_asm_name (),
520 : : callee->dump_asm_name ());
521 : : }
522 : 0 : else if (callee->alias
523 : 0 : && e->callee->ultimate_alias_target () == orig_callee)
524 : : {
525 : 0 : if (!alias)
526 : : {
527 : 0 : alias = dyn_cast<cgraph_node *> (
528 : : new_callee->noninterposable_alias ());
529 : : }
530 : 0 : e->redirect_callee_duplicating_thunks (alias);
531 : 0 : if (dump_file)
532 : 0 : fprintf (dump_file, "recursive call from %s to %s orig %s\n",
533 : 0 : e->caller->dump_asm_name (), e->callee->dump_asm_name (),
534 : : callee->dump_asm_name ());
535 : : }
536 : : }
537 : 0 : new_callee->expand_all_artificial_thunks ();
538 : 0 : if (alias)
539 : 0 : alias->expand_all_artificial_thunks ();
540 : 0 : }
541 : :
542 : : /* Create clones for CALLER's inlined callees, ORIG_INLINED_TO is the original
543 : : node from clone_as_needed () such that new_inlined_to is a clone of it. */
544 : :
545 : : static void
546 : 0 : inline_clones (cgraph_node *caller, cgraph_node *orig_inlined_to)
547 : : {
548 : 0 : struct cgraph_edge *edge;
549 : 0 : for (edge = caller->callees; edge; edge = edge->next_callee)
550 : : {
551 : 0 : struct cgraph_node *callee = edge->callee;
552 : 0 : if (edge->inline_failed)
553 : 0 : continue;
554 : :
555 : 0 : if (callee->inlined_to != orig_inlined_to)
556 : 0 : continue;
557 : :
558 : 0 : struct cgraph_node *new_inlined_to, *cl;
559 : 0 : if (caller->inlined_to)
560 : 0 : new_inlined_to = caller->inlined_to;
561 : : else
562 : : new_inlined_to = caller;
563 : :
564 : 0 : cl = callee->create_clone (callee->decl,
565 : : edge->count /*profile_count*/,
566 : : true /*update_original*/,
567 : 0 : vNULL /*redirect_callers*/,
568 : : false /*call_duplication_hook*/,
569 : : new_inlined_to /*new_inlined_to*/,
570 : : NULL /*param_adjustments*/,
571 : : "locality_clone" /*suffix*/);
572 : 0 : edge->redirect_callee (cl);
573 : :
574 : 0 : node_to_clone.put (callee, cl);
575 : 0 : clone_to_node.put (cl, callee);
576 : :
577 : 0 : if (callee->thunk)
578 : : {
579 : 0 : thunk_info *info = thunk_info::get (callee);
580 : 0 : *thunk_info::get_create (cl) = *info;
581 : : }
582 : :
583 : 0 : adjust_recursive_callees (cl, new_inlined_to, orig_inlined_to);
584 : 0 : adjust_recursive_callees (cl, cl, callee);
585 : 0 : if (dump_file)
586 : : {
587 : 0 : fprintf (dump_file, "Inline cloned\n");
588 : 0 : cl->dump (dump_file);
589 : : }
590 : :
591 : : /* Recursively inline till end of this callchain. */
592 : 0 : inline_clones (cl, orig_inlined_to);
593 : : }
594 : 0 : }
595 : :
596 : : /* Clone EDGE->CALLEE if it or a clone of it is not already in PARTITION.
597 : : Redirect all callers of EDGE->CALLEE that are in PARTITION, not just the
598 : : EDGE. If a clone is already present in PARTITION, redirect all edges from
599 : : EDGE->CALLER to EDGE->CALLEE. This is because we only visit one edge per
600 : : caller to callee and redirect for all others from there.
601 : :
602 : : If cloning, also recursively clone inlined functions till the end of the
603 : : callchain because inlined clones have 1-1 exclusive copy and edge from
604 : : caller to inlined node.
605 : :
606 : : There are 2 flows possible:
607 : : 1. Only redirect
608 : : 1.1. cnode is already in current partition - cnode mustn't be a
609 : : locality_clone -> nothing to do
610 : : 1.2. A clone of cnode is in current partition - find out if it's the
611 : : correct clone for edge - must be a locality_clone but the exact same
612 : : kind as callee i.e. orig or cp/sra clone, if yes, redirect, else go to #2
613 : : 1.3. Cnode/a clone of cnode is in current partition but caller is inlined
614 : : 2. Clone and redirect
615 : : 2.1. cnode is original node
616 : : 2.2. cnode itself is a clone
617 : : Clone inlines
618 : : Flavors of edges:
619 : : 1. Normal -> orig nodes, locality clones or cp/sra clones
620 : : 2. Recursive -> direct recursion
621 : : 3. Alias -> recursion via aliasing or as a result of IPA code duplication
622 : : 4. Inline -> shouldn't be included in callchain. */
623 : :
624 : : static cgraph_node *
625 : 0 : clone_node_as_needed (cgraph_edge *edge, locality_partition partition,
626 : : int &cl_num, lto_locality_cloning_model cm)
627 : : {
628 : : /* suitable_for_locality_cloning_p () currently prohibits cloning aliases due
629 : : to potential versioning and materialization issues. Could be enabled in
630 : : the future. suitable_for_locality_cloning_p () also checks for
631 : : interposability for CNODE but not for edge redirection. */
632 : 0 : struct cgraph_node *cnode = edge->callee;
633 : 0 : struct cgraph_node *caller = edge->caller;
634 : :
635 : : /* If clone of cnode is already in the partition
636 : : Get latest clone of cnode. If current partition has cloned cnode, that
637 : : clone should be returned. Otherwise, clone from previous partition is
638 : : returned
639 : : Original node and its clone shouldn't co-exist in current partition
640 : :
641 : : This is required if callee is partitioned via another edge before caller
642 : : was, and we are now visiting caller->callee edge
643 : :
644 : : 1) a -> b ==> a -> bc1; b was cloned say via d -> bc1, a is orig
645 : : 2) ac1 -> b ==> ac1 -> bc1; b was cloned and a was just cloned
646 : : 3) a -> bc1 and bc2 present, mustn't happen, b was cloned and a was
647 : : redirected without being partitioned first.
648 : : Why will we do this again - multiple edges and something's wrong in
649 : : partition_callchain ()
650 : : 4) ac1 -> bc1 ==> ac1 -> bc2; a was cloned and we already got (1) in some
651 : : other partition
652 : : 5) ac1 -> bc1 but no clone present in this PARTITION. Create from b, not
653 : : from bc1?
654 : : 6) a -> b; a -> bc0; create new clone, no clone present
655 : : 7) ac0 -> b; ac0 -> bc0 same as (6)
656 : : 8) a -> bc0 and no clone present, mustn't happen, same as (3)
657 : :
658 : : Redirect when bc1 is present and:
659 : : a -> b or ac -> b or ac -> bc0 */
660 : :
661 : 0 : cgraph_node *orig_cnode = cnode;
662 : 0 : cgraph_node **o_cnode = clone_to_node.get (cnode);
663 : 0 : if (o_cnode)
664 : 0 : orig_cnode = *o_cnode;
665 : :
666 : 0 : cgraph_node **cnode_cl = node_to_clone.get (orig_cnode);
667 : :
668 : 0 : if (cnode_cl && node_in_partition_p (partition, *cnode_cl))
669 : : {
670 : 0 : if (node_in_partition_p (partition, caller))
671 : : {
672 : 0 : bool clone_p = false;
673 : 0 : auto_vec<cgraph_edge *> redirected_edges;
674 : 0 : for (cgraph_edge *ec = caller->callees; ec; ec = ec->next_callee)
675 : 0 : if (ec->callee == cnode && edge_redirectable_p (ec, cm))
676 : : {
677 : 0 : ec->redirect_callee_duplicating_thunks (*cnode_cl);
678 : 0 : clone_p = true;
679 : 0 : redirected_edges.safe_push (ec);
680 : 0 : if (dump_file)
681 : : {
682 : 0 : fprintf (dump_file, "clone present %s %s redirecting %s\n",
683 : : cnode->dump_asm_name (),
684 : : (*cnode_cl)->dump_asm_name (),
685 : : caller->dump_asm_name ());
686 : : }
687 : : }
688 : 0 : if (clone_p)
689 : : {
690 : 0 : (*cnode_cl)->expand_all_artificial_thunks ();
691 : 0 : adjust_profile_info_for_non_self_rec_edges (redirected_edges,
692 : : *cnode_cl, cnode);
693 : 0 : return NULL;
694 : : }
695 : 0 : }
696 : : }
697 : :
698 : : /* Create a new clone for a -> b, ac -> b.
699 : : For ac -> bc, should be done on bc or b?
700 : : bc could be from b_cp/b_sra or b. */
701 : :
702 : 0 : if (orig_cnode != cnode)
703 : : {
704 : 0 : if (dump_file)
705 : 0 : fprintf (dump_file, "Clone of clone %s %s\n", cnode->dump_asm_name (),
706 : : orig_cnode->dump_asm_name ());
707 : 0 : return NULL;
708 : : }
709 : :
710 : 0 : struct cgraph_node *cloned_node
711 : 0 : = create_locality_clone (cnode, partition, cl_num, cm);
712 : :
713 : 0 : gcc_assert (cloned_node);
714 : 0 : if (!cloned_node)
715 : : return NULL;
716 : :
717 : 0 : node_to_clone.put (cnode, cloned_node);
718 : 0 : clone_to_node.put (cloned_node, cnode);
719 : :
720 : 0 : adjust_recursive_callees (cloned_node, cloned_node, cnode);
721 : 0 : symtab->call_cgraph_duplication_hooks (cnode, cloned_node);
722 : :
723 : 0 : adjust_profile_info (cloned_node, cnode);
724 : : /* Inline clones are created iff their inlined_to == CNODE. */
725 : 0 : inline_clones (cloned_node, cnode);
726 : :
727 : 0 : return cloned_node;
728 : : }
729 : :
730 : : /* Accumulate frequency of all edges from EDGE->caller to EDGE->callee. */
731 : :
732 : : static sreal
733 : 0 : accumulate_incoming_edge_frequency (cgraph_edge *edge)
734 : : {
735 : 0 : sreal count = 0;
736 : 0 : struct cgraph_edge *e;
737 : 0 : for (e = edge->callee->callers; e; e = e->next_caller)
738 : : {
739 : : /* Make a local decision about all edges for EDGE->caller but not the
740 : : other nodes already in the partition. Their edges will be visited
741 : : later or may have been visited before and not fit the
742 : : cut-off criteria. */
743 : 0 : if (e->caller == edge->caller)
744 : 0 : count += e->sreal_frequency ();
745 : : }
746 : 0 : return count;
747 : : }
748 : :
749 : : /* Determine if EDGE->CALLEE is suitable for cloning. It is assummed that the
750 : : callee is not an inlined node. */
751 : :
752 : : static bool
753 : 0 : suitable_for_locality_cloning_p (cgraph_edge *edge,
754 : : lto_locality_cloning_model cm)
755 : : {
756 : 0 : cgraph_node *node = edge->callee;
757 : 0 : if (!node->versionable)
758 : : return false;
759 : :
760 : : /* Out-of-line locality clones of ipcp or sra clones will be created in this
761 : : pass after IPA inline is run. A locality clone has the same function
762 : : body and the same updated signature as the ipcp/sra clone.
763 : : This fails or asserts based on how the clone is created:
764 : : 1. If param_adjustments and tree_map are not recorded for locality clone:
765 : : clone materialization (tree_function_versioning ()) fails when
766 : : updating signature and remapping calls because clone_of (ipcp/sra
767 : : clone) and locality clone differ in param information.
768 : : 2. If param_adjustments and tree_map are provided: asserts are triggered
769 : : in fnsummary duplication because IPA inline resets some summaries.
770 : :
771 : : One inelegant solution is to provide param_adjustments and tree_map, and
772 : : then set clone_of to ipcp/sra clone's clone_of. However, this sometimes
773 : : results in segmentation fault when the compiled program is run.
774 : : Disabling clone of clones altogether for now with an aim to resolve this
775 : : is future. */
776 : 0 : if (node->clone_of)
777 : : return false;
778 : :
779 : 0 : if (node->alias)
780 : : return false;
781 : :
782 : 0 : if (edge->recursive_p ())
783 : : return false;
784 : :
785 : 0 : if (!node->definition)
786 : : return false;
787 : :
788 : : /* Don't clone NODE if IPA count of NODE or EDGE is zero. */
789 : 0 : if (!node->count.ipa ().nonzero_p () || !edge->count.ipa ().nonzero_p ())
790 : 0 : return false;
791 : :
792 : 0 : if (cm == LTO_LOCALITY_NON_INTERPOSABLE_CLONING)
793 : : {
794 : : /* Interposability may change on edge basis. */
795 : 0 : enum availability avail;
796 : 0 : edge->callee->ultimate_alias_target (&avail, edge->caller);
797 : 0 : if (avail <= AVAIL_INTERPOSABLE)
798 : 0 : return false;
799 : : }
800 : :
801 : : return true;
802 : : }
803 : :
804 : : /* Map from caller to all callees already visited for partitioning. */
805 : : hash_map<cgraph_node *, auto_vec<cgraph_node *> > caller_to_callees;
806 : :
807 : : /* Partition EDGE->CALLEE into PARTITION or clone if already partitioned and
808 : : satisfies cloning criteria such as CLONING_MODEL, REAL_FREQ and SIZE
809 : : cut-offs and CLONE_FURTHER_P set by previous caller. */
810 : :
811 : : /* callgraph can have multiple caller to callee edges for multiple callsites
812 : : For the first such edge, we make decisions about cutoffs and cloning because
813 : : we redirect ALL callsites to cloned callee, not just one of them. */
814 : :
815 : : static void
816 : 0 : partition_callchain (cgraph_edge *edge, locality_partition partition,
817 : : bool clone_further_p,
818 : : lto_locality_cloning_model cloning_model,
819 : : double freq_cutoff, int size, int &cl_num)
820 : : {
821 : : /* Aliases are added in the same partition as their targets.
822 : : Aliases are not cloned and their callees are not processed separately. */
823 : 0 : cgraph_node *node = edge->callee->ultimate_alias_target ();
824 : 0 : cgraph_node *caller = edge->caller;
825 : 0 : cgraph_node *caller_node = node, *cl_node = NULL;
826 : :
827 : : /* Already visited the caller to callee edges. */
828 : 0 : auto_vec<cgraph_node *> &callees = caller_to_callees.get_or_insert (caller);
829 : 0 : if (std::find (callees.begin (), callees.end (), node) != callees.end ())
830 : 0 : return;
831 : :
832 : 0 : callees.safe_push (node);
833 : :
834 : 0 : if (node->get_partitioning_class () == SYMBOL_PARTITION)
835 : : {
836 : 0 : if (!node_partitioned_p (node))
837 : : {
838 : 0 : add_node_to_partition (partition, node);
839 : 0 : if (dump_file)
840 : 0 : fprintf (dump_file, "Partitioned node: %s\n",
841 : : node->dump_asm_name ());
842 : : }
843 : 0 : else if (cloning_model >= LTO_LOCALITY_NON_INTERPOSABLE_CLONING
844 : 0 : && !node_in_partition_p (partition, node))
845 : : {
846 : : /* Non-inlined node, or alias, already partitioned
847 : : If cut-off, don't clone callees but partition unpartitioned
848 : : callees.
849 : : size is node + inlined nodes. */
850 : 0 : if (clone_further_p)
851 : : {
852 : 0 : if (!node->alias)
853 : 0 : if (ipa_size_summaries->get (node)->size >= size)
854 : 0 : clone_further_p = false;
855 : :
856 : 0 : if (freq_cutoff != 0.0)
857 : : {
858 : 0 : sreal acc_freq = accumulate_incoming_edge_frequency (edge);
859 : 0 : if (acc_freq.to_double () < freq_cutoff)
860 : 0 : clone_further_p = false;
861 : : }
862 : : }
863 : :
864 : 0 : if (!suitable_for_locality_cloning_p (edge, cloning_model))
865 : : clone_further_p = false;
866 : :
867 : 0 : if (clone_further_p)
868 : : {
869 : : /* Try to clone NODE and its inline chain. */
870 : 0 : if (dump_file)
871 : 0 : fprintf (dump_file, "Cloning node: %s\n",
872 : : node->dump_asm_name ());
873 : 0 : cl_node = clone_node_as_needed (edge, partition, cl_num,
874 : : cloning_model);
875 : 0 : if (cl_node)
876 : : {
877 : 0 : add_node_to_partition (partition, cl_node);
878 : 0 : caller_node = cl_node;
879 : : }
880 : : else
881 : : caller_node = NULL;
882 : : }
883 : : }
884 : : }
885 : 0 : else if (!node->inlined_to)
886 : : return;
887 : :
888 : 0 : if (caller_node)
889 : 0 : for (cgraph_edge *e = caller_node->callees; e; e = e->next_callee)
890 : 0 : partition_callchain (e, partition, clone_further_p, cloning_model,
891 : : freq_cutoff, size, cl_num);
892 : : }
893 : :
894 : : /* Determine whether NODE is an entrypoint to a callchain. */
895 : :
896 : : static bool
897 : 0 : is_entry_node_p (cgraph_node *node)
898 : : {
899 : : /* node->inlined_to is returned as SYMBOL_DUPLICATE. */
900 : 0 : if (node->get_partitioning_class () != SYMBOL_PARTITION)
901 : : return false;
902 : :
903 : 0 : if (!node->callers)
904 : : return true;
905 : :
906 : 0 : for (cgraph_edge *e = node->callers; e; e = e->next_caller)
907 : : {
908 : 0 : if (! e->recursive_p ())
909 : : return false;
910 : : }
911 : 0 : if (node->alias
912 : 0 : && !is_entry_node_p (node->ultimate_alias_target ()))
913 : : return false;
914 : : return true;
915 : : }
916 : :
917 : : /* Determine order of all external nodes if PGO profile is available.
918 : : Store the order in ORDER. */
919 : :
920 : : static bool
921 : 0 : locality_determine_ipa_order (auto_vec<locality_order *> *order)
922 : : {
923 : 0 : struct cgraph_node *node;
924 : 0 : auto_vec<locality_order *> non_comparable_nodes;
925 : 0 : FOR_EACH_DEFINED_FUNCTION (node)
926 : 0 : if (node->get_partitioning_class () == SYMBOL_PARTITION)
927 : : {
928 : 0 : if (node->no_reorder)
929 : : {
930 : 0 : if (dump_file)
931 : 0 : fprintf (dump_file, "no reorder %s\n", node->dump_asm_name ());
932 : 0 : return false;
933 : : }
934 : 0 : else if (is_entry_node_p (node))
935 : : {
936 : 0 : profile_count pcnt = node->count.ipa ();
937 : 0 : if (!pcnt.initialized_p () || !pcnt.ipa_p ())
938 : : {
939 : 0 : sreal cnt = 0;
940 : 0 : locality_order *lo = new locality_order (node, cnt);
941 : 0 : non_comparable_nodes.safe_push (lo);
942 : 0 : continue;
943 : 0 : }
944 : 0 : sreal count = 0;
945 : 0 : struct cgraph_edge *edge;
946 : 0 : for (edge = node->callees; edge; edge = edge->next_callee)
947 : : {
948 : : /* For PGO, frequency is not used in
949 : : compare_edge_profile_counts (), it's used only as part of
950 : : static profile order. */
951 : 0 : sreal freq = edge->sreal_frequency ();
952 : 0 : count += freq;
953 : : }
954 : 0 : locality_order *cl = new locality_order (node, count);
955 : 0 : order->safe_push (cl);
956 : : }
957 : : }
958 : 0 : order->qsort (compare_edge_profile_counts);
959 : 0 : for (auto el : non_comparable_nodes)
960 : 0 : order->safe_push (el);
961 : 0 : return true;
962 : 0 : }
963 : :
964 : : /* Determine order of all external nodes if only static profile is available.
965 : : Store the order in ORDER. */
966 : :
967 : : static bool
968 : 0 : locality_determine_static_order (auto_vec<locality_order *> *order)
969 : : {
970 : 0 : struct cgraph_node *node;
971 : 0 : FOR_EACH_DEFINED_FUNCTION (node)
972 : 0 : if (node->get_partitioning_class () == SYMBOL_PARTITION)
973 : : {
974 : 0 : if (node->no_reorder)
975 : : {
976 : 0 : if (dump_file)
977 : 0 : fprintf (dump_file, "no reorder %s\n", node->dump_asm_name ());
978 : 0 : return false;
979 : : }
980 : 0 : else if (is_entry_node_p (node))
981 : : {
982 : 0 : sreal count = 0;
983 : 0 : struct cgraph_edge *edge;
984 : 0 : for (edge = node->callees; edge; edge = edge->next_callee)
985 : : {
986 : 0 : sreal freq = edge->sreal_frequency ();
987 : 0 : count += freq;
988 : : }
989 : 0 : locality_order *cl = new locality_order (node, count);
990 : 0 : order->safe_push (cl);
991 : : }
992 : : }
993 : 0 : order->qsort (static_profile_cmp);
994 : : return true;
995 : : }
996 : :
997 : : /* Partitioning for code locality.
998 : : 1. Create and sort callchains. If PGO is available, use real profile
999 : : counts. Otherwise, use a set of heuristics to sort the callchains.
1000 : : 2. Partition the external nodes and their callchains in the determined order
1001 : : 2.1. If !partition, partition, else try and clone if it satisfies cloning
1002 : : criteria.
1003 : : 3. Partition all other unpartitioned nodes. */
1004 : :
1005 : : static void
1006 : 0 : locality_partition_and_clone (int max_locality_partition_size,
1007 : : lto_locality_cloning_model cloning_model,
1008 : : int freq_denominator, int size)
1009 : : {
1010 : 0 : locality_partition partition;
1011 : 0 : int npartitions = 0;
1012 : :
1013 : 0 : auto_vec<locality_order *> order;
1014 : 0 : auto_vec<varpool_node *> varpool_order;
1015 : 0 : struct cgraph_node *node;
1016 : 0 : bool order_p;
1017 : :
1018 : 0 : int cl_num = 0;
1019 : :
1020 : 0 : double real_freq = 0.0;
1021 : 0 : if (freq_denominator > 0)
1022 : 0 : real_freq = 1.0 / (double) freq_denominator;
1023 : :
1024 : 0 : cgraph_node *n = symtab->first_defined_function ();
1025 : 0 : if (n && n->count.ipa_p ())
1026 : 0 : order_p = locality_determine_ipa_order (&order);
1027 : : else
1028 : 0 : order_p = locality_determine_static_order (&order);
1029 : 0 : if (!order_p)
1030 : : {
1031 : 0 : if (dump_file)
1032 : : {
1033 : 0 : fprintf (dump_file, "Locality partition: falling back to balanced"
1034 : : "model\n");
1035 : : }
1036 : :
1037 : 0 : return;
1038 : : }
1039 : :
1040 : 0 : int64_t partition_size
1041 : : = max_locality_partition_size
1042 : 0 : ? max_locality_partition_size : param_max_partition_size;
1043 : 0 : partition = create_partition (npartitions);
1044 : :
1045 : 0 : for (unsigned i = 0; i < order.length (); i++)
1046 : : {
1047 : 0 : node = order[i]->node;
1048 : 0 : if (node_partitioned_p (node))
1049 : 0 : continue;
1050 : :
1051 : 0 : if (partition->insns > partition_size)
1052 : 0 : partition = create_partition (npartitions);
1053 : 0 : if (dump_file)
1054 : 0 : fprintf (dump_file, "Partition id: %d\n", partition->part_id);
1055 : :
1056 : 0 : add_node_to_partition (partition, node);
1057 : 0 : if (dump_file)
1058 : 0 : fprintf (dump_file, "Ordered Node: %s\n", node->dump_asm_name ());
1059 : :
1060 : 0 : for (cgraph_edge *edge = node->callees; edge; edge = edge->next_callee)
1061 : : {
1062 : : /* Recursively partition the callchain of edge->callee. */
1063 : 0 : partition_callchain (edge, partition, true, cloning_model, real_freq,
1064 : : size, cl_num);
1065 : : }
1066 : : }
1067 : :
1068 : 0 : for (unsigned i = 0; i < order.length (); i++)
1069 : 0 : delete order[i];
1070 : 0 : order = vNULL;
1071 : 0 : }
1072 : :
1073 : : /* Entry point to locality-clone pass. */
1074 : : static int
1075 : 0 : lc_execute (void)
1076 : : {
1077 : 0 : symtab_node *node;
1078 : 0 : FOR_EACH_SYMBOL (node)
1079 : 0 : node->aux = NULL;
1080 : :
1081 : 0 : locality_partition_and_clone (param_max_locality_partition_size,
1082 : : flag_lto_locality_cloning,
1083 : : param_lto_locality_frequency,
1084 : : param_lto_locality_size);
1085 : :
1086 : 0 : FOR_EACH_SYMBOL (node)
1087 : 0 : node->aux = NULL;
1088 : 0 : return 0;
1089 : : }
1090 : :
1091 : : namespace {
1092 : :
1093 : : const pass_data pass_data_ipa_locality_clone = {
1094 : : IPA_PASS, /* type */
1095 : : "locality-clone", /* name */
1096 : : OPTGROUP_NONE, /* optinfo_flags */
1097 : : TV_IPA_LC, /* tv_id */
1098 : : 0, /* properties_required */
1099 : : 0, /* properties_provided */
1100 : : 0, /* properties_destroyed */
1101 : : 0, /* todo_flags_start */
1102 : : (TODO_dump_symtab | TODO_remove_functions), /* todo_flags_finish */
1103 : : };
1104 : :
1105 : : class pass_ipa_locality_cloning : public ipa_opt_pass_d
1106 : : {
1107 : : public:
1108 : 285081 : pass_ipa_locality_cloning (gcc::context *ctxt)
1109 : : : ipa_opt_pass_d (pass_data_ipa_locality_clone, ctxt,
1110 : : NULL, /* generate_summary */
1111 : : NULL, /* write_summary */
1112 : : NULL, /* read_summary */
1113 : : NULL, /* write_optimization_summary */
1114 : : NULL, /* read_optimization_summary */
1115 : : NULL, /* stmt_fixup */
1116 : : 0, /* function_transform_todo_flags_start */
1117 : : NULL, /* function_transform */
1118 : 285081 : NULL) /* variable_transform */
1119 : 285081 : {}
1120 : :
1121 : : /* opt_pass methods: */
1122 : 562027 : virtual bool gate (function *)
1123 : : {
1124 : 562027 : return (flag_wpa && flag_ipa_reorder_for_locality);
1125 : : }
1126 : :
1127 : 0 : virtual unsigned int execute (function *) { return lc_execute (); }
1128 : :
1129 : : }; // class pass_ipa_locality_cloning
1130 : :
1131 : : } // namespace
1132 : :
1133 : : ipa_opt_pass_d *
1134 : 285081 : make_pass_ipa_locality_cloning (gcc::context *ctxt)
1135 : : {
1136 : 285081 : return new pass_ipa_locality_cloning (ctxt);
1137 : : }
|