GCC Middle and Back End API Reference
ipa-locality-cloning.cc File Reference
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "target.h"
#include "function.h"
#include "tree.h"
#include "alloc-pool.h"
#include "tree-pass.h"
#include "cgraph.h"
#include "symbol-summary.h"
#include "tree-vrp.h"
#include "symtab-thunks.h"
#include "sreal.h"
#include "ipa-cp.h"
#include "ipa-prop.h"
#include "ipa-fnsummary.h"
#include "ipa-modref-tree.h"
#include "ipa-modref.h"
#include "symtab-clones.h"
#include "ipa-locality-cloning.h"
Include dependency graph for ipa-locality-cloning.cc:

Data Structures

struct  locality_order
 
struct  profile_stats
 

Macros

#define INCLUDE_ALGORITHM
 

Functions

static bool node_partitioned_p (cgraph_node *node)
 
static void add_node_to_partition (locality_partition part, cgraph_node *node)
 
static bool node_in_partition_p (locality_partition partition, cgraph_node *node)
 
static int compare_node_uids (cgraph_node *n1, cgraph_node *n2)
 
static int static_profile_cmp (const void *pa, const void *pb)
 
static int compare_edge_profile_counts (const void *pa, const void *pb)
 
static locality_partition create_partition (int &npartitions)
 
static void init_profile_stats (profile_stats *stats, cgraph_node *target=NULL)
 
static bool accumulate_profile_counts_after_cloning (cgraph_node *node, void *data)
 
static void adjust_profile_info_for_non_self_rec_edges (auto_vec< cgraph_edge * > &edges, cgraph_node *new_node, cgraph_node *orig_node)
 
static void adjust_profile_info (cgraph_node *new_node, cgraph_node *orig_node)
 
static bool edge_redirectable_p (cgraph_edge *edge, lto_locality_cloning_model cm)
 
static cgraph_nodecreate_locality_clone (cgraph_node *cnode, locality_partition partition, int &cl_num, lto_locality_cloning_model cm)
 
static void adjust_recursive_callees (cgraph_node *clone, cgraph_node *new_callee, cgraph_node *orig_callee)
 
static void inline_clones (cgraph_node *caller, cgraph_node *orig_inlined_to)
 
static cgraph_nodeclone_node_as_needed (cgraph_edge *edge, locality_partition partition, int &cl_num, lto_locality_cloning_model cm)
 
static sreal accumulate_incoming_edge_frequency (cgraph_edge *edge)
 
static bool suitable_for_locality_cloning_p (cgraph_edge *edge, lto_locality_cloning_model cm)
 
static void partition_callchain (cgraph_edge *edge, locality_partition partition, bool clone_further_p, lto_locality_cloning_model cloning_model, double freq_cutoff, int size, int &cl_num)
 
static bool is_entry_node_p (cgraph_node *node)
 
static bool locality_determine_ipa_order (auto_vec< locality_order * > *order)
 
static bool locality_determine_static_order (auto_vec< locality_order * > *order)
 
static void locality_partition_and_clone (int max_locality_partition_size, lto_locality_cloning_model cloning_model, int freq_denominator, int size)
 
static int lc_execute (void)
 
ipa_opt_pass_dmake_pass_ipa_locality_cloning (gcc::context *ctxt)
 

Variables

vec< locality_partitionlocality_partitions
 
hash_map< cgraph_node *, cgraph_node * > node_to_clone
 
hash_map< cgraph_node *, cgraph_node * > clone_to_node
 
hash_map< cgraph_node *, auto_vec< cgraph_node * > > caller_to_callees
 

Macro Definition Documentation

◆ INCLUDE_ALGORITHM

#define INCLUDE_ALGORITHM
Code locality based function cloning. Copyright The GNU Toolchain Authors This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>.
This file implements cloning required to improve partitioning of the callgraph for locality considerations. Partitioning for improving code locality. This pass aims to place frequently executed callchains closer together in memory to improve performance through improved locality. If any frequent callchains cannot be placed together because they are already placed elsewhere, local function clones are created and all callers near to the clones are redirected to use this copy. Locality code placement is done in 2 parts. 1. IPA pass to be executed after ipa-inline and before ipa-pure-const. Execute stage prepares the plan to place all nodes into partitions. 2. WPA Partition stage actually implements the plan. Brief overview of the IPA pass: 1. Create and sort callchains. If PGO is available, use real profile counts. Otherwise, use a set of heuristics to sort the callchains. 2. Create a partition plan for the callchains, processing them in the sorted order. 1. If a function is unpartitioned, place it in the current partition. 2. If a function is already placed in a partition away from current partition as part of another callchain: Create a local clone in current partition, if cloning criteria is satisfied. 3. Redirect any new caller to a local clone if one exists. Partition size is param controlled to fine tune per program behavior.

Function Documentation

◆ accumulate_incoming_edge_frequency()

static sreal accumulate_incoming_edge_frequency ( cgraph_edge * edge)
static
Accumulate frequency of all edges from EDGE->caller to EDGE->callee.

References cgraph_edge::caller, count, cgraph_edge::next_caller, and cgraph_edge::sreal_frequency().

Referenced by partition_callchain().

◆ accumulate_profile_counts_after_cloning()

static bool accumulate_profile_counts_after_cloning ( cgraph_node * node,
void * data )
static
Helper function of to accumulate call counts.

References cgraph_node::callers, and cgraph_edge::next_caller.

Referenced by adjust_profile_info().

◆ add_node_to_partition()

◆ adjust_profile_info()

◆ adjust_profile_info_for_non_self_rec_edges()

static void adjust_profile_info_for_non_self_rec_edges ( auto_vec< cgraph_edge * > & edges,
cgraph_node * new_node,
cgraph_node * orig_node )
static
NEW_NODE is a previously created clone of ORIG_NODE already present in current partition. EDGES contains newly redirected edges to NEW_NODE. Adjust profile information for both nodes and the edge.

References profile_count::adjust_for_ipa_scaling(), cgraph_node::callees, profile_count::combine_with_ipa_count(), cgraph_node::count, count, profile_count::dump(), symtab_node::dump_asm_name(), dump_file, i, cgraph_node::indirect_calls, profile_count::ipa(), cgraph_edge::next_callee, and profile_count::zero().

Referenced by clone_node_as_needed().

◆ adjust_recursive_callees()

static void adjust_recursive_callees ( cgraph_node * clone,
cgraph_node * new_callee,
cgraph_node * orig_callee )
static
Redirect recursive edges of CLONE to correctly point to CLONE. As part of cloning process, all callee edges of a node are just duplicated but not redirected. Therefore, these edges still call to original of CLONE. For non-inlined CLONEs, NEW_CALLEE == CLONE and ORIG_CALLEE is CLONE's original node. For inlined node, self recursion to CLONE's original same as non-inlined, additionally, calls to CLONE->inlined_to are also recursive: NEW_CALLEE == CLONE->inlined_into and ORIG_CALLEE == original node of CLONE->inlined_into.

References symtab_node::alias, cgraph_node::callees, symtab_node::dump_asm_name(), dump_file, dyn_cast(), cgraph_node::expand_all_artificial_thunks(), gcc_assert, cgraph_edge::next_callee, node_to_clone, symtab_node::noninterposable_alias(), and NULL.

Referenced by clone_node_as_needed(), and inline_clones().

◆ clone_node_as_needed()

static cgraph_node * clone_node_as_needed ( cgraph_edge * edge,
locality_partition partition,
int & cl_num,
lto_locality_cloning_model cm )
static
Clone EDGE->CALLEE if it or a clone of it is not already in PARTITION. Redirect all callers of EDGE->CALLEE that are in PARTITION, not just the EDGE. If a clone is already present in PARTITION, redirect all edges from EDGE->CALLER to EDGE->CALLEE. This is because we only visit one edge per caller to callee and redirect for all others from there. If cloning, also recursively clone inlined functions till the end of the callchain because inlined clones have 1-1 exclusive copy and edge from caller to inlined node. There are 2 flows possible: 1. Only redirect 1.1. cnode is already in current partition - cnode mustn't be a locality_clone -> nothing to do 1.2. A clone of cnode is in current partition - find out if it's the correct clone for edge - must be a locality_clone but the exact same kind as callee i.e. orig or cp/sra clone, if yes, redirect, else go to #2 1.3. Cnode/a clone of cnode is in current partition but caller is inlined 2. Clone and redirect 2.1. cnode is original node 2.2. cnode itself is a clone Clone inlines Flavors of edges: 1. Normal -> orig nodes, locality clones or cp/sra clones 2. Recursive -> direct recursion 3. Alias -> recursion via aliasing or as a result of IPA code duplication 4. Inline -> shouldn't be included in callchain.

References adjust_profile_info(), adjust_profile_info_for_non_self_rec_edges(), adjust_recursive_callees(), cgraph_node::callees, clone_to_node, create_locality_clone(), symtab_node::dump_asm_name(), dump_file, edge_redirectable_p(), gcc_assert, inline_clones(), cgraph_edge::next_callee, node_in_partition_p(), node_to_clone, NULL, and symtab.

Referenced by partition_callchain().

◆ compare_edge_profile_counts()

static int compare_edge_profile_counts ( const void * pa,
const void * pb )
static
Helper function for qsort; sort nodes by profile count.

References a, b, profile_count::compatible_p(), and static_profile_cmp().

Referenced by locality_determine_ipa_order().

◆ compare_node_uids()

static int compare_node_uids ( cgraph_node * n1,
cgraph_node * n2 )
static
Helper function for qsort; to break ties.

References gcc_assert, and symtab_node::get_uid().

Referenced by static_profile_cmp().

◆ create_locality_clone()

◆ create_partition()

static locality_partition create_partition ( int & npartitions)
static
Create and return a new partition and increment NPARTITIONS.

References locality_partition_def::insns, locality_partitions, locality_partition_def::nodes, and locality_partition_def::part_id.

Referenced by locality_partition_and_clone().

◆ edge_redirectable_p()

static bool edge_redirectable_p ( cgraph_edge * edge,
lto_locality_cloning_model cm )
inlinestatic
Return true if EDGE can be safely redirected to another callee.

References AVAIL_INTERPOSABLE, and LTO_LOCALITY_NON_INTERPOSABLE_CLONING.

Referenced by clone_node_as_needed(), and create_locality_clone().

◆ init_profile_stats()

static void init_profile_stats ( profile_stats * stats,
cgraph_node * target = NULL )
inlinestatic
Initialize fields of STATS.

References NULL, and profile_count::zero().

Referenced by adjust_profile_info().

◆ inline_clones()

static void inline_clones ( cgraph_node * caller,
cgraph_node * orig_inlined_to )
static
Create clones for CALLER's inlined callees, ORIG_INLINED_TO is the original node from clone_as_needed () such that new_inlined_to is a clone of it.

References adjust_recursive_callees(), cgraph_node::callees, cgraph_edge::caller, clone_to_node, cgraph_node::create_clone(), symtab_node::decl, cgraph_node::dump(), dump_file, thunk_info::get(), thunk_info::get_create(), inline_clones(), cgraph_node::inlined_to, node_to_clone, NULL, cgraph_node::thunk, and vNULL.

Referenced by clone_node_as_needed(), and inline_clones().

◆ is_entry_node_p()

◆ lc_execute()

static int lc_execute ( void )
static
Entry point to locality-clone pass.

References symtab_node::aux, FOR_EACH_SYMBOL, locality_partition_and_clone(), and NULL.

◆ locality_determine_ipa_order()

◆ locality_determine_static_order()

static bool locality_determine_static_order ( auto_vec< locality_order * > * order)
static

◆ locality_partition_and_clone()

static void locality_partition_and_clone ( int max_locality_partition_size,
lto_locality_cloning_model cloning_model,
int freq_denominator,
int size )
static
Partitioning for code locality. 1. Create and sort callchains. If PGO is available, use real profile counts. Otherwise, use a set of heuristics to sort the callchains. 2. Partition the external nodes and their callchains in the determined order 2.1. If !partition, partition, else try and clone if it satisfies cloning criteria. 3. Partition all other unpartitioned nodes.

References add_node_to_partition(), cgraph_node::callees, cgraph_node::count, create_partition(), symtab_node::dump_asm_name(), dump_file, i, profile_count::ipa_p(), locality_determine_ipa_order(), locality_determine_static_order(), node_partitioned_p(), symtab_node::order, partition_callchain(), symtab, and vNULL.

Referenced by lc_execute().

◆ make_pass_ipa_locality_cloning()

ipa_opt_pass_d * make_pass_ipa_locality_cloning ( gcc::context * ctxt)

◆ node_in_partition_p()

static bool node_in_partition_p ( locality_partition partition,
cgraph_node * node )
static
Return TRUE if NODE is in PARTITION.

References symtab_node::aux.

Referenced by clone_node_as_needed(), create_locality_clone(), and partition_callchain().

◆ node_partitioned_p()

static bool node_partitioned_p ( cgraph_node * node)
inlinestatic
Return true if NODE is already in some partition.

References symtab_node::aux.

Referenced by add_node_to_partition(), locality_partition_and_clone(), and partition_callchain().

◆ partition_callchain()

static void partition_callchain ( cgraph_edge * edge,
locality_partition partition,
bool clone_further_p,
lto_locality_cloning_model cloning_model,
double freq_cutoff,
int size,
int & cl_num )
static
Partition EDGE->CALLEE into PARTITION or clone if already partitioned and satisfies cloning criteria such as CLONING_MODEL, REAL_FREQ and SIZE cut-offs and CLONE_FURTHER_P set by previous caller.
callgraph can have multiple caller to callee edges for multiple callsites For the first such edge, we make decisions about cutoffs and cloning because we redirect ALL callsites to cloned callee, not just one of them.

References accumulate_incoming_edge_frequency(), add_node_to_partition(), symtab_node::alias, cgraph_node::callees, cgraph_edge::caller, caller_to_callees, clone_node_as_needed(), symtab_node::dump_asm_name(), dump_file, symtab_node::get_partitioning_class(), cgraph_node::inlined_to, ipa_size_summaries, LTO_LOCALITY_NON_INTERPOSABLE_CLONING, cgraph_edge::next_callee, node_in_partition_p(), node_partitioned_p(), NULL, partition_callchain(), suitable_for_locality_cloning_p(), SYMBOL_PARTITION, and sreal::to_double().

Referenced by locality_partition_and_clone(), and partition_callchain().

◆ static_profile_cmp()

static int static_profile_cmp ( const void * pa,
const void * pb )
static
Helper function for qsort; sort nodes by order.

References a, b, and compare_node_uids().

Referenced by compare_edge_profile_counts(), and locality_determine_static_order().

◆ suitable_for_locality_cloning_p()

static bool suitable_for_locality_cloning_p ( cgraph_edge * edge,
lto_locality_cloning_model cm )
static
Determine if EDGE->CALLEE is suitable for cloning. It is assummed that the callee is not an inlined node.

References symtab_node::alias, AVAIL_INTERPOSABLE, cgraph_node::clone_of, cgraph_node::count, symtab_node::definition, profile_count::ipa(), LTO_LOCALITY_NON_INTERPOSABLE_CLONING, profile_count::nonzero_p(), and cgraph_node::versionable.

Referenced by partition_callchain().

Variable Documentation

◆ caller_to_callees

hash_map<cgraph_node *, auto_vec<cgraph_node *> > caller_to_callees
Map from caller to all callees already visited for partitioning.

Referenced by partition_callchain().

◆ clone_to_node

hash_map<cgraph_node *, cgraph_node *> clone_to_node
Map from clone to its original node.

Referenced by clone_node_as_needed(), and inline_clones().

◆ locality_partitions

vec<locality_partition> locality_partitions
Locality partitions, assigns nodes to partitions. These are used later in WPA partitioning.

Referenced by create_partition().

◆ node_to_clone

hash_map<cgraph_node *, cgraph_node *> node_to_clone
Map from original node to its latest clone. Gets overwritten whenever a new clone is created from the same node.

Referenced by adjust_recursive_callees(), clone_node_as_needed(), and inline_clones().