GCC Middle and Back End API Reference
loop-unroll.cc File Reference
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "cfghooks.h"
#include "memmodel.h"
#include "optabs.h"
#include "emit-rtl.h"
#include "recog.h"
#include "profile.h"
#include "cfgrtl.h"
#include "cfgloop.h"
#include "dojump.h"
#include "expr.h"
#include "dumpfile.h"
Include dependency graph for loop-unroll.cc:

Data Structures

struct  iv_to_split
 
struct  var_to_expand
 
struct  iv_split_hasher
 
struct  var_expand_hasher
 
struct  opt_info
 

Functions

static void decide_unroll_stupid (class loop *, int)
 
static void decide_unroll_constant_iterations (class loop *, int)
 
static void decide_unroll_runtime_iterations (class loop *, int)
 
static void unroll_loop_stupid (class loop *)
 
static void decide_unrolling (int)
 
static void unroll_loop_constant_iterations (class loop *)
 
static void unroll_loop_runtime_iterations (class loop *)
 
static struct opt_infoanalyze_insns_in_loop (class loop *)
 
static void opt_info_start_duplication (struct opt_info *)
 
static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool)
 
static void free_opt_info (struct opt_info *)
 
static struct var_to_expandanalyze_insn_to_expand_var (class loop *, rtx_insn *)
 
static bool referenced_in_one_insn_in_loop_p (class loop *, rtx, int *)
 
static struct iv_to_splitanalyze_iv_to_split_insn (rtx_insn *)
 
static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *)
 
static void insert_var_expansion_initialization (struct var_to_expand *, basic_block)
 
static void combine_var_copies_in_loop_exit (struct var_to_expand *, basic_block)
 
static rtx get_expansion (struct var_to_expand *)
 
static void report_unroll (class loop *loop, dump_location_t locus)
 
void unroll_loops (int flags)
 
static bool loop_exit_at_end_p (class loop *loop)
 
basic_block split_edge_and_insert (edge e, rtx_insn *insns)
 
static rtx_insncompare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx_code_label *label, profile_probability prob, rtx_insn *cinsn)
 
static void reset_debug_uses_in_loop (class loop *loop, rtx reg, int debug_uses)
 
static unsigned determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
 
static void allocate_basic_variable (struct iv_to_split *ivts)
 
static void insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
 
static void split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
 
static void maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
 

Function Documentation

◆ allocate_basic_variable()

static void allocate_basic_variable ( struct iv_to_split * ivts)
static
Allocate basic variable for the induction variable chain.   

References gen_reg_rtx(), GET_MODE, ggc_alloc(), SET_SRC, and single_set().

Referenced by apply_opt_in_copies().

◆ analyze_insn_to_expand_var()

static struct var_to_expand * analyze_insn_to_expand_var ( class loop * loop,
rtx_insn * insn )
static
Determine whether INSN contains an accumulator
  which can be expanded into separate copies,
  one for each copy of the LOOP body.

  for (i = 0 ; i < n; i++)
    sum += a[i];

  ==>

  sum += a[i]
  ....
  i = i+1;
  sum1 += a[i]
  ....
  i = i+1
  sum2 += a[i];
  ....

  Return NULL if INSN contains no opportunity for expansion of accumulator.
  Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
  information and return a pointer to it.

References copy_rtx(), dump_file, FLOAT_MODE_P, GET_CODE, GET_MODE, ggc_alloc(), have_insn_for(), var_to_expand::insn, NULL, print_rtl(), referenced_in_one_insn_in_loop_p(), REG_P, reset_debug_uses_in_loop(), rtx_equal_p(), rtx_referenced_p(), SET_DEST, SET_SRC, single_set(), SUBREG_REG, and XEXP.

Referenced by analyze_insns_in_loop().

◆ analyze_insns_in_loop()

◆ analyze_iv_to_split_insn()

static struct iv_to_split * analyze_iv_to_split_insn ( rtx_insn * insn)
static
Determine whether there is an induction variable in INSN that
we would like to split during unrolling.

I.e. replace

i = i + 1;
...
i = i + 1;
...
i = i + 1;
...

type chains by

i0 = i + 1
...
i = i0 + 1
...
i = i0 + 2
...

Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
an IV_TO_SPLIT structure, fill it with the relevant information and return a
pointer to it.   

References biv_p(), const0_rtx, GET_MODE, ggc_alloc(), iv_to_split::insn, iv_analyze_result(), NULL, NULL_RTX, REG_P, SET_DEST, single_set(), and iv::step.

Referenced by analyze_insns_in_loop().

◆ apply_opt_in_copies()

◆ combine_var_copies_in_loop_exit()

static void combine_var_copies_in_loop_exit ( struct var_to_expand * ve,
basic_block place )
static
Combine the variable expansions at the loop exit.  PLACE is the
loop exit basic block where the summation of the expansions should
take place.   

References BB_HEAD, copy_rtx(), emit_insn_after(), emit_move_insn(), end_sequence(), expr, FOR_EACH_VEC_ELT, force_operand(), gcc_unreachable, get_insns(), GET_MODE, ggc_alloc(), i, var_to_expand::insn, NEXT_INSN(), NOTE_INSN_BASIC_BLOCK_P, simplify_gen_binary(), and start_sequence().

Referenced by apply_opt_in_copies().

◆ compare_and_jump_seq()

static rtx_insn * compare_and_jump_seq ( rtx op0,
rtx op1,
enum rtx_code comp,
rtx_code_label * label,
profile_probability prob,
rtx_insn * cinsn )
static

◆ decide_unroll_constant_iterations()

◆ decide_unroll_runtime_iterations()

◆ decide_unroll_stupid()

◆ decide_unrolling()

◆ determine_split_iv_delta()

static unsigned determine_split_iv_delta ( unsigned n_copy,
unsigned n_copies,
bool unrolling )
static
Determine the number of iterations between initialization of the base
variable and the current copy (N_COPY).  N_COPIES is the total number
of newly created copies.  UNROLLING is true if we are unrolling
(not peeling) the loop.   

References ggc_alloc().

Referenced by apply_opt_in_copies().

◆ expand_var_during_unrolling()

static void expand_var_during_unrolling ( struct var_to_expand * ve,
rtx_insn * insn )
static
Given INSN replace the uses of the accumulator recorded in VE
with a new register.   

References apply_change_group(), gcc_assert, gen_reg_rtx(), get_expansion(), GET_MODE, ggc_alloc(), var_to_expand::insn, SET_DEST, single_set(), and validate_replace_rtx_group().

Referenced by apply_opt_in_copies().

◆ free_opt_info()

◆ get_expansion()

static rtx get_expansion ( struct var_to_expand * ve)
static
Return one expansion of the accumulator recorded in struct VE.   

References ggc_alloc(), and var_to_expand::reg.

Referenced by expand_var_during_unrolling().

◆ insert_base_initialization()

static void insert_base_initialization ( struct iv_to_split * ivts,
rtx_insn * insn )
static
Insert initialization of basic variable of IVTS before INSN, taking
the initial value from INSN.   

References copy_rtx(), emit_insn_before(), emit_move_insn(), end_sequence(), force_operand(), get_insns(), ggc_alloc(), var_to_expand::insn, SET_SRC, single_set(), and start_sequence().

Referenced by apply_opt_in_copies().

◆ insert_var_expansion_initialization()

static void insert_var_expansion_initialization ( struct var_to_expand * ve,
basic_block place )
static
Initialize the variable expansions in loop preheader.  PLACE is the
loop-preheader basic block where the initialization of the
expansions should take place.  The expansions are initialized with
(-0) when the operation is plus or minus to honor sign zero.  This
way we can prevent cases where the sign of the final result is
effected by the sign of the expansion.  Here is an example to
demonstrate this:

for (i = 0 ; i < n; i++)
  sum += something;

==>

sum += something
....
i = i+1;
sum1 += something
....
i = i+1
sum2 += something;
....

When SUM is initialized with -zero and SOMETHING is also -zero; the
final result of sum should be -zero thus the expansions sum1 and sum2
should be initialized with -zero as well (otherwise we will get +zero
as the final result).   

References BB_END, CONST0_RTX, CONST1_RTX, emit_insn_after(), emit_move_insn(), end_sequence(), FOR_EACH_VEC_ELT, gcc_unreachable, get_insns(), GET_MODE, ggc_alloc(), i, MODE_HAS_SIGNED_ZEROS, simplify_gen_unary(), and start_sequence().

Referenced by apply_opt_in_copies().

◆ loop_exit_at_end_p()

static bool loop_exit_at_end_p ( class loop * loop)
static

◆ maybe_strip_eq_note_for_split_iv()

static void maybe_strip_eq_note_for_split_iv ( struct opt_info * opt_info,
rtx_insn * insn )
static
Strip away REG_EQUAL notes for IVs we're splitting.

Updating REG_EQUAL notes for IVs we split is tricky: We
cannot tell until after unrolling, DF-rescanning, and liveness
updating, whether an EQ_USE is reached by the split IV while
the IV reg is still live.  See PR55006.

??? We cannot use remove_reg_equal_equiv_notes_for_regno,
because RTL loop-iv requires us to defer rescanning insns and
any notes attached to them.  So resort to old techniques...   

References find_reg_equal_equiv_note(), ggc_alloc(), iv_to_split::insn, opt_info::iv_to_split_head, reg_mentioned_p(), and remove_note().

Referenced by apply_opt_in_copies().

◆ opt_info_start_duplication()

static void opt_info_start_duplication ( struct opt_info * opt_info)
static
Called just before loop duplication.  Records start of duplicated area
to OPT_INFO.   

References cfun, opt_info::first_new_block, and last_basic_block_for_fn.

Referenced by unroll_loop_constant_iterations(), unroll_loop_runtime_iterations(), and unroll_loop_stupid().

◆ referenced_in_one_insn_in_loop_p()

static bool referenced_in_one_insn_in_loop_p ( class loop * loop,
rtx reg,
int * debug_uses )
static
Returns true if REG is referenced in one nondebug insn in LOOP.
Set *DEBUG_USES to the number of debug insns that reference the
variable.   

References DEBUG_INSN_P, FOR_BB_INSNS, free(), get_loop_body(), ggc_alloc(), i, loop::num_nodes, and rtx_referenced_p().

Referenced by analyze_insn_to_expand_var().

◆ report_unroll()

static void report_unroll ( class loop * loop,
dump_location_t locus )
static

◆ reset_debug_uses_in_loop()

static void reset_debug_uses_in_loop ( class loop * loop,
rtx reg,
int debug_uses )
static

◆ split_edge_and_insert()

basic_block split_edge_and_insert ( edge e,
rtx_insn * insns )
Splits edge E and inserts the sequence of instructions INSNS on it, and
returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
and NULL is returned instead.   

References BB_END, emit_insn_after(), insns, NULL, and split_edge().

Referenced by add_test(), and unroll_loop_runtime_iterations().

◆ split_iv()

static void split_iv ( struct iv_to_split * ivts,
rtx_insn * insn,
unsigned delta )
static

◆ unroll_loop_constant_iterations()

◆ unroll_loop_runtime_iterations()

static void unroll_loop_runtime_iterations ( class loop * loop)
static
Unroll LOOP for which we are able to count number of iterations in
runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
power of two.  The transformation does this (with some extra care
for case n < 0):

for (i = 0; i < n; i++)
  body;

==>  (LOOP->LPT_DECISION.TIMES == 3)

i = 0;
mod = n % 4;

switch (mod)
  {
    case 3:
      body; i++;
    case 2:
      body; i++;
    case 1:
      body; i++;
    case 0: ;
  }

while (i < n)
  {
    body; i++;
    body; i++;
    body; i++;
    body; i++;
  }

References profile_probability::always(), analyze_insns_in_loop(), loop::any_estimate, loop::any_likely_upper_bound, apply_opt_in_copies(), bitmap_clear(), bitmap_clear_bit(), bitmap_ones(), bitmap_set_bit, block_label(), CDI_DOMINATORS, compare_and_jump_seq(), const0_rtx, const1_rtx, niter_desc::const_iter, copy_rtx(), basic_block_def::count, DLTHE_FLAG_UPDATE_FREQ, DLTHE_RECORD_COPY_NUMBER, dump_file, duplicate_loop_body_to_header_edge(), EDGE_SUCC, emit_move_insn(), end_sequence(), expand_simple_binop(), flow_bb_inside_loop_p(), FOR_EACH_VEC_ELT, force_operand(), free(), free_opt_info(), gcc_assert, gen_int_mode(), gen_reg_rtx(), get_bb_copy(), get_dominated_by(), get_insns(), get_loop_body(), get_simple_loop_desc(), ggc_alloc(), i, niter_desc::in_edge, profile_probability::invert(), iterate_fix_dominators(), loop_exit_at_end_p(), loop_latch_edge(), loop_preheader_edge(), loop::lpt_decision, make_edge(), niter_desc::mode, loop::nb_iterations_estimate, loop::nb_iterations_likely_upper_bound, loop::nb_iterations_upper_bound, niter_desc::niter, niter_desc::niter_expr, niter_desc::noloop_assumptions, NULL, NULL_RTX, num_loop_insns(), loop::num_nodes, opt_info_start_duplication(), OPTAB_LIB_WIDEN, niter_desc::out_edge, remove_path(), set_immediate_dominator(), simplify_gen_binary(), single_pred_edge(), single_succ_edge(), split_edge(), split_edge_and_insert(), start_sequence(), lpt_decision::times, wi::udiv_trunc(), and unshare_all_rtl_in_chain().

Referenced by unroll_loops().

◆ unroll_loop_stupid()

static void unroll_loop_stupid ( class loop * loop)
static
Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:

while (cond)
  body;

==>  (LOOP->LPT_DECISION.TIMES == 3)

while (cond)
  {
    body;
    if (!cond) break;
    body;
    if (!cond) break;
    body;
    if (!cond) break;
    body;
  }

References analyze_insns_in_loop(), apply_opt_in_copies(), bitmap_clear(), DLTHE_FLAG_UPDATE_FREQ, DLTHE_RECORD_COPY_NUMBER, dump_file, duplicate_loop_body_to_header_edge(), free_opt_info(), gcc_assert, get_simple_loop_desc(), ggc_alloc(), loop_latch_edge(), loop::lpt_decision, NULL, num_loop_insns(), opt_info_start_duplication(), niter_desc::simple_p, and lpt_decision::times.

Referenced by unroll_loops().

◆ unroll_loops()