|
static void | insert_bb (struct occurrence *new_occ, basic_block idom, struct occurrence **p_head) |
|
static void | register_division_in (basic_block bb, int importance) |
|
static void | compute_merit (struct occurrence *occ) |
|
static bool | is_division_by (gimple *use_stmt, tree def) |
|
static bool | is_mult_by (gimple *use_stmt, tree def, tree a) |
|
static bool | is_square_of (gimple *use_stmt, tree def) |
|
static bool | is_division_by_square (gimple *use_stmt, tree def) |
|
static void | insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, tree def, tree recip_def, tree square_recip_def, int should_insert_square_recip, int threshold) |
|
static void | replace_reciprocal_squares (use_operand_p use_p) |
|
static void | replace_reciprocal (use_operand_p use_p) |
|
static struct occurrence * | free_bb (struct occurrence *occ) |
|
static void | optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def) |
|
static void | execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |
|
internal_fn | internal_fn_reciprocal (gcall *call) |
|
gimple_opt_pass * | make_pass_cse_reciprocals (gcc::context *ctxt) |
|
static tree | execute_cse_conv_1 (tree name, bool *cfg_changed) |
|
static bool | maybe_record_sincos (vec< gimple * > *stmts, basic_block *top_bb, gimple *use_stmt) |
|
static bool | execute_cse_sincos_1 (tree name) |
|
static int | powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache) |
|
static int | powi_cost (HOST_WIDE_INT n) |
|
static tree | powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type, unsigned HOST_WIDE_INT n, tree *cache) |
|
tree | powi_as_mults (gimple_stmt_iterator *gsi, location_t loc, tree arg0, HOST_WIDE_INT n) |
|
static tree | gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc, tree arg0, HOST_WIDE_INT n) |
|
static tree | build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc, tree fn, tree arg) |
|
static tree | build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc, const char *name, enum tree_code code, tree arg0, tree arg1) |
|
static tree | build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, tree type, tree val) |
|
bool | representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n, struct pow_synth_sqrt_info *info) |
|
static tree | get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi, tree fn, location_t loc, tree *cache) |
|
static void | print_nested_fn (FILE *stream, const char *fname, const char *arg, unsigned int n) |
|
static void | dump_fractional_sqrt_sequence (FILE *stream, const char *arg, struct pow_synth_sqrt_info *info) |
|
static void | dump_integer_part (FILE *stream, const char *arg, HOST_WIDE_INT n) |
|
static tree | expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc, tree arg0, tree arg1, HOST_WIDE_INT max_depth) |
|
static tree | gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, tree arg0, tree arg1) |
|
gimple_opt_pass * | make_pass_cse_sincos (gcc::context *ctxt) |
|
gimple_opt_pass * | make_pass_expand_pow (gcc::context *ctxt) |
|
static bool | widening_mult_conversion_strippable_p (tree result_type, gimple *stmt) |
|
static bool | is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, tree *new_rhs_out) |
|
static bool | is_widening_mult_p (gimple *stmt, tree *type1_out, tree *rhs1_out, tree *type2_out, tree *rhs2_out) |
|
static bool | is_copysign_call_with_1 (gimple *call) |
|
static bool | convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi) |
|
static bool | convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi) |
|
static bool | convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, enum tree_code code) |
|
static void | convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2) |
|
static void | cancel_fma_deferring (fma_deferring_state *state) |
|
static gphi * | result_of_phi (tree op) |
|
static bool | last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, hash_set< tree > *last_result_set) |
|
static bool | convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, fma_deferring_state *state, tree mul_cond=NULL_TREE, tree mul_len=NULL_TREE, tree mul_bias=NULL_TREE) |
|
static void | maybe_optimize_guarding_check (vec< gimple * > &mul_stmts, gimple *cond_stmt, gimple *div_stmt, bool *cfg_changed) |
|
static bool | arith_cast_equal_p (tree val1, tree val2) |
|
static int | arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, tree maxval, tree *other) |
|
bool | gimple_unsigned_integer_sat_add (tree, tree *, tree(*)(tree)) |
|
bool | gimple_unsigned_integer_sat_sub (tree, tree *, tree(*)(tree)) |
|
bool | gimple_unsigned_integer_sat_trunc (tree, tree *, tree(*)(tree)) |
|
bool | gimple_signed_integer_sat_add (tree, tree *, tree(*)(tree)) |
|
static void | build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, tree lhs, tree op_0, tree op_1) |
|
static void | build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, gphi *phi, internal_fn fn, tree lhs, tree op_0, tree op_1) |
|
static void | match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) |
|
static void | match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi) |
|
static void | match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) |
|
static void | match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi) |
|
static void | match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt) |
|
static bool | match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, enum tree_code code, bool *cfg_changed) |
|
static gimple * | uaddc_cast (gimple *g) |
|
static gimple * | uaddc_ne0 (gimple *g) |
|
static bool | uaddc_is_cplxpart (gimple *g, tree_code part) |
|
static bool | match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code) |
|
static void | match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt) |
|
static bool | target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode) |
|
static bool | divmod_candidate_p (gassign *stmt) |
|
static bool | convert_to_divmod (gassign *stmt) |
|
static bool | convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi) |
|
static void | optimize_spaceship (gcond *stmt) |
|
gimple_opt_pass * | make_pass_optimize_widening_mul (gcc::context *ctxt) |
|
To evaluate powi(x,n), the floating point value x raised to the
constant integer exponent n, we use a hybrid algorithm that
combines the "window method" with look-up tables. For an
introduction to exponentiation algorithms and "addition chains",
see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
"Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.
Provide a default value for POWI_MAX_MULTS, the maximum number of
multiplications to inline before calling the system library's pow
function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
so this default never requires calling pow, powf or powl.
Referenced by expand_pow_as_sqrts(), gimple_expand_builtin_pow(), and gimple_expand_builtin_powi().
Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
with uses in additions and subtractions to form fused multiply-add
operations. Returns true if successful and MUL_STMT should be removed.
If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
on MUL_COND, otherwise it is unconditional.
If STATE indicates that we are deferring FMA transformation, that means
that we do not produce FMAs for basic blocks which look like:
<bb 6>
# accumulator_111 = PHI <0.0(5), accumulator_66(6)>
_65 = _14 * _16;
accumulator_66 = _65 + accumulator_111;
or its unrolled version, i.e. with several FMA candidates that feed result
of one into the addend of another. Instead, we add them to a list in STATE
and if we later discover an FMA candidate that is not part of such a chain,
we go back and perform all deferred past candidates.
References ANY_INTEGRAL_TYPE_P, bb_optimization_type(), can_interpret_as_conditional_op_p(), cancel_fma_deferring(), convert_mult_to_fma_1(), dbg_cnt(), direct_internal_fn_supported_p(), dump_file, dump_flags, FLOAT_TYPE_P, FOR_EACH_IMM_USE_FAST, FOR_EACH_PHI_OR_STMT_USE, FP_CONTRACT_FAST, gcc_assert, gcc_checking_assert, gimple_assign_lhs(), gimple_assign_rhs_code(), gimple_bb(), gimple_get_lhs(), has_single_use(), has_zero_uses(), integer_truep(), INTEGRAL_TYPE_P, is_gimple_assign(), is_gimple_debug(), fma_transformation_info::mul_result, fma_transformation_info::mul_stmt, fma_transformation_info::op1, fma_transformation_info::op2, poly_int_tree_p(), print_gimple_stmt(), result_of_phi(), single_imm_use(), SSA_NAME_DEF_STMT, SSA_OP_USE, TDF_DETAILS, TDF_NONE, wi::to_widest(), TREE_CODE, tree_to_poly_int64(), TREE_TYPE, type_has_mode_precision_p(), TYPE_OVERFLOW_TRAPS, TYPE_SIZE, USE_FROM_PTR, and USE_STMT.
This function looks for:
t1 = a TRUNC_DIV_EXPR b;
t2 = a TRUNC_MOD_EXPR b;
and transforms it to the following sequence:
complex_tmp = DIVMOD (a, b);
t1 = REALPART_EXPR(a);
t2 = IMAGPART_EXPR(b);
For conditions enabling the transform see divmod_candidate_p().
The pass has three parts:
1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
other trunc_div_expr and trunc_mod_expr stmts.
2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
to stmts vector.
3) Insert DIVMOD call just before top_stmt and update entries in
stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
IMAGPART_EXPR for mod).
References build_complex_type(), CDI_DOMINATORS, cfun, divmod_candidate_p(), dominated_by_p(), fold_build1, FOR_EACH_IMM_USE_STMT, gcc_unreachable, gimple_assign_rhs1(), gimple_assign_rhs2(), gimple_assign_rhs_code(), gimple_assign_set_rhs_from_tree(), gimple_bb(), gimple_build_call_internal(), gimple_call_set_lhs(), gimple_call_set_nothrow(), gimple_uid(), gsi_for_stmt(), gsi_insert_before(), GSI_SAME_STMT, i, is_gimple_assign(), make_temp_ssa_name(), operand_equal_p(), stmt_can_throw_internal(), TREE_TYPE, update_stmt(), and widen_mul_stats.
Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
square roots. Place at GSI and LOC. Limit the maximum depth
of the sqrt chains to MAX_DEPTH. Return the tree holding the
result of the expanded sequence or NULL_TREE if the expansion failed.
This routine assumes that ARG1 is a real number with a fractional part
(the integer exponent case will have been handled earlier in
gimple_expand_builtin_pow).
For ARG1 > 0.0:
* For ARG1 composed of a whole part WHOLE_PART and a fractional part
FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
FRAC_PART == ARG1 - WHOLE_PART:
Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
POW (ARG0, FRAC_PART) is expanded as a product of square root chains
if it can be expressed as such, that is if FRAC_PART satisfies:
FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
where integer a[i] is either 0 or 1.
Example:
POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
--> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
For ARG1 < 0.0 there are two approaches:
* (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
is calculated as above.
Example:
POW (x, -5.625) == 1.0 / POW (x, 5.625)
--> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
* (B) : WHOLE_PART := - ceil (abs (ARG1))
FRAC_PART := ARG1 - WHOLE_PART
and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
Example:
POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
--> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
For ARG1 < 0.0 we choose between (A) and (B) depending on
how many multiplications we'd have to do.
So, for the example in (B): POW (x, -5.875), if we were to
follow algorithm (A) we would produce:
1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
which contains more multiplications than approach (B).
Hopefully, this approach will eliminate potentially expensive POW library
calls when unsafe floating point math is enabled and allow the compiler to
further optimise the multiplies, square roots and divides produced by this
function.
References build_and_insert_binop(), build_real(), cache, dconst0, dconst1, pow_synth_sqrt_info::deepest, dump_file, dump_fractional_sqrt_sequence(), dump_integer_part(), exp(), pow_synth_sqrt_info::factors, gcc_assert, get_fn_chain(), gimple_expand_builtin_powi(), i, mathfn_built_in(), NULL_TREE, pow_synth_sqrt_info::num_mults, powi_cost(), POWI_MAX_MULTS, real_arithmetic(), real_ceil(), real_floor(), real_from_integer(), real_identical(), real_to_decimal(), real_to_integer(), real_value_abs(), REAL_VALUE_NEGATIVE, REAL_VALUE_TYPE, representable_as_half_series_p(), SIGNED, TREE_CODE, TREE_REAL_CST, TREE_TYPE, and TYPE_MODE.
Referenced by gimple_expand_builtin_pow().
Walk the subset of the dominator tree rooted at OCC, setting the
RECIP_DEF field to a definition of 1.0 / DEF that can be used in
the given basic block. The field may be left NULL, of course,
if it is not possible or profitable to do the optimization.
DEF_BSI is an iterator pointing at the statement defining DEF.
If RECIP_DEF is set, a dominator already has a computation that can
be used.
If should_insert_square_recip is set, then this also inserts
the square of the reciprocal immediately after the definition
of the reciprocal.
References occurrence::bb, occurrence::bb_has_division, build_one_cst(), occurrence::children, create_tmp_reg(), gimple_build_assign(), gsi_after_labels(), gsi_bb(), gsi_end_p(), gsi_insert_after(), gsi_insert_before(), GSI_NEW_STMT, gsi_next(), GSI_SAME_STMT, gsi_stmt(), insert_reciprocals(), is_division_by(), is_division_by_square(), occurrence::next, occurrence::num_divisions, occurrence::recip_def, occurrence::recip_def_stmt, reciprocal_stats, occurrence::square_recip_def, TREE_TYPE, and type().
Referenced by execute_cse_reciprocals_1(), and insert_reciprocals().
Recognize for unsigned x
x = y - z;
if (x > y)
where there are other uses of x and replace it with
_7 = .SUB_OVERFLOW (y, z);
x = REALPART_EXPR <_7>;
_8 = IMAGPART_EXPR <_7>;
if (_8)
and similarly for addition.
Also recognize:
yc = (type) y;
zc = (type) z;
x = yc + zc;
if (x > max)
where y and z have unsigned types with maximum max
and there are other uses of x and all of those cast x
back to that unsigned type and again replace it with
_7 = .ADD_OVERFLOW (y, z);
_9 = REALPART_EXPR <_7>;
_8 = IMAGPART_EXPR <_7>;
if (_8)
and replace (utype) x with _9.
Or with x >> popcount (max) instead of x > max.
Also recognize:
x = ~z;
if (y > x)
and replace it with
_7 = .ADD_OVERFLOW (y, z);
_8 = IMAGPART_EXPR <_7>;
if (_8)
And also recognize:
z = x * y;
if (x != 0)
goto <bb 3>; [50.00%]
else
goto <bb 4>; [50.00%]
<bb 3> [local count: 536870913]:
_2 = z / x;
_9 = _2 != y;
_10 = (int) _9;
<bb 4> [local count: 1073741824]:
# iftmp.0_3 = PHI <_10(3), 0(2)>
and replace it with
_7 = .MUL_OVERFLOW (x, y);
z = IMAGPART_EXPR <_7>;
_8 = IMAGPART_EXPR <_7>;
_9 = _8 != 0;
iftmp.0_3 = (int) _9;
References arith_overflow_check_p(), as_a(), boolean_type_node, build1(), build2(), build_complex_type(), build_int_cst(), can_mult_highpart_p(), cfg_changed, fold_convert, FOR_EACH_IMM_USE_FAST, FOR_EACH_IMM_USE_STMT, g, gcc_assert, gcc_checking_assert, GET_MODE_BITSIZE(), gimple_assign_cast_p(), gimple_assign_lhs(), gimple_assign_rhs1(), gimple_assign_rhs2(), gimple_assign_rhs_class(), gimple_assign_rhs_code(), gimple_assign_set_rhs1(), gimple_assign_set_rhs2(), gimple_assign_set_rhs_code(), gimple_assign_set_rhs_with_ops(), gimple_bb(), GIMPLE_BINARY_RHS, gimple_build_assign(), gimple_build_call_internal(), gimple_call_set_lhs(), gimple_cond_set_code(), gimple_cond_set_lhs(), gimple_cond_set_rhs(), gsi_end_p(), gsi_for_stmt(), gsi_insert_after(), gsi_insert_before(), GSI_NEW_STMT, gsi_next_nondebug(), gsi_prev_nondebug(), gsi_remove(), gsi_replace(), GSI_SAME_STMT, gsi_stmt(), has_zero_uses(), i, INTEGRAL_TYPE_P, is_gimple_assign(), is_gimple_debug(), make_ssa_name(), wi::max_value(), maybe_optimize_guarding_check(), wi::ne_p(), NULL, NULL_TREE, operand_equal_p(), optab_handler(), release_ssa_name(), wi::rshift(), sc, SCALAR_INT_TYPE_MODE, single_imm_use(), SSA_NAME_DEF_STMT, SSA_NAME_OCCURS_IN_ABNORMAL_PHI, wi::to_wide(), TREE_CODE, TREE_TYPE, TYPE_MODE, TYPE_PRECISION, TYPE_UNSIGNED, UNSIGNED, update_stmt(), USE_STMT, useless_type_conversion_p(), and wide_int_to_tree().
Try to match e.g.
_29 = .ADD_OVERFLOW (_3, _4);
_30 = REALPART_EXPR <_29>;
_31 = IMAGPART_EXPR <_29>;
_32 = .ADD_OVERFLOW (_30, _38);
_33 = REALPART_EXPR <_32>;
_34 = IMAGPART_EXPR <_32>;
_35 = _31 + _34;
as
_36 = .UADDC (_3, _4, _38);
_33 = REALPART_EXPR <_36>;
_35 = IMAGPART_EXPR <_36>;
or
_22 = .SUB_OVERFLOW (_6, _5);
_23 = REALPART_EXPR <_22>;
_24 = IMAGPART_EXPR <_22>;
_25 = .SUB_OVERFLOW (_23, _37);
_26 = REALPART_EXPR <_25>;
_27 = IMAGPART_EXPR <_25>;
_28 = _24 | _27;
as
_29 = .USUBC (_6, _5, _37);
_26 = REALPART_EXPR <_29>;
_288 = IMAGPART_EXPR <_29>;
provided _38 or _37 above have [0, 1] range
and _3, _4 and _30 or _6, _5 and _23 are unsigned
integral types with the same precision. Whether + or | or ^ is
used on the IMAGPART_EXPR results doesn't matter, with one of
added or subtracted operands in [0, 1] range at most one
.ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow.
References build1(), build_complex_type(), build_zero_cst(), const_unop(), fold_convert, FOR_EACH_IMM_USE_FAST, g, gcc_checking_assert, gimple_assign_cast_p(), gimple_assign_lhs(), gimple_assign_rhs1(), gimple_assign_rhs2(), gimple_assign_rhs_code(), gimple_build_assign(), gimple_build_call_internal(), gimple_call_arg(), gimple_call_internal_fn(), gimple_call_internal_p(), gimple_call_lhs(), gimple_call_set_lhs(), gsi_for_stmt(), gsi_insert_before(), gsi_remove(), gsi_replace(), GSI_SAME_STMT, has_single_use(), i, INTEGRAL_TYPE_P, is_gimple_assign(), is_gimple_call(), is_gimple_debug(), make_ssa_name(), NULL, NULL_TREE, num_imm_uses(), optab_handler(), r, release_defs(), SSA_NAME_DEF_STMT, TREE_CODE, TREE_OPERAND, TREE_TYPE, tree_zero_one_valued_p(), TYPE_MODE, TYPE_PRECISION, TYPE_UNSIGNED, types_compatible_p(), uaddc_cast(), uaddc_is_cplxpart(), uaddc_ne0(), and USE_STMT.
static void maybe_optimize_guarding_check |
( |
vec< gimple * > & | mul_stmts, |
|
|
gimple * | cond_stmt, |
|
|
gimple * | div_stmt, |
|
|
bool * | cfg_changed ) |
|
static |
Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
a check for non-zero like:
_1 = x_4(D) * y_5(D);
*res_7(D) = _1;
if (x_4(D) != 0)
goto <bb 3>; [50.00%]
else
goto <bb 4>; [50.00%]
<bb 3> [local count: 536870913]:
_2 = _1 / x_4(D);
_9 = _2 != y_5(D);
_10 = (int) _9;
<bb 4> [local count: 1073741824]:
# iftmp.0_3 = PHI <_10(3), 0(2)>
then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
optimize the x_4(D) != 0 condition to 1.
References as_a(), cfg_changed, EDGE_COUNT, EDGE_SUCC, FOR_EACH_VEC_ELT, g, gimple_assign_cast_p(), gimple_assign_lhs(), gimple_assign_rhs1(), gimple_assign_rhs2(), gimple_assign_rhs3(), gimple_assign_rhs_code(), gimple_bb(), gimple_cond_code(), gimple_cond_lhs(), gimple_cond_make_false(), gimple_cond_make_true(), gimple_cond_rhs(), gimple_phi_arg_def(), gsi_after_labels(), gsi_end_p(), gsi_last_bb(), gsi_next(), gsi_next_nondebug(), gsi_start_phis(), gsi_stmt(), i, integer_onep(), integer_zerop(), INTEGRAL_TYPE_P, is_gimple_debug(), NULL, operand_equal_p(), safe_dyn_cast(), single_pred_edge(), single_pred_p(), single_succ(), single_succ_edge(), single_succ_p(), SSA_NAME_DEF_STMT, basic_block_def::succs, TREE_CODE, TREE_TYPE, TYPE_PRECISION, and update_stmt().
Referenced by match_arith_overflow().
Transform sequences like
t = sqrt (a)
x = 1.0 / t;
r1 = x * x;
r2 = a * x;
into:
t = sqrt (a)
r1 = 1.0 / a;
r2 = t;
x = r1 * r2;
depending on the uses of x, r1, r2. This removes one multiplication and
allows the sqrt and division operations to execute in parallel.
DEF_GSI is the gsi of the initial division by sqrt that defines
DEF (x in the example above).
References a, dconst1, dump_file, dyn_cast(), fold_stmt_inplace(), FOR_EACH_IMM_USE_STMT, FOR_EACH_VEC_ELT, gcc_assert, gimple_assign_rhs1(), gimple_assign_rhs2(), gimple_assign_rhs_code(), gimple_assign_set_rhs_from_tree(), gimple_bb(), gimple_build_assign(), gimple_call_arg(), gimple_call_combined_fn(), gimple_call_lhs(), gsi_for_stmt(), gsi_insert_after(), gsi_insert_before(), GSI_NEW_STMT, gsi_remove(), GSI_SAME_STMT, gsi_stmt(), i, is_gimple_debug(), is_mult_by(), is_square_of(), make_temp_ssa_name(), NULL, NULL_TREE, print_gimple_stmt(), real_equal(), release_defs(), release_ssa_name(), SSA_NAME_DEF_STMT, TDF_NONE, TREE_CODE, TREE_REAL_CST, TREE_TYPE, and update_stmt().
static void optimize_spaceship |
( |
gcond * | stmt | ) |
|
|
static |
If target has spaceship<MODE>3 expander, pattern recognize
<bb 2> [local count: 1073741824]:
if (a_2(D) == b_3(D))
goto <bb 6>; [34.00%]
else
goto <bb 3>; [66.00%]
<bb 3> [local count: 708669601]:
if (a_2(D) < b_3(D))
goto <bb 6>; [1.04%]
else
goto <bb 4>; [98.96%]
<bb 4> [local count: 701299439]:
if (a_2(D) > b_3(D))
goto <bb 5>; [48.89%]
else
goto <bb 6>; [51.11%]
<bb 5> [local count: 342865295]:
<bb 6> [local count: 1073741824]:
and turn it into:
<bb 2> [local count: 1073741824]:
_1 = .SPACESHIP (a_2(D), b_3(D));
if (_1 == 0)
goto <bb 6>; [34.00%]
else
goto <bb 3>; [66.00%]
<bb 3> [local count: 708669601]:
if (_1 == -1)
goto <bb 6>; [1.04%]
else
goto <bb 4>; [98.96%]
<bb 4> [local count: 701299439]:
if (_1 == 1)
goto <bb 5>; [48.89%]
else
goto <bb 6>; [51.11%]
<bb 5> [local count: 342865295]:
<bb 6> [local count: 1073741824]:
so that the backend can emit optimal comparison and
conditional jump sequence.
References as_a(), cond_only_block_p(), EDGE_SUCC, g, gcc_assert, gimple_bb(), gimple_build_call_internal(), gimple_call_set_lhs(), gimple_cond_code(), gimple_cond_lhs(), gimple_cond_rhs(), gimple_cond_set_code(), gimple_cond_set_lhs(), gimple_cond_set_rhs(), gsi_for_stmt(), gsi_insert_before(), gsi_last_bb(), GSI_SAME_STMT, HONOR_NANS(), i, integer_minus_one_node, integer_one_node, integer_type_node, integer_zero_node, make_ssa_name(), wi::minus_one(), NULL, operand_equal_p(), optab_handler(), safe_dyn_cast(), SCALAR_FLOAT_TYPE_P, set_range_info(), single_pred_p(), TREE_TYPE, wi::two(), TYPE_MODE, TYPE_PRECISION, and update_stmt().