Branch data Line data Source code
1 : : /* Straight-line strength reduction.
2 : : Copyright (C) 2012-2025 Free Software Foundation, Inc.
3 : : Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : /* There are many algorithms for performing strength reduction on
22 : : loops. This is not one of them. IVOPTS handles strength reduction
23 : : of induction variables just fine. This pass is intended to pick
24 : : up the crumbs it leaves behind, by considering opportunities for
25 : : strength reduction along dominator paths.
26 : :
27 : : Strength reduction addresses explicit multiplies, and certain
28 : : multiplies implicit in addressing expressions. It would also be
29 : : possible to apply strength reduction to divisions and modulos,
30 : : but such opportunities are relatively uncommon.
31 : :
32 : : Strength reduction is also currently restricted to integer operations.
33 : : If desired, it could be extended to floating-point operations under
34 : : control of something like -funsafe-math-optimizations. */
35 : :
36 : : #include "config.h"
37 : : #include "system.h"
38 : : #include "coretypes.h"
39 : : #include "backend.h"
40 : : #include "rtl.h"
41 : : #include "tree.h"
42 : : #include "gimple.h"
43 : : #include "cfghooks.h"
44 : : #include "tree-pass.h"
45 : : #include "ssa.h"
46 : : #include "expmed.h"
47 : : #include "gimple-pretty-print.h"
48 : : #include "fold-const.h"
49 : : #include "gimple-iterator.h"
50 : : #include "gimplify-me.h"
51 : : #include "stor-layout.h"
52 : : #include "cfgloop.h"
53 : : #include "tree-cfg.h"
54 : : #include "domwalk.h"
55 : : #include "tree-ssa-address.h"
56 : : #include "tree-affine.h"
57 : : #include "tree-eh.h"
58 : : #include "builtins.h"
59 : : #include "tree-ssa-dce.h"
60 : :
61 : : /* Information about a strength reduction candidate. Each statement
62 : : in the candidate table represents an expression of one of the
63 : : following forms (the special case of CAND_REF will be described
64 : : later):
65 : :
66 : : (CAND_MULT) S1: X = (B + i) * S
67 : : (CAND_ADD) S1: X = B + (i * S)
68 : :
69 : : Here X and B are SSA names, i is an integer constant, and S is
70 : : either an SSA name or a constant. We call B the "base," i the
71 : : "index", and S the "stride."
72 : :
73 : : Any statement S0 that dominates S1 and is of the form:
74 : :
75 : : (CAND_MULT) S0: Y = (B + i') * S
76 : : (CAND_ADD) S0: Y = B + (i' * S)
77 : :
78 : : is called a "basis" for S1. In both cases, S1 may be replaced by
79 : :
80 : : S1': X = Y + (i - i') * S,
81 : :
82 : : where (i - i') * S is folded to the extent possible.
83 : :
84 : : All gimple statements are visited in dominator order, and each
85 : : statement that may contribute to one of the forms of S1 above is
86 : : given at least one entry in the candidate table. Such statements
87 : : include addition, pointer addition, subtraction, multiplication,
88 : : negation, copies, and nontrivial type casts. If a statement may
89 : : represent more than one expression of the forms of S1 above,
90 : : multiple "interpretations" are stored in the table and chained
91 : : together. Examples:
92 : :
93 : : * An add of two SSA names may treat either operand as the base.
94 : : * A multiply of two SSA names, likewise.
95 : : * A copy or cast may be thought of as either a CAND_MULT with
96 : : i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
97 : :
98 : : Candidate records are allocated from an obstack. They are addressed
99 : : both from a hash table keyed on S1, and from a vector of candidate
100 : : pointers arranged in predominator order.
101 : :
102 : : Opportunity note
103 : : ----------------
104 : : Currently we don't recognize:
105 : :
106 : : S0: Y = (S * i') - B
107 : : S1: X = (S * i) - B
108 : :
109 : : as a strength reduction opportunity, even though this S1 would
110 : : also be replaceable by the S1' above. This can be added if it
111 : : comes up in practice.
112 : :
113 : : Strength reduction in addressing
114 : : --------------------------------
115 : : There is another kind of candidate known as CAND_REF. A CAND_REF
116 : : describes a statement containing a memory reference having
117 : : complex addressing that might benefit from strength reduction.
118 : : Specifically, we are interested in references for which
119 : : get_inner_reference returns a base address, offset, and bitpos as
120 : : follows:
121 : :
122 : : base: MEM_REF (T1, C1)
123 : : offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
124 : : bitpos: C4 * BITS_PER_UNIT
125 : :
126 : : Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
127 : : arbitrary integer constants. Note that C2 may be zero, in which
128 : : case the offset will be MULT_EXPR (T2, C3).
129 : :
130 : : When this pattern is recognized, the original memory reference
131 : : can be replaced with:
132 : :
133 : : MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
134 : : C1 + (C2 * C3) + C4)
135 : :
136 : : which distributes the multiply to allow constant folding. When
137 : : two or more addressing expressions can be represented by MEM_REFs
138 : : of this form, differing only in the constants C1, C2, and C4,
139 : : making this substitution produces more efficient addressing during
140 : : the RTL phases. When there are not at least two expressions with
141 : : the same values of T1, T2, and C3, there is nothing to be gained
142 : : by the replacement.
143 : :
144 : : Strength reduction of CAND_REFs uses the same infrastructure as
145 : : that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
146 : : field, MULT_EXPR (T2, C3) in the stride (S) field, and
147 : : C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
148 : : is thus another CAND_REF with the same B and S values. When at
149 : : least two CAND_REFs are chained together using the basis relation,
150 : : each of them is replaced as above, resulting in improved code
151 : : generation for addressing.
152 : :
153 : : Conditional candidates
154 : : ======================
155 : :
156 : : Conditional candidates are best illustrated with an example.
157 : : Consider the code sequence:
158 : :
159 : : (1) x_0 = ...;
160 : : (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
161 : : if (...)
162 : : (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
163 : : (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
164 : : (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
165 : : (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
166 : :
167 : : Here strength reduction is complicated by the uncertain value of x_2.
168 : : A legitimate transformation is:
169 : :
170 : : (1) x_0 = ...;
171 : : (2) a_0 = x_0 * 5;
172 : : if (...)
173 : : {
174 : : (3) [x_1 = x_0 + 1;]
175 : : (3a) t_1 = a_0 + 5;
176 : : }
177 : : (4) [x_2 = PHI <x_0, x_1>;]
178 : : (4a) t_2 = PHI <a_0, t_1>;
179 : : (5) [x_3 = x_2 + 1;]
180 : : (6r) a_1 = t_2 + 5;
181 : :
182 : : where the bracketed instructions may go dead.
183 : :
184 : : To recognize this opportunity, we have to observe that statement (6)
185 : : has a "hidden basis" (2). The hidden basis is unlike a normal basis
186 : : in that the statement and the hidden basis have different base SSA
187 : : names (x_2 and x_0, respectively). The relationship is established
188 : : when a statement's base name (x_2) is defined by a phi statement (4),
189 : : each argument of which (x_0, x_1) has an identical "derived base name."
190 : : If the argument is defined by a candidate (as x_1 is by (3)) that is a
191 : : CAND_ADD having a stride of 1, the derived base name of the argument is
192 : : the base name of the candidate (x_0). Otherwise, the argument itself
193 : : is its derived base name (as is the case with argument x_0).
194 : :
195 : : The hidden basis for statement (6) is the nearest dominating candidate
196 : : whose base name is the derived base name (x_0) of the feeding phi (4),
197 : : and whose stride is identical to that of the statement. We can then
198 : : create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
199 : : allowing the final replacement of (6) by the strength-reduced (6r).
200 : :
201 : : To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
202 : : A CAND_PHI is not a candidate for replacement, but is maintained in the
203 : : candidate table to ease discovery of hidden bases. Any phi statement
204 : : whose arguments share a common derived base name is entered into the
205 : : table with the derived base name, an (arbitrary) index of zero, and a
206 : : stride of 1. A statement with a hidden basis can then be detected by
207 : : simply looking up its feeding phi definition in the candidate table,
208 : : extracting the derived base name, and searching for a basis in the
209 : : usual manner after substituting the derived base name.
210 : :
211 : : Note that the transformation is only valid when the original phi and
212 : : the statements that define the phi's arguments are all at the same
213 : : position in the loop hierarchy. */
214 : :
215 : :
216 : : /* Index into the candidate vector, offset by 1. VECs are zero-based,
217 : : while cand_idx's are one-based, with zero indicating null. */
218 : : typedef unsigned cand_idx;
219 : :
220 : : /* The kind of candidate. */
221 : : enum cand_kind
222 : : {
223 : : CAND_MULT,
224 : : CAND_ADD,
225 : : CAND_REF,
226 : : CAND_PHI
227 : : };
228 : :
229 : : class slsr_cand_d
230 : : {
231 : : public:
232 : : /* The candidate statement S1. */
233 : : gimple *cand_stmt;
234 : :
235 : : /* The base expression B: often an SSA name, but not always. */
236 : : tree base_expr;
237 : :
238 : : /* The stride S. */
239 : : tree stride;
240 : :
241 : : /* The index constant i. */
242 : : offset_int index;
243 : :
244 : : /* The type of the candidate. This is normally the type of base_expr,
245 : : but casts may have occurred when combining feeding instructions.
246 : : A candidate can only be a basis for candidates of the same final type.
247 : : (For CAND_REFs, this is the type to be used for operand 1 of the
248 : : replacement MEM_REF.) */
249 : : tree cand_type;
250 : :
251 : : /* The type to be used to interpret the stride field when the stride
252 : : is not a constant. Normally the same as the type of the recorded
253 : : stride, but when the stride has been cast we need to maintain that
254 : : knowledge in order to make legal substitutions without losing
255 : : precision. When the stride is a constant, this will be sizetype. */
256 : : tree stride_type;
257 : :
258 : : /* The kind of candidate (CAND_MULT, etc.). */
259 : : enum cand_kind kind;
260 : :
261 : : /* Index of this candidate in the candidate vector. */
262 : : cand_idx cand_num;
263 : :
264 : : /* Index of the next candidate record for the same statement.
265 : : A statement may be useful in more than one way (e.g., due to
266 : : commutativity). So we can have multiple "interpretations"
267 : : of a statement. */
268 : : cand_idx next_interp;
269 : :
270 : : /* Index of the first candidate record in a chain for the same
271 : : statement. */
272 : : cand_idx first_interp;
273 : :
274 : : /* Index of the basis statement S0, if any, in the candidate vector. */
275 : : cand_idx basis;
276 : :
277 : : /* First candidate for which this candidate is a basis, if one exists. */
278 : : cand_idx dependent;
279 : :
280 : : /* Next candidate having the same basis as this one. */
281 : : cand_idx sibling;
282 : :
283 : : /* If this is a conditional candidate, the CAND_PHI candidate
284 : : that defines the base SSA name B. */
285 : : cand_idx def_phi;
286 : :
287 : : /* Savings that can be expected from eliminating dead code if this
288 : : candidate is replaced. */
289 : : int dead_savings;
290 : :
291 : : /* For PHI candidates, use a visited flag to keep from processing the
292 : : same PHI twice from multiple paths. */
293 : : int visited;
294 : :
295 : : /* We sometimes have to cache a phi basis with a phi candidate to
296 : : avoid processing it twice. Valid only if visited==1. */
297 : : tree cached_basis;
298 : : };
299 : :
300 : : typedef class slsr_cand_d slsr_cand, *slsr_cand_t;
301 : : typedef const class slsr_cand_d *const_slsr_cand_t;
302 : :
303 : : /* Pointers to candidates are chained together as part of a mapping
304 : : from base expressions to the candidates that use them. */
305 : :
306 : : struct cand_chain_d
307 : : {
308 : : /* Base expression for the chain of candidates: often, but not
309 : : always, an SSA name. */
310 : : tree base_expr;
311 : :
312 : : /* Pointer to a candidate. */
313 : : slsr_cand_t cand;
314 : :
315 : : /* Chain pointer. */
316 : : struct cand_chain_d *next;
317 : :
318 : : };
319 : :
320 : : typedef struct cand_chain_d cand_chain, *cand_chain_t;
321 : : typedef const struct cand_chain_d *const_cand_chain_t;
322 : :
323 : : /* Information about a unique "increment" associated with candidates
324 : : having an SSA name for a stride. An increment is the difference
325 : : between the index of the candidate and the index of its basis,
326 : : i.e., (i - i') as discussed in the module commentary.
327 : :
328 : : When we are not going to generate address arithmetic we treat
329 : : increments that differ only in sign as the same, allowing sharing
330 : : of the cost of initializers. The absolute value of the increment
331 : : is stored in the incr_info. */
332 : :
333 : : class incr_info_d
334 : : {
335 : : public:
336 : : /* The increment that relates a candidate to its basis. */
337 : : offset_int incr;
338 : :
339 : : /* How many times the increment occurs in the candidate tree. */
340 : : unsigned count;
341 : :
342 : : /* Cost of replacing candidates using this increment. Negative and
343 : : zero costs indicate replacement should be performed. */
344 : : int cost;
345 : :
346 : : /* If this increment is profitable but is not -1, 0, or 1, it requires
347 : : an initializer T_0 = stride * incr to be found or introduced in the
348 : : nearest common dominator of all candidates. This field holds T_0
349 : : for subsequent use. */
350 : : tree initializer;
351 : :
352 : : /* If the initializer was found to already exist, this is the block
353 : : where it was found. */
354 : : basic_block init_bb;
355 : : };
356 : :
357 : : typedef class incr_info_d incr_info, *incr_info_t;
358 : :
359 : : /* Candidates are maintained in a vector. If candidate X dominates
360 : : candidate Y, then X appears before Y in the vector; but the
361 : : converse does not necessarily hold. */
362 : : static vec<slsr_cand_t> cand_vec;
363 : :
364 : : enum cost_consts
365 : : {
366 : : COST_NEUTRAL = 0,
367 : : COST_INFINITE = 1000
368 : : };
369 : :
370 : : enum stride_status
371 : : {
372 : : UNKNOWN_STRIDE = 0,
373 : : KNOWN_STRIDE = 1
374 : : };
375 : :
376 : : enum phi_adjust_status
377 : : {
378 : : NOT_PHI_ADJUST = 0,
379 : : PHI_ADJUST = 1
380 : : };
381 : :
382 : : enum count_phis_status
383 : : {
384 : : DONT_COUNT_PHIS = 0,
385 : : COUNT_PHIS = 1
386 : : };
387 : :
388 : : /* Constrain how many PHI nodes we will visit for a conditional
389 : : candidate (depth and breadth). */
390 : : const int MAX_SPREAD = 16;
391 : :
392 : : /* Pointer map embodying a mapping from statements to candidates. */
393 : : static hash_map<gimple *, slsr_cand_t> *stmt_cand_map;
394 : :
395 : : /* Obstack for candidates. */
396 : : static struct obstack cand_obstack;
397 : :
398 : : /* Obstack for candidate chains. */
399 : : static struct obstack chain_obstack;
400 : :
401 : : /* An array INCR_VEC of incr_infos is used during analysis of related
402 : : candidates having an SSA name for a stride. INCR_VEC_LEN describes
403 : : its current length. MAX_INCR_VEC_LEN is used to avoid costly
404 : : pathological cases. */
405 : : static incr_info_t incr_vec;
406 : : static unsigned incr_vec_len;
407 : : const int MAX_INCR_VEC_LEN = 16;
408 : :
409 : : /* For a chain of candidates with unknown stride, indicates whether or not
410 : : we must generate pointer arithmetic when replacing statements. */
411 : : static bool address_arithmetic_p;
412 : :
413 : : /* Forward function declarations. */
414 : : static slsr_cand_t base_cand_from_table (tree);
415 : : static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
416 : : static bool legal_cast_p_1 (tree, tree);
417 : :
418 : : /* Produce a pointer to the IDX'th candidate in the candidate vector. */
419 : :
420 : : static slsr_cand_t
421 : 7306903 : lookup_cand (cand_idx idx)
422 : : {
423 : 0 : return cand_vec[idx];
424 : : }
425 : :
426 : : /* Helper for hashing a candidate chain header. */
427 : :
428 : : struct cand_chain_hasher : nofree_ptr_hash <cand_chain>
429 : : {
430 : : static inline hashval_t hash (const cand_chain *);
431 : : static inline bool equal (const cand_chain *, const cand_chain *);
432 : : };
433 : :
434 : : inline hashval_t
435 : 29671122 : cand_chain_hasher::hash (const cand_chain *p)
436 : : {
437 : 29671122 : tree base_expr = p->base_expr;
438 : 29671122 : return iterative_hash_expr (base_expr, 0);
439 : : }
440 : :
441 : : inline bool
442 : 23747389 : cand_chain_hasher::equal (const cand_chain *chain1, const cand_chain *chain2)
443 : : {
444 : 23747389 : return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
445 : : }
446 : :
447 : : /* Hash table embodying a mapping from base exprs to chains of candidates. */
448 : : static hash_table<cand_chain_hasher> *base_cand_map;
449 : :
450 : : /* Pointer map used by tree_to_aff_combination_expand. */
451 : : static hash_map<tree, name_expansion *> *name_expansions;
452 : : /* Pointer map embodying a mapping from bases to alternative bases. */
453 : : static hash_map<tree, tree> *alt_base_map;
454 : :
455 : : /* Given BASE, use the tree affine combiniation facilities to
456 : : find the underlying tree expression for BASE, with any
457 : : immediate offset excluded.
458 : :
459 : : N.B. we should eliminate this backtracking with better forward
460 : : analysis in a future release. */
461 : :
462 : : static tree
463 : 52466 : get_alternative_base (tree base)
464 : : {
465 : 52466 : tree *result = alt_base_map->get (base);
466 : :
467 : 52466 : if (result == NULL)
468 : : {
469 : 8752 : tree expr;
470 : 8752 : aff_tree aff;
471 : :
472 : 8752 : tree_to_aff_combination_expand (base, TREE_TYPE (base),
473 : : &aff, &name_expansions);
474 : 8752 : aff.offset = 0;
475 : 8752 : expr = aff_combination_to_tree (&aff);
476 : :
477 : 17379 : bool existed = alt_base_map->put (base, base == expr ? NULL : expr);
478 : 8752 : gcc_assert (!existed);
479 : :
480 : 8752 : return expr == base ? NULL : expr;
481 : 8752 : }
482 : :
483 : 43714 : return *result;
484 : : }
485 : :
486 : : /* Look in the candidate table for a CAND_PHI that defines BASE and
487 : : return it if found; otherwise return NULL. */
488 : :
489 : : static cand_idx
490 : 2279784 : find_phi_def (tree base)
491 : : {
492 : 2279784 : slsr_cand_t c;
493 : :
494 : 2279784 : if (TREE_CODE (base) != SSA_NAME)
495 : : return 0;
496 : :
497 : 2279784 : c = base_cand_from_table (base);
498 : :
499 : 155035 : if (!c || c->kind != CAND_PHI
500 : 2280756 : || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (c->cand_stmt)))
501 : : return 0;
502 : :
503 : 967 : return c->cand_num;
504 : : }
505 : :
506 : : /* Determine whether all uses of NAME are directly or indirectly
507 : : used by STMT. That is, we want to know whether if STMT goes
508 : : dead, the definition of NAME also goes dead. */
509 : : static bool
510 : 187994 : uses_consumed_by_stmt (tree name, gimple *stmt, unsigned recurse = 0)
511 : : {
512 : 187994 : gimple *use_stmt;
513 : 187994 : imm_use_iterator iter;
514 : 187994 : bool retval = true;
515 : :
516 : 359607 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, name)
517 : : {
518 : 269177 : if (use_stmt == stmt || is_gimple_debug (use_stmt))
519 : 169090 : continue;
520 : :
521 : 100087 : if (!is_gimple_assign (use_stmt)
522 : 37587 : || !gimple_get_lhs (use_stmt)
523 : 37587 : || !is_gimple_reg (gimple_get_lhs (use_stmt))
524 : 30701 : || recurse >= 10
525 : 130781 : || !uses_consumed_by_stmt (gimple_get_lhs (use_stmt), stmt,
526 : : recurse + 1))
527 : : {
528 : : retval = false;
529 : : break;
530 : : }
531 : 187994 : }
532 : :
533 : 187994 : return retval;
534 : : }
535 : :
536 : : /* Helper routine for find_basis_for_candidate. May be called twice:
537 : : once for the candidate's base expr, and optionally again either for
538 : : the candidate's phi definition or for a CAND_REF's alternative base
539 : : expression. */
540 : :
541 : : static slsr_cand_t
542 : 7308506 : find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
543 : : {
544 : 7308506 : cand_chain mapping_key;
545 : 7308506 : cand_chain_t chain;
546 : 7308506 : slsr_cand_t basis = NULL;
547 : :
548 : : // Limit potential of N^2 behavior for long candidate chains.
549 : 7308506 : int iters = 0;
550 : 7308506 : int max_iters = param_max_slsr_candidate_scan;
551 : :
552 : 7308506 : mapping_key.base_expr = base_expr;
553 : 7308506 : chain = base_cand_map->find (&mapping_key);
554 : :
555 : 28807536 : for (; chain && iters < max_iters; chain = chain->next, ++iters)
556 : : {
557 : 21499030 : slsr_cand_t one_basis = chain->cand;
558 : :
559 : 39330703 : if (one_basis->kind != c->kind
560 : 12414378 : || one_basis->cand_stmt == c->cand_stmt
561 : 12413659 : || !operand_equal_p (one_basis->stride, c->stride, 0)
562 : 6602575 : || !types_compatible_p (one_basis->cand_type, c->cand_type)
563 : 5460512 : || !types_compatible_p (one_basis->stride_type, c->stride_type)
564 : 26959530 : || !dominated_by_p (CDI_DOMINATORS,
565 : 5460500 : gimple_bb (c->cand_stmt),
566 : 5460500 : gimple_bb (one_basis->cand_stmt)))
567 : 17831673 : continue;
568 : :
569 : 3667357 : tree lhs = gimple_assign_lhs (one_basis->cand_stmt);
570 : 3667357 : if (lhs && TREE_CODE (lhs) == SSA_NAME
571 : 7316063 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
572 : 34 : continue;
573 : :
574 : 3667323 : if (!basis || basis->cand_num < one_basis->cand_num)
575 : 21499030 : basis = one_basis;
576 : : }
577 : :
578 : 7308506 : return basis;
579 : : }
580 : :
581 : : /* Use the base expr from candidate C to look for possible candidates
582 : : that can serve as a basis for C. Each potential basis must also
583 : : appear in a block that dominates the candidate statement and have
584 : : the same stride and type. If more than one possible basis exists,
585 : : the one with highest index in the vector is chosen; this will be
586 : : the most immediately dominating basis. */
587 : :
588 : : static int
589 : 7307493 : find_basis_for_candidate (slsr_cand_t c)
590 : : {
591 : 7307493 : slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
592 : :
593 : : /* If a candidate doesn't have a basis using its base expression,
594 : : it may have a basis hidden by one or more intervening phis. */
595 : 7307493 : if (!basis && c->def_phi)
596 : : {
597 : 861 : basic_block basis_bb, phi_bb;
598 : 861 : slsr_cand_t phi_cand = lookup_cand (c->def_phi);
599 : 861 : basis = find_basis_for_base_expr (c, phi_cand->base_expr);
600 : :
601 : 861 : if (basis)
602 : : {
603 : : /* A hidden basis must dominate the phi-definition of the
604 : : candidate's base name. */
605 : 78 : phi_bb = gimple_bb (phi_cand->cand_stmt);
606 : 78 : basis_bb = gimple_bb (basis->cand_stmt);
607 : :
608 : 78 : if (phi_bb == basis_bb
609 : 78 : || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
610 : : {
611 : 8 : basis = NULL;
612 : 8 : c->basis = 0;
613 : : }
614 : :
615 : : /* If we found a hidden basis, estimate additional dead-code
616 : : savings if the phi and its feeding statements can be removed. */
617 : 78 : tree feeding_var = gimple_phi_result (phi_cand->cand_stmt);
618 : 78 : if (basis && uses_consumed_by_stmt (feeding_var, c->cand_stmt))
619 : 33 : c->dead_savings += phi_cand->dead_savings;
620 : : }
621 : : }
622 : :
623 : 7307493 : if (flag_expensive_optimizations && !basis && c->kind == CAND_REF)
624 : : {
625 : 19667 : tree alt_base_expr = get_alternative_base (c->base_expr);
626 : 19667 : if (alt_base_expr)
627 : 152 : basis = find_basis_for_base_expr (c, alt_base_expr);
628 : : }
629 : :
630 : 6268737 : if (basis)
631 : : {
632 : 1152203 : c->sibling = basis->dependent;
633 : 1152203 : basis->dependent = c->cand_num;
634 : 1152203 : return basis->cand_num;
635 : : }
636 : :
637 : : return 0;
638 : : }
639 : :
640 : : /* Record a mapping from BASE to C, indicating that C may potentially serve
641 : : as a basis using that base expression. BASE may be the same as
642 : : C->BASE_EXPR; alternatively BASE can be a different tree that share the
643 : : underlining expression of C->BASE_EXPR. */
644 : :
645 : : static void
646 : 7319022 : record_potential_basis (slsr_cand_t c, tree base)
647 : : {
648 : 7319022 : cand_chain_t node;
649 : 7319022 : cand_chain **slot;
650 : :
651 : 7319022 : gcc_assert (base);
652 : :
653 : 7319022 : node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
654 : 7319022 : node->base_expr = base;
655 : 7319022 : node->cand = c;
656 : 7319022 : node->next = NULL;
657 : 7319022 : slot = base_cand_map->find_slot (node, INSERT);
658 : :
659 : 7319022 : if (*slot)
660 : : {
661 : 3541291 : cand_chain_t head = (cand_chain_t) (*slot);
662 : 3541291 : node->next = head->next;
663 : 3541291 : head->next = node;
664 : : }
665 : : else
666 : 3777731 : *slot = node;
667 : 7319022 : }
668 : :
669 : : /* Allocate storage for a new candidate and initialize its fields.
670 : : Attempt to find a basis for the candidate.
671 : :
672 : : For CAND_REF, an alternative base may also be recorded and used
673 : : to find a basis. This helps cases where the expression hidden
674 : : behind BASE (which is usually an SSA_NAME) has immediate offset,
675 : : e.g.
676 : :
677 : : a2[i][j] = 1;
678 : : a2[i + 20][j] = 2; */
679 : :
680 : : static slsr_cand_t
681 : 7318812 : alloc_cand_and_find_basis (enum cand_kind kind, gimple *gs, tree base,
682 : : const offset_int &index, tree stride, tree ctype,
683 : : tree stype, unsigned savings)
684 : : {
685 : 7318812 : slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
686 : : sizeof (slsr_cand));
687 : 7318812 : c->cand_stmt = gs;
688 : 7318812 : c->base_expr = base;
689 : 7318812 : c->stride = stride;
690 : 7318812 : c->index = index;
691 : 7318812 : c->cand_type = ctype;
692 : 7318812 : c->stride_type = stype;
693 : 7318812 : c->kind = kind;
694 : 7318812 : c->cand_num = cand_vec.length ();
695 : 7318812 : c->next_interp = 0;
696 : 7318812 : c->first_interp = c->cand_num;
697 : 7318812 : c->dependent = 0;
698 : 7318812 : c->sibling = 0;
699 : 7318812 : c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
700 : 7318812 : c->dead_savings = savings;
701 : 7318812 : c->visited = 0;
702 : 7318812 : c->cached_basis = NULL_TREE;
703 : :
704 : 7318812 : cand_vec.safe_push (c);
705 : :
706 : 7318812 : if (kind == CAND_PHI)
707 : 11319 : c->basis = 0;
708 : : else
709 : 7307493 : c->basis = find_basis_for_candidate (c);
710 : :
711 : 7318812 : record_potential_basis (c, base);
712 : 7318812 : if (flag_expensive_optimizations && kind == CAND_REF)
713 : : {
714 : 32799 : tree alt_base = get_alternative_base (base);
715 : 32799 : if (alt_base)
716 : 210 : record_potential_basis (c, alt_base);
717 : : }
718 : :
719 : 7318812 : return c;
720 : : }
721 : :
722 : : /* Determine the target cost of statement GS when compiling according
723 : : to SPEED. */
724 : :
725 : : static int
726 : 1233250 : stmt_cost (gimple *gs, bool speed)
727 : : {
728 : 1233250 : tree lhs, rhs1, rhs2;
729 : 1233250 : machine_mode lhs_mode;
730 : :
731 : 1233250 : gcc_assert (is_gimple_assign (gs));
732 : 1233250 : lhs = gimple_assign_lhs (gs);
733 : 1233250 : rhs1 = gimple_assign_rhs1 (gs);
734 : 1233250 : lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
735 : :
736 : 1233250 : switch (gimple_assign_rhs_code (gs))
737 : : {
738 : 370857 : case MULT_EXPR:
739 : 370857 : rhs2 = gimple_assign_rhs2 (gs);
740 : :
741 : 370857 : if (tree_fits_shwi_p (rhs2))
742 : 300163 : return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed);
743 : :
744 : 70694 : gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
745 : 70694 : return mul_cost (speed, lhs_mode);
746 : :
747 : 209793 : case PLUS_EXPR:
748 : 209793 : case POINTER_PLUS_EXPR:
749 : 209793 : case MINUS_EXPR:
750 : 209793 : return add_cost (speed, lhs_mode);
751 : :
752 : 10643 : case NEGATE_EXPR:
753 : 10643 : return neg_cost (speed, lhs_mode);
754 : :
755 : 608428 : CASE_CONVERT:
756 : 1216856 : return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
757 : :
758 : : /* Note that we don't assign costs to copies that in most cases
759 : : will go away. */
760 : : case SSA_NAME:
761 : : return 0;
762 : :
763 : 0 : default:
764 : 0 : ;
765 : : }
766 : :
767 : 0 : gcc_unreachable ();
768 : : }
769 : :
770 : : /* Look up the defining statement for BASE_IN and return a pointer
771 : : to its candidate in the candidate table, if any; otherwise NULL.
772 : : Only CAND_ADD and CAND_MULT candidates are returned. */
773 : :
774 : : static slsr_cand_t
775 : 14263824 : base_cand_from_table (tree base_in)
776 : : {
777 : 14263824 : slsr_cand_t *result;
778 : :
779 : 14263824 : gimple *def = SSA_NAME_DEF_STMT (base_in);
780 : 14263824 : if (!def)
781 : : return (slsr_cand_t) NULL;
782 : :
783 : 14263824 : result = stmt_cand_map->get (def);
784 : :
785 : 14263824 : if (result && (*result)->kind != CAND_REF)
786 : : return *result;
787 : :
788 : : return (slsr_cand_t) NULL;
789 : : }
790 : :
791 : : /* Add an entry to the statement-to-candidate mapping. */
792 : :
793 : : static void
794 : 5256112 : add_cand_for_stmt (gimple *gs, slsr_cand_t c)
795 : : {
796 : 5256112 : bool existed = stmt_cand_map->put (gs, c);
797 : 5256112 : gcc_assert (!existed);
798 : 5256112 : }
799 : :
800 : : /* Given PHI which contains a phi statement, determine whether it
801 : : satisfies all the requirements of a phi candidate. If so, create
802 : : a candidate. Note that a CAND_PHI never has a basis itself, but
803 : : is used to help find a basis for subsequent candidates. */
804 : :
805 : : static void
806 : 4312834 : slsr_process_phi (gphi *phi, bool speed)
807 : : {
808 : 4312834 : unsigned i;
809 : 4312834 : tree arg0_base = NULL_TREE, base_type;
810 : 4312834 : slsr_cand_t c;
811 : 4312834 : class loop *cand_loop = gimple_bb (phi)->loop_father;
812 : 4312834 : unsigned savings = 0;
813 : :
814 : : /* A CAND_PHI requires each of its arguments to have the same
815 : : derived base name. (See the module header commentary for a
816 : : definition of derived base names.) Furthermore, all feeding
817 : : definitions must be in the same position in the loop hierarchy
818 : : as PHI. */
819 : :
820 : 4580845 : for (i = 0; i < gimple_phi_num_args (phi); i++)
821 : : {
822 : 4569526 : slsr_cand_t arg_cand;
823 : 4569526 : tree arg = gimple_phi_arg_def (phi, i);
824 : 4569526 : tree derived_base_name = NULL_TREE;
825 : 4569526 : gimple *arg_stmt = NULL;
826 : 4569526 : basic_block arg_bb = NULL;
827 : :
828 : 4569526 : if (TREE_CODE (arg) != SSA_NAME)
829 : : return;
830 : :
831 : 4169106 : arg_cand = base_cand_from_table (arg);
832 : :
833 : 4169106 : if (arg_cand)
834 : : {
835 : 293119 : while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
836 : : {
837 : 23251 : if (!arg_cand->next_interp)
838 : : return;
839 : :
840 : 3790 : arg_cand = lookup_cand (arg_cand->next_interp);
841 : : }
842 : :
843 : 269868 : if (!integer_onep (arg_cand->stride))
844 : : return;
845 : :
846 : 157201 : derived_base_name = arg_cand->base_expr;
847 : 157201 : arg_stmt = arg_cand->cand_stmt;
848 : 157201 : arg_bb = gimple_bb (arg_stmt);
849 : :
850 : : /* Gather potential dead code savings if the phi statement
851 : : can be removed later on. */
852 : 157201 : if (uses_consumed_by_stmt (arg, phi))
853 : : {
854 : 87851 : if (gimple_code (arg_stmt) == GIMPLE_PHI)
855 : 3006 : savings += arg_cand->dead_savings;
856 : : else
857 : 84845 : savings += stmt_cost (arg_stmt, speed);
858 : : }
859 : : }
860 : 3879777 : else if (SSA_NAME_IS_DEFAULT_DEF (arg))
861 : : {
862 : 193249 : derived_base_name = arg;
863 : 193249 : arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
864 : : }
865 : :
866 : 350450 : if (!arg_bb || arg_bb->loop_father != cand_loop)
867 : 3755777 : return;
868 : :
869 : 281201 : if (i == 0)
870 : : arg0_base = derived_base_name;
871 : 45520 : else if (!operand_equal_p (derived_base_name, arg0_base, 0))
872 : : return;
873 : : }
874 : :
875 : : /* Create the candidate. "alloc_cand_and_find_basis" is named
876 : : misleadingly for this case, as no basis will be sought for a
877 : : CAND_PHI. */
878 : 11319 : base_type = TREE_TYPE (arg0_base);
879 : :
880 : 11319 : c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base,
881 : 11319 : 0, integer_one_node, base_type,
882 : : sizetype, savings);
883 : :
884 : : /* Add the candidate to the statement-candidate mapping. */
885 : 11319 : add_cand_for_stmt (phi, c);
886 : : }
887 : :
888 : : /* Given PBASE which is a pointer to tree, look up the defining
889 : : statement for it and check whether the candidate is in the
890 : : form of:
891 : :
892 : : X = B + (1 * S), S is integer constant
893 : : X = B + (i * S), S is integer one
894 : :
895 : : If so, set PBASE to the candidate's base_expr and return double
896 : : int (i * S).
897 : : Otherwise, just return double int zero. */
898 : :
899 : : static offset_int
900 : 42530 : backtrace_base_for_ref (tree *pbase)
901 : : {
902 : 42530 : tree base_in = *pbase;
903 : 42530 : slsr_cand_t base_cand;
904 : :
905 : 42530 : STRIP_NOPS (base_in);
906 : :
907 : : /* Strip off widening conversion(s) to handle cases where
908 : : e.g. 'B' is widened from an 'int' in order to calculate
909 : : a 64-bit address. */
910 : 40483 : if (CONVERT_EXPR_P (base_in)
911 : 44577 : && legal_cast_p_1 (TREE_TYPE (base_in),
912 : 2047 : TREE_TYPE (TREE_OPERAND (base_in, 0))))
913 : 1733 : base_in = get_unwidened (base_in, NULL_TREE);
914 : :
915 : 42530 : if (TREE_CODE (base_in) != SSA_NAME)
916 : 1217 : return 0;
917 : :
918 : 41313 : base_cand = base_cand_from_table (base_in);
919 : :
920 : 104046 : while (base_cand && base_cand->kind != CAND_PHI)
921 : : {
922 : 43033 : if (base_cand->kind == CAND_ADD
923 : 41810 : && base_cand->index == 1
924 : 66504 : && TREE_CODE (base_cand->stride) == INTEGER_CST)
925 : : {
926 : : /* X = B + (1 * S), S is integer constant. */
927 : 3767 : *pbase = base_cand->base_expr;
928 : 3767 : return wi::to_offset (base_cand->stride);
929 : : }
930 : 39266 : else if (base_cand->kind == CAND_ADD
931 : 38043 : && TREE_CODE (base_cand->stride) == INTEGER_CST
932 : 57112 : && integer_onep (base_cand->stride))
933 : : {
934 : : /* X = B + (i * S), S is integer one. */
935 : 17846 : *pbase = base_cand->base_expr;
936 : 17846 : return base_cand->index;
937 : : }
938 : :
939 : 21420 : base_cand = lookup_cand (base_cand->next_interp);
940 : : }
941 : :
942 : 19700 : return 0;
943 : : }
944 : :
945 : : /* Look for the following pattern:
946 : :
947 : : *PBASE: MEM_REF (T1, C1)
948 : :
949 : : *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
950 : : or
951 : : MULT_EXPR (PLUS_EXPR (T2, C2), C3)
952 : : or
953 : : MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
954 : :
955 : : *PINDEX: C4 * BITS_PER_UNIT
956 : :
957 : : If not present, leave the input values unchanged and return FALSE.
958 : : Otherwise, modify the input values as follows and return TRUE:
959 : :
960 : : *PBASE: T1
961 : : *POFFSET: MULT_EXPR (T2, C3)
962 : : *PINDEX: C1 + (C2 * C3) + C4
963 : :
964 : : When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
965 : : will be further restructured to:
966 : :
967 : : *PBASE: T1
968 : : *POFFSET: MULT_EXPR (T2', C3)
969 : : *PINDEX: C1 + (C2 * C3) + C4 + (C5 * C3) */
970 : :
971 : : static bool
972 : 5596643 : restructure_reference (tree *pbase, tree *poffset, offset_int *pindex,
973 : : tree *ptype)
974 : : {
975 : 5596643 : tree base = *pbase, offset = *poffset;
976 : 5596643 : offset_int index = *pindex;
977 : 5596643 : tree mult_op0, t1, t2, type;
978 : 5596643 : offset_int c1, c2, c3, c4, c5;
979 : 5596643 : offset_int mem_offset;
980 : :
981 : 5596643 : if (!base
982 : 5596643 : || !offset
983 : 187957 : || TREE_CODE (base) != MEM_REF
984 : 68208 : || !mem_ref_offset (base).is_constant (&mem_offset)
985 : 68208 : || TREE_CODE (offset) != MULT_EXPR
986 : 43104 : || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
987 : 5639745 : || wi::umod_floor (index, BITS_PER_UNIT) != 0)
988 : 5553541 : return false;
989 : :
990 : 43102 : t1 = TREE_OPERAND (base, 0);
991 : 43102 : c1 = offset_int::from (mem_offset, SIGNED);
992 : 43102 : type = TREE_TYPE (TREE_OPERAND (base, 1));
993 : :
994 : 43102 : mult_op0 = TREE_OPERAND (offset, 0);
995 : 43102 : c3 = wi::to_offset (TREE_OPERAND (offset, 1));
996 : :
997 : 43102 : if (TREE_CODE (mult_op0) == PLUS_EXPR)
998 : :
999 : 2869 : if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
1000 : : {
1001 : 2297 : t2 = TREE_OPERAND (mult_op0, 0);
1002 : 2297 : c2 = wi::to_offset (TREE_OPERAND (mult_op0, 1));
1003 : : }
1004 : : else
1005 : : return false;
1006 : :
1007 : 40233 : else if (TREE_CODE (mult_op0) == MINUS_EXPR)
1008 : :
1009 : 0 : if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
1010 : : {
1011 : 0 : t2 = TREE_OPERAND (mult_op0, 0);
1012 : 0 : c2 = -wi::to_offset (TREE_OPERAND (mult_op0, 1));
1013 : : }
1014 : : else
1015 : : return false;
1016 : :
1017 : : else
1018 : : {
1019 : 40233 : t2 = mult_op0;
1020 : 40233 : c2 = 0;
1021 : : }
1022 : :
1023 : 42530 : c4 = index >> LOG2_BITS_PER_UNIT;
1024 : 42530 : c5 = backtrace_base_for_ref (&t2);
1025 : :
1026 : 42530 : *pbase = t1;
1027 : 42530 : *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
1028 : : wide_int_to_tree (sizetype, c3));
1029 : 42530 : *pindex = c1 + c2 * c3 + c4 + c5 * c3;
1030 : 42530 : *ptype = type;
1031 : :
1032 : 42530 : return true;
1033 : : }
1034 : :
1035 : : /* Given GS which contains a data reference, create a CAND_REF entry in
1036 : : the candidate table and attempt to find a basis. */
1037 : :
1038 : : static void
1039 : 11760463 : slsr_process_ref (gimple *gs)
1040 : : {
1041 : 11760463 : tree ref_expr, base, offset, type;
1042 : 11760463 : poly_int64 bitsize, bitpos;
1043 : 11760463 : machine_mode mode;
1044 : 11760463 : int unsignedp, reversep, volatilep;
1045 : 11760463 : slsr_cand_t c;
1046 : :
1047 : 23520926 : if (gimple_vdef (gs))
1048 : 6653331 : ref_expr = gimple_assign_lhs (gs);
1049 : : else
1050 : 5107132 : ref_expr = gimple_assign_rhs1 (gs);
1051 : :
1052 : 11760463 : if (!handled_component_p (ref_expr)
1053 : 5681319 : || TREE_CODE (ref_expr) == BIT_FIELD_REF
1054 : 5654442 : || (TREE_CODE (ref_expr) == COMPONENT_REF
1055 : 4937008 : && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
1056 : 11717933 : return;
1057 : :
1058 : 5597300 : base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
1059 : : &unsignedp, &reversep, &volatilep);
1060 : 5597300 : HOST_WIDE_INT cbitpos;
1061 : 5597300 : if (reversep || !bitpos.is_constant (&cbitpos))
1062 : : return;
1063 : 5596643 : offset_int index = cbitpos;
1064 : :
1065 : 5596643 : if (!restructure_reference (&base, &offset, &index, &type))
1066 : : return;
1067 : :
1068 : 42530 : c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
1069 : : type, sizetype, 0);
1070 : :
1071 : : /* Add the candidate to the statement-candidate mapping. */
1072 : 42530 : add_cand_for_stmt (gs, c);
1073 : : }
1074 : :
1075 : : /* Create a candidate entry for a statement GS, where GS multiplies
1076 : : two SSA names BASE_IN and STRIDE_IN. Propagate any known information
1077 : : about the two SSA names into the new candidate. Return the new
1078 : : candidate. */
1079 : :
1080 : : static slsr_cand_t
1081 : 276612 : create_mul_ssa_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1082 : : {
1083 : 276612 : tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1084 : 276612 : tree stype = NULL_TREE;
1085 : 276612 : offset_int index;
1086 : 276612 : unsigned savings = 0;
1087 : 276612 : slsr_cand_t c;
1088 : 276612 : slsr_cand_t base_cand = base_cand_from_table (base_in);
1089 : :
1090 : : /* Look at all interpretations of the base candidate, if necessary,
1091 : : to find information to propagate into this candidate. */
1092 : 651631 : while (base_cand && !base && base_cand->kind != CAND_PHI)
1093 : : {
1094 : :
1095 : 98407 : if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
1096 : : {
1097 : : /* Y = (B + i') * 1
1098 : : X = Y * Z
1099 : : ================
1100 : : X = (B + i') * Z */
1101 : 92 : base = base_cand->base_expr;
1102 : 92 : index = base_cand->index;
1103 : 92 : stride = stride_in;
1104 : 92 : ctype = base_cand->cand_type;
1105 : 92 : stype = TREE_TYPE (stride_in);
1106 : 92 : if (has_single_use (base_in))
1107 : 138 : savings = (base_cand->dead_savings
1108 : 69 : + stmt_cost (base_cand->cand_stmt, speed));
1109 : : }
1110 : 98315 : else if (base_cand->kind == CAND_ADD
1111 : 83537 : && TREE_CODE (base_cand->stride) == INTEGER_CST)
1112 : : {
1113 : : /* Y = B + (i' * S), S constant
1114 : : X = Y * Z
1115 : : ============================
1116 : : X = B + ((i' * S) * Z) */
1117 : 57732 : base = base_cand->base_expr;
1118 : 57732 : index = base_cand->index * wi::to_offset (base_cand->stride);
1119 : 57732 : stride = stride_in;
1120 : 57732 : ctype = base_cand->cand_type;
1121 : 57732 : stype = TREE_TYPE (stride_in);
1122 : 57732 : if (has_single_use (base_in))
1123 : 72250 : savings = (base_cand->dead_savings
1124 : 36125 : + stmt_cost (base_cand->cand_stmt, speed));
1125 : : }
1126 : :
1127 : 98407 : base_cand = lookup_cand (base_cand->next_interp);
1128 : : }
1129 : :
1130 : 276612 : if (!base)
1131 : : {
1132 : : /* No interpretations had anything useful to propagate, so
1133 : : produce X = (Y + 0) * Z. */
1134 : 218788 : base = base_in;
1135 : 218788 : index = 0;
1136 : 218788 : stride = stride_in;
1137 : 218788 : ctype = TREE_TYPE (base_in);
1138 : 218788 : stype = TREE_TYPE (stride_in);
1139 : : }
1140 : :
1141 : 276612 : c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1142 : : ctype, stype, savings);
1143 : 276612 : return c;
1144 : : }
1145 : :
1146 : : /* Create a candidate entry for a statement GS, where GS multiplies
1147 : : SSA name BASE_IN by constant STRIDE_IN. Propagate any known
1148 : : information about BASE_IN into the new candidate. Return the new
1149 : : candidate. */
1150 : :
1151 : : static slsr_cand_t
1152 : 604526 : create_mul_imm_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1153 : : {
1154 : 604526 : tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1155 : 604526 : offset_int index, temp;
1156 : 604526 : unsigned savings = 0;
1157 : 604526 : slsr_cand_t c;
1158 : 604526 : slsr_cand_t base_cand = base_cand_from_table (base_in);
1159 : :
1160 : : /* Look at all interpretations of the base candidate, if necessary,
1161 : : to find information to propagate into this candidate. */
1162 : 1571653 : while (base_cand && !base && base_cand->kind != CAND_PHI)
1163 : : {
1164 : 362601 : if (base_cand->kind == CAND_MULT
1165 : 31114 : && TREE_CODE (base_cand->stride) == INTEGER_CST)
1166 : : {
1167 : : /* Y = (B + i') * S, S constant
1168 : : X = Y * c
1169 : : ============================
1170 : : X = (B + i') * (S * c) */
1171 : 11767 : temp = wi::to_offset (base_cand->stride) * wi::to_offset (stride_in);
1172 : 11767 : if (wi::fits_to_tree_p (temp, TREE_TYPE (stride_in)))
1173 : : {
1174 : 11336 : base = base_cand->base_expr;
1175 : 11336 : index = base_cand->index;
1176 : 11336 : stride = wide_int_to_tree (TREE_TYPE (stride_in), temp);
1177 : 11336 : ctype = base_cand->cand_type;
1178 : 11336 : if (has_single_use (base_in))
1179 : 12850 : savings = (base_cand->dead_savings
1180 : 6425 : + stmt_cost (base_cand->cand_stmt, speed));
1181 : : }
1182 : : }
1183 : 350834 : else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
1184 : : {
1185 : : /* Y = B + (i' * 1)
1186 : : X = Y * c
1187 : : ===========================
1188 : : X = (B + i') * c */
1189 : 237161 : base = base_cand->base_expr;
1190 : 237161 : index = base_cand->index;
1191 : 237161 : stride = stride_in;
1192 : 237161 : ctype = base_cand->cand_type;
1193 : 237161 : if (has_single_use (base_in))
1194 : 387392 : savings = (base_cand->dead_savings
1195 : 193696 : + stmt_cost (base_cand->cand_stmt, speed));
1196 : : }
1197 : 113673 : else if (base_cand->kind == CAND_ADD
1198 : 189466 : && base_cand->index == 1
1199 : 189466 : && TREE_CODE (base_cand->stride) == INTEGER_CST)
1200 : : {
1201 : : /* Y = B + (1 * S), S constant
1202 : : X = Y * c
1203 : : ===========================
1204 : : X = (B + S) * c */
1205 : 0 : base = base_cand->base_expr;
1206 : 0 : index = wi::to_offset (base_cand->stride);
1207 : 0 : stride = stride_in;
1208 : 0 : ctype = base_cand->cand_type;
1209 : 0 : if (has_single_use (base_in))
1210 : 0 : savings = (base_cand->dead_savings
1211 : 0 : + stmt_cost (base_cand->cand_stmt, speed));
1212 : : }
1213 : :
1214 : 362601 : base_cand = lookup_cand (base_cand->next_interp);
1215 : : }
1216 : :
1217 : 604526 : if (!base)
1218 : : {
1219 : : /* No interpretations had anything useful to propagate, so
1220 : : produce X = (Y + 0) * c. */
1221 : 356029 : base = base_in;
1222 : 356029 : index = 0;
1223 : 356029 : stride = stride_in;
1224 : 356029 : ctype = TREE_TYPE (base_in);
1225 : : }
1226 : :
1227 : 604526 : c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1228 : : ctype, sizetype, savings);
1229 : 604526 : return c;
1230 : : }
1231 : :
1232 : : /* Given GS which is a multiply of scalar integers, make an appropriate
1233 : : entry in the candidate table. If this is a multiply of two SSA names,
1234 : : create two CAND_MULT interpretations and attempt to find a basis for
1235 : : each of them. Otherwise, create a single CAND_MULT and attempt to
1236 : : find a basis. */
1237 : :
1238 : : static void
1239 : 663467 : slsr_process_mul (gimple *gs, tree rhs1, tree rhs2, bool speed)
1240 : : {
1241 : 663467 : slsr_cand_t c, c2;
1242 : :
1243 : : /* If this is a multiply of an SSA name with itself, it is highly
1244 : : unlikely that we will get a strength reduction opportunity, so
1245 : : don't record it as a candidate. This simplifies the logic for
1246 : : finding a basis, so if this is removed that must be considered. */
1247 : 663467 : if (rhs1 == rhs2)
1248 : : return;
1249 : :
1250 : 656049 : if (TREE_CODE (rhs2) == SSA_NAME)
1251 : : {
1252 : : /* Record an interpretation of this statement in the candidate table
1253 : : assuming RHS1 is the base expression and RHS2 is the stride. */
1254 : 138306 : c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1255 : :
1256 : : /* Add the first interpretation to the statement-candidate mapping. */
1257 : 138306 : add_cand_for_stmt (gs, c);
1258 : :
1259 : : /* Record another interpretation of this statement assuming RHS1
1260 : : is the stride and RHS2 is the base expression. */
1261 : 138306 : c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1262 : 138306 : c->next_interp = c2->cand_num;
1263 : 138306 : c2->first_interp = c->cand_num;
1264 : : }
1265 : 517743 : else if (TREE_CODE (rhs2) == INTEGER_CST && !integer_zerop (rhs2))
1266 : : {
1267 : : /* Record an interpretation for the multiply-immediate. */
1268 : 517743 : c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1269 : :
1270 : : /* Add the interpretation to the statement-candidate mapping. */
1271 : 517743 : add_cand_for_stmt (gs, c);
1272 : : }
1273 : : }
1274 : :
1275 : : /* Create a candidate entry for a statement GS, where GS adds two
1276 : : SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1277 : : subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
1278 : : information about the two SSA names into the new candidate.
1279 : : Return the new candidate. */
1280 : :
1281 : : static slsr_cand_t
1282 : 1780089 : create_add_ssa_cand (gimple *gs, tree base_in, tree addend_in,
1283 : : bool subtract_p, bool speed)
1284 : : {
1285 : 1780089 : tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1286 : 1780089 : tree stype = NULL_TREE;
1287 : 1780089 : offset_int index;
1288 : 1780089 : unsigned savings = 0;
1289 : 1780089 : slsr_cand_t c;
1290 : 1780089 : slsr_cand_t base_cand = base_cand_from_table (base_in);
1291 : 1780089 : slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1292 : :
1293 : : /* The most useful transformation is a multiply-immediate feeding
1294 : : an add or subtract. Look for that first. */
1295 : 5039410 : while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1296 : : {
1297 : 1479232 : if (addend_cand->kind == CAND_MULT
1298 : 1626410 : && addend_cand->index == 0
1299 : 2177060 : && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1300 : : {
1301 : : /* Z = (B + 0) * S, S constant
1302 : : X = Y +/- Z
1303 : : ===========================
1304 : : X = Y + ((+/-1 * S) * B) */
1305 : 550650 : base = base_in;
1306 : 550650 : index = wi::to_offset (addend_cand->stride);
1307 : 550650 : if (subtract_p)
1308 : 39962 : index = -index;
1309 : 550650 : stride = addend_cand->base_expr;
1310 : 550650 : ctype = TREE_TYPE (base_in);
1311 : 550650 : stype = addend_cand->cand_type;
1312 : 550650 : if (has_single_use (addend_in))
1313 : 915956 : savings = (addend_cand->dead_savings
1314 : 457978 : + stmt_cost (addend_cand->cand_stmt, speed));
1315 : : }
1316 : :
1317 : 1479232 : addend_cand = lookup_cand (addend_cand->next_interp);
1318 : : }
1319 : :
1320 : 2505444 : while (base_cand && !base && base_cand->kind != CAND_PHI)
1321 : : {
1322 : 725355 : if (base_cand->kind == CAND_ADD
1323 : 725355 : && (base_cand->index == 0
1324 : 418162 : || operand_equal_p (base_cand->stride,
1325 : 418162 : integer_zero_node, 0)))
1326 : : {
1327 : : /* Y = B + (i' * S), i' * S = 0
1328 : : X = Y +/- Z
1329 : : ============================
1330 : : X = B + (+/-1 * Z) */
1331 : 92869 : base = base_cand->base_expr;
1332 : 174915 : index = subtract_p ? -1 : 1;
1333 : 92869 : stride = addend_in;
1334 : 92869 : ctype = base_cand->cand_type;
1335 : 185738 : stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1336 : 92869 : : TREE_TYPE (addend_in));
1337 : 92869 : if (has_single_use (base_in))
1338 : 138612 : savings = (base_cand->dead_savings
1339 : 69306 : + stmt_cost (base_cand->cand_stmt, speed));
1340 : : }
1341 : 632486 : else if (subtract_p)
1342 : : {
1343 : 67543 : slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1344 : :
1345 : 160169 : while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1346 : : {
1347 : 25083 : if (subtrahend_cand->kind == CAND_MULT
1348 : 33250 : && subtrahend_cand->index == 0
1349 : 33250 : && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1350 : : {
1351 : : /* Z = (B + 0) * S, S constant
1352 : : X = Y - Z
1353 : : ===========================
1354 : : Value: X = Y + ((-1 * S) * B) */
1355 : 0 : base = base_in;
1356 : 0 : index = wi::to_offset (subtrahend_cand->stride);
1357 : 0 : index = -index;
1358 : 0 : stride = subtrahend_cand->base_expr;
1359 : 0 : ctype = TREE_TYPE (base_in);
1360 : 0 : stype = subtrahend_cand->cand_type;
1361 : 0 : if (has_single_use (addend_in))
1362 : 0 : savings = (subtrahend_cand->dead_savings
1363 : 0 : + stmt_cost (subtrahend_cand->cand_stmt, speed));
1364 : : }
1365 : :
1366 : 25083 : subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1367 : : }
1368 : : }
1369 : :
1370 : 725355 : base_cand = lookup_cand (base_cand->next_interp);
1371 : : }
1372 : :
1373 : 1780089 : if (!base)
1374 : : {
1375 : : /* No interpretations had anything useful to propagate, so
1376 : : produce X = Y + (1 * Z). */
1377 : 1136570 : base = base_in;
1378 : 2114694 : index = subtract_p ? -1 : 1;
1379 : 1136570 : stride = addend_in;
1380 : 1136570 : ctype = TREE_TYPE (base_in);
1381 : 2273140 : stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1382 : 1136570 : : TREE_TYPE (addend_in));
1383 : : }
1384 : :
1385 : 1780089 : c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1386 : : ctype, stype, savings);
1387 : 1780089 : return c;
1388 : : }
1389 : :
1390 : : /* Create a candidate entry for a statement GS, where GS adds SSA
1391 : : name BASE_IN to constant INDEX_IN. Propagate any known information
1392 : : about BASE_IN into the new candidate. Return the new candidate. */
1393 : :
1394 : : static slsr_cand_t
1395 : 1735968 : create_add_imm_cand (gimple *gs, tree base_in, const offset_int &index_in,
1396 : : bool speed)
1397 : : {
1398 : 1735968 : enum cand_kind kind = CAND_ADD;
1399 : 1735968 : tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1400 : 1735968 : tree stype = NULL_TREE;
1401 : 1735968 : offset_int index, multiple;
1402 : 1735968 : unsigned savings = 0;
1403 : 1735968 : slsr_cand_t c;
1404 : 1735968 : slsr_cand_t base_cand = base_cand_from_table (base_in);
1405 : :
1406 : 3861193 : while (base_cand && !base && base_cand->kind != CAND_PHI)
1407 : : {
1408 : 389257 : signop sign = TYPE_SIGN (TREE_TYPE (base_cand->stride));
1409 : :
1410 : 389257 : if (TREE_CODE (base_cand->stride) == INTEGER_CST
1411 : 389257 : && wi::multiple_of_p (index_in, wi::to_offset (base_cand->stride),
1412 : : sign, &multiple))
1413 : : {
1414 : : /* Y = (B + i') * S, S constant, c = kS for some integer k
1415 : : X = Y + c
1416 : : ============================
1417 : : X = (B + (i'+ k)) * S
1418 : : OR
1419 : : Y = B + (i' * S), S constant, c = kS for some integer k
1420 : : X = Y + c
1421 : : ============================
1422 : : X = (B + (i'+ k)) * S */
1423 : 227081 : kind = base_cand->kind;
1424 : 227081 : base = base_cand->base_expr;
1425 : 227081 : index = base_cand->index + multiple;
1426 : 227081 : stride = base_cand->stride;
1427 : 227081 : ctype = base_cand->cand_type;
1428 : 227081 : stype = base_cand->stride_type;
1429 : 227081 : if (has_single_use (base_in))
1430 : 295430 : savings = (base_cand->dead_savings
1431 : 147715 : + stmt_cost (base_cand->cand_stmt, speed));
1432 : : }
1433 : :
1434 : 389257 : base_cand = lookup_cand (base_cand->next_interp);
1435 : : }
1436 : :
1437 : 1735968 : if (!base)
1438 : : {
1439 : : /* No interpretations had anything useful to propagate, so
1440 : : produce X = Y + (c * 1). */
1441 : 1508887 : kind = CAND_ADD;
1442 : 1508887 : base = base_in;
1443 : 1508887 : index = index_in;
1444 : 1508887 : stride = integer_one_node;
1445 : 1508887 : ctype = TREE_TYPE (base_in);
1446 : 1508887 : stype = sizetype;
1447 : : }
1448 : :
1449 : 1735968 : c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1450 : : ctype, stype, savings);
1451 : 1735968 : return c;
1452 : : }
1453 : :
1454 : : /* Given GS which is an add or subtract of scalar integers or pointers,
1455 : : make at least one appropriate entry in the candidate table. */
1456 : :
1457 : : static void
1458 : 2930781 : slsr_process_add (gimple *gs, tree rhs1, tree rhs2, bool speed)
1459 : : {
1460 : 2930781 : bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1461 : 2930781 : slsr_cand_t c = NULL, c2;
1462 : :
1463 : 2930781 : if (TREE_CODE (rhs2) == SSA_NAME)
1464 : : {
1465 : : /* First record an interpretation assuming RHS1 is the base expression
1466 : : and RHS2 is the stride. But it doesn't make sense for the
1467 : : stride to be a pointer, so don't record a candidate in that case. */
1468 : 1194813 : if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1469 : : {
1470 : 1194813 : c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1471 : :
1472 : : /* Add the first interpretation to the statement-candidate
1473 : : mapping. */
1474 : 1194813 : add_cand_for_stmt (gs, c);
1475 : : }
1476 : :
1477 : : /* If the two RHS operands are identical, or this is a subtract,
1478 : : we're done. */
1479 : 1194813 : if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1480 : : return;
1481 : :
1482 : : /* Otherwise, record another interpretation assuming RHS2 is the
1483 : : base expression and RHS1 is the stride, again provided that the
1484 : : stride is not a pointer. */
1485 : 985582 : if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1486 : : {
1487 : 585276 : c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1488 : 585276 : if (c)
1489 : : {
1490 : 585276 : c->next_interp = c2->cand_num;
1491 : 585276 : c2->first_interp = c->cand_num;
1492 : : }
1493 : : else
1494 : 0 : add_cand_for_stmt (gs, c2);
1495 : : }
1496 : : }
1497 : 1735968 : else if (TREE_CODE (rhs2) == INTEGER_CST)
1498 : : {
1499 : : /* Record an interpretation for the add-immediate. */
1500 : 1735968 : offset_int index = wi::to_offset (rhs2);
1501 : 1735968 : if (subtract_p)
1502 : 23386 : index = -index;
1503 : :
1504 : 1735968 : c = create_add_imm_cand (gs, rhs1, index, speed);
1505 : :
1506 : : /* Add the interpretation to the statement-candidate mapping. */
1507 : 1735968 : add_cand_for_stmt (gs, c);
1508 : : }
1509 : : }
1510 : :
1511 : : /* Given GS which is a negate of a scalar integer, make an appropriate
1512 : : entry in the candidate table. A negate is equivalent to a multiply
1513 : : by -1. */
1514 : :
1515 : : static void
1516 : 86783 : slsr_process_neg (gimple *gs, tree rhs1, bool speed)
1517 : : {
1518 : : /* Record a CAND_MULT interpretation for the multiply by -1. */
1519 : 86783 : slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1520 : :
1521 : : /* Add the interpretation to the statement-candidate mapping. */
1522 : 86783 : add_cand_for_stmt (gs, c);
1523 : 86783 : }
1524 : :
1525 : : /* Help function for legal_cast_p, operating on two trees. Checks
1526 : : whether it's allowable to cast from RHS to LHS. See legal_cast_p
1527 : : for more details. */
1528 : :
1529 : : static bool
1530 : 2665897 : legal_cast_p_1 (tree lhs_type, tree rhs_type)
1531 : : {
1532 : 2665897 : unsigned lhs_size, rhs_size;
1533 : 2665897 : bool lhs_wraps, rhs_wraps;
1534 : :
1535 : 2665897 : lhs_size = TYPE_PRECISION (lhs_type);
1536 : 2665897 : rhs_size = TYPE_PRECISION (rhs_type);
1537 : 2665897 : lhs_wraps = ANY_INTEGRAL_TYPE_P (lhs_type) && TYPE_OVERFLOW_WRAPS (lhs_type);
1538 : 2665897 : rhs_wraps = ANY_INTEGRAL_TYPE_P (rhs_type) && TYPE_OVERFLOW_WRAPS (rhs_type);
1539 : :
1540 : 2665897 : if (lhs_size < rhs_size
1541 : 2414241 : || (rhs_wraps && !lhs_wraps)
1542 : 1561799 : || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1543 : 1273372 : return false;
1544 : :
1545 : : return true;
1546 : : }
1547 : :
1548 : : /* Return TRUE if GS is a statement that defines an SSA name from
1549 : : a conversion and is legal for us to combine with an add and multiply
1550 : : in the candidate table. For example, suppose we have:
1551 : :
1552 : : A = B + i;
1553 : : C = (type) A;
1554 : : D = C * S;
1555 : :
1556 : : Without the type-cast, we would create a CAND_MULT for D with base B,
1557 : : index i, and stride S. We want to record this candidate only if it
1558 : : is equivalent to apply the type cast following the multiply:
1559 : :
1560 : : A = B + i;
1561 : : E = A * S;
1562 : : D = (type) E;
1563 : :
1564 : : We will record the type with the candidate for D. This allows us
1565 : : to use a similar previous candidate as a basis. If we have earlier seen
1566 : :
1567 : : A' = B + i';
1568 : : C' = (type) A';
1569 : : D' = C' * S;
1570 : :
1571 : : we can replace D with
1572 : :
1573 : : D = D' + (i - i') * S;
1574 : :
1575 : : But if moving the type-cast would change semantics, we mustn't do this.
1576 : :
1577 : : This is legitimate for casts from a non-wrapping integral type to
1578 : : any integral type of the same or larger size. It is not legitimate
1579 : : to convert a wrapping type to a non-wrapping type, or to a wrapping
1580 : : type of a different size. I.e., with a wrapping type, we must
1581 : : assume that the addition B + i could wrap, in which case performing
1582 : : the multiply before or after one of the "illegal" type casts will
1583 : : have different semantics. */
1584 : :
1585 : : static bool
1586 : 2661941 : legal_cast_p (gimple *gs, tree rhs)
1587 : : {
1588 : 2661941 : if (!is_gimple_assign (gs)
1589 : 2661941 : || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1590 : : return false;
1591 : :
1592 : 2661941 : return legal_cast_p_1 (TREE_TYPE (gimple_assign_lhs (gs)), TREE_TYPE (rhs));
1593 : : }
1594 : :
1595 : : /* Given GS which is a cast to a scalar integer type, determine whether
1596 : : the cast is legal for strength reduction. If so, make at least one
1597 : : appropriate entry in the candidate table. */
1598 : :
1599 : : static void
1600 : 2661941 : slsr_process_cast (gimple *gs, tree rhs1, bool speed)
1601 : : {
1602 : 2661941 : tree lhs, ctype;
1603 : 2661941 : slsr_cand_t base_cand, c = NULL, c2;
1604 : 2661941 : unsigned savings = 0;
1605 : :
1606 : 2661941 : if (!legal_cast_p (gs, rhs1))
1607 : : return;
1608 : :
1609 : 1388917 : lhs = gimple_assign_lhs (gs);
1610 : 1388917 : base_cand = base_cand_from_table (rhs1);
1611 : 1388917 : ctype = TREE_TYPE (lhs);
1612 : :
1613 : 1388917 : if (base_cand && base_cand->kind != CAND_PHI)
1614 : : {
1615 : : slsr_cand_t first_cand = NULL;
1616 : :
1617 : 581151 : while (base_cand)
1618 : : {
1619 : : /* Propagate all data from the base candidate except the type,
1620 : : which comes from the cast, and the base candidate's cast,
1621 : : which is no longer applicable. */
1622 : 331736 : if (has_single_use (rhs1))
1623 : 383226 : savings = (base_cand->dead_savings
1624 : 191613 : + stmt_cost (base_cand->cand_stmt, speed));
1625 : :
1626 : 663472 : c = alloc_cand_and_find_basis (base_cand->kind, gs,
1627 : : base_cand->base_expr,
1628 : 331736 : base_cand->index, base_cand->stride,
1629 : : ctype, base_cand->stride_type,
1630 : : savings);
1631 : 331736 : if (!first_cand)
1632 : 249415 : first_cand = c;
1633 : :
1634 : 331736 : if (first_cand != c)
1635 : 82321 : c->first_interp = first_cand->cand_num;
1636 : :
1637 : 331736 : base_cand = lookup_cand (base_cand->next_interp);
1638 : : }
1639 : : }
1640 : : else
1641 : : {
1642 : : /* If nothing is known about the RHS, create fresh CAND_ADD and
1643 : : CAND_MULT interpretations:
1644 : :
1645 : : X = Y + (0 * 1)
1646 : : X = (Y + 0) * 1
1647 : :
1648 : : The first of these is somewhat arbitrary, but the choice of
1649 : : 1 for the stride simplifies the logic for propagating casts
1650 : : into their uses. */
1651 : 1139502 : c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1652 : : integer_one_node, ctype, sizetype, 0);
1653 : 1139502 : c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1654 : : integer_one_node, ctype, sizetype, 0);
1655 : 1139502 : c->next_interp = c2->cand_num;
1656 : 1139502 : c2->first_interp = c->cand_num;
1657 : : }
1658 : :
1659 : : /* Add the first (or only) interpretation to the statement-candidate
1660 : : mapping. */
1661 : 1388917 : add_cand_for_stmt (gs, c);
1662 : : }
1663 : :
1664 : : /* Given GS which is a copy of a scalar integer type, make at least one
1665 : : appropriate entry in the candidate table.
1666 : :
1667 : : This interface is included for completeness, but is unnecessary
1668 : : if this pass immediately follows a pass that performs copy
1669 : : propagation, such as DOM. */
1670 : :
1671 : : static void
1672 : 139733 : slsr_process_copy (gimple *gs, tree rhs1, bool speed)
1673 : : {
1674 : 139733 : slsr_cand_t base_cand, c = NULL, c2;
1675 : 139733 : unsigned savings = 0;
1676 : :
1677 : 139733 : base_cand = base_cand_from_table (rhs1);
1678 : :
1679 : 139733 : if (base_cand && base_cand->kind != CAND_PHI)
1680 : : {
1681 : : slsr_cand_t first_cand = NULL;
1682 : :
1683 : 84014 : while (base_cand)
1684 : : {
1685 : : /* Propagate all data from the base candidate. */
1686 : 48530 : if (has_single_use (rhs1))
1687 : 90874 : savings = (base_cand->dead_savings
1688 : 45437 : + stmt_cost (base_cand->cand_stmt, speed));
1689 : :
1690 : 97060 : c = alloc_cand_and_find_basis (base_cand->kind, gs,
1691 : : base_cand->base_expr,
1692 : 48530 : base_cand->index, base_cand->stride,
1693 : : base_cand->cand_type,
1694 : : base_cand->stride_type, savings);
1695 : 48530 : if (!first_cand)
1696 : 35484 : first_cand = c;
1697 : :
1698 : 48530 : if (first_cand != c)
1699 : 13046 : c->first_interp = first_cand->cand_num;
1700 : :
1701 : 48530 : base_cand = lookup_cand (base_cand->next_interp);
1702 : : }
1703 : : }
1704 : : else
1705 : : {
1706 : : /* If nothing is known about the RHS, create fresh CAND_ADD and
1707 : : CAND_MULT interpretations:
1708 : :
1709 : : X = Y + (0 * 1)
1710 : : X = (Y + 0) * 1
1711 : :
1712 : : The first of these is somewhat arbitrary, but the choice of
1713 : : 1 for the stride simplifies the logic for propagating casts
1714 : : into their uses. */
1715 : 104249 : c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1716 : 104249 : integer_one_node, TREE_TYPE (rhs1),
1717 : : sizetype, 0);
1718 : 104249 : c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1719 : 104249 : integer_one_node, TREE_TYPE (rhs1),
1720 : : sizetype, 0);
1721 : 104249 : c->next_interp = c2->cand_num;
1722 : 104249 : c2->first_interp = c->cand_num;
1723 : : }
1724 : :
1725 : : /* Add the first (or only) interpretation to the statement-candidate
1726 : : mapping. */
1727 : 139733 : add_cand_for_stmt (gs, c);
1728 : 139733 : }
1729 : :
1730 : 2001530 : class find_candidates_dom_walker : public dom_walker
1731 : : {
1732 : : public:
1733 : 1000765 : find_candidates_dom_walker (cdi_direction direction)
1734 : 2001530 : : dom_walker (direction) {}
1735 : : edge before_dom_children (basic_block) final override;
1736 : : };
1737 : :
1738 : : /* Find strength-reduction candidates in block BB. */
1739 : :
1740 : : edge
1741 : 10603827 : find_candidates_dom_walker::before_dom_children (basic_block bb)
1742 : : {
1743 : 10603827 : bool speed = optimize_bb_for_speed_p (bb);
1744 : :
1745 : 14916661 : for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
1746 : 4312834 : gsi_next (&gsi))
1747 : 4312834 : slsr_process_phi (gsi.phi (), speed);
1748 : :
1749 : 96347299 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1750 : 75139645 : gsi_next (&gsi))
1751 : : {
1752 : 75139645 : gimple *gs = gsi_stmt (gsi);
1753 : :
1754 : 75139645 : if (stmt_could_throw_p (cfun, gs))
1755 : 3111723 : continue;
1756 : :
1757 : 98288898 : if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1758 : 11760463 : slsr_process_ref (gs);
1759 : :
1760 : 60267459 : else if (is_gimple_assign (gs)
1761 : 60267459 : && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (gs)))
1762 : 2914467 : || POINTER_TYPE_P (TREE_TYPE (gimple_assign_lhs (gs)))))
1763 : : {
1764 : 9150626 : tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1765 : :
1766 : 9150626 : switch (gimple_assign_rhs_code (gs))
1767 : : {
1768 : 2728325 : case MULT_EXPR:
1769 : 2728325 : case PLUS_EXPR:
1770 : 2728325 : rhs1 = gimple_assign_rhs1 (gs);
1771 : 2728325 : rhs2 = gimple_assign_rhs2 (gs);
1772 : : /* Should never happen, but currently some buggy situations
1773 : : in earlier phases put constants in rhs1. */
1774 : 2728325 : if (TREE_CODE (rhs1) != SSA_NAME)
1775 : 1 : continue;
1776 : : break;
1777 : :
1778 : : /* Possible future opportunity: rhs1 of a ptr+ can be
1779 : : an ADDR_EXPR. */
1780 : 963860 : case POINTER_PLUS_EXPR:
1781 : 963860 : case MINUS_EXPR:
1782 : 963860 : rhs2 = gimple_assign_rhs2 (gs);
1783 : 4057429 : gcc_fallthrough ();
1784 : :
1785 : 4057429 : CASE_CONVERT:
1786 : 4057429 : case SSA_NAME:
1787 : 4057429 : case NEGATE_EXPR:
1788 : 4057429 : rhs1 = gimple_assign_rhs1 (gs);
1789 : 4057429 : if (TREE_CODE (rhs1) != SSA_NAME)
1790 : 303048 : continue;
1791 : : break;
1792 : :
1793 : 8847577 : default:
1794 : 8847577 : ;
1795 : : }
1796 : :
1797 : 8847577 : switch (gimple_assign_rhs_code (gs))
1798 : : {
1799 : 663467 : case MULT_EXPR:
1800 : 663467 : slsr_process_mul (gs, rhs1, rhs2, speed);
1801 : 663467 : break;
1802 : :
1803 : 2930781 : case PLUS_EXPR:
1804 : 2930781 : case POINTER_PLUS_EXPR:
1805 : 2930781 : case MINUS_EXPR:
1806 : 2930781 : slsr_process_add (gs, rhs1, rhs2, speed);
1807 : 2930781 : break;
1808 : :
1809 : 86783 : case NEGATE_EXPR:
1810 : 86783 : slsr_process_neg (gs, rhs1, speed);
1811 : 86783 : break;
1812 : :
1813 : 2661941 : CASE_CONVERT:
1814 : 2661941 : slsr_process_cast (gs, rhs1, speed);
1815 : 2661941 : break;
1816 : :
1817 : 139733 : case SSA_NAME:
1818 : 139733 : slsr_process_copy (gs, rhs1, speed);
1819 : 139733 : break;
1820 : :
1821 : : default:
1822 : : ;
1823 : : }
1824 : : }
1825 : : }
1826 : 10603827 : return NULL;
1827 : : }
1828 : :
1829 : : /* Dump a candidate for debug. */
1830 : :
1831 : : static void
1832 : 33 : dump_candidate (slsr_cand_t c)
1833 : : {
1834 : 33 : fprintf (dump_file, "%3d [%d] ", c->cand_num,
1835 : 33 : gimple_bb (c->cand_stmt)->index);
1836 : 33 : print_gimple_stmt (dump_file, c->cand_stmt, 0);
1837 : 33 : switch (c->kind)
1838 : : {
1839 : 5 : case CAND_MULT:
1840 : 5 : fputs (" MULT : (", dump_file);
1841 : 5 : print_generic_expr (dump_file, c->base_expr);
1842 : 5 : fputs (" + ", dump_file);
1843 : 5 : print_decs (c->index, dump_file);
1844 : 5 : fputs (") * ", dump_file);
1845 : 5 : if (TREE_CODE (c->stride) != INTEGER_CST
1846 : 5 : && c->stride_type != TREE_TYPE (c->stride))
1847 : : {
1848 : 0 : fputs ("(", dump_file);
1849 : 0 : print_generic_expr (dump_file, c->stride_type);
1850 : 0 : fputs (")", dump_file);
1851 : : }
1852 : 5 : print_generic_expr (dump_file, c->stride);
1853 : 5 : fputs (" : ", dump_file);
1854 : 5 : break;
1855 : 17 : case CAND_ADD:
1856 : 17 : fputs (" ADD : ", dump_file);
1857 : 17 : print_generic_expr (dump_file, c->base_expr);
1858 : 17 : fputs (" + (", dump_file);
1859 : 17 : print_decs (c->index, dump_file);
1860 : 17 : fputs (" * ", dump_file);
1861 : 17 : if (TREE_CODE (c->stride) != INTEGER_CST
1862 : 17 : && c->stride_type != TREE_TYPE (c->stride))
1863 : : {
1864 : 0 : fputs ("(", dump_file);
1865 : 0 : print_generic_expr (dump_file, c->stride_type);
1866 : 0 : fputs (")", dump_file);
1867 : : }
1868 : 17 : print_generic_expr (dump_file, c->stride);
1869 : 17 : fputs (") : ", dump_file);
1870 : 17 : break;
1871 : 11 : case CAND_REF:
1872 : 11 : fputs (" REF : ", dump_file);
1873 : 11 : print_generic_expr (dump_file, c->base_expr);
1874 : 11 : fputs (" + (", dump_file);
1875 : 11 : print_generic_expr (dump_file, c->stride);
1876 : 11 : fputs (") + ", dump_file);
1877 : 11 : print_decs (c->index, dump_file);
1878 : 11 : fputs (" : ", dump_file);
1879 : 11 : break;
1880 : 0 : case CAND_PHI:
1881 : 0 : fputs (" PHI : ", dump_file);
1882 : 0 : print_generic_expr (dump_file, c->base_expr);
1883 : 0 : fputs (" + (unknown * ", dump_file);
1884 : 0 : print_generic_expr (dump_file, c->stride);
1885 : 0 : fputs (") : ", dump_file);
1886 : 0 : break;
1887 : 0 : default:
1888 : 0 : gcc_unreachable ();
1889 : : }
1890 : 33 : print_generic_expr (dump_file, c->cand_type);
1891 : 33 : fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1892 : : c->basis, c->dependent, c->sibling);
1893 : 33 : fprintf (dump_file,
1894 : : " next-interp: %d first-interp: %d dead-savings: %d\n",
1895 : : c->next_interp, c->first_interp, c->dead_savings);
1896 : 33 : if (c->def_phi)
1897 : 0 : fprintf (dump_file, " phi: %d\n", c->def_phi);
1898 : 33 : fputs ("\n", dump_file);
1899 : 33 : }
1900 : :
1901 : : /* Dump the candidate vector for debug. */
1902 : :
1903 : : static void
1904 : 3 : dump_cand_vec (void)
1905 : : {
1906 : 3 : unsigned i;
1907 : 3 : slsr_cand_t c;
1908 : :
1909 : 3 : fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1910 : :
1911 : 42 : FOR_EACH_VEC_ELT (cand_vec, i, c)
1912 : 36 : if (c != NULL)
1913 : 33 : dump_candidate (c);
1914 : 3 : }
1915 : :
1916 : : /* Callback used to dump the candidate chains hash table. */
1917 : :
1918 : : int
1919 : 14 : ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1920 : : {
1921 : 14 : const_cand_chain_t chain = *slot;
1922 : 14 : cand_chain_t p;
1923 : :
1924 : 14 : print_generic_expr (dump_file, chain->base_expr);
1925 : 14 : fprintf (dump_file, " -> %d", chain->cand->cand_num);
1926 : :
1927 : 42 : for (p = chain->next; p; p = p->next)
1928 : 28 : fprintf (dump_file, " -> %d", p->cand->cand_num);
1929 : :
1930 : 14 : fputs ("\n", dump_file);
1931 : 14 : return 1;
1932 : : }
1933 : :
1934 : : /* Dump the candidate chains. */
1935 : :
1936 : : static void
1937 : 3 : dump_cand_chains (void)
1938 : : {
1939 : 3 : fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1940 : 3 : base_cand_map->traverse_noresize <void *, ssa_base_cand_dump_callback>
1941 : 17 : (NULL);
1942 : 3 : fputs ("\n", dump_file);
1943 : 3 : }
1944 : :
1945 : : /* Dump the increment vector for debug. */
1946 : :
1947 : : static void
1948 : 35803 : dump_incr_vec (void)
1949 : : {
1950 : 35803 : if (dump_file && (dump_flags & TDF_DETAILS))
1951 : : {
1952 : 0 : unsigned i;
1953 : :
1954 : 0 : fprintf (dump_file, "\nIncrement vector:\n\n");
1955 : :
1956 : 0 : for (i = 0; i < incr_vec_len; i++)
1957 : : {
1958 : 0 : fprintf (dump_file, "%3d increment: ", i);
1959 : 0 : print_decs (incr_vec[i].incr, dump_file);
1960 : 0 : fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1961 : 0 : fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1962 : 0 : fputs ("\n initializer: ", dump_file);
1963 : 0 : print_generic_expr (dump_file, incr_vec[i].initializer);
1964 : 0 : fputs ("\n\n", dump_file);
1965 : : }
1966 : : }
1967 : 35803 : }
1968 : :
1969 : : /* Replace *EXPR in candidate C with an equivalent strength-reduced
1970 : : data reference. */
1971 : :
1972 : : static void
1973 : 17554 : replace_ref (tree *expr, slsr_cand_t c)
1974 : : {
1975 : 17554 : tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1976 : 17554 : unsigned HOST_WIDE_INT misalign;
1977 : 17554 : unsigned align;
1978 : :
1979 : : /* Ensure the memory reference carries the minimum alignment
1980 : : requirement for the data type. See PR58041. */
1981 : 17554 : get_object_alignment_1 (*expr, &align, &misalign);
1982 : 17554 : if (misalign != 0)
1983 : 261 : align = least_bit_hwi (misalign);
1984 : 17554 : if (align < TYPE_ALIGN (acc_type))
1985 : 14 : acc_type = build_aligned_type (acc_type, align);
1986 : :
1987 : 17554 : add_expr = fold_build2 (POINTER_PLUS_EXPR, c->cand_type,
1988 : : c->base_expr, c->stride);
1989 : 17554 : mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1990 : : wide_int_to_tree (c->cand_type, c->index));
1991 : :
1992 : : /* Gimplify the base addressing expression for the new MEM_REF tree. */
1993 : 17554 : gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1994 : 17554 : TREE_OPERAND (mem_ref, 0)
1995 : 17554 : = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1996 : : /*simple_p=*/true, NULL,
1997 : : /*before=*/true, GSI_SAME_STMT);
1998 : 17554 : copy_ref_info (mem_ref, *expr);
1999 : 17554 : *expr = mem_ref;
2000 : 17554 : update_stmt (c->cand_stmt);
2001 : 17554 : }
2002 : :
2003 : : /* Return true if CAND_REF candidate C is a valid memory reference. */
2004 : :
2005 : : static bool
2006 : 6136 : valid_mem_ref_cand_p (slsr_cand_t c)
2007 : : {
2008 : 6136 : if (TREE_CODE (TREE_OPERAND (c->stride, 1)) != INTEGER_CST)
2009 : : return false;
2010 : :
2011 : 6136 : struct mem_address addr
2012 : 12272 : = { NULL_TREE, c->base_expr, TREE_OPERAND (c->stride, 0),
2013 : 6136 : TREE_OPERAND (c->stride, 1), wide_int_to_tree (sizetype, c->index) };
2014 : :
2015 : 6136 : return
2016 : 6136 : valid_mem_ref_p (TYPE_MODE (c->cand_type), TYPE_ADDR_SPACE (c->cand_type),
2017 : : &addr);
2018 : : }
2019 : :
2020 : : /* Replace CAND_REF candidate C, each sibling of candidate C, and each
2021 : : dependent of candidate C with an equivalent strength-reduced data
2022 : : reference. */
2023 : :
2024 : : static void
2025 : 6848 : replace_refs (slsr_cand_t c)
2026 : : {
2027 : : /* Replacing a chain of only 2 candidates which are valid memory references
2028 : : is generally counter-productive because you cannot recoup the additional
2029 : : calculation added in front of them. */
2030 : 20397 : if (c->basis == 0
2031 : 6431 : && c->dependent
2032 : 6431 : && !lookup_cand (c->dependent)->dependent
2033 : 3293 : && valid_mem_ref_cand_p (c)
2034 : 23240 : && valid_mem_ref_cand_p (lookup_cand (c->dependent)))
2035 : : return;
2036 : :
2037 : 17554 : if (dump_file && (dump_flags & TDF_DETAILS))
2038 : : {
2039 : 9 : fputs ("Replacing reference: ", dump_file);
2040 : 9 : print_gimple_stmt (dump_file, c->cand_stmt, 0);
2041 : : }
2042 : :
2043 : 35108 : if (gimple_vdef (c->cand_stmt))
2044 : : {
2045 : 6219 : tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
2046 : 6219 : replace_ref (lhs, c);
2047 : : }
2048 : : else
2049 : : {
2050 : 11335 : tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
2051 : 11335 : replace_ref (rhs, c);
2052 : : }
2053 : :
2054 : 17554 : if (dump_file && (dump_flags & TDF_DETAILS))
2055 : : {
2056 : 9 : fputs ("With: ", dump_file);
2057 : 9 : print_gimple_stmt (dump_file, c->cand_stmt, 0);
2058 : 9 : fputs ("\n", dump_file);
2059 : : }
2060 : :
2061 : 17554 : if (c->sibling)
2062 : 417 : replace_refs (lookup_cand (c->sibling));
2063 : :
2064 : 17554 : if (c->dependent)
2065 : 13549 : replace_refs (lookup_cand (c->dependent));
2066 : : }
2067 : :
2068 : : /* Return TRUE if candidate C is dependent upon a PHI. */
2069 : :
2070 : : static bool
2071 : 2421271 : phi_dependent_cand_p (slsr_cand_t c)
2072 : : {
2073 : : /* A candidate is not necessarily dependent upon a PHI just because
2074 : : it has a phi definition for its base name. It may have a basis
2075 : : that relies upon the same phi definition, in which case the PHI
2076 : : is irrelevant to this candidate. */
2077 : 2421271 : return (c->def_phi
2078 : 360 : && c->basis
2079 : 2421631 : && lookup_cand (c->basis)->def_phi != c->def_phi);
2080 : : }
2081 : :
2082 : : /* Calculate the increment required for candidate C relative to
2083 : : its basis. */
2084 : :
2085 : : static offset_int
2086 : 1265416 : cand_increment (slsr_cand_t c)
2087 : : {
2088 : 1265416 : slsr_cand_t basis;
2089 : :
2090 : : /* If the candidate doesn't have a basis, just return its own
2091 : : index. This is useful in record_increments to help us find
2092 : : an existing initializer. Also, if the candidate's basis is
2093 : : hidden by a phi, then its own index will be the increment
2094 : : from the newly introduced phi basis. */
2095 : 1265416 : if (!c->basis || phi_dependent_cand_p (c))
2096 : 35846 : return c->index;
2097 : :
2098 : 1229570 : basis = lookup_cand (c->basis);
2099 : 1229570 : gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
2100 : 1229570 : return c->index - basis->index;
2101 : : }
2102 : :
2103 : : /* Calculate the increment required for candidate C relative to
2104 : : its basis. If we aren't going to generate pointer arithmetic
2105 : : for this candidate, return the absolute value of that increment
2106 : : instead. */
2107 : :
2108 : : static inline offset_int
2109 : 75444 : cand_abs_increment (slsr_cand_t c)
2110 : : {
2111 : 75444 : offset_int increment = cand_increment (c);
2112 : :
2113 : 75444 : if (!address_arithmetic_p && wi::neg_p (increment))
2114 : 1985 : increment = -increment;
2115 : :
2116 : 75444 : return increment;
2117 : : }
2118 : :
2119 : : /* Return TRUE iff candidate C has already been replaced under
2120 : : another interpretation. */
2121 : :
2122 : : static inline bool
2123 : 1387164 : cand_already_replaced (slsr_cand_t c)
2124 : : {
2125 : 538 : return (gimple_bb (c->cand_stmt) == 0);
2126 : : }
2127 : :
2128 : : /* Common logic used by replace_unconditional_candidate and
2129 : : replace_conditional_candidate. */
2130 : :
2131 : : static void
2132 : 1065492 : replace_mult_candidate (slsr_cand_t c, tree basis_name, offset_int bump,
2133 : : auto_bitmap &sdce_worklist)
2134 : : {
2135 : 1065492 : tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
2136 : 1065492 : enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
2137 : :
2138 : : /* It is not useful to replace casts, copies, negates, or adds of
2139 : : an SSA name and a constant. */
2140 : 1065492 : if (cand_code == SSA_NAME
2141 : : || CONVERT_EXPR_CODE_P (cand_code)
2142 : : || cand_code == PLUS_EXPR
2143 : : || cand_code == POINTER_PLUS_EXPR
2144 : : || cand_code == MINUS_EXPR
2145 : : || cand_code == NEGATE_EXPR)
2146 : : return;
2147 : :
2148 : 74441 : enum tree_code code = PLUS_EXPR;
2149 : 74441 : tree bump_tree;
2150 : 74441 : gimple *stmt_to_print = NULL;
2151 : :
2152 : 74441 : if (wi::neg_p (bump))
2153 : : {
2154 : 6995 : code = MINUS_EXPR;
2155 : 6995 : bump = -bump;
2156 : : }
2157 : :
2158 : : /* It is possible that the resulting bump doesn't fit in target_type.
2159 : : Abandon the replacement in this case. This does not affect
2160 : : siblings or dependents of C. */
2161 : 74441 : if (bump != wi::ext (bump, TYPE_PRECISION (target_type),
2162 : 74441 : TYPE_SIGN (target_type)))
2163 : : return;
2164 : :
2165 : 73436 : bump_tree = wide_int_to_tree (target_type, bump);
2166 : :
2167 : : /* If the basis name and the candidate's LHS have incompatible types,
2168 : : introduce a cast. */
2169 : 73436 : if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
2170 : 0 : basis_name = introduce_cast_before_cand (c, target_type, basis_name);
2171 : :
2172 : 73436 : if (dump_file && (dump_flags & TDF_DETAILS))
2173 : : {
2174 : 0 : fputs ("Replacing: ", dump_file);
2175 : 0 : print_gimple_stmt (dump_file, c->cand_stmt, 0);
2176 : : }
2177 : :
2178 : 73436 : if (bump == 0)
2179 : : {
2180 : 53137 : tree lhs = gimple_assign_lhs (c->cand_stmt);
2181 : 53137 : gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
2182 : 53137 : gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2183 : 53137 : slsr_cand_t cc = lookup_cand (c->first_interp);
2184 : 53137 : gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2185 : 53137 : gsi_replace (&gsi, copy_stmt, false);
2186 : 159411 : while (cc)
2187 : : {
2188 : 53137 : cc->cand_stmt = copy_stmt;
2189 : 53137 : cc = lookup_cand (cc->next_interp);
2190 : : }
2191 : 53137 : if (dump_file && (dump_flags & TDF_DETAILS))
2192 : : stmt_to_print = copy_stmt;
2193 : : }
2194 : : else
2195 : : {
2196 : 20299 : tree rhs1, rhs2;
2197 : 20299 : if (cand_code != NEGATE_EXPR) {
2198 : 20299 : rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2199 : 20299 : rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2200 : : /* Mark the 2 original rhs for maybe DCEing. */
2201 : 20299 : if (TREE_CODE (rhs1) == SSA_NAME)
2202 : 20299 : bitmap_set_bit (sdce_worklist, SSA_NAME_VERSION (rhs1));
2203 : 20299 : if (TREE_CODE (rhs2) == SSA_NAME)
2204 : 0 : bitmap_set_bit (sdce_worklist, SSA_NAME_VERSION (rhs2));
2205 : : }
2206 : 20299 : if (cand_code != NEGATE_EXPR
2207 : 20299 : && ((operand_equal_p (rhs1, basis_name, 0)
2208 : 0 : && operand_equal_p (rhs2, bump_tree, 0))
2209 : 20299 : || (operand_equal_p (rhs1, bump_tree, 0)
2210 : 0 : && operand_equal_p (rhs2, basis_name, 0))))
2211 : : {
2212 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
2213 : : {
2214 : 0 : fputs ("(duplicate, not actually replacing)", dump_file);
2215 : 0 : stmt_to_print = c->cand_stmt;
2216 : : }
2217 : : }
2218 : : else
2219 : : {
2220 : 20299 : gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2221 : 20299 : slsr_cand_t cc = lookup_cand (c->first_interp);
2222 : 20299 : gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree);
2223 : 20299 : update_stmt (gsi_stmt (gsi));
2224 : 60897 : while (cc)
2225 : : {
2226 : 20299 : cc->cand_stmt = gsi_stmt (gsi);
2227 : 20299 : cc = lookup_cand (cc->next_interp);
2228 : : }
2229 : 20299 : if (dump_file && (dump_flags & TDF_DETAILS))
2230 : 0 : stmt_to_print = gsi_stmt (gsi);
2231 : : }
2232 : : }
2233 : :
2234 : 73436 : if (dump_file && (dump_flags & TDF_DETAILS))
2235 : : {
2236 : 0 : fputs ("With: ", dump_file);
2237 : 0 : print_gimple_stmt (dump_file, stmt_to_print, 0);
2238 : 0 : fputs ("\n", dump_file);
2239 : : }
2240 : : }
2241 : :
2242 : : /* Replace candidate C with an add or subtract. Note that we only
2243 : : operate on CAND_MULTs with known strides, so we will never generate
2244 : : a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
2245 : : X = Y + ((i - i') * S), as described in the module commentary. The
2246 : : folded value ((i - i') * S) is referred to here as the "bump." */
2247 : :
2248 : : static void
2249 : 1065477 : replace_unconditional_candidate (slsr_cand_t c, auto_bitmap &sdce_worklist)
2250 : : {
2251 : 1065477 : slsr_cand_t basis;
2252 : :
2253 : 1065477 : if (cand_already_replaced (c))
2254 : 0 : return;
2255 : :
2256 : 1065477 : basis = lookup_cand (c->basis);
2257 : 1065477 : offset_int bump = cand_increment (c) * wi::to_offset (c->stride);
2258 : :
2259 : 1065477 : replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump,
2260 : : sdce_worklist);
2261 : : }
2262 : :
2263 : : /* Return the index in the increment vector of the given INCREMENT,
2264 : : or -1 if not found. The latter can occur if more than
2265 : : MAX_INCR_VEC_LEN increments have been found. */
2266 : :
2267 : : static inline int
2268 : 69879 : incr_vec_index (const offset_int &increment)
2269 : : {
2270 : 69879 : unsigned i;
2271 : :
2272 : 123180 : for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
2273 : : ;
2274 : :
2275 : 69879 : if (i < incr_vec_len)
2276 : : return i;
2277 : : else
2278 : 0 : return -1;
2279 : : }
2280 : :
2281 : : /* Create a new statement along edge E to add BASIS_NAME to the product
2282 : : of INCREMENT and the stride of candidate C. Create and return a new
2283 : : SSA name from *VAR to be used as the LHS of the new statement.
2284 : : KNOWN_STRIDE is true iff C's stride is a constant. */
2285 : :
2286 : : static tree
2287 : 29 : create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
2288 : : offset_int increment, edge e, location_t loc,
2289 : : bool known_stride)
2290 : : {
2291 : 29 : tree lhs, basis_type;
2292 : 29 : gassign *new_stmt, *cast_stmt = NULL;
2293 : :
2294 : : /* If the add candidate along this incoming edge has the same
2295 : : index as C's hidden basis, the hidden basis represents this
2296 : : edge correctly. */
2297 : 29 : if (increment == 0)
2298 : : return basis_name;
2299 : :
2300 : 25 : basis_type = TREE_TYPE (basis_name);
2301 : 25 : lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2302 : :
2303 : : /* Occasionally people convert integers to pointers without a
2304 : : cast, leading us into trouble if we aren't careful. */
2305 : 50 : enum tree_code plus_code
2306 : 25 : = POINTER_TYPE_P (basis_type) ? POINTER_PLUS_EXPR : PLUS_EXPR;
2307 : :
2308 : 25 : if (known_stride)
2309 : : {
2310 : 17 : tree bump_tree;
2311 : 17 : enum tree_code code = plus_code;
2312 : 17 : offset_int bump = increment * wi::to_offset (c->stride);
2313 : 17 : if (wi::neg_p (bump) && !POINTER_TYPE_P (basis_type))
2314 : : {
2315 : 1 : code = MINUS_EXPR;
2316 : 1 : bump = -bump;
2317 : : }
2318 : :
2319 : 17 : tree stride_type = POINTER_TYPE_P (basis_type) ? sizetype : basis_type;
2320 : 17 : bump_tree = wide_int_to_tree (stride_type, bump);
2321 : 17 : new_stmt = gimple_build_assign (lhs, code, basis_name, bump_tree);
2322 : : }
2323 : : else
2324 : : {
2325 : 8 : int i;
2326 : 8 : bool negate_incr = !POINTER_TYPE_P (basis_type) && wi::neg_p (increment);
2327 : 8 : i = incr_vec_index (negate_incr ? -increment : increment);
2328 : 8 : gcc_assert (i >= 0);
2329 : :
2330 : 8 : if (incr_vec[i].initializer)
2331 : : {
2332 : 8 : enum tree_code code = negate_incr ? MINUS_EXPR : plus_code;
2333 : 8 : new_stmt = gimple_build_assign (lhs, code, basis_name,
2334 : : incr_vec[i].initializer);
2335 : : }
2336 : : else {
2337 : 0 : tree stride;
2338 : :
2339 : 0 : if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
2340 : : {
2341 : 0 : tree cast_stride = make_temp_ssa_name (c->stride_type, NULL,
2342 : : "slsr");
2343 : 0 : cast_stmt = gimple_build_assign (cast_stride, NOP_EXPR,
2344 : : c->stride);
2345 : 0 : stride = cast_stride;
2346 : : }
2347 : : else
2348 : 0 : stride = c->stride;
2349 : :
2350 : 0 : if (increment == 1)
2351 : 0 : new_stmt = gimple_build_assign (lhs, plus_code, basis_name, stride);
2352 : 0 : else if (increment == -1)
2353 : 0 : new_stmt = gimple_build_assign (lhs, MINUS_EXPR, basis_name, stride);
2354 : : else
2355 : 0 : gcc_unreachable ();
2356 : : }
2357 : : }
2358 : :
2359 : 25 : if (cast_stmt)
2360 : : {
2361 : 0 : gimple_set_location (cast_stmt, loc);
2362 : 0 : gsi_insert_on_edge (e, cast_stmt);
2363 : : }
2364 : :
2365 : 25 : gimple_set_location (new_stmt, loc);
2366 : 25 : gsi_insert_on_edge (e, new_stmt);
2367 : :
2368 : 25 : if (dump_file && (dump_flags & TDF_DETAILS))
2369 : : {
2370 : 0 : if (cast_stmt)
2371 : : {
2372 : 0 : fprintf (dump_file, "Inserting cast on edge %d->%d: ",
2373 : 0 : e->src->index, e->dest->index);
2374 : 0 : print_gimple_stmt (dump_file, cast_stmt, 0);
2375 : : }
2376 : 0 : fprintf (dump_file, "Inserting on edge %d->%d: ", e->src->index,
2377 : 0 : e->dest->index);
2378 : 0 : print_gimple_stmt (dump_file, new_stmt, 0);
2379 : : }
2380 : :
2381 : : return lhs;
2382 : : }
2383 : :
2384 : : /* Clear the visited field for a tree of PHI candidates. */
2385 : :
2386 : : static void
2387 : 91 : clear_visited (gphi *phi)
2388 : : {
2389 : 91 : unsigned i;
2390 : 91 : slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2391 : :
2392 : 91 : if (phi_cand->visited)
2393 : : {
2394 : 91 : phi_cand->visited = 0;
2395 : :
2396 : 259 : for (i = 0; i < gimple_phi_num_args (phi); i++)
2397 : : {
2398 : 168 : tree arg = gimple_phi_arg_def (phi, i);
2399 : 168 : gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2400 : 168 : if (gimple_code (arg_def) == GIMPLE_PHI)
2401 : 2 : clear_visited (as_a <gphi *> (arg_def));
2402 : : }
2403 : : }
2404 : 91 : }
2405 : :
2406 : : /* Recursive helper function for create_phi_basis. */
2407 : :
2408 : : static tree
2409 : 24 : create_phi_basis_1 (slsr_cand_t c, gimple *from_phi, tree basis_name,
2410 : : location_t loc, bool known_stride)
2411 : : {
2412 : 24 : int i;
2413 : 24 : tree name, phi_arg;
2414 : 24 : gphi *phi;
2415 : 24 : slsr_cand_t basis = lookup_cand (c->basis);
2416 : 24 : int nargs = gimple_phi_num_args (from_phi);
2417 : 24 : basic_block phi_bb = gimple_bb (from_phi);
2418 : 24 : slsr_cand_t phi_cand = *stmt_cand_map->get (from_phi);
2419 : 24 : auto_vec<tree> phi_args (nargs);
2420 : :
2421 : 24 : if (phi_cand->visited)
2422 : 0 : return phi_cand->cached_basis;
2423 : 24 : phi_cand->visited = 1;
2424 : :
2425 : : /* Process each argument of the existing phi that represents
2426 : : conditionally-executed add candidates. */
2427 : 61 : for (i = 0; i < nargs; i++)
2428 : : {
2429 : 37 : edge e = (*phi_bb->preds)[i];
2430 : 37 : tree arg = gimple_phi_arg_def (from_phi, i);
2431 : 37 : tree feeding_def;
2432 : :
2433 : : /* If the phi argument is the base name of the CAND_PHI, then
2434 : : this incoming arc should use the hidden basis. */
2435 : 37 : if (operand_equal_p (arg, phi_cand->base_expr, 0))
2436 : 7 : if (basis->index == 0)
2437 : 7 : feeding_def = gimple_assign_lhs (basis->cand_stmt);
2438 : : else
2439 : : {
2440 : 0 : offset_int incr = -basis->index;
2441 : 0 : feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2442 : : e, loc, known_stride);
2443 : : }
2444 : : else
2445 : : {
2446 : 30 : gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2447 : :
2448 : : /* If there is another phi along this incoming edge, we must
2449 : : process it in the same fashion to ensure that all basis
2450 : : adjustments are made along its incoming edges. */
2451 : 30 : if (gimple_code (arg_def) == GIMPLE_PHI)
2452 : 1 : feeding_def = create_phi_basis_1 (c, arg_def, basis_name,
2453 : : loc, known_stride);
2454 : : else
2455 : : {
2456 : 29 : slsr_cand_t arg_cand = base_cand_from_table (arg);
2457 : 29 : offset_int diff = arg_cand->index - basis->index;
2458 : 29 : feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2459 : : e, loc, known_stride);
2460 : : }
2461 : : }
2462 : :
2463 : : /* Because of recursion, we need to save the arguments in a vector
2464 : : so we can create the PHI statement all at once. Otherwise the
2465 : : storage for the half-created PHI can be reclaimed. */
2466 : 37 : phi_args.safe_push (feeding_def);
2467 : : }
2468 : :
2469 : : /* Create the new phi basis. */
2470 : 24 : name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2471 : 24 : phi = create_phi_node (name, phi_bb);
2472 : 24 : SSA_NAME_DEF_STMT (name) = phi;
2473 : :
2474 : 61 : FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2475 : : {
2476 : 37 : edge e = (*phi_bb->preds)[i];
2477 : 37 : add_phi_arg (phi, phi_arg, e, loc);
2478 : : }
2479 : :
2480 : 24 : update_stmt (phi);
2481 : :
2482 : 24 : if (dump_file && (dump_flags & TDF_DETAILS))
2483 : : {
2484 : 0 : fputs ("Introducing new phi basis: ", dump_file);
2485 : 0 : print_gimple_stmt (dump_file, phi, 0);
2486 : : }
2487 : :
2488 : 24 : phi_cand->cached_basis = name;
2489 : 24 : return name;
2490 : 24 : }
2491 : :
2492 : : /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2493 : : is hidden by the phi node FROM_PHI, create a new phi node in the same
2494 : : block as FROM_PHI. The new phi is suitable for use as a basis by C,
2495 : : with its phi arguments representing conditional adjustments to the
2496 : : hidden basis along conditional incoming paths. Those adjustments are
2497 : : made by creating add statements (and sometimes recursively creating
2498 : : phis) along those incoming paths. LOC is the location to attach to
2499 : : the introduced statements. KNOWN_STRIDE is true iff C's stride is a
2500 : : constant. */
2501 : :
2502 : : static tree
2503 : 23 : create_phi_basis (slsr_cand_t c, gimple *from_phi, tree basis_name,
2504 : : location_t loc, bool known_stride)
2505 : : {
2506 : 23 : tree retval = create_phi_basis_1 (c, from_phi, basis_name, loc,
2507 : : known_stride);
2508 : 23 : gcc_assert (retval);
2509 : 23 : clear_visited (as_a <gphi *> (from_phi));
2510 : 23 : return retval;
2511 : : }
2512 : :
2513 : : /* Given a candidate C whose basis is hidden by at least one intervening
2514 : : phi, introduce a matching number of new phis to represent its basis
2515 : : adjusted by conditional increments along possible incoming paths. Then
2516 : : replace C as though it were an unconditional candidate, using the new
2517 : : basis. */
2518 : :
2519 : : static void
2520 : 15 : replace_conditional_candidate (slsr_cand_t c, auto_bitmap &sdce_worklist)
2521 : :
2522 : : {
2523 : 15 : tree basis_name, name;
2524 : 15 : slsr_cand_t basis;
2525 : 15 : location_t loc;
2526 : :
2527 : : /* Look up the LHS SSA name from C's basis. This will be the
2528 : : RHS1 of the adds we will introduce to create new phi arguments. */
2529 : 15 : basis = lookup_cand (c->basis);
2530 : 15 : basis_name = gimple_assign_lhs (basis->cand_stmt);
2531 : :
2532 : : /* Create a new phi statement which will represent C's true basis
2533 : : after the transformation is complete. */
2534 : 15 : loc = gimple_location (c->cand_stmt);
2535 : 15 : name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2536 : : basis_name, loc, KNOWN_STRIDE);
2537 : :
2538 : : /* Replace C with an add of the new basis phi and a constant. */
2539 : 15 : offset_int bump = c->index * wi::to_offset (c->stride);
2540 : :
2541 : 15 : replace_mult_candidate (c, name, bump, sdce_worklist);
2542 : 15 : }
2543 : :
2544 : : /* Recursive helper function for phi_add_costs. SPREAD is a measure of
2545 : : how many PHI nodes we have visited at this point in the tree walk. */
2546 : :
2547 : : static int
2548 : 28 : phi_add_costs_1 (gimple *phi, slsr_cand_t c, int one_add_cost, int *spread)
2549 : : {
2550 : 28 : unsigned i;
2551 : 28 : int cost = 0;
2552 : 28 : slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2553 : :
2554 : 28 : if (phi_cand->visited)
2555 : : return 0;
2556 : :
2557 : 28 : phi_cand->visited = 1;
2558 : 28 : (*spread)++;
2559 : :
2560 : : /* If we work our way back to a phi that isn't dominated by the hidden
2561 : : basis, this isn't a candidate for replacement. Indicate this by
2562 : : returning an unreasonably high cost. It's not easy to detect
2563 : : these situations when determining the basis, so we defer the
2564 : : decision until now. */
2565 : 28 : basic_block phi_bb = gimple_bb (phi);
2566 : 28 : slsr_cand_t basis = lookup_cand (c->basis);
2567 : 28 : basic_block basis_bb = gimple_bb (basis->cand_stmt);
2568 : :
2569 : 28 : if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2570 : 0 : return COST_INFINITE;
2571 : :
2572 : 81 : for (i = 0; i < gimple_phi_num_args (phi); i++)
2573 : : {
2574 : 53 : tree arg = gimple_phi_arg_def (phi, i);
2575 : :
2576 : 53 : if (arg != phi_cand->base_expr)
2577 : : {
2578 : 52 : gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2579 : :
2580 : 52 : if (gimple_code (arg_def) == GIMPLE_PHI)
2581 : : {
2582 : 1 : cost += phi_add_costs_1 (arg_def, c, one_add_cost, spread);
2583 : :
2584 : 1 : if (cost >= COST_INFINITE || *spread > MAX_SPREAD)
2585 : : return COST_INFINITE;
2586 : : }
2587 : : else
2588 : : {
2589 : 51 : slsr_cand_t arg_cand = base_cand_from_table (arg);
2590 : :
2591 : 51 : if (arg_cand->index != c->index)
2592 : 48 : cost += one_add_cost;
2593 : : }
2594 : : }
2595 : : }
2596 : :
2597 : : return cost;
2598 : : }
2599 : :
2600 : : /* Compute the expected costs of inserting basis adjustments for
2601 : : candidate C with phi-definition PHI. The cost of inserting
2602 : : one adjustment is given by ONE_ADD_COST. If PHI has arguments
2603 : : which are themselves phi results, recursively calculate costs
2604 : : for those phis as well. */
2605 : :
2606 : : static int
2607 : 27 : phi_add_costs (gimple *phi, slsr_cand_t c, int one_add_cost)
2608 : : {
2609 : 27 : int spread = 0;
2610 : 27 : int retval = phi_add_costs_1 (phi, c, one_add_cost, &spread);
2611 : 27 : clear_visited (as_a <gphi *> (phi));
2612 : 27 : return retval;
2613 : : }
2614 : : /* For candidate C, each sibling of candidate C, and each dependent of
2615 : : candidate C, determine whether the candidate is dependent upon a
2616 : : phi that hides its basis. If not, replace the candidate unconditionally.
2617 : : Otherwise, determine whether the cost of introducing compensation code
2618 : : for the candidate is offset by the gains from strength reduction. If
2619 : : so, replace the candidate and introduce the compensation code. */
2620 : :
2621 : : static void
2622 : 560310 : replace_uncond_cands_and_profitable_phis (slsr_cand_t c,
2623 : : auto_bitmap &sdce_worklist)
2624 : : {
2625 : 1065535 : if (phi_dependent_cand_p (c))
2626 : : {
2627 : : /* A multiply candidate with a stride of 1 is just an artifice
2628 : : of a copy or cast; there is no value in replacing it. */
2629 : 58 : if (c->kind == CAND_MULT && wi::to_offset (c->stride) != 1)
2630 : : {
2631 : : /* A candidate dependent upon a phi will replace a multiply by
2632 : : a constant with an add, and will insert at most one add for
2633 : : each phi argument. Add these costs with the potential dead-code
2634 : : savings to determine profitability. */
2635 : 27 : bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2636 : 27 : int mult_savings = stmt_cost (c->cand_stmt, speed);
2637 : 27 : gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2638 : 27 : tree phi_result = gimple_phi_result (phi);
2639 : 108 : int one_add_cost = add_cost (speed,
2640 : 27 : TYPE_MODE (TREE_TYPE (phi_result)));
2641 : 27 : int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2642 : 27 : int cost = add_costs - mult_savings - c->dead_savings;
2643 : :
2644 : 27 : if (dump_file && (dump_flags & TDF_DETAILS))
2645 : : {
2646 : 0 : fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
2647 : 0 : fprintf (dump_file, " add_costs = %d\n", add_costs);
2648 : 0 : fprintf (dump_file, " mult_savings = %d\n", mult_savings);
2649 : 0 : fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
2650 : 0 : fprintf (dump_file, " cost = %d\n", cost);
2651 : 0 : if (cost <= COST_NEUTRAL)
2652 : 0 : fputs (" Replacing...\n", dump_file);
2653 : : else
2654 : 0 : fputs (" Not replaced.\n", dump_file);
2655 : : }
2656 : :
2657 : 27 : if (cost <= COST_NEUTRAL)
2658 : 15 : replace_conditional_candidate (c, sdce_worklist);
2659 : : }
2660 : : }
2661 : : else
2662 : 1065477 : replace_unconditional_candidate (c, sdce_worklist);
2663 : :
2664 : 1065535 : if (c->sibling)
2665 : 42756 : replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling),
2666 : : sdce_worklist);
2667 : :
2668 : 1065535 : if (c->dependent)
2669 : 505225 : replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent),
2670 : : sdce_worklist);
2671 : 560310 : }
2672 : :
2673 : : /* Count the number of candidates in the tree rooted at C that have
2674 : : not already been replaced under other interpretations. */
2675 : :
2676 : : static int
2677 : 39088 : count_candidates (slsr_cand_t c)
2678 : : {
2679 : 105653 : unsigned count = cand_already_replaced (c) ? 0 : 1;
2680 : :
2681 : 105653 : if (c->sibling)
2682 : 3285 : count += count_candidates (lookup_cand (c->sibling));
2683 : :
2684 : 105653 : if (c->dependent)
2685 : 66565 : count += count_candidates (lookup_cand (c->dependent));
2686 : :
2687 : 39088 : return count;
2688 : : }
2689 : :
2690 : : /* Increase the count of INCREMENT by one in the increment vector.
2691 : : INCREMENT is associated with candidate C. If INCREMENT is to be
2692 : : conditionally executed as part of a conditional candidate replacement,
2693 : : IS_PHI_ADJUST is true, otherwise false. If an initializer
2694 : : T_0 = stride * I is provided by a candidate that dominates all
2695 : : candidates with the same increment, also record T_0 for subsequent use. */
2696 : :
2697 : : static void
2698 : 105677 : record_increment (slsr_cand_t c, offset_int increment, bool is_phi_adjust)
2699 : : {
2700 : 105677 : bool found = false;
2701 : 105677 : unsigned i;
2702 : :
2703 : : /* Treat increments that differ only in sign as identical so as to
2704 : : share initializers, unless we are generating pointer arithmetic. */
2705 : 105677 : if (!address_arithmetic_p && wi::neg_p (increment))
2706 : 5590 : increment = -increment;
2707 : :
2708 : 158971 : for (i = 0; i < incr_vec_len; i++)
2709 : : {
2710 : 95775 : if (incr_vec[i].incr == increment)
2711 : : {
2712 : 42481 : incr_vec[i].count++;
2713 : 42481 : found = true;
2714 : :
2715 : : /* If we previously recorded an initializer that doesn't
2716 : : dominate this candidate, it's not going to be useful to
2717 : : us after all. */
2718 : 42481 : if (incr_vec[i].initializer
2719 : 42481 : && !dominated_by_p (CDI_DOMINATORS,
2720 : 979 : gimple_bb (c->cand_stmt),
2721 : 979 : incr_vec[i].init_bb))
2722 : : {
2723 : 0 : incr_vec[i].initializer = NULL_TREE;
2724 : 0 : incr_vec[i].init_bb = NULL;
2725 : : }
2726 : :
2727 : : break;
2728 : : }
2729 : : }
2730 : :
2731 : 63196 : if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2732 : : {
2733 : : /* The first time we see an increment, create the entry for it.
2734 : : If this is the root candidate which doesn't have a basis, set
2735 : : the count to zero. We're only processing it so it can possibly
2736 : : provide an initializer for other candidates. */
2737 : 63196 : incr_vec[incr_vec_len].incr = increment;
2738 : 63196 : incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2739 : 63196 : incr_vec[incr_vec_len].cost = COST_INFINITE;
2740 : :
2741 : : /* Optimistically record the first occurrence of this increment
2742 : : as providing an initializer (if it does); we will revise this
2743 : : opinion later if it doesn't dominate all other occurrences.
2744 : : Exception: increments of 0, 1 never need initializers;
2745 : : and phi adjustments don't ever provide initializers. */
2746 : 63196 : if (c->kind == CAND_ADD
2747 : 52035 : && !is_phi_adjust
2748 : 52023 : && c->index == increment
2749 : 22032 : && (increment > 1 || increment < 0)
2750 : 68286 : && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2751 : 3317 : || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2752 : : {
2753 : 4126 : tree t0 = NULL_TREE;
2754 : 4126 : tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2755 : 4126 : tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2756 : 4126 : if (operand_equal_p (rhs1, c->base_expr, 0))
2757 : : t0 = rhs2;
2758 : 1285 : else if (operand_equal_p (rhs2, c->base_expr, 0))
2759 : : t0 = rhs1;
2760 : 4120 : if (t0
2761 : 4120 : && SSA_NAME_DEF_STMT (t0)
2762 : 8240 : && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2763 : : {
2764 : 4120 : incr_vec[incr_vec_len].initializer = t0;
2765 : 4120 : incr_vec[incr_vec_len++].init_bb
2766 : 4120 : = gimple_bb (SSA_NAME_DEF_STMT (t0));
2767 : : }
2768 : : else
2769 : : {
2770 : 6 : incr_vec[incr_vec_len].initializer = NULL_TREE;
2771 : 6 : incr_vec[incr_vec_len++].init_bb = NULL;
2772 : : }
2773 : : }
2774 : : else
2775 : : {
2776 : 59070 : incr_vec[incr_vec_len].initializer = NULL_TREE;
2777 : 59070 : incr_vec[incr_vec_len++].init_bb = NULL;
2778 : : }
2779 : : }
2780 : 105677 : }
2781 : :
2782 : : /* Recursive helper function for record_phi_increments. */
2783 : :
2784 : : static void
2785 : 12 : record_phi_increments_1 (slsr_cand_t basis, gimple *phi)
2786 : : {
2787 : 12 : unsigned i;
2788 : 12 : slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2789 : :
2790 : 12 : if (phi_cand->visited)
2791 : : return;
2792 : 12 : phi_cand->visited = 1;
2793 : :
2794 : 36 : for (i = 0; i < gimple_phi_num_args (phi); i++)
2795 : : {
2796 : 24 : tree arg = gimple_phi_arg_def (phi, i);
2797 : 24 : gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2798 : :
2799 : 24 : if (gimple_code (arg_def) == GIMPLE_PHI)
2800 : 0 : record_phi_increments_1 (basis, arg_def);
2801 : : else
2802 : : {
2803 : 24 : offset_int diff;
2804 : :
2805 : 24 : if (operand_equal_p (arg, phi_cand->base_expr, 0))
2806 : : {
2807 : 7 : diff = -basis->index;
2808 : 7 : record_increment (phi_cand, diff, PHI_ADJUST);
2809 : : }
2810 : : else
2811 : : {
2812 : 17 : slsr_cand_t arg_cand = base_cand_from_table (arg);
2813 : 17 : diff = arg_cand->index - basis->index;
2814 : 17 : record_increment (arg_cand, diff, PHI_ADJUST);
2815 : : }
2816 : : }
2817 : : }
2818 : : }
2819 : :
2820 : : /* Given phi statement PHI that hides a candidate from its BASIS, find
2821 : : the increments along each incoming arc (recursively handling additional
2822 : : phis that may be present) and record them. These increments are the
2823 : : difference in index between the index-adjusting statements and the
2824 : : index of the basis. */
2825 : :
2826 : : static void
2827 : 12 : record_phi_increments (slsr_cand_t basis, gimple *phi)
2828 : : {
2829 : 12 : record_phi_increments_1 (basis, phi);
2830 : 12 : clear_visited (as_a <gphi *> (phi));
2831 : 12 : }
2832 : :
2833 : : /* Determine how many times each unique increment occurs in the set
2834 : : of candidates rooted at C's parent, recording the data in the
2835 : : increment vector. For each unique increment I, if an initializer
2836 : : T_0 = stride * I is provided by a candidate that dominates all
2837 : : candidates with the same increment, also record T_0 for subsequent
2838 : : use. */
2839 : :
2840 : : static void
2841 : 39088 : record_increments (slsr_cand_t c)
2842 : : {
2843 : 105653 : if (!cand_already_replaced (c))
2844 : : {
2845 : 105653 : if (!phi_dependent_cand_p (c))
2846 : 105641 : record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2847 : : else
2848 : : {
2849 : : /* A candidate with a basis hidden by a phi will have one
2850 : : increment for its relationship to the index represented by
2851 : : the phi, and potentially additional increments along each
2852 : : incoming edge. For the root of the dependency tree (which
2853 : : has no basis), process just the initial index in case it has
2854 : : an initializer that can be used by subsequent candidates. */
2855 : 12 : record_increment (c, c->index, NOT_PHI_ADJUST);
2856 : :
2857 : 12 : if (c->basis)
2858 : 12 : record_phi_increments (lookup_cand (c->basis),
2859 : 12 : lookup_cand (c->def_phi)->cand_stmt);
2860 : : }
2861 : : }
2862 : :
2863 : 105653 : if (c->sibling)
2864 : 3285 : record_increments (lookup_cand (c->sibling));
2865 : :
2866 : 105653 : if (c->dependent)
2867 : 66565 : record_increments (lookup_cand (c->dependent));
2868 : 39088 : }
2869 : :
2870 : : /* Recursive helper function for phi_incr_cost. */
2871 : :
2872 : : static int
2873 : 15 : phi_incr_cost_1 (slsr_cand_t c, const offset_int &incr, gimple *phi,
2874 : : int *savings)
2875 : : {
2876 : 15 : unsigned i;
2877 : 15 : int cost = 0;
2878 : 15 : slsr_cand_t basis = lookup_cand (c->basis);
2879 : 15 : slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2880 : :
2881 : 15 : if (phi_cand->visited)
2882 : : return 0;
2883 : 15 : phi_cand->visited = 1;
2884 : :
2885 : 45 : for (i = 0; i < gimple_phi_num_args (phi); i++)
2886 : : {
2887 : 30 : tree arg = gimple_phi_arg_def (phi, i);
2888 : 30 : gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2889 : :
2890 : 30 : if (gimple_code (arg_def) == GIMPLE_PHI)
2891 : : {
2892 : 0 : int feeding_savings = 0;
2893 : 0 : tree feeding_var = gimple_phi_result (arg_def);
2894 : 0 : cost += phi_incr_cost_1 (c, incr, arg_def, &feeding_savings);
2895 : 0 : if (uses_consumed_by_stmt (feeding_var, phi))
2896 : 0 : *savings += feeding_savings;
2897 : : }
2898 : : else
2899 : : {
2900 : 30 : offset_int diff;
2901 : 30 : slsr_cand_t arg_cand;
2902 : :
2903 : : /* When the PHI argument is just a pass-through to the base
2904 : : expression of the hidden basis, the difference is zero minus
2905 : : the index of the basis. There is no potential savings by
2906 : : eliminating a statement in this case. */
2907 : 30 : if (operand_equal_p (arg, phi_cand->base_expr, 0))
2908 : : {
2909 : 7 : arg_cand = (slsr_cand_t)NULL;
2910 : 7 : diff = -basis->index;
2911 : : }
2912 : : else
2913 : : {
2914 : 23 : arg_cand = base_cand_from_table (arg);
2915 : 23 : diff = arg_cand->index - basis->index;
2916 : : }
2917 : :
2918 : 30 : if (incr == diff)
2919 : : {
2920 : 14 : tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2921 : 14 : cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2922 : 14 : if (arg_cand)
2923 : : {
2924 : 14 : tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2925 : 14 : if (uses_consumed_by_stmt (lhs, phi))
2926 : 14 : *savings += stmt_cost (arg_cand->cand_stmt, true);
2927 : : }
2928 : : }
2929 : : }
2930 : : }
2931 : :
2932 : : return cost;
2933 : : }
2934 : :
2935 : : /* Add up and return the costs of introducing add statements that
2936 : : require the increment INCR on behalf of candidate C and phi
2937 : : statement PHI. Accumulate into *SAVINGS the potential savings
2938 : : from removing existing statements that feed PHI and have no other
2939 : : uses. */
2940 : :
2941 : : static int
2942 : 15 : phi_incr_cost (slsr_cand_t c, const offset_int &incr, gimple *phi,
2943 : : int *savings)
2944 : : {
2945 : 15 : int retval = phi_incr_cost_1 (c, incr, phi, savings);
2946 : 15 : clear_visited (as_a <gphi *> (phi));
2947 : 15 : return retval;
2948 : : }
2949 : :
2950 : : /* Return the first candidate in the tree rooted at C that has not
2951 : : already been replaced, favoring siblings over dependents. */
2952 : :
2953 : : static slsr_cand_t
2954 : 35803 : unreplaced_cand_in_tree (slsr_cand_t c)
2955 : : {
2956 : 35803 : if (!cand_already_replaced (c))
2957 : : return c;
2958 : :
2959 : 0 : if (c->sibling)
2960 : : {
2961 : 0 : slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2962 : 0 : if (sib)
2963 : : return sib;
2964 : : }
2965 : :
2966 : 0 : if (c->dependent)
2967 : : {
2968 : 0 : slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2969 : 0 : if (dep)
2970 : : return dep;
2971 : : }
2972 : :
2973 : : return NULL;
2974 : : }
2975 : :
2976 : : /* Return TRUE if the candidates in the tree rooted at C should be
2977 : : optimized for speed, else FALSE. We estimate this based on the block
2978 : : containing the most dominant candidate in the tree that has not yet
2979 : : been replaced. */
2980 : :
2981 : : static bool
2982 : 35803 : optimize_cands_for_speed_p (slsr_cand_t c)
2983 : : {
2984 : 35803 : slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2985 : 35803 : gcc_assert (c2);
2986 : 35803 : return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2987 : : }
2988 : :
2989 : : /* Add COST_IN to the lowest cost of any dependent path starting at
2990 : : candidate C or any of its siblings, counting only candidates along
2991 : : such paths with increment INCR. Assume that replacing a candidate
2992 : : reduces cost by REPL_SAVINGS. Also account for savings from any
2993 : : statements that would go dead. If COUNT_PHIS is true, include
2994 : : costs of introducing feeding statements for conditional candidates. */
2995 : :
2996 : : static int
2997 : 4190 : lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2998 : : const offset_int &incr, bool count_phis)
2999 : : {
3000 : 4190 : int local_cost, sib_cost, savings = 0;
3001 : 4190 : offset_int cand_incr = cand_abs_increment (c);
3002 : :
3003 : 4190 : if (cand_already_replaced (c))
3004 : : local_cost = cost_in;
3005 : 4190 : else if (incr == cand_incr)
3006 : 2892 : local_cost = cost_in - repl_savings - c->dead_savings;
3007 : : else
3008 : 1298 : local_cost = cost_in - c->dead_savings;
3009 : :
3010 : 4190 : if (count_phis
3011 : 283 : && phi_dependent_cand_p (c)
3012 : 4204 : && !cand_already_replaced (c))
3013 : : {
3014 : 14 : gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
3015 : 14 : local_cost += phi_incr_cost (c, incr, phi, &savings);
3016 : :
3017 : 14 : if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
3018 : 8 : local_cost -= savings;
3019 : : }
3020 : :
3021 : 4190 : if (c->dependent)
3022 : 1802 : local_cost = lowest_cost_path (local_cost, repl_savings,
3023 : : lookup_cand (c->dependent), incr,
3024 : : count_phis);
3025 : :
3026 : 4190 : if (c->sibling)
3027 : : {
3028 : 93 : sib_cost = lowest_cost_path (cost_in, repl_savings,
3029 : : lookup_cand (c->sibling), incr,
3030 : : count_phis);
3031 : 93 : local_cost = MIN (local_cost, sib_cost);
3032 : : }
3033 : :
3034 : 4190 : return local_cost;
3035 : : }
3036 : :
3037 : : /* Compute the total savings that would accrue from all replacements
3038 : : in the candidate tree rooted at C, counting only candidates with
3039 : : increment INCR. Assume that replacing a candidate reduces cost
3040 : : by REPL_SAVINGS. Also account for savings from statements that
3041 : : would go dead. */
3042 : :
3043 : : static int
3044 : 177 : total_savings (int repl_savings, slsr_cand_t c, const offset_int &incr,
3045 : : bool count_phis)
3046 : : {
3047 : 177 : int savings = 0;
3048 : 177 : offset_int cand_incr = cand_abs_increment (c);
3049 : :
3050 : 177 : if (incr == cand_incr && !cand_already_replaced (c))
3051 : 132 : savings += repl_savings + c->dead_savings;
3052 : :
3053 : 177 : if (count_phis
3054 : 102 : && phi_dependent_cand_p (c)
3055 : 178 : && !cand_already_replaced (c))
3056 : : {
3057 : 1 : int phi_savings = 0;
3058 : 1 : gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
3059 : 1 : savings -= phi_incr_cost (c, incr, phi, &phi_savings);
3060 : :
3061 : 1 : if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
3062 : 1 : savings += phi_savings;
3063 : : }
3064 : :
3065 : 177 : if (c->dependent)
3066 : 106 : savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
3067 : : count_phis);
3068 : :
3069 : 177 : if (c->sibling)
3070 : 0 : savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
3071 : : count_phis);
3072 : :
3073 : 177 : return savings;
3074 : : }
3075 : :
3076 : : /* Use target-specific costs to determine and record which increments
3077 : : in the current candidate tree are profitable to replace, assuming
3078 : : MODE and SPEED. FIRST_DEP is the first dependent of the root of
3079 : : the candidate tree.
3080 : :
3081 : : One slight limitation here is that we don't account for the possible
3082 : : introduction of casts in some cases. See replace_one_candidate for
3083 : : the cases where these are introduced. This should probably be cleaned
3084 : : up sometime. */
3085 : :
3086 : : static void
3087 : 35803 : analyze_increments (slsr_cand_t first_dep, machine_mode mode, bool speed)
3088 : : {
3089 : 35803 : unsigned i;
3090 : :
3091 : 98999 : for (i = 0; i < incr_vec_len; i++)
3092 : : {
3093 : 63196 : HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
3094 : :
3095 : : /* If somehow this increment is bigger than a HWI, we won't
3096 : : be optimizing candidates that use it. And if the increment
3097 : : has a count of zero, nothing will be done with it. */
3098 : 63196 : if (!wi::fits_shwi_p (incr_vec[i].incr) || !incr_vec[i].count)
3099 : 26910 : incr_vec[i].cost = COST_INFINITE;
3100 : :
3101 : : /* Increments of 0, 1, and -1 are always profitable to replace,
3102 : : because they always replace a multiply or add with an add or
3103 : : copy, and may cause one or more existing instructions to go
3104 : : dead. Exception: -1 can't be assumed to be profitable for
3105 : : pointer addition. */
3106 : 36286 : else if (incr == 0
3107 : 36286 : || incr == 1
3108 : 2400 : || (incr == -1
3109 : 8 : && !POINTER_TYPE_P (first_dep->cand_type)))
3110 : 33886 : incr_vec[i].cost = COST_NEUTRAL;
3111 : :
3112 : : /* If we need to add an initializer, give up if a cast from the
3113 : : candidate's type to its stride's type can lose precision.
3114 : : Note that this already takes into account that the stride may
3115 : : have been cast to a wider type, in which case this test won't
3116 : : fire. Example:
3117 : :
3118 : : short int _1;
3119 : : _2 = (int) _1;
3120 : : _3 = _2 * 10;
3121 : : _4 = x + _3; ADD: x + (10 * (int)_1) : int
3122 : : _5 = _2 * 15;
3123 : : _6 = x + _5; ADD: x + (15 * (int)_1) : int
3124 : :
3125 : : Although the stride was a short int initially, the stride
3126 : : used in the analysis has been widened to an int, and such
3127 : : widening will be done in the initializer as well. */
3128 : 2400 : else if (!incr_vec[i].initializer
3129 : 1909 : && TREE_CODE (first_dep->stride) != INTEGER_CST
3130 : 4309 : && !legal_cast_p_1 (first_dep->stride_type,
3131 : 1909 : TREE_TYPE (gimple_assign_lhs
3132 : : (first_dep->cand_stmt))))
3133 : 34 : incr_vec[i].cost = COST_INFINITE;
3134 : :
3135 : : /* If we need to add an initializer, make sure we don't introduce
3136 : : a multiply by a pointer type, which can happen in certain cast
3137 : : scenarios. */
3138 : 2366 : else if (!incr_vec[i].initializer
3139 : 1875 : && TREE_CODE (first_dep->stride) != INTEGER_CST
3140 : 1875 : && POINTER_TYPE_P (first_dep->stride_type))
3141 : 0 : incr_vec[i].cost = COST_INFINITE;
3142 : :
3143 : : /* For any other increment, if this is a multiply candidate, we
3144 : : must introduce a temporary T and initialize it with
3145 : : T_0 = stride * increment. When optimizing for speed, walk the
3146 : : candidate tree to calculate the best cost reduction along any
3147 : : path; if it offsets the fixed cost of inserting the initializer,
3148 : : replacing the increment is profitable. When optimizing for
3149 : : size, instead calculate the total cost reduction from replacing
3150 : : all candidates with this increment. */
3151 : 2366 : else if (first_dep->kind == CAND_MULT)
3152 : : {
3153 : 113 : int cost = mult_by_coeff_cost (incr, mode, speed);
3154 : 113 : int repl_savings;
3155 : :
3156 : 113 : if (tree_fits_shwi_p (first_dep->stride))
3157 : : {
3158 : 0 : HOST_WIDE_INT hwi_stride = tree_to_shwi (first_dep->stride);
3159 : 0 : repl_savings = mult_by_coeff_cost (hwi_stride, mode, speed);
3160 : : }
3161 : : else
3162 : 113 : repl_savings = mul_cost (speed, mode);
3163 : 113 : repl_savings -= add_cost (speed, mode);
3164 : :
3165 : 113 : if (speed)
3166 : 107 : cost = lowest_cost_path (cost, repl_savings, first_dep,
3167 : 107 : incr_vec[i].incr, COUNT_PHIS);
3168 : : else
3169 : 6 : cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
3170 : : COUNT_PHIS);
3171 : :
3172 : 113 : incr_vec[i].cost = cost;
3173 : : }
3174 : :
3175 : : /* If this is an add candidate, the initializer may already
3176 : : exist, so only calculate the cost of the initializer if it
3177 : : doesn't. We are replacing one add with another here, so the
3178 : : known replacement savings is zero. We will account for removal
3179 : : of dead instructions in lowest_cost_path or total_savings. */
3180 : : else
3181 : : {
3182 : 2253 : int cost = 0;
3183 : 2253 : if (!incr_vec[i].initializer)
3184 : 1762 : cost = mult_by_coeff_cost (incr, mode, speed);
3185 : :
3186 : 2253 : if (speed)
3187 : 2188 : cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
3188 : : DONT_COUNT_PHIS);
3189 : : else
3190 : 65 : cost -= total_savings (0, first_dep, incr_vec[i].incr,
3191 : : DONT_COUNT_PHIS);
3192 : :
3193 : 2253 : incr_vec[i].cost = cost;
3194 : : }
3195 : : }
3196 : 35803 : }
3197 : :
3198 : : /* Return the nearest common dominator of BB1 and BB2. If the blocks
3199 : : are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
3200 : : if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
3201 : : return C2 in *WHERE; and if the NCD matches neither, return NULL in
3202 : : *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
3203 : :
3204 : : static basic_block
3205 : 413 : ncd_for_two_cands (basic_block bb1, basic_block bb2,
3206 : : slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
3207 : : {
3208 : 413 : basic_block ncd;
3209 : :
3210 : 413 : if (!bb1)
3211 : : {
3212 : 247 : *where = c2;
3213 : 247 : return bb2;
3214 : : }
3215 : :
3216 : 166 : if (!bb2)
3217 : : {
3218 : 0 : *where = c1;
3219 : 0 : return bb1;
3220 : : }
3221 : :
3222 : 166 : ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
3223 : :
3224 : : /* If both candidates are in the same block, the earlier
3225 : : candidate wins. */
3226 : 166 : if (bb1 == ncd && bb2 == ncd)
3227 : : {
3228 : 144 : if (!c1 || (c2 && c2->cand_num < c1->cand_num))
3229 : 144 : *where = c2;
3230 : : else
3231 : 0 : *where = c1;
3232 : : }
3233 : :
3234 : : /* Otherwise, if one of them produced a candidate in the
3235 : : dominator, that one wins. */
3236 : 22 : else if (bb1 == ncd)
3237 : 6 : *where = c1;
3238 : :
3239 : 16 : else if (bb2 == ncd)
3240 : 5 : *where = c2;
3241 : :
3242 : : /* If neither matches the dominator, neither wins. */
3243 : : else
3244 : 11 : *where = NULL;
3245 : :
3246 : : return ncd;
3247 : : }
3248 : :
3249 : : /* Consider all candidates that feed PHI. Find the nearest common
3250 : : dominator of those candidates requiring the given increment INCR.
3251 : : Further find and return the nearest common dominator of this result
3252 : : with block NCD. If the returned block contains one or more of the
3253 : : candidates, return the earliest candidate in the block in *WHERE. */
3254 : :
3255 : : static basic_block
3256 : 8 : ncd_with_phi (slsr_cand_t c, const offset_int &incr, gphi *phi,
3257 : : basic_block ncd, slsr_cand_t *where)
3258 : : {
3259 : 8 : unsigned i;
3260 : 8 : slsr_cand_t basis = lookup_cand (c->basis);
3261 : 8 : slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3262 : :
3263 : 24 : for (i = 0; i < gimple_phi_num_args (phi); i++)
3264 : : {
3265 : 16 : tree arg = gimple_phi_arg_def (phi, i);
3266 : 16 : gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3267 : :
3268 : 16 : if (gimple_code (arg_def) == GIMPLE_PHI)
3269 : 0 : ncd = ncd_with_phi (c, incr, as_a <gphi *> (arg_def), ncd, where);
3270 : : else
3271 : : {
3272 : 16 : offset_int diff;
3273 : :
3274 : 16 : if (operand_equal_p (arg, phi_cand->base_expr, 0))
3275 : 6 : diff = -basis->index;
3276 : : else
3277 : : {
3278 : 10 : slsr_cand_t arg_cand = base_cand_from_table (arg);
3279 : 10 : diff = arg_cand->index - basis->index;
3280 : : }
3281 : :
3282 : 16 : basic_block pred = gimple_phi_arg_edge (phi, i)->src;
3283 : :
3284 : 25 : if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
3285 : 8 : ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where);
3286 : : }
3287 : : }
3288 : :
3289 : 8 : return ncd;
3290 : : }
3291 : :
3292 : : /* Consider the candidate C together with any candidates that feed
3293 : : C's phi dependence (if any). Find and return the nearest common
3294 : : dominator of those candidates requiring the given increment INCR.
3295 : : If the returned block contains one or more of the candidates,
3296 : : return the earliest candidate in the block in *WHERE. */
3297 : :
3298 : : static basic_block
3299 : 1227 : ncd_of_cand_and_phis (slsr_cand_t c, const offset_int &incr, slsr_cand_t *where)
3300 : : {
3301 : 1227 : basic_block ncd = NULL;
3302 : :
3303 : 1227 : if (cand_abs_increment (c) == incr)
3304 : : {
3305 : 400 : ncd = gimple_bb (c->cand_stmt);
3306 : 400 : *where = c;
3307 : : }
3308 : :
3309 : 1227 : if (phi_dependent_cand_p (c))
3310 : 8 : ncd = ncd_with_phi (c, incr,
3311 : 8 : as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt),
3312 : : ncd, where);
3313 : :
3314 : 1227 : return ncd;
3315 : : }
3316 : :
3317 : : /* Consider all candidates in the tree rooted at C for which INCR
3318 : : represents the required increment of C relative to its basis.
3319 : : Find and return the basic block that most nearly dominates all
3320 : : such candidates. If the returned block contains one or more of
3321 : : the candidates, return the earliest candidate in the block in
3322 : : *WHERE. */
3323 : :
3324 : : static basic_block
3325 : 1227 : nearest_common_dominator_for_cands (slsr_cand_t c, const offset_int &incr,
3326 : : slsr_cand_t *where)
3327 : : {
3328 : 1227 : basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
3329 : 1227 : slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
3330 : :
3331 : : /* First find the NCD of all siblings and dependents. */
3332 : 1227 : if (c->sibling)
3333 : 57 : sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
3334 : : incr, &sib_where);
3335 : 1227 : if (c->dependent)
3336 : 928 : dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
3337 : : incr, &dep_where);
3338 : 1227 : if (!sib_ncd && !dep_ncd)
3339 : : {
3340 : 504 : new_where = NULL;
3341 : 504 : ncd = NULL;
3342 : : }
3343 : 723 : else if (sib_ncd && !dep_ncd)
3344 : : {
3345 : 49 : new_where = sib_where;
3346 : 49 : ncd = sib_ncd;
3347 : : }
3348 : 674 : else if (dep_ncd && !sib_ncd)
3349 : : {
3350 : 674 : new_where = dep_where;
3351 : 674 : ncd = dep_ncd;
3352 : : }
3353 : : else
3354 : 0 : ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
3355 : : dep_where, &new_where);
3356 : :
3357 : : /* If the candidate's increment doesn't match the one we're interested
3358 : : in (and nor do any increments for feeding defs of a phi-dependence),
3359 : : then the result depends only on siblings and dependents. */
3360 : 1227 : this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
3361 : :
3362 : 1227 : if (!this_ncd || cand_already_replaced (c))
3363 : : {
3364 : 822 : *where = new_where;
3365 : 822 : return ncd;
3366 : : }
3367 : :
3368 : : /* Otherwise, compare this candidate with the result from all siblings
3369 : : and dependents. */
3370 : 405 : ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
3371 : :
3372 : 405 : return ncd;
3373 : : }
3374 : :
3375 : : /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
3376 : :
3377 : : static inline bool
3378 : 133067 : profitable_increment_p (unsigned index)
3379 : : {
3380 : 133067 : return (incr_vec[index].cost <= COST_NEUTRAL);
3381 : : }
3382 : :
3383 : : /* For each profitable increment in the increment vector not equal to
3384 : : 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
3385 : : dominator of all statements in the candidate chain rooted at C
3386 : : that require that increment, and insert an initializer
3387 : : T_0 = stride * increment at that location. Record T_0 with the
3388 : : increment record. */
3389 : :
3390 : : static void
3391 : 35803 : insert_initializers (slsr_cand_t c)
3392 : : {
3393 : 35803 : unsigned i;
3394 : :
3395 : 98999 : for (i = 0; i < incr_vec_len; i++)
3396 : : {
3397 : 63196 : basic_block bb;
3398 : 63196 : slsr_cand_t where = NULL;
3399 : 63196 : gassign *init_stmt;
3400 : 63196 : gassign *cast_stmt = NULL;
3401 : 63196 : tree new_name, incr_tree, init_stride;
3402 : 63196 : offset_int incr = incr_vec[i].incr;
3403 : :
3404 : 125659 : if (!profitable_increment_p (i)
3405 : 34619 : || incr == 1
3406 : 31710 : || (incr == -1
3407 : 4 : && (!POINTER_TYPE_P (lookup_cand (c->basis)->cand_type)))
3408 : 94906 : || incr == 0)
3409 : 62954 : continue;
3410 : :
3411 : : /* We may have already identified an existing initializer that
3412 : : will suffice. */
3413 : 733 : if (incr_vec[i].initializer)
3414 : : {
3415 : 491 : if (dump_file && (dump_flags & TDF_DETAILS))
3416 : : {
3417 : 0 : fputs ("Using existing initializer: ", dump_file);
3418 : 0 : print_gimple_stmt (dump_file,
3419 : 0 : SSA_NAME_DEF_STMT (incr_vec[i].initializer),
3420 : : 0, TDF_NONE);
3421 : : }
3422 : 491 : continue;
3423 : : }
3424 : :
3425 : : /* Find the block that most closely dominates all candidates
3426 : : with this increment. If there is at least one candidate in
3427 : : that block, the earliest one will be returned in WHERE. */
3428 : 242 : bb = nearest_common_dominator_for_cands (c, incr, &where);
3429 : :
3430 : : /* If the NCD is not dominated by the block containing the
3431 : : definition of the stride, we can't legally insert a
3432 : : single initializer. Mark the increment as unprofitable
3433 : : so we don't make any replacements. FIXME: Multiple
3434 : : initializers could be placed with more analysis. */
3435 : 242 : gimple *stride_def = SSA_NAME_DEF_STMT (c->stride);
3436 : 242 : basic_block stride_bb = gimple_bb (stride_def);
3437 : :
3438 : 242 : if (stride_bb && !dominated_by_p (CDI_DOMINATORS, bb, stride_bb))
3439 : : {
3440 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3441 : 0 : fprintf (dump_file,
3442 : : "Initializer #%d cannot be legally placed\n", i);
3443 : 0 : incr_vec[i].cost = COST_INFINITE;
3444 : 0 : continue;
3445 : : }
3446 : :
3447 : : /* If the nominal stride has a different type than the recorded
3448 : : stride type, build a cast from the nominal stride to that type. */
3449 : 242 : if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
3450 : : {
3451 : 107 : init_stride = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3452 : 107 : cast_stmt = gimple_build_assign (init_stride, NOP_EXPR, c->stride);
3453 : : }
3454 : : else
3455 : 135 : init_stride = c->stride;
3456 : :
3457 : : /* Create a new SSA name to hold the initializer's value. */
3458 : 242 : new_name = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3459 : 242 : incr_vec[i].initializer = new_name;
3460 : :
3461 : : /* Create the initializer and insert it in the latest possible
3462 : : dominating position. */
3463 : 242 : incr_tree = wide_int_to_tree (c->stride_type, incr);
3464 : 242 : init_stmt = gimple_build_assign (new_name, MULT_EXPR,
3465 : : init_stride, incr_tree);
3466 : 242 : if (where)
3467 : : {
3468 : 228 : gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
3469 : 228 : location_t loc = gimple_location (where->cand_stmt);
3470 : :
3471 : 228 : if (cast_stmt)
3472 : : {
3473 : 107 : gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3474 : 107 : gimple_set_location (cast_stmt, loc);
3475 : : }
3476 : :
3477 : 228 : gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3478 : 228 : gimple_set_location (init_stmt, loc);
3479 : : }
3480 : : else
3481 : : {
3482 : 14 : gimple_stmt_iterator gsi = gsi_last_bb (bb);
3483 : 14 : gimple *basis_stmt = lookup_cand (c->basis)->cand_stmt;
3484 : 14 : location_t loc = gimple_location (basis_stmt);
3485 : :
3486 : 14 : if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
3487 : : {
3488 : 14 : if (cast_stmt)
3489 : : {
3490 : 0 : gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3491 : 0 : gimple_set_location (cast_stmt, loc);
3492 : : }
3493 : 14 : gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3494 : : }
3495 : : else
3496 : : {
3497 : 0 : if (cast_stmt)
3498 : : {
3499 : 0 : gsi_insert_after (&gsi, cast_stmt, GSI_NEW_STMT);
3500 : 0 : gimple_set_location (cast_stmt, loc);
3501 : : }
3502 : 0 : gsi_insert_after (&gsi, init_stmt, GSI_NEW_STMT);
3503 : : }
3504 : :
3505 : 14 : gimple_set_location (init_stmt, gimple_location (basis_stmt));
3506 : : }
3507 : :
3508 : 242 : if (dump_file && (dump_flags & TDF_DETAILS))
3509 : : {
3510 : 0 : if (cast_stmt)
3511 : : {
3512 : 0 : fputs ("Inserting stride cast: ", dump_file);
3513 : 0 : print_gimple_stmt (dump_file, cast_stmt, 0);
3514 : : }
3515 : 0 : fputs ("Inserting initializer: ", dump_file);
3516 : 0 : print_gimple_stmt (dump_file, init_stmt, 0);
3517 : : }
3518 : : }
3519 : 35803 : }
3520 : :
3521 : : /* Recursive helper function for all_phi_incrs_profitable. */
3522 : :
3523 : : static bool
3524 : 12 : all_phi_incrs_profitable_1 (slsr_cand_t c, gphi *phi, int *spread)
3525 : : {
3526 : 12 : unsigned i;
3527 : 12 : slsr_cand_t basis = lookup_cand (c->basis);
3528 : 12 : slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3529 : :
3530 : 12 : if (phi_cand->visited)
3531 : : return true;
3532 : :
3533 : 12 : phi_cand->visited = 1;
3534 : 12 : (*spread)++;
3535 : :
3536 : : /* If the basis doesn't dominate the PHI (including when the PHI is
3537 : : in the same block as the basis), we won't be able to create a PHI
3538 : : using the basis here. */
3539 : 12 : basic_block basis_bb = gimple_bb (basis->cand_stmt);
3540 : 12 : basic_block phi_bb = gimple_bb (phi);
3541 : :
3542 : 12 : if (phi_bb == basis_bb
3543 : 12 : || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
3544 : 0 : return false;
3545 : :
3546 : 29 : for (i = 0; i < gimple_phi_num_args (phi); i++)
3547 : : {
3548 : : /* If the PHI arg resides in a block not dominated by the basis,
3549 : : we won't be able to create a PHI using the basis here. */
3550 : 21 : basic_block pred_bb = gimple_phi_arg_edge (phi, i)->src;
3551 : :
3552 : 21 : if (!dominated_by_p (CDI_DOMINATORS, pred_bb, basis_bb))
3553 : : return false;
3554 : :
3555 : 21 : tree arg = gimple_phi_arg_def (phi, i);
3556 : 21 : gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3557 : :
3558 : 21 : if (gimple_code (arg_def) == GIMPLE_PHI)
3559 : : {
3560 : 0 : if (!all_phi_incrs_profitable_1 (c, as_a <gphi *> (arg_def), spread)
3561 : 0 : || *spread > MAX_SPREAD)
3562 : : return false;
3563 : : }
3564 : : else
3565 : : {
3566 : 21 : int j;
3567 : 21 : offset_int increment;
3568 : :
3569 : 21 : if (operand_equal_p (arg, phi_cand->base_expr, 0))
3570 : 7 : increment = -basis->index;
3571 : : else
3572 : : {
3573 : 14 : slsr_cand_t arg_cand = base_cand_from_table (arg);
3574 : 14 : increment = arg_cand->index - basis->index;
3575 : : }
3576 : :
3577 : 21 : if (!address_arithmetic_p && wi::neg_p (increment))
3578 : 1 : increment = -increment;
3579 : :
3580 : 21 : j = incr_vec_index (increment);
3581 : :
3582 : 21 : if (dump_file && (dump_flags & TDF_DETAILS))
3583 : : {
3584 : 0 : fprintf (dump_file, " Conditional candidate %d, phi: ",
3585 : : c->cand_num);
3586 : 0 : print_gimple_stmt (dump_file, phi, 0);
3587 : 0 : fputs (" increment: ", dump_file);
3588 : 0 : print_decs (increment, dump_file);
3589 : 0 : if (j < 0)
3590 : 0 : fprintf (dump_file,
3591 : : "\n Not replaced; incr_vec overflow.\n");
3592 : : else {
3593 : 0 : fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
3594 : 0 : if (profitable_increment_p (j))
3595 : 0 : fputs (" Replacing...\n", dump_file);
3596 : : else
3597 : 0 : fputs (" Not replaced.\n", dump_file);
3598 : : }
3599 : : }
3600 : :
3601 : 21 : if (j < 0 || !profitable_increment_p (j))
3602 : 4 : return false;
3603 : : }
3604 : : }
3605 : :
3606 : : return true;
3607 : : }
3608 : :
3609 : : /* Return TRUE iff all required increments for candidates feeding PHI
3610 : : are profitable (and legal!) to replace on behalf of candidate C. */
3611 : :
3612 : : static bool
3613 : 12 : all_phi_incrs_profitable (slsr_cand_t c, gphi *phi)
3614 : : {
3615 : 12 : int spread = 0;
3616 : 12 : bool retval = all_phi_incrs_profitable_1 (c, phi, &spread);
3617 : 12 : clear_visited (phi);
3618 : 12 : return retval;
3619 : : }
3620 : :
3621 : : /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3622 : : type TO_TYPE, and insert it in front of the statement represented
3623 : : by candidate C. Use *NEW_VAR to create the new SSA name. Return
3624 : : the new SSA name. */
3625 : :
3626 : : static tree
3627 : 980 : introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3628 : : {
3629 : 980 : tree cast_lhs;
3630 : 980 : gassign *cast_stmt;
3631 : 980 : gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3632 : :
3633 : 980 : cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3634 : 980 : cast_stmt = gimple_build_assign (cast_lhs, NOP_EXPR, from_expr);
3635 : 980 : gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3636 : 980 : gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3637 : :
3638 : 980 : if (dump_file && (dump_flags & TDF_DETAILS))
3639 : : {
3640 : 0 : fputs (" Inserting: ", dump_file);
3641 : 0 : print_gimple_stmt (dump_file, cast_stmt, 0);
3642 : : }
3643 : :
3644 : 980 : return cast_lhs;
3645 : : }
3646 : :
3647 : : /* Replace the RHS of the statement represented by candidate C with
3648 : : NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3649 : : leave C unchanged or just interchange its operands. The original
3650 : : operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3651 : : If the replacement was made and we are doing a details dump,
3652 : : return the revised statement, else NULL. */
3653 : :
3654 : : static gimple *
3655 : 6981 : replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3656 : : enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3657 : : slsr_cand_t c)
3658 : : {
3659 : 6981 : if (new_code != old_code
3660 : 6981 : || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3661 : 0 : || !operand_equal_p (new_rhs2, old_rhs2, 0))
3662 : 3089 : && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3663 : 0 : || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3664 : : {
3665 : 6981 : gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3666 : 6981 : slsr_cand_t cc = lookup_cand (c->first_interp);
3667 : 6981 : gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3668 : 6981 : update_stmt (gsi_stmt (gsi));
3669 : 26719 : while (cc)
3670 : : {
3671 : 12757 : cc->cand_stmt = gsi_stmt (gsi);
3672 : 12757 : cc = lookup_cand (cc->next_interp);
3673 : : }
3674 : :
3675 : 6981 : if (dump_file && (dump_flags & TDF_DETAILS))
3676 : 0 : return gsi_stmt (gsi);
3677 : : }
3678 : :
3679 : 0 : else if (dump_file && (dump_flags & TDF_DETAILS))
3680 : 0 : fputs (" (duplicate, not actually replacing)\n", dump_file);
3681 : :
3682 : : return NULL;
3683 : : }
3684 : :
3685 : : /* Strength-reduce the statement represented by candidate C by replacing
3686 : : it with an equivalent addition or subtraction. I is the index into
3687 : : the increment vector identifying C's increment. NEW_VAR is used to
3688 : : create a new SSA name if a cast needs to be introduced. BASIS_NAME
3689 : : is the rhs1 to use in creating the add/subtract. */
3690 : :
3691 : : static void
3692 : 18854 : replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name,
3693 : : auto_bitmap &sdce_worklist)
3694 : : {
3695 : 18854 : gimple *stmt_to_print = NULL;
3696 : 18854 : tree orig_rhs1, orig_rhs2;
3697 : 18854 : tree rhs2;
3698 : 18854 : enum tree_code orig_code, repl_code;
3699 : 18854 : offset_int cand_incr;
3700 : :
3701 : 18854 : orig_code = gimple_assign_rhs_code (c->cand_stmt);
3702 : 18854 : orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3703 : 18854 : orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3704 : 18854 : cand_incr = cand_increment (c);
3705 : :
3706 : : /* If orig_rhs2 is NULL, we have already replaced this in situ with
3707 : : a copy statement under another interpretation. */
3708 : 18854 : if (!orig_rhs2)
3709 : 0 : return;
3710 : :
3711 : : /* Mark the 2 original rhs for maybe DCEing. */
3712 : 18854 : if (TREE_CODE (orig_rhs1) == SSA_NAME)
3713 : 18854 : bitmap_set_bit (sdce_worklist, SSA_NAME_VERSION (orig_rhs1));
3714 : 18854 : if (TREE_CODE (orig_rhs2) == SSA_NAME)
3715 : 18854 : bitmap_set_bit (sdce_worklist, SSA_NAME_VERSION (orig_rhs2));
3716 : :
3717 : 18854 : if (dump_file && (dump_flags & TDF_DETAILS))
3718 : : {
3719 : 0 : fputs ("Replacing: ", dump_file);
3720 : 0 : print_gimple_stmt (dump_file, c->cand_stmt, 0);
3721 : 0 : stmt_to_print = c->cand_stmt;
3722 : : }
3723 : :
3724 : 18854 : if (address_arithmetic_p)
3725 : : repl_code = POINTER_PLUS_EXPR;
3726 : : else
3727 : 15810 : repl_code = PLUS_EXPR;
3728 : :
3729 : : /* If the increment has an initializer T_0, replace the candidate
3730 : : statement with an add of the basis name and the initializer. */
3731 : 18854 : if (incr_vec[i].initializer)
3732 : : {
3733 : 1376 : tree init_type = TREE_TYPE (incr_vec[i].initializer);
3734 : 1376 : tree orig_type = TREE_TYPE (orig_rhs2);
3735 : :
3736 : 1376 : if (types_compatible_p (orig_type, init_type))
3737 : 1376 : rhs2 = incr_vec[i].initializer;
3738 : : else
3739 : 0 : rhs2 = introduce_cast_before_cand (c, orig_type,
3740 : 0 : incr_vec[i].initializer);
3741 : :
3742 : 1376 : if (incr_vec[i].incr != cand_incr)
3743 : : {
3744 : 120 : gcc_assert (repl_code == PLUS_EXPR);
3745 : : repl_code = MINUS_EXPR;
3746 : : }
3747 : :
3748 : 1376 : stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3749 : : orig_code, orig_rhs1, orig_rhs2,
3750 : : c);
3751 : : }
3752 : :
3753 : : /* Otherwise, the increment is one of -1, 0, and 1. Replace
3754 : : with a subtract of the stride from the basis name, a copy
3755 : : from the basis name, or an add of the stride to the basis
3756 : : name, respectively. It may be necessary to introduce a
3757 : : cast (or reuse an existing cast). */
3758 : 17478 : else if (cand_incr == 1)
3759 : : {
3760 : 5605 : tree stride_type = TREE_TYPE (c->stride);
3761 : 5605 : tree orig_type = TREE_TYPE (orig_rhs2);
3762 : :
3763 : 5605 : if (types_compatible_p (orig_type, stride_type))
3764 : 4681 : rhs2 = c->stride;
3765 : : else
3766 : 924 : rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3767 : :
3768 : 5605 : stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3769 : : orig_code, orig_rhs1, orig_rhs2,
3770 : : c);
3771 : : }
3772 : :
3773 : 11873 : else if (cand_incr == -1)
3774 : : {
3775 : 323 : tree stride_type = TREE_TYPE (c->stride);
3776 : 323 : tree orig_type = TREE_TYPE (orig_rhs2);
3777 : 323 : gcc_assert (repl_code != POINTER_PLUS_EXPR);
3778 : :
3779 : 323 : if (types_compatible_p (orig_type, stride_type))
3780 : 267 : rhs2 = c->stride;
3781 : : else
3782 : 56 : rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3783 : :
3784 : 323 : if (orig_code != MINUS_EXPR
3785 : 42 : || !operand_equal_p (basis_name, orig_rhs1, 0)
3786 : 323 : || !operand_equal_p (rhs2, orig_rhs2, 0))
3787 : : {
3788 : 323 : gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3789 : 323 : slsr_cand_t cc = lookup_cand (c->first_interp);
3790 : 323 : gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3791 : 323 : update_stmt (gsi_stmt (gsi));
3792 : 1250 : while (cc)
3793 : : {
3794 : 604 : cc->cand_stmt = gsi_stmt (gsi);
3795 : 604 : cc = lookup_cand (cc->next_interp);
3796 : : }
3797 : :
3798 : 323 : if (dump_file && (dump_flags & TDF_DETAILS))
3799 : 0 : stmt_to_print = gsi_stmt (gsi);
3800 : : }
3801 : 0 : else if (dump_file && (dump_flags & TDF_DETAILS))
3802 : 0 : fputs (" (duplicate, not actually replacing)\n", dump_file);
3803 : : }
3804 : :
3805 : 11550 : else if (cand_incr == 0)
3806 : : {
3807 : 11550 : tree lhs = gimple_assign_lhs (c->cand_stmt);
3808 : 11550 : tree lhs_type = TREE_TYPE (lhs);
3809 : 11550 : tree basis_type = TREE_TYPE (basis_name);
3810 : :
3811 : 11550 : if (types_compatible_p (lhs_type, basis_type))
3812 : : {
3813 : 11550 : gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
3814 : 11550 : gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3815 : 11550 : slsr_cand_t cc = lookup_cand (c->first_interp);
3816 : 11550 : gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3817 : 11550 : gsi_replace (&gsi, copy_stmt, false);
3818 : 42420 : while (cc)
3819 : : {
3820 : 19320 : cc->cand_stmt = copy_stmt;
3821 : 19320 : cc = lookup_cand (cc->next_interp);
3822 : : }
3823 : :
3824 : 11550 : if (dump_file && (dump_flags & TDF_DETAILS))
3825 : : stmt_to_print = copy_stmt;
3826 : : }
3827 : : else
3828 : : {
3829 : 0 : gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3830 : 0 : gassign *cast_stmt = gimple_build_assign (lhs, NOP_EXPR, basis_name);
3831 : 0 : slsr_cand_t cc = lookup_cand (c->first_interp);
3832 : 0 : gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3833 : 0 : gsi_replace (&gsi, cast_stmt, false);
3834 : 0 : while (cc)
3835 : : {
3836 : 0 : cc->cand_stmt = cast_stmt;
3837 : 0 : cc = lookup_cand (cc->next_interp);
3838 : : }
3839 : :
3840 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3841 : : stmt_to_print = cast_stmt;
3842 : : }
3843 : : }
3844 : : else
3845 : 0 : gcc_unreachable ();
3846 : :
3847 : 18854 : if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3848 : : {
3849 : 0 : fputs ("With: ", dump_file);
3850 : 0 : print_gimple_stmt (dump_file, stmt_to_print, 0);
3851 : 0 : fputs ("\n", dump_file);
3852 : : }
3853 : : }
3854 : :
3855 : : /* For each candidate in the tree rooted at C, replace it with
3856 : : an increment if such has been shown to be profitable. */
3857 : :
3858 : : static void
3859 : 39088 : replace_profitable_candidates (slsr_cand_t c, auto_bitmap &sdce_worklist)
3860 : : {
3861 : 69850 : if (!cand_already_replaced (c))
3862 : : {
3863 : 69850 : offset_int increment = cand_abs_increment (c);
3864 : 69850 : enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3865 : 69850 : int i;
3866 : :
3867 : 69850 : i = incr_vec_index (increment);
3868 : :
3869 : : /* Only process profitable increments. Nothing useful can be done
3870 : : to a cast or copy. */
3871 : 69850 : if (i >= 0
3872 : 69850 : && profitable_increment_p (i)
3873 : 68151 : && orig_code != SSA_NAME
3874 : 122542 : && !CONVERT_EXPR_CODE_P (orig_code))
3875 : : {
3876 : 18858 : if (phi_dependent_cand_p (c))
3877 : : {
3878 : 12 : gphi *phi = as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt);
3879 : :
3880 : 12 : if (all_phi_incrs_profitable (c, phi))
3881 : : {
3882 : : /* Look up the LHS SSA name from C's basis. This will be
3883 : : the RHS1 of the adds we will introduce to create new
3884 : : phi arguments. */
3885 : 8 : slsr_cand_t basis = lookup_cand (c->basis);
3886 : 8 : tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3887 : :
3888 : : /* Create a new phi statement that will represent C's true
3889 : : basis after the transformation is complete. */
3890 : 8 : location_t loc = gimple_location (c->cand_stmt);
3891 : 8 : tree name = create_phi_basis (c, phi, basis_name,
3892 : : loc, UNKNOWN_STRIDE);
3893 : :
3894 : : /* Replace C with an add of the new basis phi and the
3895 : : increment. */
3896 : 8 : replace_one_candidate (c, i, name, sdce_worklist);
3897 : : }
3898 : : }
3899 : : else
3900 : : {
3901 : 18846 : slsr_cand_t basis = lookup_cand (c->basis);
3902 : 18846 : tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3903 : 18846 : replace_one_candidate (c, i, basis_name, sdce_worklist);
3904 : : }
3905 : : }
3906 : : }
3907 : :
3908 : 69850 : if (c->sibling)
3909 : 3285 : replace_profitable_candidates (lookup_cand (c->sibling), sdce_worklist);
3910 : :
3911 : 69850 : if (c->dependent)
3912 : 30762 : replace_profitable_candidates (lookup_cand (c->dependent), sdce_worklist);
3913 : 39088 : }
3914 : :
3915 : : /* Analyze costs of related candidates in the candidate vector,
3916 : : and make beneficial replacements. */
3917 : :
3918 : : static void
3919 : 1000765 : analyze_candidates_and_replace (void)
3920 : : {
3921 : 1000765 : unsigned i;
3922 : 1000765 : slsr_cand_t c;
3923 : 1000765 : auto_bitmap simple_dce_worklist;
3924 : :
3925 : : /* Each candidate that has a null basis and a non-null
3926 : : dependent is the root of a tree of related statements.
3927 : : Analyze each tree to determine a subset of those
3928 : : statements that can be replaced with maximum benefit.
3929 : :
3930 : : Note the first NULL element is skipped. */
3931 : 8319577 : FOR_EACH_VEC_ELT_FROM (cand_vec, i, c, 1)
3932 : : {
3933 : 7318812 : slsr_cand_t first_dep;
3934 : :
3935 : 7318812 : if (c->basis != 0 || c->dependent == 0)
3936 : 6759024 : continue;
3937 : :
3938 : 559788 : if (dump_file && (dump_flags & TDF_DETAILS))
3939 : 7 : fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3940 : : c->cand_num);
3941 : :
3942 : 559788 : first_dep = lookup_cand (c->dependent);
3943 : :
3944 : : /* If this is a chain of CAND_REFs, unconditionally replace
3945 : : each of them with a strength-reduced data reference. */
3946 : 559788 : if (c->kind == CAND_REF)
3947 : 6431 : replace_refs (c);
3948 : :
3949 : : /* If the common stride of all related candidates is a known
3950 : : constant, each candidate without a phi-dependence can be
3951 : : profitably replaced. Each replaces a multiply by a single
3952 : : add, with the possibility that a feeding add also goes dead.
3953 : : A candidate with a phi-dependence is replaced only if the
3954 : : compensation code it requires is offset by the strength
3955 : : reduction savings. */
3956 : 553357 : else if (TREE_CODE (c->stride) == INTEGER_CST)
3957 : 517554 : replace_uncond_cands_and_profitable_phis (first_dep,
3958 : : simple_dce_worklist);
3959 : :
3960 : : /* When the stride is an SSA name, it may still be profitable
3961 : : to replace some or all of the dependent candidates, depending
3962 : : on whether the introduced increments can be reused, or are
3963 : : less expensive to calculate than the replaced statements. */
3964 : : else
3965 : : {
3966 : 35803 : machine_mode mode;
3967 : 35803 : bool speed;
3968 : :
3969 : : /* Determine whether we'll be generating pointer arithmetic
3970 : : when replacing candidates. */
3971 : 71606 : address_arithmetic_p = (c->kind == CAND_ADD
3972 : 35803 : && POINTER_TYPE_P (c->cand_type));
3973 : :
3974 : : /* If all candidates have already been replaced under other
3975 : : interpretations, nothing remains to be done. */
3976 : 35803 : if (!count_candidates (c))
3977 : 0 : continue;
3978 : :
3979 : : /* Construct an array of increments for this candidate chain. */
3980 : 35803 : incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3981 : 35803 : incr_vec_len = 0;
3982 : 35803 : record_increments (c);
3983 : :
3984 : : /* Determine which increments are profitable to replace. */
3985 : 35803 : mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3986 : 35803 : speed = optimize_cands_for_speed_p (c);
3987 : 35803 : analyze_increments (first_dep, mode, speed);
3988 : :
3989 : : /* Insert initializers of the form T_0 = stride * increment
3990 : : for use in profitable replacements. */
3991 : 35803 : insert_initializers (first_dep);
3992 : 35803 : dump_incr_vec ();
3993 : :
3994 : : /* Perform the replacements. */
3995 : 35803 : replace_profitable_candidates (first_dep, simple_dce_worklist);
3996 : 35803 : free (incr_vec);
3997 : : }
3998 : : }
3999 : :
4000 : : /* For conditional candidates, we may have uncommitted insertions
4001 : : on edges to clean up. */
4002 : 1000765 : gsi_commit_edge_inserts ();
4003 : :
4004 : 1000765 : simple_dce_from_worklist (simple_dce_worklist);
4005 : 1000765 : }
4006 : :
4007 : : namespace {
4008 : :
4009 : : const pass_data pass_data_strength_reduction =
4010 : : {
4011 : : GIMPLE_PASS, /* type */
4012 : : "slsr", /* name */
4013 : : OPTGROUP_NONE, /* optinfo_flags */
4014 : : TV_GIMPLE_SLSR, /* tv_id */
4015 : : ( PROP_cfg | PROP_ssa ), /* properties_required */
4016 : : 0, /* properties_provided */
4017 : : 0, /* properties_destroyed */
4018 : : 0, /* todo_flags_start */
4019 : : 0, /* todo_flags_finish */
4020 : : };
4021 : :
4022 : : class pass_strength_reduction : public gimple_opt_pass
4023 : : {
4024 : : public:
4025 : 280831 : pass_strength_reduction (gcc::context *ctxt)
4026 : 561662 : : gimple_opt_pass (pass_data_strength_reduction, ctxt)
4027 : : {}
4028 : :
4029 : : /* opt_pass methods: */
4030 : 1000779 : bool gate (function *) final override { return flag_tree_slsr; }
4031 : : unsigned int execute (function *) final override;
4032 : :
4033 : : }; // class pass_strength_reduction
4034 : :
4035 : : unsigned
4036 : 1000765 : pass_strength_reduction::execute (function *fun)
4037 : : {
4038 : : /* Create the obstack where candidates will reside. */
4039 : 1000765 : gcc_obstack_init (&cand_obstack);
4040 : :
4041 : : /* Allocate the candidate vector and initialize the first NULL element. */
4042 : 1000765 : cand_vec.create (128);
4043 : 1000765 : cand_vec.safe_push (NULL);
4044 : :
4045 : : /* Allocate the mapping from statements to candidate indices. */
4046 : 1000765 : stmt_cand_map = new hash_map<gimple *, slsr_cand_t>;
4047 : :
4048 : : /* Create the obstack where candidate chains will reside. */
4049 : 1000765 : gcc_obstack_init (&chain_obstack);
4050 : :
4051 : : /* Allocate the mapping from base expressions to candidate chains. */
4052 : 1000765 : base_cand_map = new hash_table<cand_chain_hasher> (500);
4053 : :
4054 : : /* Allocate the mapping from bases to alternative bases. */
4055 : 1000765 : alt_base_map = new hash_map<tree, tree>;
4056 : :
4057 : : /* Initialize the loop optimizer. We need to detect flow across
4058 : : back edges, and this gives us dominator information as well. */
4059 : 1000765 : loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4060 : :
4061 : : /* Walk the CFG in predominator order looking for strength reduction
4062 : : candidates. */
4063 : 1000765 : find_candidates_dom_walker (CDI_DOMINATORS)
4064 : 1000765 : .walk (fun->cfg->x_entry_block_ptr);
4065 : :
4066 : 1000765 : if (dump_file && (dump_flags & TDF_DETAILS))
4067 : : {
4068 : 3 : dump_cand_vec ();
4069 : 3 : dump_cand_chains ();
4070 : : }
4071 : :
4072 : 2001530 : delete alt_base_map;
4073 : 1000765 : free_affine_expand_cache (&name_expansions);
4074 : :
4075 : : /* Analyze costs and make appropriate replacements. */
4076 : 1000765 : analyze_candidates_and_replace ();
4077 : :
4078 : 1000765 : loop_optimizer_finalize ();
4079 : 1000765 : delete base_cand_map;
4080 : 1000765 : base_cand_map = NULL;
4081 : 1000765 : obstack_free (&chain_obstack, NULL);
4082 : 2001530 : delete stmt_cand_map;
4083 : 1000765 : cand_vec.release ();
4084 : 1000765 : obstack_free (&cand_obstack, NULL);
4085 : :
4086 : 1000765 : return 0;
4087 : : }
4088 : :
4089 : : } // anon namespace
4090 : :
4091 : : gimple_opt_pass *
4092 : 280831 : make_pass_strength_reduction (gcc::context *ctxt)
4093 : : {
4094 : 280831 : return new pass_strength_reduction (ctxt);
4095 : : }
|