Line data Source code
1 : /* Induction variable optimizations.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* This pass tries to find the optimal set of induction variables for the loop.
21 : It optimizes just the basic linear induction variables (although adding
22 : support for other types should not be too hard). It includes the
23 : optimizations commonly known as strength reduction, induction variable
24 : coalescing and induction variable elimination. It does it in the
25 : following steps:
26 :
27 : 1) The interesting uses of induction variables are found. This includes
28 :
29 : -- uses of induction variables in non-linear expressions
30 : -- addresses of arrays
31 : -- comparisons of induction variables
32 :
33 : Note the interesting uses are categorized and handled in group.
34 : Generally, address type uses are grouped together if their iv bases
35 : are different in constant offset.
36 :
37 : 2) Candidates for the induction variables are found. This includes
38 :
39 : -- old induction variables
40 : -- the variables defined by expressions derived from the "interesting
41 : groups/uses" above
42 :
43 : 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 : cost function assigns a cost to sets of induction variables and consists
45 : of three parts:
46 :
47 : -- The group/use costs. Each of the interesting groups/uses chooses
48 : the best induction variable in the set and adds its cost to the sum.
49 : The cost reflects the time spent on modifying the induction variables
50 : value to be usable for the given purpose (adding base and offset for
51 : arrays, etc.).
52 : -- The variable costs. Each of the variables has a cost assigned that
53 : reflects the costs associated with incrementing the value of the
54 : variable. The original variables are somewhat preferred.
55 : -- The set cost. Depending on the size of the set, extra cost may be
56 : added to reflect register pressure.
57 :
58 : All the costs are defined in a machine-specific way, using the target
59 : hooks and machine descriptions to determine them.
60 :
61 : 4) The trees are transformed to use the new variables, the dead code is
62 : removed.
63 :
64 : All of this is done loop by loop. Doing it globally is theoretically
65 : possible, it might give a better performance and it might enable us
66 : to decide costs more precisely, but getting all the interactions right
67 : would be complicated.
68 :
69 : For the targets supporting low-overhead loops, IVOPTs has to take care of
70 : the loops which will probably be transformed in RTL doloop optimization,
71 : to try to make selected IV candidate set optimal. The process of doloop
72 : support includes:
73 :
74 : 1) Analyze the current loop will be transformed to doloop or not, find and
75 : mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 : set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 : doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 : The target hook predict_doloop_p can be used for target specific checks.
79 :
80 : 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 : set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 : like biv. For cost determination between doloop IV cand and IV use, the
83 : target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 : provided to add on extra costs for generic type and address type IV use.
85 : Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 : use, and bound zero is set for IV elimination.
87 :
88 : 3) With the cost setting in step 2), the current cost model based IV
89 : selection algorithm will process as usual, pick up doloop dedicated IV if
90 : profitable. */
91 :
92 : #include "config.h"
93 : #include "system.h"
94 : #include "coretypes.h"
95 : #include "backend.h"
96 : #include "rtl.h"
97 : #include "tree.h"
98 : #include "gimple.h"
99 : #include "cfghooks.h"
100 : #include "tree-pass.h"
101 : #include "memmodel.h"
102 : #include "tm_p.h"
103 : #include "ssa.h"
104 : #include "expmed.h"
105 : #include "insn-config.h"
106 : #include "emit-rtl.h"
107 : #include "recog.h"
108 : #include "cgraph.h"
109 : #include "gimple-pretty-print.h"
110 : #include "alias.h"
111 : #include "fold-const.h"
112 : #include "stor-layout.h"
113 : #include "tree-eh.h"
114 : #include "gimplify.h"
115 : #include "gimple-iterator.h"
116 : #include "gimplify-me.h"
117 : #include "tree-cfg.h"
118 : #include "tree-ssa-loop-ivopts.h"
119 : #include "tree-ssa-loop-manip.h"
120 : #include "tree-ssa-loop-niter.h"
121 : #include "tree-ssa-loop.h"
122 : #include "explow.h"
123 : #include "expr.h"
124 : #include "tree-dfa.h"
125 : #include "tree-ssa.h"
126 : #include "cfgloop.h"
127 : #include "tree-scalar-evolution.h"
128 : #include "tree-affine.h"
129 : #include "tree-ssa-propagate.h"
130 : #include "tree-ssa-address.h"
131 : #include "builtins.h"
132 : #include "tree-vectorizer.h"
133 : #include "dbgcnt.h"
134 : #include "cfganal.h"
135 : #include "gimple-fold.h"
136 :
137 : /* For lang_hooks.types.type_for_mode. */
138 : #include "langhooks.h"
139 :
140 : /* FIXME: Expressions are expanded to RTL in this pass to determine the
141 : cost of different addressing modes. This should be moved to a TBD
142 : interface between the GIMPLE and RTL worlds. */
143 :
144 : /* The infinite cost. */
145 : #define INFTY 1000000000
146 :
147 : /* Returns the expected number of loop iterations for LOOP.
148 : The average trip count is computed from profile data if it
149 : exists. */
150 :
151 : static inline unsigned HOST_WIDE_INT
152 8724909 : avg_loop_niter (class loop *loop)
153 : {
154 8724909 : HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
155 8724909 : if (niter == -1)
156 : {
157 4942725 : niter = likely_max_stmt_executions_int (loop);
158 :
159 4942725 : if (niter == -1 || niter > param_avg_loop_niter)
160 4162759 : return param_avg_loop_niter;
161 : }
162 :
163 4562150 : return niter;
164 : }
165 :
166 : struct iv_use;
167 :
168 : /* Representation of the induction variable. */
169 : struct iv
170 : {
171 : tree base; /* Initial value of the iv. */
172 : tree base_object; /* A memory object to that the induction variable points. */
173 : tree step; /* Step of the iv (constant only). */
174 : tree ssa_name; /* The ssa name with the value. */
175 : struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
176 : bool biv_p; /* Is it a biv? */
177 : bool no_overflow; /* True if the iv doesn't overflow. */
178 : bool have_address_use;/* For biv, indicate if it's used in any address
179 : type use. */
180 : };
181 :
182 : /* Per-ssa version information (induction variable descriptions, etc.). */
183 : struct version_info
184 : {
185 : tree name; /* The ssa name. */
186 : struct iv *iv; /* Induction variable description. */
187 : bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
188 : an expression that is not an induction variable. */
189 : bool preserve_biv; /* For the original biv, whether to preserve it. */
190 : unsigned inv_id; /* Id of an invariant. */
191 : };
192 :
193 : /* Types of uses. */
194 : enum use_type
195 : {
196 : USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
197 : USE_REF_ADDRESS, /* Use is an address for an explicit memory
198 : reference. */
199 : USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
200 : cases where the expansion of the function
201 : will turn the argument into a normal address. */
202 : USE_COMPARE /* Use is a compare. */
203 : };
204 :
205 : /* Cost of a computation. */
206 : class comp_cost
207 : {
208 : public:
209 130727073 : comp_cost (): cost (0), complexity (0), scratch (0)
210 : {}
211 :
212 25160519 : comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
213 15161206 : : cost (cost), complexity (complexity), scratch (scratch)
214 14366012 : {}
215 :
216 : /* Returns true if COST is infinite. */
217 : bool infinite_cost_p ();
218 :
219 : /* Adds costs COST1 and COST2. */
220 : friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
221 :
222 : /* Adds COST to the comp_cost. */
223 : comp_cost operator+= (comp_cost cost);
224 :
225 : /* Adds constant C to this comp_cost. */
226 : comp_cost operator+= (HOST_WIDE_INT c);
227 :
228 : /* Subtracts constant C to this comp_cost. */
229 : comp_cost operator-= (HOST_WIDE_INT c);
230 :
231 : /* Divide the comp_cost by constant C. */
232 : comp_cost operator/= (HOST_WIDE_INT c);
233 :
234 : /* Multiply the comp_cost by constant C. */
235 : comp_cost operator*= (HOST_WIDE_INT c);
236 :
237 : /* Subtracts costs COST1 and COST2. */
238 : friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
239 :
240 : /* Subtracts COST from this comp_cost. */
241 : comp_cost operator-= (comp_cost cost);
242 :
243 : /* Returns true if COST1 is smaller than COST2. */
244 : friend bool operator< (comp_cost cost1, comp_cost cost2);
245 :
246 : /* Returns true if COST1 and COST2 are equal. */
247 : friend bool operator== (comp_cost cost1, comp_cost cost2);
248 :
249 : /* Returns true if COST1 is smaller or equal than COST2. */
250 : friend bool operator<= (comp_cost cost1, comp_cost cost2);
251 :
252 : int64_t cost; /* The runtime cost. */
253 : unsigned complexity; /* The estimate of the complexity of the code for
254 : the computation (in no concrete units --
255 : complexity field should be larger for more
256 : complex expressions and addressing modes). */
257 : int64_t scratch; /* Scratch used during cost computation. */
258 : };
259 :
260 : static const comp_cost no_cost;
261 : static const comp_cost infinite_cost (INFTY, 0, INFTY);
262 :
263 : bool
264 1822591969 : comp_cost::infinite_cost_p ()
265 : {
266 1822591969 : return cost == INFTY;
267 : }
268 :
269 : comp_cost
270 243518062 : operator+ (comp_cost cost1, comp_cost cost2)
271 : {
272 243518062 : if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
273 1878914 : return infinite_cost;
274 :
275 241639148 : gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
276 241639148 : cost1.cost += cost2.cost;
277 241639148 : cost1.complexity += cost2.complexity;
278 :
279 241639148 : return cost1;
280 : }
281 :
282 : comp_cost
283 208331956 : operator- (comp_cost cost1, comp_cost cost2)
284 : {
285 208331956 : if (cost1.infinite_cost_p ())
286 0 : return infinite_cost;
287 :
288 208331956 : gcc_assert (!cost2.infinite_cost_p ());
289 208331956 : gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
290 :
291 208331956 : cost1.cost -= cost2.cost;
292 208331956 : cost1.complexity -= cost2.complexity;
293 :
294 208331956 : return cost1;
295 : }
296 :
297 : comp_cost
298 243518062 : comp_cost::operator+= (comp_cost cost)
299 : {
300 243518062 : *this = *this + cost;
301 243518062 : return *this;
302 : }
303 :
304 : comp_cost
305 860636193 : comp_cost::operator+= (HOST_WIDE_INT c)
306 : {
307 860636193 : if (c >= INFTY)
308 0 : this->cost = INFTY;
309 :
310 860636193 : if (infinite_cost_p ())
311 0 : return *this;
312 :
313 860636193 : gcc_assert (this->cost + c < infinite_cost.cost);
314 860636193 : this->cost += c;
315 :
316 860636193 : return *this;
317 : }
318 :
319 : comp_cost
320 541057 : comp_cost::operator-= (HOST_WIDE_INT c)
321 : {
322 541057 : if (infinite_cost_p ())
323 0 : return *this;
324 :
325 541057 : gcc_assert (this->cost - c < infinite_cost.cost);
326 541057 : this->cost -= c;
327 :
328 541057 : return *this;
329 : }
330 :
331 : comp_cost
332 0 : comp_cost::operator/= (HOST_WIDE_INT c)
333 : {
334 0 : gcc_assert (c != 0);
335 0 : if (infinite_cost_p ())
336 0 : return *this;
337 :
338 0 : this->cost /= c;
339 :
340 0 : return *this;
341 : }
342 :
343 : comp_cost
344 0 : comp_cost::operator*= (HOST_WIDE_INT c)
345 : {
346 0 : if (infinite_cost_p ())
347 0 : return *this;
348 :
349 0 : gcc_assert (this->cost * c < infinite_cost.cost);
350 0 : this->cost *= c;
351 :
352 0 : return *this;
353 : }
354 :
355 : comp_cost
356 208331956 : comp_cost::operator-= (comp_cost cost)
357 : {
358 208331956 : *this = *this - cost;
359 208331956 : return *this;
360 : }
361 :
362 : bool
363 182290014 : operator< (comp_cost cost1, comp_cost cost2)
364 : {
365 182290014 : if (cost1.cost == cost2.cost)
366 80356049 : return cost1.complexity < cost2.complexity;
367 :
368 101933965 : return cost1.cost < cost2.cost;
369 : }
370 :
371 : bool
372 3926458 : operator== (comp_cost cost1, comp_cost cost2)
373 : {
374 3926458 : return cost1.cost == cost2.cost
375 3926458 : && cost1.complexity == cost2.complexity;
376 : }
377 :
378 : bool
379 6429350 : operator<= (comp_cost cost1, comp_cost cost2)
380 : {
381 6429350 : return cost1 < cost2 || cost1 == cost2;
382 : }
383 :
384 : struct iv_inv_expr_ent;
385 :
386 : /* The candidate - cost pair. */
387 : class cost_pair
388 : {
389 : public:
390 : struct iv_cand *cand; /* The candidate. */
391 : comp_cost cost; /* The cost. */
392 : enum tree_code comp; /* For iv elimination, the comparison. */
393 : bitmap inv_vars; /* The list of invariant ssa_vars that have to be
394 : preserved when representing iv_use with iv_cand. */
395 : bitmap inv_exprs; /* The list of newly created invariant expressions
396 : when representing iv_use with iv_cand. */
397 : tree value; /* For final value elimination, the expression for
398 : the final value of the iv. For iv elimination,
399 : the new bound to compare with. */
400 : };
401 :
402 : /* Use. */
403 : struct iv_use
404 : {
405 : unsigned id; /* The id of the use. */
406 : unsigned group_id; /* The group id the use belongs to. */
407 : enum use_type type; /* Type of the use. */
408 : tree mem_type; /* The memory type to use when testing whether an
409 : address is legitimate, and what the address's
410 : cost is. */
411 : struct iv *iv; /* The induction variable it is based on. */
412 : gimple *stmt; /* Statement in that it occurs. */
413 : tree *op_p; /* The place where it occurs. */
414 :
415 : tree addr_base; /* Base address with const offset stripped. */
416 : poly_uint64 addr_offset;
417 : /* Const offset stripped from base address. */
418 : };
419 :
420 : /* Group of uses. */
421 : struct iv_group
422 : {
423 : /* The id of the group. */
424 : unsigned id;
425 : /* Uses of the group are of the same type. */
426 : enum use_type type;
427 : /* The set of "related" IV candidates, plus the important ones. */
428 : bitmap related_cands;
429 : /* Number of IV candidates in the cost_map. */
430 : unsigned n_map_members;
431 : /* The costs wrto the iv candidates. */
432 : class cost_pair *cost_map;
433 : /* The selected candidate for the group. */
434 : struct iv_cand *selected;
435 : /* To indicate this is a doloop use group. */
436 : bool doloop_p;
437 : /* Uses in the group. */
438 : vec<struct iv_use *> vuses;
439 : };
440 :
441 : /* The position where the iv is computed. */
442 : enum iv_position
443 : {
444 : IP_NORMAL, /* At the end, just before the exit condition. */
445 : IP_END, /* At the end of the latch block. */
446 : IP_BEFORE_USE, /* Immediately before a specific use. */
447 : IP_AFTER_USE, /* Immediately after a specific use. */
448 : IP_ORIGINAL /* The original biv. */
449 : };
450 :
451 : /* The induction variable candidate. */
452 : struct iv_cand
453 : {
454 : unsigned id; /* The number of the candidate. */
455 : bool important; /* Whether this is an "important" candidate, i.e. such
456 : that it should be considered by all uses. */
457 : bool involves_undefs; /* Whether the IV involves undefined values. */
458 : ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
459 : gimple *incremented_at;/* For original biv, the statement where it is
460 : incremented. */
461 : tree var_before; /* The variable used for it before increment. */
462 : tree var_after; /* The variable used for it after increment. */
463 : struct iv *iv; /* The value of the candidate. NULL for
464 : "pseudocandidate" used to indicate the possibility
465 : to replace the final value of an iv by direct
466 : computation of the value. */
467 : unsigned cost; /* Cost of the candidate. */
468 : unsigned cost_step; /* Cost of the candidate's increment operation. */
469 : struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
470 : where it is incremented. */
471 : bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
472 : iv_cand. */
473 : bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
474 : handle it as a new invariant expression which will
475 : be hoisted out of loop. */
476 : struct iv *orig_iv; /* The original iv if this cand is added from biv with
477 : smaller type. */
478 : bool doloop_p; /* Whether this is a doloop candidate. */
479 : };
480 :
481 : /* Hashtable entry for common candidate derived from iv uses. */
482 2615704 : class iv_common_cand
483 : {
484 : public:
485 : tree base;
486 : tree step;
487 : /* IV uses from which this common candidate is derived. */
488 : auto_vec<struct iv_use *> uses;
489 : hashval_t hash;
490 : };
491 :
492 : /* Hashtable helpers. */
493 :
494 : struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
495 : {
496 : static inline hashval_t hash (const iv_common_cand *);
497 : static inline bool equal (const iv_common_cand *, const iv_common_cand *);
498 : };
499 :
500 : /* Hash function for possible common candidates. */
501 :
502 : inline hashval_t
503 9889635 : iv_common_cand_hasher::hash (const iv_common_cand *ccand)
504 : {
505 9889635 : return ccand->hash;
506 : }
507 :
508 : /* Hash table equality function for common candidates. */
509 :
510 : inline bool
511 11168079 : iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
512 : const iv_common_cand *ccand2)
513 : {
514 11168079 : return (ccand1->hash == ccand2->hash
515 1631236 : && operand_equal_p (ccand1->base, ccand2->base, 0)
516 1610293 : && operand_equal_p (ccand1->step, ccand2->step, 0)
517 12771408 : && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
518 1603329 : == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
519 : }
520 :
521 : /* Loop invariant expression hashtable entry. */
522 :
523 : struct iv_inv_expr_ent
524 : {
525 : /* Tree expression of the entry. */
526 : tree expr;
527 : /* Unique indentifier. */
528 : int id;
529 : /* Hash value. */
530 : hashval_t hash;
531 : };
532 :
533 : /* Sort iv_inv_expr_ent pair A and B by id field. */
534 :
535 : static int
536 5739 : sort_iv_inv_expr_ent (const void *a, const void *b)
537 : {
538 5739 : const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
539 5739 : const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
540 :
541 5739 : unsigned id1 = (*e1)->id;
542 5739 : unsigned id2 = (*e2)->id;
543 :
544 5739 : if (id1 < id2)
545 : return -1;
546 2669 : else if (id1 > id2)
547 : return 1;
548 : else
549 0 : return 0;
550 : }
551 :
552 : /* Hashtable helpers. */
553 :
554 : struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
555 : {
556 : static inline hashval_t hash (const iv_inv_expr_ent *);
557 : static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
558 : };
559 :
560 : /* Return true if uses of type TYPE represent some form of address. */
561 :
562 : inline bool
563 8977510 : address_p (use_type type)
564 : {
565 8977510 : return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
566 : }
567 :
568 : /* Hash function for loop invariant expressions. */
569 :
570 : inline hashval_t
571 6716984 : iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
572 : {
573 6716984 : return expr->hash;
574 : }
575 :
576 : /* Hash table equality function for expressions. */
577 :
578 : inline bool
579 8071571 : iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
580 : const iv_inv_expr_ent *expr2)
581 : {
582 8071571 : return expr1->hash == expr2->hash
583 8071571 : && operand_equal_p (expr1->expr, expr2->expr, 0);
584 : }
585 :
586 : struct ivopts_data
587 : {
588 : /* The currently optimized loop. */
589 : class loop *current_loop;
590 : location_t loop_loc;
591 :
592 : /* Numbers of iterations for all exits of the current loop. */
593 : hash_map<edge, tree_niter_desc *> *niters;
594 :
595 : /* Number of registers used in it. */
596 : unsigned regs_used;
597 :
598 : /* The size of version_info array allocated. */
599 : unsigned version_info_size;
600 :
601 : /* The array of information for the ssa names. */
602 : struct version_info *version_info;
603 :
604 : /* The hashtable of loop invariant expressions created
605 : by ivopt. */
606 : hash_table<iv_inv_expr_hasher> *inv_expr_tab;
607 :
608 : /* The bitmap of indices in version_info whose value was changed. */
609 : bitmap relevant;
610 :
611 : /* The uses of induction variables. */
612 : vec<iv_group *> vgroups;
613 :
614 : /* The candidates. */
615 : vec<iv_cand *> vcands;
616 :
617 : /* A bitmap of important candidates. */
618 : bitmap important_candidates;
619 :
620 : /* Cache used by tree_to_aff_combination_expand. */
621 : hash_map<tree, name_expansion *> *name_expansion_cache;
622 :
623 : /* The hashtable of common candidates derived from iv uses. */
624 : hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
625 :
626 : /* The common candidates. */
627 : vec<iv_common_cand *> iv_common_cands;
628 :
629 : /* Hash map recording base object information of tree exp. */
630 : hash_map<tree, tree> *base_object_map;
631 :
632 : /* The maximum invariant variable id. */
633 : unsigned max_inv_var_id;
634 :
635 : /* The maximum invariant expression id. */
636 : unsigned max_inv_expr_id;
637 :
638 : /* Number of no_overflow BIVs which are not used in memory address. */
639 : unsigned bivs_not_used_in_addr;
640 :
641 : /* Obstack for iv structure. */
642 : struct obstack iv_obstack;
643 :
644 : /* Whether to consider just related and important candidates when replacing a
645 : use. */
646 : bool consider_all_candidates;
647 :
648 : /* Are we optimizing for speed? */
649 : bool speed;
650 :
651 : /* Whether the loop body includes any function calls. */
652 : bool body_includes_call;
653 :
654 : /* Whether the loop body can only be exited via single exit. */
655 : bool loop_single_exit_p;
656 :
657 : /* Whether the loop has doloop comparison use. */
658 : bool doloop_use_p;
659 : };
660 :
661 : /* An assignment of iv candidates to uses. */
662 :
663 : class iv_ca
664 : {
665 : public:
666 : /* The number of uses covered by the assignment. */
667 : unsigned upto;
668 :
669 : /* Number of uses that cannot be expressed by the candidates in the set. */
670 : unsigned bad_groups;
671 :
672 : /* Candidate assigned to a use, together with the related costs. */
673 : class cost_pair **cand_for_group;
674 :
675 : /* Number of times each candidate is used. */
676 : unsigned *n_cand_uses;
677 :
678 : /* The candidates used. */
679 : bitmap cands;
680 :
681 : /* The number of candidates in the set. */
682 : unsigned n_cands;
683 :
684 : /* The number of invariants needed, including both invariant variants and
685 : invariant expressions. */
686 : unsigned n_invs;
687 :
688 : /* Total cost of expressing uses. */
689 : comp_cost cand_use_cost;
690 :
691 : /* Total cost of candidates. */
692 : int64_t cand_cost;
693 :
694 : /* Number of times each invariant variable is used. */
695 : unsigned *n_inv_var_uses;
696 :
697 : /* Number of times each invariant expression is used. */
698 : unsigned *n_inv_expr_uses;
699 :
700 : /* Total cost of the assignment. */
701 : comp_cost cost;
702 : };
703 :
704 : /* Difference of two iv candidate assignments. */
705 :
706 : struct iv_ca_delta
707 : {
708 : /* Changed group. */
709 : struct iv_group *group;
710 :
711 : /* An old assignment (for rollback purposes). */
712 : class cost_pair *old_cp;
713 :
714 : /* A new assignment. */
715 : class cost_pair *new_cp;
716 :
717 : /* Next change in the list. */
718 : struct iv_ca_delta *next;
719 : };
720 :
721 : /* Bound on number of candidates below that all candidates are considered. */
722 :
723 : #define CONSIDER_ALL_CANDIDATES_BOUND \
724 : ((unsigned) param_iv_consider_all_candidates_bound)
725 :
726 : /* If there are more iv occurrences, we just give up (it is quite unlikely that
727 : optimizing such a loop would help, and it would take ages). */
728 :
729 : #define MAX_CONSIDERED_GROUPS \
730 : ((unsigned) param_iv_max_considered_uses)
731 :
732 : /* If there are at most this number of ivs in the set, try removing unnecessary
733 : ivs from the set always. */
734 :
735 : #define ALWAYS_PRUNE_CAND_SET_BOUND \
736 : ((unsigned) param_iv_always_prune_cand_set_bound)
737 :
738 : /* The list of trees for that the decl_rtl field must be reset is stored
739 : here. */
740 :
741 : static vec<tree> decl_rtl_to_reset;
742 :
743 : static comp_cost force_expr_to_var_cost (tree, bool);
744 :
745 : /* The single loop exit if it dominates the latch, NULL otherwise. */
746 :
747 : edge
748 700136 : single_dom_exit (class loop *loop)
749 : {
750 700136 : edge exit = single_exit (loop);
751 :
752 700136 : if (!exit)
753 : return NULL;
754 :
755 466148 : if (!just_once_each_iteration_p (loop, exit->src))
756 : return NULL;
757 :
758 : return exit;
759 : }
760 :
761 : /* Dumps information about the induction variable IV to FILE. Don't dump
762 : variable's name if DUMP_NAME is FALSE. The information is dumped with
763 : preceding spaces indicated by INDENT_LEVEL. */
764 :
765 : void
766 1597 : dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
767 : {
768 1597 : const char *p;
769 1597 : const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
770 :
771 1597 : if (indent_level > 4)
772 : indent_level = 4;
773 1597 : p = spaces + 8 - (indent_level << 1);
774 :
775 1597 : fprintf (file, "%sIV struct:\n", p);
776 1597 : if (iv->ssa_name && dump_name)
777 : {
778 550 : fprintf (file, "%s SSA_NAME:\t", p);
779 550 : print_generic_expr (file, iv->ssa_name, TDF_SLIM);
780 550 : fprintf (file, "\n");
781 : }
782 :
783 1597 : fprintf (file, "%s Type:\t", p);
784 1597 : print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
785 1597 : fprintf (file, "\n");
786 :
787 1597 : fprintf (file, "%s Base:\t", p);
788 1597 : print_generic_expr (file, iv->base, TDF_SLIM);
789 1597 : fprintf (file, "\n");
790 :
791 1597 : fprintf (file, "%s Step:\t", p);
792 1597 : print_generic_expr (file, iv->step, TDF_SLIM);
793 1597 : fprintf (file, "\n");
794 :
795 1597 : if (iv->base_object)
796 : {
797 497 : fprintf (file, "%s Object:\t", p);
798 497 : print_generic_expr (file, iv->base_object, TDF_SLIM);
799 497 : fprintf (file, "\n");
800 : }
801 :
802 2887 : fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
803 :
804 1597 : fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
805 1597 : p, iv->no_overflow ? "No-overflow" : "Overflow");
806 1597 : }
807 :
808 : /* Dumps information about the USE to FILE. */
809 :
810 : void
811 250 : dump_use (FILE *file, struct iv_use *use)
812 : {
813 250 : fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
814 250 : fprintf (file, " At stmt:\t");
815 250 : print_gimple_stmt (file, use->stmt, 0);
816 250 : fprintf (file, " At pos:\t");
817 250 : if (use->op_p)
818 160 : print_generic_expr (file, *use->op_p, TDF_SLIM);
819 250 : fprintf (file, "\n");
820 250 : dump_iv (file, use->iv, false, 2);
821 250 : }
822 :
823 : /* Dumps information about the uses to FILE. */
824 :
825 : void
826 67 : dump_groups (FILE *file, struct ivopts_data *data)
827 : {
828 67 : unsigned i, j;
829 67 : struct iv_group *group;
830 :
831 287 : for (i = 0; i < data->vgroups.length (); i++)
832 : {
833 220 : group = data->vgroups[i];
834 220 : fprintf (file, "Group %d:\n", group->id);
835 220 : if (group->type == USE_NONLINEAR_EXPR)
836 90 : fprintf (file, " Type:\tGENERIC\n");
837 130 : else if (group->type == USE_REF_ADDRESS)
838 56 : fprintf (file, " Type:\tREFERENCE ADDRESS\n");
839 74 : else if (group->type == USE_PTR_ADDRESS)
840 0 : fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
841 : else
842 : {
843 74 : gcc_assert (group->type == USE_COMPARE);
844 74 : fprintf (file, " Type:\tCOMPARE\n");
845 : }
846 470 : for (j = 0; j < group->vuses.length (); j++)
847 250 : dump_use (file, group->vuses[j]);
848 : }
849 67 : }
850 :
851 : /* Dumps information about induction variable candidate CAND to FILE. */
852 :
853 : void
854 797 : dump_cand (FILE *file, struct iv_cand *cand)
855 : {
856 797 : struct iv *iv = cand->iv;
857 :
858 797 : fprintf (file, "Candidate %d:\n", cand->id);
859 797 : if (cand->inv_vars)
860 : {
861 26 : fprintf (file, " Depend on inv.vars: ");
862 26 : dump_bitmap (file, cand->inv_vars);
863 : }
864 797 : if (cand->inv_exprs)
865 : {
866 0 : fprintf (file, " Depend on inv.exprs: ");
867 0 : dump_bitmap (file, cand->inv_exprs);
868 : }
869 :
870 797 : if (cand->var_before)
871 : {
872 687 : fprintf (file, " Var befor: ");
873 687 : print_generic_expr (file, cand->var_before, TDF_SLIM);
874 687 : fprintf (file, "\n");
875 : }
876 797 : if (cand->var_after)
877 : {
878 687 : fprintf (file, " Var after: ");
879 687 : print_generic_expr (file, cand->var_after, TDF_SLIM);
880 687 : fprintf (file, "\n");
881 : }
882 :
883 797 : switch (cand->pos)
884 : {
885 653 : case IP_NORMAL:
886 653 : fprintf (file, " Incr POS: before exit test\n");
887 653 : break;
888 :
889 0 : case IP_BEFORE_USE:
890 0 : fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
891 0 : break;
892 :
893 0 : case IP_AFTER_USE:
894 0 : fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
895 0 : break;
896 :
897 0 : case IP_END:
898 0 : fprintf (file, " Incr POS: at end\n");
899 0 : break;
900 :
901 144 : case IP_ORIGINAL:
902 144 : fprintf (file, " Incr POS: orig biv\n");
903 144 : break;
904 : }
905 :
906 797 : dump_iv (file, iv, false, 1);
907 797 : }
908 :
909 : /* Returns the info for ssa version VER. */
910 :
911 : static inline struct version_info *
912 116964697 : ver_info (struct ivopts_data *data, unsigned ver)
913 : {
914 116964697 : return data->version_info + ver;
915 : }
916 :
917 : /* Returns the info for ssa name NAME. */
918 :
919 : static inline struct version_info *
920 94770480 : name_info (struct ivopts_data *data, tree name)
921 : {
922 94770480 : return ver_info (data, SSA_NAME_VERSION (name));
923 : }
924 :
925 : /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
926 : emitted in LOOP. */
927 :
928 : static bool
929 33407706 : stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
930 : {
931 33407706 : basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
932 :
933 33407706 : gcc_assert (bb);
934 :
935 33407706 : if (sbb == loop->latch)
936 : return true;
937 :
938 33302555 : if (sbb != bb)
939 : return false;
940 :
941 19424765 : return stmt == last_nondebug_stmt (bb);
942 : }
943 :
944 : /* Returns true if STMT if after the place where the original induction
945 : variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
946 : if the positions are identical. */
947 :
948 : static bool
949 7884743 : stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
950 : {
951 7884743 : basic_block cand_bb = gimple_bb (cand->incremented_at);
952 7884743 : basic_block stmt_bb = gimple_bb (stmt);
953 :
954 7884743 : if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
955 : return false;
956 :
957 5426574 : if (stmt_bb != cand_bb)
958 : return true;
959 :
960 5167187 : if (true_if_equal
961 5167187 : && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
962 : return true;
963 5160638 : return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
964 : }
965 :
966 : /* Returns true if STMT if after the place where the induction variable
967 : CAND is incremented in LOOP. */
968 :
969 : static bool
970 42448207 : stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
971 : {
972 42448207 : switch (cand->pos)
973 : {
974 : case IP_END:
975 : return false;
976 :
977 33407706 : case IP_NORMAL:
978 33407706 : return stmt_after_ip_normal_pos (loop, stmt);
979 :
980 7874673 : case IP_ORIGINAL:
981 7874673 : case IP_AFTER_USE:
982 7874673 : return stmt_after_inc_pos (cand, stmt, false);
983 :
984 10070 : case IP_BEFORE_USE:
985 10070 : return stmt_after_inc_pos (cand, stmt, true);
986 :
987 0 : default:
988 0 : gcc_unreachable ();
989 : }
990 : }
991 :
992 : /* walk_tree callback for contains_abnormal_ssa_name_p. */
993 :
994 : static tree
995 14563827 : contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
996 : {
997 14563827 : if (TREE_CODE (*tp) == SSA_NAME
998 14563827 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
999 : return *tp;
1000 :
1001 14563810 : if (!EXPR_P (*tp))
1002 10006985 : *walk_subtrees = 0;
1003 :
1004 : return NULL_TREE;
1005 : }
1006 :
1007 : /* Returns true if EXPR contains a ssa name that occurs in an
1008 : abnormal phi node. */
1009 :
1010 : bool
1011 7873562 : contains_abnormal_ssa_name_p (tree expr)
1012 : {
1013 7873562 : return walk_tree_without_duplicates
1014 7873562 : (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1015 : }
1016 :
1017 : /* Returns the structure describing number of iterations determined from
1018 : EXIT of DATA->current_loop, or NULL if something goes wrong. */
1019 :
1020 : static class tree_niter_desc *
1021 4365671 : niter_for_exit (struct ivopts_data *data, edge exit)
1022 : {
1023 4365671 : class tree_niter_desc *desc;
1024 4365671 : tree_niter_desc **slot;
1025 :
1026 4365671 : if (!data->niters)
1027 : {
1028 471899 : data->niters = new hash_map<edge, tree_niter_desc *>;
1029 471899 : slot = NULL;
1030 : }
1031 : else
1032 3893772 : slot = data->niters->get (exit);
1033 :
1034 4365671 : if (!slot)
1035 : {
1036 : /* Try to determine number of iterations. We cannot safely work with ssa
1037 : names that appear in phi nodes on abnormal edges, so that we do not
1038 : create overlapping life ranges for them (PR 27283). */
1039 484081 : desc = XNEW (class tree_niter_desc);
1040 484081 : ::new (static_cast<void*> (desc)) tree_niter_desc ();
1041 484081 : if (!number_of_iterations_exit (data->current_loop,
1042 : exit, desc, true)
1043 484081 : || contains_abnormal_ssa_name_p (desc->niter))
1044 : {
1045 40231 : desc->~tree_niter_desc ();
1046 40231 : XDELETE (desc);
1047 40231 : desc = NULL;
1048 : }
1049 484081 : data->niters->put (exit, desc);
1050 : }
1051 : else
1052 3881590 : desc = *slot;
1053 :
1054 4365671 : return desc;
1055 : }
1056 :
1057 : /* Returns the structure describing number of iterations determined from
1058 : single dominating exit of DATA->current_loop, or NULL if something
1059 : goes wrong. */
1060 :
1061 : static class tree_niter_desc *
1062 67 : niter_for_single_dom_exit (struct ivopts_data *data)
1063 : {
1064 67 : edge exit = single_dom_exit (data->current_loop);
1065 :
1066 67 : if (!exit)
1067 : return NULL;
1068 :
1069 57 : return niter_for_exit (data, exit);
1070 : }
1071 :
1072 : /* Initializes data structures used by the iv optimization pass, stored
1073 : in DATA. */
1074 :
1075 : static void
1076 241428 : tree_ssa_iv_optimize_init (struct ivopts_data *data)
1077 : {
1078 241428 : data->version_info_size = 2 * num_ssa_names;
1079 241428 : data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1080 241428 : data->relevant = BITMAP_ALLOC (NULL);
1081 241428 : data->important_candidates = BITMAP_ALLOC (NULL);
1082 241428 : data->max_inv_var_id = 0;
1083 241428 : data->max_inv_expr_id = 0;
1084 241428 : data->niters = NULL;
1085 241428 : data->vgroups.create (20);
1086 241428 : data->vcands.create (20);
1087 241428 : data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1088 241428 : data->name_expansion_cache = NULL;
1089 241428 : data->base_object_map = NULL;
1090 241428 : data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1091 241428 : data->iv_common_cands.create (20);
1092 241428 : decl_rtl_to_reset.create (20);
1093 241428 : gcc_obstack_init (&data->iv_obstack);
1094 241428 : }
1095 :
1096 : /* walk_tree callback for determine_base_object. */
1097 :
1098 : static tree
1099 19224900 : determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1100 : {
1101 19224900 : tree_code code = TREE_CODE (*tp);
1102 19224900 : tree obj = NULL_TREE;
1103 19224900 : if (code == ADDR_EXPR)
1104 : {
1105 1016422 : tree base = get_base_address (TREE_OPERAND (*tp, 0));
1106 1016422 : if (!base)
1107 0 : obj = *tp;
1108 1016422 : else if (TREE_CODE (base) != MEM_REF)
1109 1016394 : obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1110 : }
1111 18208478 : else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1112 1908574 : obj = fold_convert (ptr_type_node, *tp);
1113 :
1114 2924968 : if (!obj)
1115 : {
1116 16299932 : if (!EXPR_P (*tp))
1117 7130094 : *walk_subtrees = 0;
1118 :
1119 16299932 : return NULL_TREE;
1120 : }
1121 : /* Record special node for multiple base objects and stop. */
1122 2924968 : if (*static_cast<tree *> (wdata))
1123 : {
1124 4254 : *static_cast<tree *> (wdata) = integer_zero_node;
1125 4254 : return integer_zero_node;
1126 : }
1127 : /* Record the base object and continue looking. */
1128 2920714 : *static_cast<tree *> (wdata) = obj;
1129 2920714 : return NULL_TREE;
1130 : }
1131 :
1132 : /* Returns a memory object to that EXPR points with caching. Return NULL if we
1133 : are able to determine that it does not point to any such object; specially
1134 : return integer_zero_node if EXPR contains multiple base objects. */
1135 :
1136 : static tree
1137 10375137 : determine_base_object (struct ivopts_data *data, tree expr)
1138 : {
1139 10375137 : tree *slot, obj = NULL_TREE;
1140 10375137 : if (data->base_object_map)
1141 : {
1142 10211864 : if ((slot = data->base_object_map->get(expr)) != NULL)
1143 4739772 : return *slot;
1144 : }
1145 : else
1146 163273 : data->base_object_map = new hash_map<tree, tree>;
1147 :
1148 5635365 : (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1149 5635365 : data->base_object_map->put (expr, obj);
1150 5635365 : return obj;
1151 : }
1152 :
1153 : /* Allocates an induction variable with given initial value BASE and step STEP
1154 : for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1155 :
1156 : static struct iv *
1157 10375137 : alloc_iv (struct ivopts_data *data, tree base, tree step,
1158 : bool no_overflow = false)
1159 : {
1160 10375137 : tree expr = base;
1161 10375137 : struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1162 : sizeof (struct iv));
1163 10375137 : gcc_assert (step != NULL_TREE);
1164 :
1165 : /* Canonicalize the address expression in base if it were an unsigned
1166 : computation. That leads to more equalities being detected and results in:
1167 :
1168 : 1) More accurate cost can be computed for address expressions;
1169 : 2) Duplicate candidates won't be created for bases in different
1170 : forms, like &a[0] and &a.
1171 : 3) Duplicate candidates won't be created for IV expressions that differ
1172 : only in their sign. */
1173 10375137 : aff_tree comb;
1174 10375137 : STRIP_NOPS (expr);
1175 10375137 : expr = fold_convert (unsigned_type_for (TREE_TYPE (expr)), expr);
1176 10375137 : tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1177 10375137 : base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1178 :
1179 10375137 : iv->base = base;
1180 10375137 : iv->base_object = determine_base_object (data, base);
1181 10375137 : iv->step = step;
1182 10375137 : iv->biv_p = false;
1183 10375137 : iv->nonlin_use = NULL;
1184 10375137 : iv->ssa_name = NULL_TREE;
1185 10375137 : if (!no_overflow
1186 10375137 : && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1187 : base, step))
1188 : no_overflow = true;
1189 10375137 : iv->no_overflow = no_overflow;
1190 10375137 : iv->have_address_use = false;
1191 :
1192 20750274 : return iv;
1193 10375137 : }
1194 :
1195 : /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1196 : doesn't overflow. */
1197 :
1198 : static void
1199 4886313 : set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1200 : bool no_overflow)
1201 : {
1202 4886313 : struct version_info *info = name_info (data, iv);
1203 :
1204 4886313 : gcc_assert (!info->iv);
1205 :
1206 4886313 : bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1207 4886313 : info->iv = alloc_iv (data, base, step, no_overflow);
1208 4886313 : info->iv->ssa_name = iv;
1209 4886313 : }
1210 :
1211 : /* Finds induction variable declaration for VAR. */
1212 :
1213 : static struct iv *
1214 44422237 : get_iv (struct ivopts_data *data, tree var)
1215 : {
1216 44422237 : basic_block bb;
1217 44422237 : tree type = TREE_TYPE (var);
1218 :
1219 44422237 : if (!POINTER_TYPE_P (type)
1220 35251568 : && !INTEGRAL_TYPE_P (type))
1221 : return NULL;
1222 :
1223 38727649 : if (!name_info (data, var)->iv)
1224 : {
1225 18005748 : bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1226 :
1227 18005748 : if (!bb
1228 18005748 : || !flow_bb_inside_loop_p (data->current_loop, bb))
1229 : {
1230 800175 : if (POINTER_TYPE_P (type))
1231 317450 : type = sizetype;
1232 800175 : set_iv (data, var, var, build_int_cst (type, 0), true);
1233 : }
1234 : }
1235 :
1236 38727649 : return name_info (data, var)->iv;
1237 : }
1238 :
1239 : /* Return the first non-invariant ssa var found in EXPR. */
1240 :
1241 : static tree
1242 4055524 : extract_single_var_from_expr (tree expr)
1243 : {
1244 4055524 : int i, n;
1245 4055524 : tree tmp;
1246 4055524 : enum tree_code code;
1247 :
1248 4055524 : if (!expr || is_gimple_min_invariant (expr))
1249 3390806 : return NULL;
1250 :
1251 664718 : code = TREE_CODE (expr);
1252 664718 : if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1253 : {
1254 359873 : n = TREE_OPERAND_LENGTH (expr);
1255 719817 : for (i = 0; i < n; i++)
1256 : {
1257 359944 : tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1258 :
1259 359944 : if (tmp)
1260 : return tmp;
1261 : }
1262 : }
1263 304845 : return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1264 : }
1265 :
1266 : /* Finds basic ivs. */
1267 :
1268 : static bool
1269 631201 : find_bivs (struct ivopts_data *data)
1270 : {
1271 631201 : gphi *phi;
1272 631201 : affine_iv iv;
1273 631201 : tree step, type, base, stop;
1274 631201 : bool found = false;
1275 631201 : class loop *loop = data->current_loop;
1276 631201 : gphi_iterator psi;
1277 :
1278 2357997 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1279 : {
1280 1726796 : phi = psi.phi ();
1281 :
1282 1726796 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1283 237 : continue;
1284 :
1285 1726559 : if (virtual_operand_p (PHI_RESULT (phi)))
1286 414184 : continue;
1287 :
1288 1312375 : if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1289 439346 : continue;
1290 :
1291 873029 : if (integer_zerop (iv.step))
1292 0 : continue;
1293 :
1294 873029 : step = iv.step;
1295 873029 : base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1296 : /* Stop expanding iv base at the first ssa var referred by iv step.
1297 : Ideally we should stop at any ssa var, because that's expensive
1298 : and unusual to happen, we just do it on the first one.
1299 :
1300 : See PR64705 for the rationale. */
1301 873029 : stop = extract_single_var_from_expr (step);
1302 873029 : base = expand_simple_operations (base, stop);
1303 873029 : if (contains_abnormal_ssa_name_p (base)
1304 873029 : || contains_abnormal_ssa_name_p (step))
1305 10 : continue;
1306 :
1307 873019 : type = TREE_TYPE (PHI_RESULT (phi));
1308 873019 : base = fold_convert (type, base);
1309 873019 : if (step)
1310 : {
1311 873019 : if (POINTER_TYPE_P (type))
1312 165016 : step = convert_to_ptrofftype (step);
1313 : else
1314 708003 : step = fold_convert (type, step);
1315 : }
1316 :
1317 873019 : set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1318 873019 : found = true;
1319 : }
1320 :
1321 631201 : return found;
1322 : }
1323 :
1324 : /* Marks basic ivs. */
1325 :
1326 : static void
1327 503213 : mark_bivs (struct ivopts_data *data)
1328 : {
1329 503213 : gphi *phi;
1330 503213 : gimple *def;
1331 503213 : tree var;
1332 503213 : struct iv *iv, *incr_iv;
1333 503213 : class loop *loop = data->current_loop;
1334 503213 : basic_block incr_bb;
1335 503213 : gphi_iterator psi;
1336 :
1337 503213 : data->bivs_not_used_in_addr = 0;
1338 1956455 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1339 : {
1340 1453242 : phi = psi.phi ();
1341 :
1342 1453242 : iv = get_iv (data, PHI_RESULT (phi));
1343 1453242 : if (!iv)
1344 580223 : continue;
1345 :
1346 873019 : var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1347 873019 : def = SSA_NAME_DEF_STMT (var);
1348 : /* Don't mark iv peeled from other one as biv. */
1349 874600 : if (def
1350 873019 : && gimple_code (def) == GIMPLE_PHI
1351 875733 : && gimple_bb (def) == loop->header)
1352 1581 : continue;
1353 :
1354 871438 : incr_iv = get_iv (data, var);
1355 871438 : if (!incr_iv)
1356 1144 : continue;
1357 :
1358 : /* If the increment is in the subloop, ignore it. */
1359 870294 : incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1360 870294 : if (incr_bb->loop_father != data->current_loop
1361 870294 : || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1362 0 : continue;
1363 :
1364 870294 : iv->biv_p = true;
1365 870294 : incr_iv->biv_p = true;
1366 870294 : if (iv->no_overflow)
1367 580183 : data->bivs_not_used_in_addr++;
1368 870294 : if (incr_iv->no_overflow)
1369 571663 : data->bivs_not_used_in_addr++;
1370 : }
1371 503213 : }
1372 :
1373 : /* Checks whether STMT defines a linear induction variable and stores its
1374 : parameters to IV. */
1375 :
1376 : static bool
1377 12541086 : find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1378 : {
1379 12541086 : tree lhs, stop;
1380 12541086 : class loop *loop = data->current_loop;
1381 :
1382 12541086 : iv->base = NULL_TREE;
1383 12541086 : iv->step = NULL_TREE;
1384 :
1385 12541086 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
1386 : return false;
1387 :
1388 10511727 : lhs = gimple_assign_lhs (stmt);
1389 10511727 : if (TREE_CODE (lhs) != SSA_NAME)
1390 : return false;
1391 :
1392 18811590 : if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1393 : return false;
1394 :
1395 : /* Stop expanding iv base at the first ssa var referred by iv step.
1396 : Ideally we should stop at any ssa var, because that's expensive
1397 : and unusual to happen, we just do it on the first one.
1398 :
1399 : See PR64705 for the rationale. */
1400 2822551 : stop = extract_single_var_from_expr (iv->step);
1401 2822551 : iv->base = expand_simple_operations (iv->base, stop);
1402 2822551 : if (contains_abnormal_ssa_name_p (iv->base)
1403 2822551 : || contains_abnormal_ssa_name_p (iv->step))
1404 6 : return false;
1405 :
1406 : /* If STMT could throw, then do not consider STMT as defining a GIV.
1407 : While this will suppress optimizations, we cannot safely delete this
1408 : GIV and associated statements, even if it appears it is not used. */
1409 2822545 : if (stmt_could_throw_p (cfun, stmt))
1410 : return false;
1411 :
1412 : return true;
1413 : }
1414 :
1415 : /* Finds general ivs in statement STMT. */
1416 :
1417 : static void
1418 12541086 : find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1419 : {
1420 12541086 : affine_iv iv;
1421 :
1422 12541086 : if (!find_givs_in_stmt_scev (data, stmt, &iv))
1423 9718549 : return;
1424 :
1425 2822537 : set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1426 : }
1427 :
1428 : /* Finds general ivs in basic block BB. */
1429 :
1430 : static void
1431 2809936 : find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1432 : {
1433 2809936 : gimple_stmt_iterator bsi;
1434 :
1435 27322660 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1436 21702788 : if (!is_gimple_debug (gsi_stmt (bsi)))
1437 12541086 : find_givs_in_stmt (data, gsi_stmt (bsi));
1438 2809936 : }
1439 :
1440 : /* Finds general ivs. */
1441 :
1442 : static void
1443 503213 : find_givs (struct ivopts_data *data, basic_block *body)
1444 : {
1445 503213 : class loop *loop = data->current_loop;
1446 503213 : unsigned i;
1447 :
1448 3313149 : for (i = 0; i < loop->num_nodes; i++)
1449 2809936 : find_givs_in_bb (data, body[i]);
1450 503213 : }
1451 :
1452 : /* For each ssa name defined in LOOP determines whether it is an induction
1453 : variable and if so, its initial value and step. */
1454 :
1455 : static bool
1456 631201 : find_induction_variables (struct ivopts_data *data, basic_block *body)
1457 : {
1458 631201 : unsigned i;
1459 631201 : bitmap_iterator bi;
1460 :
1461 631201 : if (!find_bivs (data))
1462 : return false;
1463 :
1464 503213 : find_givs (data, body);
1465 503213 : mark_bivs (data);
1466 :
1467 503213 : if (dump_file && (dump_flags & TDF_DETAILS))
1468 : {
1469 67 : class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1470 :
1471 67 : if (niter)
1472 : {
1473 51 : fprintf (dump_file, " number of iterations ");
1474 51 : print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1475 51 : if (!integer_zerop (niter->may_be_zero))
1476 : {
1477 1 : fprintf (dump_file, "; zero if ");
1478 1 : print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1479 : }
1480 51 : fprintf (dump_file, "\n");
1481 67 : };
1482 :
1483 67 : fprintf (dump_file, "\n<Induction Vars>:\n");
1484 819 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1485 : {
1486 752 : struct version_info *info = ver_info (data, i);
1487 752 : if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1488 550 : dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1489 : }
1490 : }
1491 :
1492 : return true;
1493 : }
1494 :
1495 : /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1496 : For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1497 : is the const offset stripped from IV base and MEM_TYPE is the type
1498 : of the memory being addressed. For uses of other types, ADDR_BASE
1499 : and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1500 :
1501 : static struct iv_use *
1502 2086561 : record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1503 : gimple *stmt, enum use_type type, tree mem_type,
1504 : tree addr_base, poly_uint64 addr_offset)
1505 : {
1506 2086561 : struct iv_use *use = XCNEW (struct iv_use);
1507 :
1508 2086561 : use->id = group->vuses.length ();
1509 2086561 : use->group_id = group->id;
1510 2086561 : use->type = type;
1511 2086561 : use->mem_type = mem_type;
1512 2086561 : use->iv = iv;
1513 2086561 : use->stmt = stmt;
1514 2086561 : use->op_p = use_p;
1515 2086561 : use->addr_base = addr_base;
1516 2086561 : use->addr_offset = addr_offset;
1517 :
1518 2086561 : group->vuses.safe_push (use);
1519 2086561 : return use;
1520 : }
1521 :
1522 : /* Checks whether OP is a loop-level invariant and if so, records it.
1523 : NONLINEAR_USE is true if the invariant is used in a way we do not
1524 : handle specially. */
1525 :
1526 : static void
1527 22589962 : record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1528 : {
1529 22589962 : basic_block bb;
1530 22589962 : struct version_info *info;
1531 :
1532 22589962 : if (TREE_CODE (op) != SSA_NAME
1533 22589962 : || virtual_operand_p (op))
1534 : return;
1535 :
1536 21411548 : bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1537 21411548 : if (bb
1538 21411548 : && flow_bb_inside_loop_p (data->current_loop, bb))
1539 : return;
1540 :
1541 3833387 : info = name_info (data, op);
1542 3833387 : info->name = op;
1543 3833387 : info->has_nonlin_use |= nonlinear_use;
1544 3833387 : if (!info->inv_id)
1545 1335224 : info->inv_id = ++data->max_inv_var_id;
1546 3833387 : bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1547 : }
1548 :
1549 : /* Record a group of TYPE. */
1550 :
1551 : static struct iv_group *
1552 1810751 : record_group (struct ivopts_data *data, enum use_type type)
1553 : {
1554 1810751 : struct iv_group *group = XCNEW (struct iv_group);
1555 :
1556 1810751 : group->id = data->vgroups.length ();
1557 1810751 : group->type = type;
1558 1810751 : group->related_cands = BITMAP_ALLOC (NULL);
1559 1810751 : group->vuses.create (1);
1560 1810751 : group->doloop_p = false;
1561 :
1562 1810751 : data->vgroups.safe_push (group);
1563 1810751 : return group;
1564 : }
1565 :
1566 : /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1567 : New group will be created if there is no existing group for the use.
1568 : MEM_TYPE is the type of memory being addressed, or NULL if this
1569 : isn't an address reference. */
1570 :
1571 : static struct iv_use *
1572 2086561 : record_group_use (struct ivopts_data *data, tree *use_p,
1573 : struct iv *iv, gimple *stmt, enum use_type type,
1574 : tree mem_type)
1575 : {
1576 2086561 : tree addr_base = NULL;
1577 2086561 : struct iv_group *group = NULL;
1578 2086561 : poly_uint64 addr_offset = 0;
1579 :
1580 : /* Record non address type use in a new group. */
1581 2086561 : if (address_p (type))
1582 : {
1583 857691 : unsigned int i;
1584 :
1585 857691 : gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1586 857691 : tree addr_toffset;
1587 857691 : split_constant_offset (iv->base, &addr_base, &addr_toffset);
1588 857691 : addr_offset = int_cst_value (addr_toffset);
1589 1612900 : for (i = 0; i < data->vgroups.length (); i++)
1590 : {
1591 1083909 : struct iv_use *use;
1592 :
1593 1083909 : group = data->vgroups[i];
1594 1083909 : use = group->vuses[0];
1595 1083909 : if (!address_p (use->type))
1596 334679 : continue;
1597 :
1598 : /* Check if it has the same stripped base and step. */
1599 749230 : if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1600 397315 : && operand_equal_p (iv->step, use->iv->step, OEP_ASSUME_WRAPV)
1601 1143447 : && operand_equal_p (addr_base, use->addr_base, OEP_ASSUME_WRAPV))
1602 : break;
1603 : }
1604 1715382 : if (i == data->vgroups.length ())
1605 528991 : group = NULL;
1606 : }
1607 :
1608 857691 : if (!group)
1609 1757861 : group = record_group (data, type);
1610 :
1611 2086561 : return record_use (group, use_p, iv, stmt, type, mem_type,
1612 2086561 : addr_base, addr_offset);
1613 : }
1614 :
1615 : /* Checks whether the use OP is interesting and if so, records it. */
1616 :
1617 : static struct iv_use *
1618 7245282 : find_interesting_uses_op (struct ivopts_data *data, tree op)
1619 : {
1620 7245282 : struct iv *iv;
1621 7245282 : gimple *stmt;
1622 7245282 : struct iv_use *use;
1623 :
1624 7245282 : if (TREE_CODE (op) != SSA_NAME)
1625 : return NULL;
1626 :
1627 5830288 : iv = get_iv (data, op);
1628 5830288 : if (!iv)
1629 : return NULL;
1630 :
1631 2523071 : if (iv->nonlin_use)
1632 : {
1633 198730 : gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1634 : return iv->nonlin_use;
1635 : }
1636 :
1637 2324341 : if (integer_zerop (iv->step))
1638 : {
1639 1695303 : record_invariant (data, op, true);
1640 1695303 : return NULL;
1641 : }
1642 :
1643 629038 : stmt = SSA_NAME_DEF_STMT (op);
1644 629038 : gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1645 :
1646 629038 : use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1647 629038 : iv->nonlin_use = use;
1648 629038 : return use;
1649 : }
1650 :
1651 : /* Indicate how compare type iv_use can be handled. */
1652 : enum comp_iv_rewrite
1653 : {
1654 : COMP_IV_NA,
1655 : /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1656 : COMP_IV_EXPR,
1657 : /* We may rewrite compare type iv_uses on both sides of comparison by
1658 : expressing value of each iv_use. */
1659 : COMP_IV_EXPR_2,
1660 : /* We may rewrite compare type iv_use by expressing value of the iv_use
1661 : or by eliminating it with other iv_cand. */
1662 : COMP_IV_ELIM
1663 : };
1664 :
1665 : /* Given a condition in statement STMT, checks whether it is a compare
1666 : of an induction variable and an invariant. If this is the case,
1667 : CONTROL_VAR is set to location of the iv, BOUND to the location of
1668 : the invariant, IV_VAR and IV_BOUND are set to the corresponding
1669 : induction variable descriptions, and true is returned. If this is not
1670 : the case, CONTROL_VAR and BOUND are set to the arguments of the
1671 : condition and false is returned. */
1672 :
1673 : static enum comp_iv_rewrite
1674 7473330 : extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1675 : tree **control_var, tree **bound,
1676 : struct iv **iv_var, struct iv **iv_bound)
1677 : {
1678 : /* The objects returned when COND has constant operands. */
1679 7473330 : static struct iv const_iv;
1680 7473330 : static tree zero;
1681 7473330 : tree *op0 = &zero, *op1 = &zero;
1682 7473330 : struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1683 7473330 : enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1684 :
1685 7473330 : if (gimple_code (stmt) == GIMPLE_COND)
1686 : {
1687 7207371 : gcond *cond_stmt = as_a <gcond *> (stmt);
1688 7207371 : op0 = gimple_cond_lhs_ptr (cond_stmt);
1689 7207371 : op1 = gimple_cond_rhs_ptr (cond_stmt);
1690 : }
1691 : else
1692 : {
1693 265959 : op0 = gimple_assign_rhs1_ptr (stmt);
1694 265959 : op1 = gimple_assign_rhs2_ptr (stmt);
1695 : }
1696 :
1697 7473330 : zero = integer_zero_node;
1698 7473330 : const_iv.step = integer_zero_node;
1699 :
1700 7473330 : if (TREE_CODE (*op0) == SSA_NAME)
1701 7473173 : iv0 = get_iv (data, *op0);
1702 7473330 : if (TREE_CODE (*op1) == SSA_NAME)
1703 3380829 : iv1 = get_iv (data, *op1);
1704 :
1705 : /* If both sides of comparison are IVs. We can express ivs on both end. */
1706 7473330 : if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1707 : {
1708 91577 : rewrite_type = COMP_IV_EXPR_2;
1709 91577 : goto end;
1710 : }
1711 :
1712 : /* If none side of comparison is IV. */
1713 5780927 : if ((!iv0 || integer_zerop (iv0->step))
1714 8723546 : && (!iv1 || integer_zerop (iv1->step)))
1715 948856 : goto end;
1716 :
1717 : /* Control variable may be on the other side. */
1718 6432897 : if (!iv0 || integer_zerop (iv0->step))
1719 : {
1720 : std::swap (op0, op1);
1721 : std::swap (iv0, iv1);
1722 : }
1723 : /* If one side is IV and the other side isn't loop invariant. */
1724 6432897 : if (!iv1)
1725 : rewrite_type = COMP_IV_EXPR;
1726 : /* If one side is IV and the other side is loop invariant. */
1727 5430936 : else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1728 : rewrite_type = COMP_IV_ELIM;
1729 :
1730 7473330 : end:
1731 7473330 : if (control_var)
1732 7473330 : *control_var = op0;
1733 7473330 : if (iv_var)
1734 1546573 : *iv_var = iv0;
1735 7473330 : if (bound)
1736 7473330 : *bound = op1;
1737 7473330 : if (iv_bound)
1738 7473330 : *iv_bound = iv1;
1739 :
1740 7473330 : return rewrite_type;
1741 : }
1742 :
1743 : /* Checks whether the condition in STMT is interesting and if so,
1744 : records it. */
1745 :
1746 : static void
1747 1546573 : find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1748 : {
1749 1546573 : tree *var_p, *bound_p;
1750 1546573 : struct iv *var_iv, *bound_iv;
1751 1546573 : enum comp_iv_rewrite ret;
1752 :
1753 1546573 : ret = extract_cond_operands (data, stmt,
1754 : &var_p, &bound_p, &var_iv, &bound_iv);
1755 1546573 : if (ret == COMP_IV_NA)
1756 : {
1757 948856 : find_interesting_uses_op (data, *var_p);
1758 948856 : find_interesting_uses_op (data, *bound_p);
1759 948856 : return;
1760 : }
1761 :
1762 597717 : record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1763 : /* Record compare type iv_use for iv on the other side of comparison. */
1764 597717 : if (ret == COMP_IV_EXPR_2)
1765 2115 : record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1766 : }
1767 :
1768 : /* Returns the outermost loop EXPR is obviously invariant in
1769 : relative to the loop LOOP, i.e. if all its operands are defined
1770 : outside of the returned loop. Returns NULL if EXPR is not
1771 : even obviously invariant in LOOP. */
1772 :
1773 : class loop *
1774 247775 : outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1775 : {
1776 247775 : basic_block def_bb;
1777 247775 : unsigned i, len;
1778 :
1779 247775 : if (is_gimple_min_invariant (expr))
1780 37245 : return current_loops->tree_root;
1781 :
1782 210530 : if (TREE_CODE (expr) == SSA_NAME)
1783 : {
1784 129165 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1785 129165 : if (def_bb)
1786 : {
1787 80236 : if (flow_bb_inside_loop_p (loop, def_bb))
1788 : return NULL;
1789 160460 : return superloop_at_depth (loop,
1790 105331 : loop_depth (def_bb->loop_father) + 1);
1791 : }
1792 :
1793 48929 : return current_loops->tree_root;
1794 : }
1795 :
1796 81365 : if (!EXPR_P (expr))
1797 : return NULL;
1798 :
1799 81365 : unsigned maxdepth = 0;
1800 81365 : len = TREE_OPERAND_LENGTH (expr);
1801 211500 : for (i = 0; i < len; i++)
1802 : {
1803 130153 : class loop *ivloop;
1804 130153 : if (!TREE_OPERAND (expr, i))
1805 0 : continue;
1806 :
1807 130153 : ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1808 130153 : if (!ivloop)
1809 : return NULL;
1810 228149 : maxdepth = MAX (maxdepth, loop_depth (ivloop));
1811 : }
1812 :
1813 81347 : return superloop_at_depth (loop, maxdepth);
1814 : }
1815 :
1816 : /* Returns true if expression EXPR is obviously invariant in LOOP,
1817 : i.e. if all its operands are defined outside of the LOOP. LOOP
1818 : should not be the function body. */
1819 :
1820 : bool
1821 12228409 : expr_invariant_in_loop_p (class loop *loop, tree expr)
1822 : {
1823 12228409 : basic_block def_bb;
1824 12228409 : unsigned i, len;
1825 :
1826 12228409 : gcc_assert (loop_depth (loop) > 0);
1827 :
1828 12228409 : if (is_gimple_min_invariant (expr))
1829 : return true;
1830 :
1831 8588802 : if (TREE_CODE (expr) == SSA_NAME)
1832 : {
1833 8140661 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1834 8140661 : if (def_bb
1835 8140661 : && flow_bb_inside_loop_p (loop, def_bb))
1836 : return false;
1837 :
1838 4143866 : return true;
1839 : }
1840 :
1841 448141 : if (!EXPR_P (expr))
1842 : return false;
1843 :
1844 448138 : len = TREE_OPERAND_LENGTH (expr);
1845 953444 : for (i = 0; i < len; i++)
1846 565518 : if (TREE_OPERAND (expr, i)
1847 565518 : && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1848 : return false;
1849 :
1850 : return true;
1851 : }
1852 :
1853 : /* Given expression EXPR which computes inductive values with respect
1854 : to loop recorded in DATA, this function returns biv from which EXPR
1855 : is derived by tracing definition chains of ssa variables in EXPR. */
1856 :
1857 : static struct iv*
1858 867058 : find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1859 : {
1860 1400957 : struct iv *iv;
1861 1400957 : unsigned i, n;
1862 1400957 : tree e2, e1;
1863 1400957 : enum tree_code code;
1864 1400957 : gimple *stmt;
1865 :
1866 1400957 : if (expr == NULL_TREE)
1867 : return NULL;
1868 :
1869 1400644 : if (is_gimple_min_invariant (expr))
1870 : return NULL;
1871 :
1872 1117923 : code = TREE_CODE (expr);
1873 1117923 : if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1874 : {
1875 21567 : n = TREE_OPERAND_LENGTH (expr);
1876 23675 : for (i = 0; i < n; i++)
1877 : {
1878 23152 : iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1879 23152 : if (iv)
1880 : return iv;
1881 : }
1882 : }
1883 :
1884 : /* Stop if it's not ssa name. */
1885 1096879 : if (code != SSA_NAME)
1886 : return NULL;
1887 :
1888 1095738 : iv = get_iv (data, expr);
1889 1095738 : if (!iv || integer_zerop (iv->step))
1890 46478 : return NULL;
1891 1049260 : else if (iv->biv_p)
1892 : return iv;
1893 :
1894 779930 : stmt = SSA_NAME_DEF_STMT (expr);
1895 779930 : if (gphi *phi = dyn_cast <gphi *> (stmt))
1896 : {
1897 1855 : ssa_op_iter iter;
1898 1855 : use_operand_p use_p;
1899 1855 : basic_block phi_bb = gimple_bb (phi);
1900 :
1901 : /* Skip loop header PHI that doesn't define biv. */
1902 1855 : if (phi_bb->loop_father == data->current_loop)
1903 : return NULL;
1904 :
1905 0 : if (virtual_operand_p (gimple_phi_result (phi)))
1906 : return NULL;
1907 :
1908 0 : FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1909 : {
1910 0 : tree use = USE_FROM_PTR (use_p);
1911 0 : iv = find_deriving_biv_for_expr (data, use);
1912 0 : if (iv)
1913 : return iv;
1914 : }
1915 : return NULL;
1916 : }
1917 778075 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
1918 : return NULL;
1919 :
1920 778075 : e1 = gimple_assign_rhs1 (stmt);
1921 778075 : code = gimple_assign_rhs_code (stmt);
1922 778075 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1923 : return find_deriving_biv_for_expr (data, e1);
1924 :
1925 768044 : switch (code)
1926 : {
1927 571662 : case MULT_EXPR:
1928 571662 : case PLUS_EXPR:
1929 571662 : case MINUS_EXPR:
1930 571662 : case POINTER_PLUS_EXPR:
1931 : /* Increments, decrements and multiplications by a constant
1932 : are simple. */
1933 571662 : e2 = gimple_assign_rhs2 (stmt);
1934 571662 : iv = find_deriving_biv_for_expr (data, e2);
1935 571662 : if (iv)
1936 : return iv;
1937 523868 : gcc_fallthrough ();
1938 :
1939 523868 : CASE_CONVERT:
1940 : /* Casts are simple. */
1941 523868 : return find_deriving_biv_for_expr (data, e1);
1942 :
1943 : default:
1944 : break;
1945 : }
1946 :
1947 : return NULL;
1948 : }
1949 :
1950 : /* Record BIV, its predecessor and successor that they are used in
1951 : address type uses. */
1952 :
1953 : static void
1954 600393 : record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1955 : {
1956 600393 : unsigned i;
1957 600393 : tree type, base_1, base_2;
1958 600393 : bitmap_iterator bi;
1959 :
1960 597479 : if (!biv || !biv->biv_p || integer_zerop (biv->step)
1961 1197872 : || biv->have_address_use || !biv->no_overflow)
1962 335892 : return;
1963 :
1964 532059 : type = TREE_TYPE (biv->base);
1965 532059 : if (!INTEGRAL_TYPE_P (type))
1966 : return;
1967 :
1968 264501 : biv->have_address_use = true;
1969 264501 : data->bivs_not_used_in_addr--;
1970 264501 : base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1971 2434417 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1972 : {
1973 2169916 : struct iv *iv = ver_info (data, i)->iv;
1974 :
1975 1957429 : if (!iv || !iv->biv_p || integer_zerop (iv->step)
1976 3071685 : || iv->have_address_use || !iv->no_overflow)
1977 1877743 : continue;
1978 :
1979 292173 : if (type != TREE_TYPE (iv->base)
1980 292173 : || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1981 31233 : continue;
1982 :
1983 260940 : if (!operand_equal_p (biv->step, iv->step, 0))
1984 5794 : continue;
1985 :
1986 255146 : base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1987 255146 : if (operand_equal_p (base_1, iv->base, 0)
1988 255146 : || operand_equal_p (base_2, biv->base, 0))
1989 : {
1990 228561 : iv->have_address_use = true;
1991 228561 : data->bivs_not_used_in_addr--;
1992 : }
1993 : }
1994 : }
1995 :
1996 : /* Cumulates the steps of indices into DATA and replaces their values with the
1997 : initial ones. Returns false when the value of the index cannot be determined.
1998 : Callback for for_each_index. */
1999 :
2000 : struct ifs_ivopts_data
2001 : {
2002 : struct ivopts_data *ivopts_data;
2003 : gimple *stmt;
2004 : tree step;
2005 : };
2006 :
2007 : static bool
2008 2226726 : idx_find_step (tree base, tree *idx, void *data)
2009 : {
2010 2226726 : struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2011 2226726 : struct iv *iv;
2012 2226726 : bool use_overflow_semantics = false;
2013 2226726 : tree step, iv_base, iv_step, lbound, off;
2014 2226726 : class loop *loop = dta->ivopts_data->current_loop;
2015 :
2016 : /* If base is a component ref, require that the offset of the reference
2017 : be invariant. */
2018 2226726 : if (TREE_CODE (base) == COMPONENT_REF)
2019 : {
2020 78 : off = component_ref_field_offset (base);
2021 78 : return expr_invariant_in_loop_p (loop, off);
2022 : }
2023 :
2024 : /* If base is array, first check whether we will be able to move the
2025 : reference out of the loop (in order to take its address in strength
2026 : reduction). In order for this to work we need both lower bound
2027 : and step to be loop invariants. */
2028 2226648 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2029 : {
2030 : /* Moreover, for a range, the size needs to be invariant as well. */
2031 522832 : if (TREE_CODE (base) == ARRAY_RANGE_REF
2032 522832 : && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2033 : return false;
2034 :
2035 522832 : step = array_ref_element_size (base);
2036 522832 : lbound = array_ref_low_bound (base);
2037 :
2038 522832 : if (!expr_invariant_in_loop_p (loop, step)
2039 522832 : || !expr_invariant_in_loop_p (loop, lbound))
2040 3150 : return false;
2041 : }
2042 :
2043 2223498 : if (TREE_CODE (*idx) != SSA_NAME)
2044 : return true;
2045 :
2046 1805267 : iv = get_iv (dta->ivopts_data, *idx);
2047 1805267 : if (!iv)
2048 : return false;
2049 :
2050 : /* XXX We produce for a base of *D42 with iv->base being &x[0]
2051 : *&x[0], which is not folded and does not trigger the
2052 : ARRAY_REF path below. */
2053 1168113 : *idx = iv->base;
2054 :
2055 1168113 : if (integer_zerop (iv->step))
2056 : return true;
2057 :
2058 874189 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2059 : {
2060 307927 : step = array_ref_element_size (base);
2061 :
2062 : /* We only handle addresses whose step is an integer constant. */
2063 307927 : if (TREE_CODE (step) != INTEGER_CST)
2064 : return false;
2065 : }
2066 : else
2067 : /* The step for pointer arithmetics already is 1 byte. */
2068 566262 : step = size_one_node;
2069 :
2070 874172 : iv_base = iv->base;
2071 874172 : iv_step = iv->step;
2072 874172 : if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2073 : use_overflow_semantics = true;
2074 :
2075 874172 : if (!convert_affine_scev (dta->ivopts_data->current_loop,
2076 : sizetype, &iv_base, &iv_step, dta->stmt,
2077 : use_overflow_semantics))
2078 : {
2079 : /* The index might wrap. */
2080 : return false;
2081 : }
2082 :
2083 870855 : step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2084 870855 : dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2085 :
2086 870855 : if (dta->ivopts_data->bivs_not_used_in_addr)
2087 : {
2088 600393 : if (!iv->biv_p)
2089 272244 : iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2090 :
2091 600393 : record_biv_for_address_use (dta->ivopts_data, iv);
2092 : }
2093 : return true;
2094 : }
2095 :
2096 : /* Records use in index IDX. Callback for for_each_index. Ivopts data
2097 : object is passed to it in DATA. */
2098 :
2099 : static bool
2100 1818267 : idx_record_use (tree base, tree *idx,
2101 : void *vdata)
2102 : {
2103 1818267 : struct ivopts_data *data = (struct ivopts_data *) vdata;
2104 1818267 : find_interesting_uses_op (data, *idx);
2105 1818267 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2106 : {
2107 231880 : if (TREE_OPERAND (base, 2))
2108 5502 : find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2109 231880 : if (TREE_OPERAND (base, 3))
2110 16796 : find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2111 : }
2112 1818267 : return true;
2113 : }
2114 :
2115 : /* If we can prove that TOP = cst * BOT for some constant cst,
2116 : store cst to MUL and return true. Otherwise return false.
2117 : The returned value is always sign-extended, regardless of the
2118 : signedness of TOP and BOT. */
2119 :
2120 : static bool
2121 17231471 : constant_multiple_of (tree top, tree bot, widest_int *mul,
2122 : struct ivopts_data *data)
2123 : {
2124 34462942 : aff_tree aff_top, aff_bot;
2125 17231471 : tree_to_aff_combination_expand (top, TREE_TYPE (top), &aff_top,
2126 : &data->name_expansion_cache);
2127 17231471 : tree_to_aff_combination_expand (bot, TREE_TYPE (bot), &aff_bot,
2128 : &data->name_expansion_cache);
2129 :
2130 17231471 : poly_widest_int poly_mul;
2131 17231471 : if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul)
2132 17231471 : && poly_mul.is_constant (mul))
2133 14294337 : return true;
2134 :
2135 : return false;
2136 17231471 : }
2137 :
2138 : /* Return true if memory reference REF with step STEP may be unaligned. */
2139 :
2140 : static bool
2141 0 : may_be_unaligned_p (tree ref, tree step)
2142 : {
2143 : /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2144 : thus they are not misaligned. */
2145 0 : if (TREE_CODE (ref) == TARGET_MEM_REF)
2146 : return false;
2147 :
2148 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2149 0 : if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2150 0 : align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2151 :
2152 0 : unsigned HOST_WIDE_INT bitpos;
2153 0 : unsigned int ref_align;
2154 0 : get_object_alignment_1 (ref, &ref_align, &bitpos);
2155 0 : if (ref_align < align
2156 0 : || (bitpos % align) != 0
2157 0 : || (bitpos % BITS_PER_UNIT) != 0)
2158 : return true;
2159 :
2160 0 : unsigned int trailing_zeros = tree_ctz (step);
2161 0 : if (trailing_zeros < HOST_BITS_PER_INT
2162 0 : && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2163 : return true;
2164 :
2165 : return false;
2166 : }
2167 :
2168 : /* Return true if EXPR may be non-addressable. */
2169 :
2170 : bool
2171 13061655 : may_be_nonaddressable_p (tree expr)
2172 : {
2173 13942003 : switch (TREE_CODE (expr))
2174 : {
2175 9266949 : case VAR_DECL:
2176 : /* Check if it's a register variable. */
2177 9266949 : return DECL_HARD_REGISTER (expr);
2178 :
2179 : case TARGET_MEM_REF:
2180 : /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2181 : target, thus they are always addressable. */
2182 : return false;
2183 :
2184 1962260 : case MEM_REF:
2185 : /* Likewise for MEM_REFs, modulo the storage order. */
2186 1962260 : return REF_REVERSE_STORAGE_ORDER (expr);
2187 :
2188 80 : case BIT_FIELD_REF:
2189 80 : if (REF_REVERSE_STORAGE_ORDER (expr))
2190 : return true;
2191 80 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2192 :
2193 1259374 : case COMPONENT_REF:
2194 1259374 : if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2195 : return true;
2196 1259374 : return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2197 1259374 : || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2198 :
2199 858830 : case ARRAY_REF:
2200 858830 : case ARRAY_RANGE_REF:
2201 858830 : if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2202 : return true;
2203 858830 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2204 :
2205 21448 : case VIEW_CONVERT_EXPR:
2206 : /* This kind of view-conversions may wrap non-addressable objects
2207 : and make them look addressable. After some processing the
2208 : non-addressability may be uncovered again, causing ADDR_EXPRs
2209 : of inappropriate objects to be built. */
2210 21448 : if (is_gimple_reg (TREE_OPERAND (expr, 0))
2211 21448 : || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2212 : return true;
2213 21438 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2214 :
2215 : CASE_CONVERT:
2216 : return true;
2217 :
2218 : default:
2219 : break;
2220 : }
2221 :
2222 : return false;
2223 : }
2224 :
2225 : /* Finds addresses in *OP_P inside STMT. */
2226 :
2227 : static void
2228 2702505 : find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2229 : tree *op_p)
2230 : {
2231 2702505 : tree base = *op_p, step = size_zero_node;
2232 2702505 : struct iv *civ;
2233 2702505 : struct ifs_ivopts_data ifs_ivopts_data;
2234 :
2235 : /* Do not play with volatile memory references. A bit too conservative,
2236 : perhaps, but safe. */
2237 5405010 : if (gimple_has_volatile_ops (stmt))
2238 7529 : goto fail;
2239 :
2240 : /* Ignore bitfields for now. Not really something terribly complicated
2241 : to handle. TODO. */
2242 2694976 : if (TREE_CODE (base) == BIT_FIELD_REF)
2243 94332 : goto fail;
2244 :
2245 2600644 : base = unshare_expr (base);
2246 :
2247 2600644 : if (TREE_CODE (base) == TARGET_MEM_REF)
2248 : {
2249 314785 : tree type = build_pointer_type (TREE_TYPE (base));
2250 314785 : tree astep;
2251 :
2252 314785 : if (TMR_BASE (base)
2253 314785 : && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2254 : {
2255 292603 : civ = get_iv (data, TMR_BASE (base));
2256 292603 : if (!civ)
2257 257436 : goto fail;
2258 :
2259 35167 : TMR_BASE (base) = civ->base;
2260 35167 : step = civ->step;
2261 : }
2262 57349 : if (TMR_INDEX2 (base)
2263 57349 : && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2264 : {
2265 14056 : civ = get_iv (data, TMR_INDEX2 (base));
2266 14056 : if (!civ)
2267 4981 : goto fail;
2268 :
2269 9075 : TMR_INDEX2 (base) = civ->base;
2270 9075 : step = civ->step;
2271 : }
2272 52368 : if (TMR_INDEX (base)
2273 52368 : && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2274 : {
2275 52368 : civ = get_iv (data, TMR_INDEX (base));
2276 52368 : if (!civ)
2277 52368 : goto fail;
2278 :
2279 0 : TMR_INDEX (base) = civ->base;
2280 0 : astep = civ->step;
2281 :
2282 0 : if (astep)
2283 : {
2284 0 : if (TMR_STEP (base))
2285 0 : astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2286 :
2287 0 : step = fold_build2 (PLUS_EXPR, type, step, astep);
2288 : }
2289 : }
2290 :
2291 0 : if (integer_zerop (step))
2292 0 : goto fail;
2293 0 : base = tree_mem_ref_addr (type, base);
2294 : }
2295 : else
2296 : {
2297 2285859 : ifs_ivopts_data.ivopts_data = data;
2298 2285859 : ifs_ivopts_data.stmt = stmt;
2299 2285859 : ifs_ivopts_data.step = size_zero_node;
2300 2285859 : if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2301 2285859 : || integer_zerop (ifs_ivopts_data.step))
2302 1416972 : goto fail;
2303 868887 : step = ifs_ivopts_data.step;
2304 :
2305 : /* Check that the base expression is addressable. This needs
2306 : to be done after substituting bases of IVs into it. */
2307 868887 : if (may_be_nonaddressable_p (base))
2308 782 : goto fail;
2309 :
2310 : /* Moreover, on strict alignment platforms, check that it is
2311 : sufficiently aligned. */
2312 868105 : if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2313 : goto fail;
2314 :
2315 868105 : base = build_fold_addr_expr (base);
2316 :
2317 : /* Substituting bases of IVs into the base expression might
2318 : have caused folding opportunities. */
2319 868105 : if (TREE_CODE (base) == ADDR_EXPR)
2320 : {
2321 462858 : tree *ref = &TREE_OPERAND (base, 0);
2322 1601848 : while (handled_component_p (*ref))
2323 676132 : ref = &TREE_OPERAND (*ref, 0);
2324 462858 : if (TREE_CODE (*ref) == MEM_REF)
2325 : {
2326 303940 : tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2327 : TREE_OPERAND (*ref, 0),
2328 : TREE_OPERAND (*ref, 1));
2329 303940 : if (tem)
2330 0 : *ref = tem;
2331 : }
2332 : }
2333 : }
2334 :
2335 868105 : civ = alloc_iv (data, base, step);
2336 : /* Fail if base object of this memory reference is unknown. */
2337 868105 : if (civ->base_object == NULL_TREE)
2338 11209 : goto fail;
2339 :
2340 856896 : record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2341 856896 : return;
2342 :
2343 1845609 : fail:
2344 1845609 : for_each_index (op_p, idx_record_use, data);
2345 : }
2346 :
2347 : /* Finds and records invariants used in STMT. */
2348 :
2349 : static void
2350 15400139 : find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2351 : {
2352 15400139 : ssa_op_iter iter;
2353 15400139 : use_operand_p use_p;
2354 15400139 : tree op;
2355 :
2356 51304355 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2357 : {
2358 20504077 : op = USE_FROM_PTR (use_p);
2359 20504077 : record_invariant (data, op, false);
2360 : }
2361 15400139 : }
2362 :
2363 : /* CALL calls an internal function. If operand *OP_P will become an
2364 : address when the call is expanded, return the type of the memory
2365 : being addressed, otherwise return null. */
2366 :
2367 : static tree
2368 1783 : get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2369 : {
2370 1783 : switch (gimple_call_internal_fn (call))
2371 : {
2372 371 : case IFN_MASK_LOAD:
2373 371 : case IFN_MASK_LOAD_LANES:
2374 371 : case IFN_MASK_LEN_LOAD_LANES:
2375 371 : case IFN_LEN_LOAD:
2376 371 : case IFN_MASK_LEN_LOAD:
2377 371 : if (op_p == gimple_call_arg_ptr (call, 0))
2378 371 : return TREE_TYPE (gimple_call_lhs (call));
2379 : return NULL_TREE;
2380 :
2381 424 : case IFN_MASK_STORE:
2382 424 : case IFN_MASK_STORE_LANES:
2383 424 : case IFN_MASK_LEN_STORE_LANES:
2384 424 : case IFN_LEN_STORE:
2385 424 : case IFN_MASK_LEN_STORE:
2386 424 : {
2387 424 : if (op_p == gimple_call_arg_ptr (call, 0))
2388 : {
2389 424 : internal_fn ifn = gimple_call_internal_fn (call);
2390 424 : int index = internal_fn_stored_value_index (ifn);
2391 424 : return TREE_TYPE (gimple_call_arg (call, index));
2392 : }
2393 : return NULL_TREE;
2394 : }
2395 :
2396 : default:
2397 : return NULL_TREE;
2398 : }
2399 : }
2400 :
2401 : /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2402 : Return true if the operand will become an address when STMT
2403 : is expanded and record the associated address use if so. */
2404 :
2405 : static bool
2406 1704965 : find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2407 : struct iv *iv)
2408 : {
2409 : /* Fail if base object of this memory reference is unknown. */
2410 1704965 : if (iv->base_object == NULL_TREE)
2411 : return false;
2412 :
2413 643900 : tree mem_type = NULL_TREE;
2414 643900 : if (gcall *call = dyn_cast <gcall *> (stmt))
2415 122166 : if (gimple_call_internal_p (call))
2416 1783 : mem_type = get_mem_type_for_internal_fn (call, op_p);
2417 1783 : if (mem_type)
2418 : {
2419 795 : iv = alloc_iv (data, iv->base, iv->step);
2420 795 : record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2421 795 : return true;
2422 : }
2423 : return false;
2424 : }
2425 :
2426 : /* Finds interesting uses of induction variables in the statement STMT. */
2427 :
2428 : static void
2429 15400139 : find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2430 : {
2431 15400139 : struct iv *iv;
2432 15400139 : tree op, *lhs, *rhs;
2433 15400139 : ssa_op_iter iter;
2434 15400139 : use_operand_p use_p;
2435 15400139 : enum tree_code code;
2436 :
2437 15400139 : find_invariants_stmt (data, stmt);
2438 :
2439 15400139 : if (gimple_code (stmt) == GIMPLE_COND)
2440 : {
2441 1462615 : find_interesting_uses_cond (data, stmt);
2442 9020520 : return;
2443 : }
2444 :
2445 13937524 : if (is_gimple_assign (stmt))
2446 : {
2447 10511727 : lhs = gimple_assign_lhs_ptr (stmt);
2448 10511727 : rhs = gimple_assign_rhs1_ptr (stmt);
2449 :
2450 10511727 : if (TREE_CODE (*lhs) == SSA_NAME)
2451 : {
2452 : /* If the statement defines an induction variable, the uses are not
2453 : interesting by themselves. */
2454 :
2455 9405795 : iv = get_iv (data, *lhs);
2456 :
2457 9405795 : if (iv && !integer_zerop (iv->step))
2458 : return;
2459 : }
2460 :
2461 8173112 : code = gimple_assign_rhs_code (stmt);
2462 8173112 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2463 8173112 : && (REFERENCE_CLASS_P (*rhs)
2464 1251597 : || is_gimple_val (*rhs)))
2465 : {
2466 2799698 : if (REFERENCE_CLASS_P (*rhs))
2467 1742870 : find_interesting_uses_address (data, stmt, rhs);
2468 : else
2469 1056828 : find_interesting_uses_op (data, *rhs);
2470 :
2471 2799698 : if (REFERENCE_CLASS_P (*lhs))
2472 959635 : find_interesting_uses_address (data, stmt, lhs);
2473 2799698 : return;
2474 : }
2475 5373414 : else if (TREE_CODE_CLASS (code) == tcc_comparison)
2476 : {
2477 83958 : find_interesting_uses_cond (data, stmt);
2478 83958 : return;
2479 : }
2480 :
2481 : /* TODO -- we should also handle address uses of type
2482 :
2483 : memory = call (whatever);
2484 :
2485 : and
2486 :
2487 : call (memory). */
2488 : }
2489 :
2490 8715253 : if (gimple_code (stmt) == GIMPLE_PHI
2491 8715253 : && gimple_bb (stmt) == data->current_loop->header)
2492 : {
2493 1453242 : iv = get_iv (data, PHI_RESULT (stmt));
2494 :
2495 1453242 : if (iv && !integer_zerop (iv->step))
2496 : return;
2497 : }
2498 :
2499 26351655 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2500 : {
2501 10667187 : op = USE_FROM_PTR (use_p);
2502 :
2503 10667187 : if (TREE_CODE (op) != SSA_NAME)
2504 518016 : continue;
2505 :
2506 10149171 : iv = get_iv (data, op);
2507 10149171 : if (!iv)
2508 8444206 : continue;
2509 :
2510 1704965 : if (!find_address_like_use (data, stmt, use_p->use, iv))
2511 1704170 : find_interesting_uses_op (data, op);
2512 : }
2513 : }
2514 :
2515 : /* Finds interesting uses of induction variables outside of loops
2516 : on loop exit edge EXIT. */
2517 :
2518 : static void
2519 891275 : find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2520 : {
2521 891275 : gphi *phi;
2522 891275 : gphi_iterator psi;
2523 891275 : tree def;
2524 :
2525 1991487 : for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2526 : {
2527 1100212 : phi = psi.phi ();
2528 1100212 : def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2529 2108976 : if (!virtual_operand_p (def))
2530 543735 : find_interesting_uses_op (data, def);
2531 : }
2532 891275 : }
2533 :
2534 : /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2535 : mode for memory reference represented by USE. */
2536 :
2537 : static GTY (()) vec<rtx, va_gc> *addr_list;
2538 :
2539 : static bool
2540 217506 : addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2541 : {
2542 217506 : rtx reg, addr;
2543 217506 : unsigned list_index;
2544 217506 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2545 217506 : machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2546 :
2547 217506 : list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2548 217506 : if (list_index >= vec_safe_length (addr_list))
2549 10136 : vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2550 :
2551 217506 : addr = (*addr_list)[list_index];
2552 217506 : if (!addr)
2553 : {
2554 13270 : addr_mode = targetm.addr_space.address_mode (as);
2555 13270 : reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2556 13270 : addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2557 13270 : (*addr_list)[list_index] = addr;
2558 : }
2559 : else
2560 204236 : addr_mode = GET_MODE (addr);
2561 :
2562 217506 : XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2563 217506 : return (memory_address_addr_space_p (mem_mode, addr, as));
2564 : }
2565 :
2566 : /* Comparison function to sort group in ascending order of addr_offset. */
2567 :
2568 : static int
2569 3144916 : group_compare_offset (const void *a, const void *b)
2570 : {
2571 3144916 : const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2572 3144916 : const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2573 :
2574 3144916 : return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2575 : }
2576 :
2577 : /* Check if small groups should be split. Return true if no group
2578 : contains more than two uses with distinct addr_offsets. Return
2579 : false otherwise. We want to split such groups because:
2580 :
2581 : 1) Small groups don't have much benefit and may interfer with
2582 : general candidate selection.
2583 : 2) Size for problem with only small groups is usually small and
2584 : general algorithm can handle it well.
2585 :
2586 : TODO -- Above claim may not hold when we want to merge memory
2587 : accesses with conseuctive addresses. */
2588 :
2589 : static bool
2590 503213 : split_small_address_groups_p (struct ivopts_data *data)
2591 : {
2592 503213 : unsigned int i, j, distinct = 1;
2593 503213 : struct iv_use *pre;
2594 503213 : struct iv_group *group;
2595 :
2596 2099361 : for (i = 0; i < data->vgroups.length (); i++)
2597 : {
2598 1596148 : group = data->vgroups[i];
2599 1596148 : if (group->vuses.length () == 1)
2600 1457423 : continue;
2601 :
2602 138725 : gcc_assert (address_p (group->type));
2603 138725 : if (group->vuses.length () == 2)
2604 : {
2605 79214 : if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2606 79214 : group->vuses[1]->addr_offset) > 0)
2607 19214 : std::swap (group->vuses[0], group->vuses[1]);
2608 : }
2609 : else
2610 59511 : group->vuses.qsort (group_compare_offset);
2611 :
2612 138725 : if (distinct > 2)
2613 13979 : continue;
2614 :
2615 124746 : distinct = 1;
2616 1788052 : for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2617 : {
2618 191904 : if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2619 : {
2620 133569 : pre = group->vuses[j];
2621 133569 : distinct++;
2622 : }
2623 :
2624 191904 : if (distinct > 2)
2625 : break;
2626 : }
2627 : }
2628 :
2629 503213 : return (distinct <= 2);
2630 : }
2631 :
2632 : /* For each group of address type uses, this function further groups
2633 : these uses according to the maximum offset supported by target's
2634 : [base + offset] addressing mode. */
2635 :
2636 : static void
2637 503213 : split_address_groups (struct ivopts_data *data)
2638 : {
2639 503213 : unsigned int i, j;
2640 : /* Always split group. */
2641 503213 : bool split_p = split_small_address_groups_p (data);
2642 :
2643 2152251 : for (i = 0; i < data->vgroups.length (); i++)
2644 : {
2645 1649038 : struct iv_group *new_group = NULL;
2646 1649038 : struct iv_group *group = data->vgroups[i];
2647 1649038 : struct iv_use *use = group->vuses[0];
2648 :
2649 1649038 : use->id = 0;
2650 1649038 : use->group_id = group->id;
2651 1649038 : if (group->vuses.length () == 1)
2652 1504747 : continue;
2653 :
2654 144291 : gcc_assert (address_p (use->type));
2655 :
2656 1983790 : for (j = 1; j < group->vuses.length ();)
2657 : {
2658 334752 : struct iv_use *next = group->vuses[j];
2659 334752 : poly_int64 offset = next->addr_offset - use->addr_offset;
2660 :
2661 : /* Split group if aksed to, or the offset against the first
2662 : use can't fit in offset part of addressing mode. IV uses
2663 : having the same offset are still kept in one group. */
2664 393694 : if (maybe_ne (offset, 0)
2665 334752 : && (split_p || !addr_offset_valid_p (use, offset)))
2666 : {
2667 58942 : if (!new_group)
2668 52890 : new_group = record_group (data, group->type);
2669 58942 : group->vuses.ordered_remove (j);
2670 58942 : new_group->vuses.safe_push (next);
2671 58942 : continue;
2672 : }
2673 :
2674 275810 : next->id = j;
2675 275810 : next->group_id = group->id;
2676 275810 : j++;
2677 : }
2678 : }
2679 503213 : }
2680 :
2681 : /* Finds uses of the induction variables that are interesting. */
2682 :
2683 : static void
2684 503213 : find_interesting_uses (struct ivopts_data *data, basic_block *body)
2685 : {
2686 503213 : basic_block bb;
2687 503213 : gimple_stmt_iterator bsi;
2688 503213 : unsigned i;
2689 503213 : edge e;
2690 :
2691 3313149 : for (i = 0; i < data->current_loop->num_nodes; i++)
2692 : {
2693 2809936 : edge_iterator ei;
2694 2809936 : bb = body[i];
2695 :
2696 7169414 : FOR_EACH_EDGE (e, ei, bb->succs)
2697 4359478 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2698 4359478 : && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2699 891275 : find_interesting_uses_outside (data, e);
2700 :
2701 5668989 : for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2702 2859053 : find_interesting_uses_stmt (data, gsi_stmt (bsi));
2703 27322660 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2704 21702788 : if (!is_gimple_debug (gsi_stmt (bsi)))
2705 12541086 : find_interesting_uses_stmt (data, gsi_stmt (bsi));
2706 : }
2707 :
2708 503213 : split_address_groups (data);
2709 :
2710 503213 : if (dump_file && (dump_flags & TDF_DETAILS))
2711 : {
2712 67 : fprintf (dump_file, "\n<IV Groups>:\n");
2713 67 : dump_groups (dump_file, data);
2714 67 : fprintf (dump_file, "\n");
2715 : }
2716 503213 : }
2717 :
2718 : /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2719 : is true, assume we are inside an address. If TOP_COMPREF is true, assume
2720 : we are at the top-level of the processed address. */
2721 :
2722 : static tree
2723 3402412 : strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2724 : poly_int64 *offset)
2725 : {
2726 3402412 : tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2727 3402412 : enum tree_code code;
2728 3402412 : tree type, orig_type = TREE_TYPE (expr);
2729 3402412 : poly_int64 off0, off1;
2730 3402412 : HOST_WIDE_INT st;
2731 3402412 : tree orig_expr = expr;
2732 :
2733 3402412 : STRIP_NOPS (expr);
2734 :
2735 3402412 : type = TREE_TYPE (expr);
2736 3402412 : code = TREE_CODE (expr);
2737 3402412 : *offset = 0;
2738 :
2739 3402412 : switch (code)
2740 : {
2741 626484 : case POINTER_PLUS_EXPR:
2742 626484 : case PLUS_EXPR:
2743 626484 : case MINUS_EXPR:
2744 626484 : op0 = TREE_OPERAND (expr, 0);
2745 626484 : op1 = TREE_OPERAND (expr, 1);
2746 :
2747 626484 : op0 = strip_offset_1 (op0, false, false, &off0);
2748 626484 : op1 = strip_offset_1 (op1, false, false, &off1);
2749 :
2750 626484 : *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2751 626484 : if (op0 == TREE_OPERAND (expr, 0)
2752 626484 : && op1 == TREE_OPERAND (expr, 1))
2753 : return orig_expr;
2754 :
2755 388156 : if (integer_zerop (op1))
2756 : expr = op0;
2757 3179 : else if (integer_zerop (op0))
2758 : {
2759 602 : if (code == MINUS_EXPR)
2760 : {
2761 602 : if (TYPE_OVERFLOW_UNDEFINED (type))
2762 : {
2763 0 : type = unsigned_type_for (type);
2764 0 : op1 = fold_convert (type, op1);
2765 : }
2766 602 : expr = fold_build1 (NEGATE_EXPR, type, op1);
2767 : }
2768 : else
2769 : expr = op1;
2770 : }
2771 : else
2772 : {
2773 2577 : if (TYPE_OVERFLOW_UNDEFINED (type))
2774 : {
2775 0 : type = unsigned_type_for (type);
2776 0 : if (code == POINTER_PLUS_EXPR)
2777 0 : code = PLUS_EXPR;
2778 0 : op0 = fold_convert (type, op0);
2779 0 : op1 = fold_convert (type, op1);
2780 : }
2781 2577 : expr = fold_build2 (code, type, op0, op1);
2782 : }
2783 :
2784 388156 : return fold_convert (orig_type, expr);
2785 :
2786 222206 : case MULT_EXPR:
2787 222206 : op1 = TREE_OPERAND (expr, 1);
2788 222206 : if (!cst_and_fits_in_hwi (op1))
2789 : return orig_expr;
2790 :
2791 182507 : op0 = TREE_OPERAND (expr, 0);
2792 182507 : op0 = strip_offset_1 (op0, false, false, &off0);
2793 182507 : if (op0 == TREE_OPERAND (expr, 0))
2794 : return orig_expr;
2795 :
2796 7244 : *offset = off0 * int_cst_value (op1);
2797 7244 : if (integer_zerop (op0))
2798 : expr = op0;
2799 : else
2800 : {
2801 7244 : if (TYPE_OVERFLOW_UNDEFINED (type))
2802 : {
2803 0 : type = unsigned_type_for (type);
2804 0 : op0 = fold_convert (type, op0);
2805 0 : op1 = fold_convert (type, op1);
2806 : }
2807 7244 : expr = fold_build2 (MULT_EXPR, type, op0, op1);
2808 : }
2809 :
2810 7244 : return fold_convert (orig_type, expr);
2811 :
2812 11 : case ARRAY_REF:
2813 11 : case ARRAY_RANGE_REF:
2814 11 : if (!inside_addr)
2815 : return orig_expr;
2816 :
2817 11 : step = array_ref_element_size (expr);
2818 11 : if (!cst_and_fits_in_hwi (step))
2819 : break;
2820 :
2821 11 : st = int_cst_value (step);
2822 11 : op1 = TREE_OPERAND (expr, 1);
2823 11 : op1 = strip_offset_1 (op1, false, false, &off1);
2824 11 : *offset = off1 * st;
2825 :
2826 11 : if (top_compref
2827 11 : && integer_zerop (op1))
2828 : {
2829 : /* Strip the component reference completely. */
2830 9 : op0 = TREE_OPERAND (expr, 0);
2831 9 : op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2832 9 : *offset += off0;
2833 9 : return op0;
2834 : }
2835 : break;
2836 :
2837 1 : case COMPONENT_REF:
2838 1 : {
2839 1 : tree field;
2840 :
2841 1 : if (!inside_addr)
2842 : return orig_expr;
2843 :
2844 1 : tmp = component_ref_field_offset (expr);
2845 1 : field = TREE_OPERAND (expr, 1);
2846 1 : if (top_compref
2847 1 : && cst_and_fits_in_hwi (tmp)
2848 2 : && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2849 : {
2850 1 : HOST_WIDE_INT boffset, abs_off;
2851 :
2852 : /* Strip the component reference completely. */
2853 1 : op0 = TREE_OPERAND (expr, 0);
2854 1 : op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2855 1 : boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2856 1 : abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2857 1 : if (boffset < 0)
2858 0 : abs_off = -abs_off;
2859 :
2860 1 : *offset = off0 + int_cst_value (tmp) + abs_off;
2861 1 : return op0;
2862 : }
2863 : }
2864 : break;
2865 :
2866 318044 : case ADDR_EXPR:
2867 318044 : op0 = TREE_OPERAND (expr, 0);
2868 318044 : op0 = strip_offset_1 (op0, true, true, &off0);
2869 318044 : *offset += off0;
2870 :
2871 318044 : if (op0 == TREE_OPERAND (expr, 0))
2872 : return orig_expr;
2873 :
2874 10 : expr = build_fold_addr_expr (op0);
2875 10 : return fold_convert (orig_type, expr);
2876 :
2877 : case MEM_REF:
2878 : /* ??? Offset operand? */
2879 : inside_addr = false;
2880 : break;
2881 :
2882 2235664 : default:
2883 2235664 : if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2884 870754 : return build_int_cst (orig_type, 0);
2885 : return orig_expr;
2886 : }
2887 :
2888 : /* Default handling of expressions for that we want to recurse into
2889 : the first operand. */
2890 4 : op0 = TREE_OPERAND (expr, 0);
2891 4 : op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2892 4 : *offset += off0;
2893 :
2894 4 : if (op0 == TREE_OPERAND (expr, 0)
2895 4 : && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2896 : return orig_expr;
2897 :
2898 1 : expr = copy_node (expr);
2899 1 : TREE_OPERAND (expr, 0) = op0;
2900 1 : if (op1)
2901 1 : TREE_OPERAND (expr, 1) = op1;
2902 :
2903 : /* Inside address, we might strip the top level component references,
2904 : thus changing type of the expression. Handling of ADDR_EXPR
2905 : will fix that. */
2906 1 : expr = fold_convert (orig_type, expr);
2907 :
2908 1 : return expr;
2909 : }
2910 :
2911 : /* Strips constant offsets from EXPR and stores them to OFFSET. */
2912 :
2913 : static tree
2914 1648868 : strip_offset (tree expr, poly_uint64 *offset)
2915 : {
2916 1648868 : poly_int64 off;
2917 1648868 : tree core = strip_offset_1 (expr, false, false, &off);
2918 1648868 : *offset = off;
2919 1648868 : return core;
2920 : }
2921 :
2922 : /* Returns variant of TYPE that can be used as base for different uses.
2923 : We return unsigned type with the same precision, which avoids problems
2924 : with overflows. */
2925 :
2926 : static tree
2927 8062128 : generic_type_for (tree type)
2928 : {
2929 8062128 : if (POINTER_TYPE_P (type))
2930 1433348 : return unsigned_type_for (type);
2931 :
2932 6628780 : if (TYPE_UNSIGNED (type))
2933 : return type;
2934 :
2935 3098046 : return unsigned_type_for (type);
2936 : }
2937 :
2938 : /* Private data for walk_tree. */
2939 :
2940 : struct walk_tree_data
2941 : {
2942 : bitmap *inv_vars;
2943 : struct ivopts_data *idata;
2944 : };
2945 :
2946 : /* Callback function for walk_tree, it records invariants and symbol
2947 : reference in *EXPR_P. DATA is the structure storing result info. */
2948 :
2949 : static tree
2950 34394643 : find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2951 : {
2952 34394643 : tree op = *expr_p;
2953 34394643 : struct version_info *info;
2954 34394643 : struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2955 :
2956 34394643 : if (TREE_CODE (op) != SSA_NAME)
2957 : return NULL_TREE;
2958 :
2959 8062814 : info = name_info (wdata->idata, op);
2960 : /* Because we expand simple operations when finding IVs, loop invariant
2961 : variable that isn't referred by the original loop could be used now.
2962 : Record such invariant variables here. */
2963 8062814 : if (!info->iv)
2964 : {
2965 390582 : struct ivopts_data *idata = wdata->idata;
2966 390582 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2967 :
2968 390582 : if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2969 : {
2970 390582 : tree steptype = TREE_TYPE (op);
2971 390582 : if (POINTER_TYPE_P (steptype))
2972 197301 : steptype = sizetype;
2973 390582 : set_iv (idata, op, op, build_int_cst (steptype, 0), true);
2974 390582 : record_invariant (idata, op, false);
2975 : }
2976 : }
2977 8062814 : if (!info->inv_id || info->has_nonlin_use)
2978 : return NULL_TREE;
2979 :
2980 6694662 : if (!*wdata->inv_vars)
2981 5185681 : *wdata->inv_vars = BITMAP_ALLOC (NULL);
2982 6694662 : bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2983 :
2984 6694662 : return NULL_TREE;
2985 : }
2986 :
2987 : /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2988 : store it. */
2989 :
2990 : static inline void
2991 27777017 : find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2992 : {
2993 27777017 : struct walk_tree_data wdata;
2994 :
2995 27777017 : if (!inv_vars)
2996 11805253 : return;
2997 :
2998 15971764 : wdata.idata = data;
2999 15971764 : wdata.inv_vars = inv_vars;
3000 15971764 : walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3001 : }
3002 :
3003 : /* Get entry from invariant expr hash table for INV_EXPR. New entry
3004 : will be recorded if it doesn't exist yet. Given below two exprs:
3005 : inv_expr + cst1, inv_expr + cst2
3006 : It's hard to make decision whether constant part should be stripped
3007 : or not. We choose to not strip based on below facts:
3008 : 1) We need to count ADD cost for constant part if it's stripped,
3009 : which isn't always trivial where this functions is called.
3010 : 2) Stripping constant away may be conflict with following loop
3011 : invariant hoisting pass.
3012 : 3) Not stripping constant away results in more invariant exprs,
3013 : which usually leads to decision preferring lower reg pressure. */
3014 :
3015 : static iv_inv_expr_ent *
3016 2650729 : get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3017 : {
3018 2650729 : STRIP_NOPS (inv_expr);
3019 :
3020 2650729 : if (poly_int_tree_p (inv_expr)
3021 2650729 : || TREE_CODE (inv_expr) == SSA_NAME)
3022 : return NULL;
3023 :
3024 : /* Don't strip constant part away as we used to. */
3025 :
3026 : /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3027 2561698 : struct iv_inv_expr_ent ent;
3028 2561698 : ent.expr = inv_expr;
3029 2561698 : ent.hash = iterative_hash_expr (inv_expr, 0);
3030 2561698 : struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3031 :
3032 2561698 : if (!*slot)
3033 : {
3034 1152670 : *slot = XNEW (struct iv_inv_expr_ent);
3035 1152670 : (*slot)->expr = inv_expr;
3036 1152670 : (*slot)->hash = ent.hash;
3037 1152670 : (*slot)->id = ++data->max_inv_expr_id;
3038 : }
3039 :
3040 2561698 : return *slot;
3041 : }
3042 :
3043 :
3044 : /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3045 : unsuitable as ivopts candidates for potentially involving undefined
3046 : behavior. */
3047 :
3048 : static tree
3049 15403626 : find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3050 : {
3051 15403626 : basic_block bb = (basic_block) bb_;
3052 15403626 : if (TREE_CODE (*tp) == SSA_NAME
3053 2263613 : && ssa_name_maybe_undef_p (*tp)
3054 15412241 : && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3055 3080 : return *tp;
3056 15400546 : if (!EXPR_P (*tp))
3057 10422673 : *walk_subtrees = 0;
3058 : return NULL;
3059 : }
3060 :
3061 : /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3062 : position to POS. If USE is not NULL, the candidate is set as related to
3063 : it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3064 : replacement of the final value of the iv by a direct computation. */
3065 :
3066 : static struct iv_cand *
3067 9038969 : add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3068 : enum iv_position pos, struct iv_use *use,
3069 : gimple *incremented_at, struct iv *orig_iv = NULL,
3070 : bool doloop = false)
3071 : {
3072 9038969 : unsigned i;
3073 9038969 : struct iv_cand *cand = NULL;
3074 9038969 : tree type, orig_type;
3075 :
3076 9038969 : gcc_assert (base && step);
3077 :
3078 : /* -fkeep-gc-roots-live means that we have to keep a real pointer
3079 : live, but the ivopts code may replace a real pointer with one
3080 : pointing before or after the memory block that is then adjusted
3081 : into the memory block during the loop. FIXME: It would likely be
3082 : better to actually force the pointer live and still use ivopts;
3083 : for example, it would be enough to write the pointer into memory
3084 : and keep it there until after the loop. */
3085 9038969 : if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3086 : return NULL;
3087 :
3088 : /* If BASE contains undefined SSA names make sure we only record
3089 : the original IV. */
3090 8932824 : bool involves_undefs = false;
3091 8932824 : if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3092 : {
3093 3080 : if (pos != IP_ORIGINAL)
3094 : return NULL;
3095 : important = false;
3096 : involves_undefs = true;
3097 : }
3098 :
3099 : /* For non-original variables, make sure their values are computed in a type
3100 : that does not invoke undefined behavior on overflows (since in general,
3101 : we cannot prove that these induction variables are non-wrapping). */
3102 8929744 : if (pos != IP_ORIGINAL)
3103 : {
3104 8062128 : orig_type = TREE_TYPE (base);
3105 8062128 : type = generic_type_for (orig_type);
3106 8062128 : if (type != orig_type)
3107 : {
3108 4531394 : base = fold_convert (type, base);
3109 4531394 : step = fold_convert (type, step);
3110 : }
3111 : }
3112 :
3113 44710290 : for (i = 0; i < data->vcands.length (); i++)
3114 : {
3115 40090366 : cand = data->vcands[i];
3116 :
3117 40090366 : if (cand->pos != pos)
3118 9808055 : continue;
3119 :
3120 30282311 : if (cand->incremented_at != incremented_at
3121 29794263 : || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3122 0 : && cand->ainc_use != use))
3123 488048 : continue;
3124 :
3125 29794263 : if (operand_equal_p (base, cand->iv->base, 0)
3126 9453590 : && operand_equal_p (step, cand->iv->step, 0)
3127 35484599 : && (TYPE_PRECISION (TREE_TYPE (base))
3128 5690336 : == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3129 : break;
3130 : }
3131 :
3132 17860230 : if (i == data->vcands.length ())
3133 : {
3134 4619924 : cand = XCNEW (struct iv_cand);
3135 4619924 : cand->id = i;
3136 4619924 : cand->iv = alloc_iv (data, base, step);
3137 4619924 : cand->pos = pos;
3138 4619924 : if (pos != IP_ORIGINAL)
3139 : {
3140 3752090 : if (doloop)
3141 0 : cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3142 : else
3143 3752090 : cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3144 3752090 : cand->var_after = cand->var_before;
3145 : }
3146 4619924 : cand->important = important;
3147 4619924 : cand->involves_undefs = involves_undefs;
3148 4619924 : cand->incremented_at = incremented_at;
3149 4619924 : cand->doloop_p = doloop;
3150 4619924 : data->vcands.safe_push (cand);
3151 :
3152 4619924 : if (!poly_int_tree_p (step))
3153 : {
3154 182422 : find_inv_vars (data, &step, &cand->inv_vars);
3155 :
3156 182422 : iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3157 : /* Share bitmap between inv_vars and inv_exprs for cand. */
3158 182422 : if (inv_expr != NULL)
3159 : {
3160 100512 : cand->inv_exprs = cand->inv_vars;
3161 100512 : cand->inv_vars = NULL;
3162 100512 : if (cand->inv_exprs)
3163 83258 : bitmap_clear (cand->inv_exprs);
3164 : else
3165 17254 : cand->inv_exprs = BITMAP_ALLOC (NULL);
3166 :
3167 100512 : bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3168 : }
3169 : }
3170 :
3171 4619924 : if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3172 0 : cand->ainc_use = use;
3173 : else
3174 4619924 : cand->ainc_use = NULL;
3175 :
3176 4619924 : cand->orig_iv = orig_iv;
3177 4619924 : if (dump_file && (dump_flags & TDF_DETAILS))
3178 686 : dump_cand (dump_file, cand);
3179 : }
3180 :
3181 8930115 : cand->important |= important;
3182 8930115 : cand->doloop_p |= doloop;
3183 :
3184 : /* Relate candidate to the group for which it is added. */
3185 8930115 : if (use)
3186 2503760 : bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3187 :
3188 : return cand;
3189 : }
3190 :
3191 : /* Returns true if incrementing the induction variable at the end of the LOOP
3192 : is allowed.
3193 :
3194 : The purpose is to avoid splitting latch edge with a biv increment, thus
3195 : creating a jump, possibly confusing other optimization passes and leaving
3196 : less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3197 : available (so we do not have a better alternative), or if the latch edge
3198 : is already nonempty. */
3199 :
3200 : static bool
3201 7947152 : allow_ip_end_pos_p (class loop *loop)
3202 : {
3203 : /* Do not allow IP_END when creating the IV would need to split the
3204 : latch edge as that makes all IP_NORMAL invalid. */
3205 7947152 : auto pos = gsi_last_bb (ip_end_pos (loop));
3206 7947152 : if (!gsi_end_p (pos) && stmt_ends_bb_p (*pos))
3207 : return false;
3208 :
3209 7947152 : if (!ip_normal_pos (loop))
3210 : return true;
3211 :
3212 7850396 : if (!empty_block_p (ip_end_pos (loop)))
3213 : return true;
3214 :
3215 : return false;
3216 : }
3217 :
3218 : /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3219 : Important field is set to IMPORTANT. */
3220 :
3221 : static void
3222 581878 : add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3223 : bool important, struct iv_use *use)
3224 : {
3225 581878 : basic_block use_bb = gimple_bb (use->stmt);
3226 581878 : machine_mode mem_mode;
3227 581878 : unsigned HOST_WIDE_INT cstepi;
3228 :
3229 : /* If we insert the increment in any position other than the standard
3230 : ones, we must ensure that it is incremented once per iteration.
3231 : It must not be in an inner nested loop, or one side of an if
3232 : statement. */
3233 581878 : if (use_bb->loop_father != data->current_loop
3234 580466 : || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3235 553426 : || stmt_can_throw_internal (cfun, use->stmt)
3236 1131483 : || !cst_and_fits_in_hwi (step))
3237 62112 : return;
3238 :
3239 519766 : cstepi = int_cst_value (step);
3240 :
3241 519766 : mem_mode = TYPE_MODE (use->mem_type);
3242 : if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3243 : || USE_STORE_PRE_INCREMENT (mem_mode))
3244 : && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3245 : || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3246 : || USE_STORE_PRE_DECREMENT (mem_mode))
3247 : && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3248 : {
3249 : enum tree_code code = MINUS_EXPR;
3250 : tree new_base;
3251 : tree new_step = step;
3252 :
3253 : if (POINTER_TYPE_P (TREE_TYPE (base)))
3254 : {
3255 : new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3256 : code = POINTER_PLUS_EXPR;
3257 : }
3258 : else
3259 : new_step = fold_convert (TREE_TYPE (base), new_step);
3260 : new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3261 : add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3262 : use->stmt);
3263 : }
3264 : if (((USE_LOAD_POST_INCREMENT (mem_mode)
3265 : || USE_STORE_POST_INCREMENT (mem_mode))
3266 : && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3267 : || ((USE_LOAD_POST_DECREMENT (mem_mode)
3268 : || USE_STORE_POST_DECREMENT (mem_mode))
3269 : && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3270 : {
3271 : add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3272 : use->stmt);
3273 : }
3274 : }
3275 :
3276 : /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3277 : position to POS. If USE is not NULL, the candidate is set as related to
3278 : it. The candidate computation is scheduled before exit condition and at
3279 : the end of loop. */
3280 :
3281 : static void
3282 6987629 : add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3283 : struct iv_use *use, struct iv *orig_iv = NULL,
3284 : bool doloop = false)
3285 : {
3286 6987629 : if (ip_normal_pos (data->current_loop))
3287 6905614 : add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3288 : doloop);
3289 : /* Exclude doloop candidate here since it requires decrement then comparison
3290 : and jump, the IP_END position doesn't match. */
3291 6987629 : if (!doloop && ip_end_pos (data->current_loop)
3292 13975258 : && allow_ip_end_pos_p (data->current_loop))
3293 273247 : add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3294 6987629 : }
3295 :
3296 : /* Adds standard iv candidates. */
3297 :
3298 : static void
3299 503212 : add_standard_iv_candidates (struct ivopts_data *data)
3300 : {
3301 503212 : add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3302 :
3303 : /* The same for a double-integer type if it is still fast enough. */
3304 503212 : if (TYPE_PRECISION
3305 503212 : (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3306 503212 : && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3307 455369 : add_candidate (data, build_int_cst (long_integer_type_node, 0),
3308 : build_int_cst (long_integer_type_node, 1), true, NULL);
3309 :
3310 : /* The same for a double-integer type if it is still fast enough. */
3311 503212 : if (TYPE_PRECISION
3312 503212 : (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3313 551043 : && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3314 12 : add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3315 : build_int_cst (long_long_integer_type_node, 1), true, NULL);
3316 503212 : }
3317 :
3318 :
3319 : /* Adds candidates bases on the old induction variable IV. */
3320 :
3321 : static void
3322 1740429 : add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3323 : {
3324 1740429 : gimple *phi;
3325 1740429 : tree def;
3326 1740429 : struct iv_cand *cand;
3327 :
3328 : /* Check if this biv is used in address type use. */
3329 1151687 : if (iv->no_overflow && iv->have_address_use
3330 493062 : && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3331 2233491 : && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3332 : {
3333 279976 : tree base = fold_convert (sizetype, iv->base);
3334 279976 : tree step = fold_convert (sizetype, iv->step);
3335 :
3336 : /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3337 279976 : add_candidate (data, base, step, true, NULL, iv);
3338 : /* Add iv cand of the original type only if it has nonlinear use. */
3339 279976 : if (iv->nonlin_use)
3340 28385 : add_candidate (data, iv->base, iv->step, true, NULL);
3341 : }
3342 : else
3343 1460453 : add_candidate (data, iv->base, iv->step, true, NULL);
3344 :
3345 : /* The same, but with initial value zero. */
3346 1740429 : if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3347 326814 : add_candidate (data, size_int (0), iv->step, true, NULL);
3348 : else
3349 1413615 : add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3350 : iv->step, true, NULL);
3351 :
3352 1740429 : phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3353 1740429 : if (gimple_code (phi) == GIMPLE_PHI)
3354 : {
3355 : /* Additionally record the possibility of leaving the original iv
3356 : untouched. */
3357 870291 : def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3358 : /* Don't add candidate if it's from another PHI node because
3359 : it's an affine iv appearing in the form of PEELED_CHREC. */
3360 870291 : phi = SSA_NAME_DEF_STMT (def);
3361 870291 : if (gimple_code (phi) != GIMPLE_PHI)
3362 : {
3363 1740582 : cand = add_candidate_1 (data,
3364 : iv->base, iv->step, true, IP_ORIGINAL, NULL,
3365 870291 : SSA_NAME_DEF_STMT (def));
3366 870291 : if (cand)
3367 : {
3368 867987 : cand->var_before = iv->ssa_name;
3369 867987 : cand->var_after = def;
3370 : }
3371 : }
3372 : else
3373 0 : gcc_assert (gimple_bb (phi) == data->current_loop->header);
3374 : }
3375 1740429 : }
3376 :
3377 : /* Adds candidates based on the old induction variables. */
3378 :
3379 : static void
3380 503212 : add_iv_candidate_for_bivs (struct ivopts_data *data)
3381 : {
3382 503212 : unsigned i;
3383 503212 : struct iv *iv;
3384 503212 : bitmap_iterator bi;
3385 :
3386 5448841 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3387 : {
3388 4945629 : iv = ver_info (data, i)->iv;
3389 4945629 : if (iv && iv->biv_p && !integer_zerop (iv->step))
3390 1740429 : add_iv_candidate_for_biv (data, iv);
3391 : }
3392 503212 : }
3393 :
3394 : /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3395 :
3396 : static void
3397 4168495 : record_common_cand (struct ivopts_data *data, tree base,
3398 : tree step, struct iv_use *use)
3399 : {
3400 4168495 : class iv_common_cand ent;
3401 4168495 : class iv_common_cand **slot;
3402 :
3403 4168495 : ent.base = base;
3404 4168495 : ent.step = step;
3405 4168495 : ent.hash = iterative_hash_expr (base, 0);
3406 4168495 : ent.hash = iterative_hash_expr (step, ent.hash);
3407 :
3408 4168495 : slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3409 4168495 : if (*slot == NULL)
3410 : {
3411 2615704 : *slot = new iv_common_cand ();
3412 2615704 : (*slot)->base = base;
3413 2615704 : (*slot)->step = step;
3414 2615704 : (*slot)->uses.create (8);
3415 2615704 : (*slot)->hash = ent.hash;
3416 2615704 : data->iv_common_cands.safe_push ((*slot));
3417 : }
3418 :
3419 4168495 : gcc_assert (use != NULL);
3420 4168495 : (*slot)->uses.safe_push (use);
3421 4168495 : return;
3422 4168495 : }
3423 :
3424 : /* Comparison function used to sort common candidates. */
3425 :
3426 : static int
3427 19113271 : common_cand_cmp (const void *p1, const void *p2)
3428 : {
3429 19113271 : unsigned n1, n2;
3430 19113271 : const class iv_common_cand *const *const ccand1
3431 : = (const class iv_common_cand *const *)p1;
3432 19113271 : const class iv_common_cand *const *const ccand2
3433 : = (const class iv_common_cand *const *)p2;
3434 :
3435 19113271 : n1 = (*ccand1)->uses.length ();
3436 19113271 : n2 = (*ccand2)->uses.length ();
3437 19113271 : return n2 - n1;
3438 : }
3439 :
3440 : /* Adds IV candidates based on common candidated recorded. */
3441 :
3442 : static void
3443 503212 : add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3444 : {
3445 503212 : unsigned i, j;
3446 503212 : struct iv_cand *cand_1, *cand_2;
3447 :
3448 503212 : data->iv_common_cands.qsort (common_cand_cmp);
3449 1462735 : for (i = 0; i < data->iv_common_cands.length (); i++)
3450 : {
3451 1446876 : class iv_common_cand *ptr = data->iv_common_cands[i];
3452 :
3453 : /* Only add IV candidate if it's derived from multiple uses. */
3454 1446876 : if (ptr->uses.length () <= 1)
3455 : break;
3456 :
3457 959523 : cand_1 = NULL;
3458 959523 : cand_2 = NULL;
3459 959523 : if (ip_normal_pos (data->current_loop))
3460 944782 : cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3461 : false, IP_NORMAL, NULL, NULL);
3462 :
3463 959523 : if (ip_end_pos (data->current_loop)
3464 959523 : && allow_ip_end_pos_p (data->current_loop))
3465 45035 : cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3466 : false, IP_END, NULL, NULL);
3467 :
3468 : /* Bind deriving uses and the new candidates. */
3469 3471837 : for (j = 0; j < ptr->uses.length (); j++)
3470 : {
3471 2512314 : struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3472 2512314 : if (cand_1)
3473 2437992 : bitmap_set_bit (group->related_cands, cand_1->id);
3474 2512314 : if (cand_2)
3475 135017 : bitmap_set_bit (group->related_cands, cand_2->id);
3476 : }
3477 : }
3478 :
3479 : /* Release data since it is useless from this point. */
3480 503212 : data->iv_common_cand_tab->empty ();
3481 503212 : data->iv_common_cands.truncate (0);
3482 503212 : }
3483 :
3484 : /* Adds candidates based on the value of USE's iv. */
3485 :
3486 : static void
3487 1649034 : add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3488 : {
3489 1649034 : poly_uint64 offset;
3490 1649034 : tree base;
3491 1649034 : struct iv *iv = use->iv;
3492 1649034 : tree basetype = TREE_TYPE (iv->base);
3493 :
3494 : /* Don't add candidate for iv_use with non integer, pointer or non-mode
3495 : precision types, instead, add candidate for the corresponding scev in
3496 : unsigned type with the same precision. See PR93674 for more info. */
3497 777216 : if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3498 2426095 : || !type_has_mode_precision_p (basetype))
3499 : {
3500 166 : basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3501 166 : TYPE_UNSIGNED (basetype));
3502 166 : add_candidate (data, fold_convert (basetype, iv->base),
3503 : fold_convert (basetype, iv->step), false, NULL);
3504 166 : return;
3505 : }
3506 :
3507 1648868 : add_candidate (data, iv->base, iv->step, false, use);
3508 :
3509 : /* Record common candidate for use in case it can be shared by others. */
3510 1648868 : record_common_cand (data, iv->base, iv->step, use);
3511 :
3512 : /* Record common candidate with initial value zero. */
3513 1648868 : basetype = TREE_TYPE (iv->base);
3514 1648868 : if (POINTER_TYPE_P (basetype))
3515 777061 : basetype = sizetype;
3516 1648868 : record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3517 :
3518 : /* Compare the cost of an address with an unscaled index with the cost of
3519 : an address with a scaled index and add candidate if useful. */
3520 1648868 : poly_int64 step;
3521 1648868 : if (use != NULL
3522 1648868 : && poly_int_tree_p (iv->step, &step)
3523 1413028 : && address_p (use->type))
3524 : {
3525 531835 : poly_int64 new_step;
3526 531835 : unsigned int fact = preferred_mem_scale_factor
3527 531835 : (use->iv->base,
3528 531835 : TYPE_MODE (use->mem_type),
3529 531835 : optimize_loop_for_speed_p (data->current_loop));
3530 :
3531 531835 : if (fact != 1
3532 531835 : && multiple_p (step, fact, &new_step))
3533 0 : add_candidate (data, size_int (0),
3534 0 : wide_int_to_tree (sizetype, new_step),
3535 : true, NULL);
3536 : }
3537 :
3538 : /* Record common candidate with constant offset stripped in base.
3539 : Like the use itself, we also add candidate directly for it. */
3540 1648868 : base = strip_offset (iv->base, &offset);
3541 1648868 : if (maybe_ne (offset, 0U) || base != iv->base)
3542 : {
3543 870759 : record_common_cand (data, base, iv->step, use);
3544 870759 : add_candidate (data, base, iv->step, false, use);
3545 : }
3546 :
3547 : /* Record common candidate with base_object removed in base. */
3548 1648868 : base = iv->base;
3549 1648868 : STRIP_NOPS (base);
3550 1648868 : if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3551 : {
3552 0 : tree step = iv->step;
3553 :
3554 0 : STRIP_NOPS (step);
3555 0 : base = TREE_OPERAND (base, 1);
3556 0 : step = fold_convert (sizetype, step);
3557 0 : record_common_cand (data, base, step, use);
3558 : /* Also record common candidate with offset stripped. */
3559 0 : tree alt_base, alt_offset;
3560 0 : split_constant_offset (base, &alt_base, &alt_offset);
3561 0 : if (!integer_zerop (alt_offset))
3562 0 : record_common_cand (data, alt_base, step, use);
3563 : }
3564 :
3565 : /* At last, add auto-incremental candidates. Make such variables
3566 : important since other iv uses with same base object may be based
3567 : on it. */
3568 1648868 : if (use != NULL && address_p (use->type))
3569 581878 : add_autoinc_candidates (data, iv->base, iv->step, true, use);
3570 : }
3571 :
3572 : /* Adds candidates based on the uses. */
3573 :
3574 : static void
3575 503212 : add_iv_candidate_for_groups (struct ivopts_data *data)
3576 : {
3577 503212 : unsigned i;
3578 :
3579 : /* Only add candidate for the first use in group. */
3580 2152246 : for (i = 0; i < data->vgroups.length (); i++)
3581 : {
3582 1649034 : struct iv_group *group = data->vgroups[i];
3583 :
3584 1649034 : gcc_assert (group->vuses[0] != NULL);
3585 1649034 : add_iv_candidate_for_use (data, group->vuses[0]);
3586 : }
3587 503212 : add_iv_candidate_derived_from_uses (data);
3588 503212 : }
3589 :
3590 : /* Record important candidates and add them to related_cands bitmaps. */
3591 :
3592 : static void
3593 503212 : record_important_candidates (struct ivopts_data *data)
3594 : {
3595 503212 : unsigned i;
3596 503212 : struct iv_group *group;
3597 :
3598 5123136 : for (i = 0; i < data->vcands.length (); i++)
3599 : {
3600 4619924 : struct iv_cand *cand = data->vcands[i];
3601 :
3602 4619924 : if (cand->important)
3603 3698662 : bitmap_set_bit (data->important_candidates, i);
3604 : }
3605 :
3606 503212 : data->consider_all_candidates = (data->vcands.length ()
3607 503212 : <= CONSIDER_ALL_CANDIDATES_BOUND);
3608 :
3609 : /* Add important candidates to groups' related_cands bitmaps. */
3610 2152246 : for (i = 0; i < data->vgroups.length (); i++)
3611 : {
3612 1649034 : group = data->vgroups[i];
3613 1649034 : bitmap_ior_into (group->related_cands, data->important_candidates);
3614 : }
3615 503212 : }
3616 :
3617 : /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3618 : If consider_all_candidates is true, we use a two-dimensional array, otherwise
3619 : we allocate a simple list to every use. */
3620 :
3621 : static void
3622 503212 : alloc_use_cost_map (struct ivopts_data *data)
3623 : {
3624 503212 : unsigned i, size, s;
3625 :
3626 2152246 : for (i = 0; i < data->vgroups.length (); i++)
3627 : {
3628 1649034 : struct iv_group *group = data->vgroups[i];
3629 :
3630 1649034 : if (data->consider_all_candidates)
3631 1639334 : size = data->vcands.length ();
3632 : else
3633 : {
3634 9700 : s = bitmap_count_bits (group->related_cands);
3635 :
3636 : /* Round up to the power of two, so that moduling by it is fast. */
3637 19400 : size = s ? (1 << ceil_log2 (s)) : 1;
3638 : }
3639 :
3640 1649034 : group->n_map_members = size;
3641 1649034 : group->cost_map = XCNEWVEC (class cost_pair, size);
3642 : }
3643 503212 : }
3644 :
3645 : /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3646 : on invariants INV_VARS and that the value used in expressing it is
3647 : VALUE, and in case of iv elimination the comparison operator is COMP. */
3648 :
3649 : static void
3650 17761107 : set_group_iv_cost (struct ivopts_data *data,
3651 : struct iv_group *group, struct iv_cand *cand,
3652 : comp_cost cost, bitmap inv_vars, tree value,
3653 : enum tree_code comp, bitmap inv_exprs)
3654 : {
3655 17761107 : unsigned i, s;
3656 :
3657 17761107 : if (cost.infinite_cost_p ())
3658 : {
3659 6130571 : BITMAP_FREE (inv_vars);
3660 6130571 : BITMAP_FREE (inv_exprs);
3661 6130571 : return;
3662 : }
3663 :
3664 11630536 : if (data->consider_all_candidates)
3665 : {
3666 11487021 : group->cost_map[cand->id].cand = cand;
3667 11487021 : group->cost_map[cand->id].cost = cost;
3668 11487021 : group->cost_map[cand->id].inv_vars = inv_vars;
3669 11487021 : group->cost_map[cand->id].inv_exprs = inv_exprs;
3670 11487021 : group->cost_map[cand->id].value = value;
3671 11487021 : group->cost_map[cand->id].comp = comp;
3672 11487021 : return;
3673 : }
3674 :
3675 : /* n_map_members is a power of two, so this computes modulo. */
3676 143515 : s = cand->id & (group->n_map_members - 1);
3677 152833 : for (i = s; i < group->n_map_members; i++)
3678 152795 : if (!group->cost_map[i].cand)
3679 143477 : goto found;
3680 52 : for (i = 0; i < s; i++)
3681 52 : if (!group->cost_map[i].cand)
3682 38 : goto found;
3683 :
3684 0 : gcc_unreachable ();
3685 :
3686 143515 : found:
3687 143515 : group->cost_map[i].cand = cand;
3688 143515 : group->cost_map[i].cost = cost;
3689 143515 : group->cost_map[i].inv_vars = inv_vars;
3690 143515 : group->cost_map[i].inv_exprs = inv_exprs;
3691 143515 : group->cost_map[i].value = value;
3692 143515 : group->cost_map[i].comp = comp;
3693 : }
3694 :
3695 : /* Gets cost of (GROUP, CAND) pair. */
3696 :
3697 : static class cost_pair *
3698 203301834 : get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3699 : struct iv_cand *cand)
3700 : {
3701 203301834 : unsigned i, s;
3702 203301834 : class cost_pair *ret;
3703 :
3704 203301834 : if (!cand)
3705 : return NULL;
3706 :
3707 197492211 : if (data->consider_all_candidates)
3708 : {
3709 183730528 : ret = group->cost_map + cand->id;
3710 183730528 : if (!ret->cand)
3711 : return NULL;
3712 :
3713 108593751 : return ret;
3714 : }
3715 :
3716 : /* n_map_members is a power of two, so this computes modulo. */
3717 13761683 : s = cand->id & (group->n_map_members - 1);
3718 18679173 : for (i = s; i < group->n_map_members; i++)
3719 18617295 : if (group->cost_map[i].cand == cand)
3720 : return group->cost_map + i;
3721 10361613 : else if (group->cost_map[i].cand == NULL)
3722 : return NULL;
3723 191397 : for (i = 0; i < s; i++)
3724 171164 : if (group->cost_map[i].cand == cand)
3725 : return group->cost_map + i;
3726 169085 : else if (group->cost_map[i].cand == NULL)
3727 : return NULL;
3728 :
3729 : return NULL;
3730 : }
3731 :
3732 : /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3733 : static rtx
3734 41465 : produce_memory_decl_rtl (tree obj, int *regno)
3735 : {
3736 41465 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3737 41465 : machine_mode address_mode = targetm.addr_space.address_mode (as);
3738 41465 : rtx x;
3739 :
3740 41465 : gcc_assert (obj);
3741 41465 : if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3742 : {
3743 41465 : const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3744 41465 : x = gen_rtx_SYMBOL_REF (address_mode, name);
3745 41465 : SET_SYMBOL_REF_DECL (x, obj);
3746 41465 : x = gen_rtx_MEM (DECL_MODE (obj), x);
3747 41465 : set_mem_addr_space (x, as);
3748 41465 : targetm.encode_section_info (obj, x, true);
3749 : }
3750 : else
3751 : {
3752 0 : x = gen_raw_REG (address_mode, (*regno)++);
3753 0 : x = gen_rtx_MEM (DECL_MODE (obj), x);
3754 0 : set_mem_addr_space (x, as);
3755 : }
3756 :
3757 41465 : return x;
3758 : }
3759 :
3760 : /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3761 : walk_tree. DATA contains the actual fake register number. */
3762 :
3763 : static tree
3764 580510 : prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3765 : {
3766 580510 : tree obj = NULL_TREE;
3767 580510 : rtx x = NULL_RTX;
3768 580510 : int *regno = (int *) data;
3769 :
3770 580510 : switch (TREE_CODE (*expr_p))
3771 : {
3772 165860 : case ADDR_EXPR:
3773 165860 : for (expr_p = &TREE_OPERAND (*expr_p, 0);
3774 165860 : handled_component_p (*expr_p);
3775 0 : expr_p = &TREE_OPERAND (*expr_p, 0))
3776 0 : continue;
3777 165860 : obj = *expr_p;
3778 165860 : if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3779 0 : x = produce_memory_decl_rtl (obj, regno);
3780 : break;
3781 :
3782 0 : case SSA_NAME:
3783 0 : *ws = 0;
3784 0 : obj = SSA_NAME_VAR (*expr_p);
3785 : /* Defer handling of anonymous SSA_NAMEs to the expander. */
3786 0 : if (!obj)
3787 : return NULL_TREE;
3788 0 : if (!DECL_RTL_SET_P (obj))
3789 0 : x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3790 : break;
3791 :
3792 165860 : case VAR_DECL:
3793 165860 : case PARM_DECL:
3794 165860 : case RESULT_DECL:
3795 165860 : *ws = 0;
3796 165860 : obj = *expr_p;
3797 :
3798 165860 : if (DECL_RTL_SET_P (obj))
3799 : break;
3800 :
3801 0 : if (DECL_MODE (obj) == BLKmode)
3802 0 : x = produce_memory_decl_rtl (obj, regno);
3803 : else
3804 0 : x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3805 :
3806 : break;
3807 :
3808 : default:
3809 : break;
3810 : }
3811 :
3812 0 : if (x)
3813 : {
3814 0 : decl_rtl_to_reset.safe_push (obj);
3815 0 : SET_DECL_RTL (obj, x);
3816 : }
3817 :
3818 : return NULL_TREE;
3819 : }
3820 :
3821 : /* Predict whether the given loop will be transformed in the RTL
3822 : doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3823 : This is only for target independent checks, see targetm.predict_doloop_p
3824 : for the target dependent ones.
3825 :
3826 : Note that according to some initial investigation, some checks like costly
3827 : niter check and invalid stmt scanning don't have much gains among general
3828 : cases, so keep this as simple as possible first.
3829 :
3830 : Some RTL specific checks seems unable to be checked in gimple, if any new
3831 : checks or easy checks _are_ missing here, please add them. */
3832 :
3833 : static bool
3834 503212 : generic_predict_doloop_p (struct ivopts_data *data)
3835 : {
3836 503212 : class loop *loop = data->current_loop;
3837 :
3838 : /* Call target hook for target dependent checks. */
3839 503212 : if (!targetm.predict_doloop_p (loop))
3840 : {
3841 503212 : if (dump_file && (dump_flags & TDF_DETAILS))
3842 67 : fprintf (dump_file, "Predict doloop failure due to"
3843 : " target specific checks.\n");
3844 503212 : return false;
3845 : }
3846 :
3847 : /* Similar to doloop_optimize, check iteration description to know it's
3848 : suitable or not. Keep it as simple as possible, feel free to extend it
3849 : if you find any multiple exits cases matter. */
3850 0 : edge exit = single_dom_exit (loop);
3851 0 : class tree_niter_desc *niter_desc;
3852 0 : if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3853 : {
3854 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3855 0 : fprintf (dump_file, "Predict doloop failure due to"
3856 : " unexpected niters.\n");
3857 0 : return false;
3858 : }
3859 :
3860 : /* Similar to doloop_optimize, check whether iteration count too small
3861 : and not profitable. */
3862 0 : HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3863 0 : if (est_niter == -1)
3864 0 : est_niter = get_likely_max_loop_iterations_int (loop);
3865 0 : if (est_niter >= 0 && est_niter < 3)
3866 : {
3867 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3868 0 : fprintf (dump_file,
3869 : "Predict doloop failure due to"
3870 : " too few iterations (%u).\n",
3871 : (unsigned int) est_niter);
3872 0 : return false;
3873 : }
3874 :
3875 : return true;
3876 : }
3877 :
3878 : /* Determines cost of the computation of EXPR. */
3879 :
3880 : static unsigned
3881 248790 : computation_cost (tree expr, bool speed)
3882 : {
3883 248790 : rtx_insn *seq;
3884 248790 : rtx rslt;
3885 248790 : tree type = TREE_TYPE (expr);
3886 248790 : unsigned cost;
3887 : /* Avoid using hard regs in ways which may be unsupported. */
3888 248790 : int regno = LAST_VIRTUAL_REGISTER + 1;
3889 248790 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
3890 248790 : enum node_frequency real_frequency = node->frequency;
3891 :
3892 248790 : node->frequency = NODE_FREQUENCY_NORMAL;
3893 248790 : crtl->maybe_hot_insn_p = speed;
3894 248790 : walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
3895 248790 : start_sequence ();
3896 248790 : rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3897 248790 : seq = end_sequence ();
3898 248790 : default_rtl_profile ();
3899 248790 : node->frequency = real_frequency;
3900 :
3901 248790 : cost = seq_cost (seq, speed);
3902 248790 : if (MEM_P (rslt))
3903 0 : cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3904 0 : TYPE_ADDR_SPACE (type), speed);
3905 248790 : else if (!REG_P (rslt))
3906 497580 : cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3907 :
3908 248790 : return cost;
3909 : }
3910 :
3911 : /* Returns variable containing the value of candidate CAND at statement AT. */
3912 :
3913 : static tree
3914 18484338 : var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3915 : {
3916 18484338 : if (stmt_after_increment (loop, cand, stmt))
3917 4737803 : return cand->var_after;
3918 : else
3919 13746535 : return cand->var_before;
3920 : }
3921 :
3922 : /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3923 : same precision that is at least as wide as the precision of TYPE, stores
3924 : BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3925 : type of A and B. */
3926 :
3927 : static tree
3928 14292024 : determine_common_wider_type (tree *a, tree *b)
3929 : {
3930 14292024 : tree wider_type = NULL;
3931 14292024 : tree suba, subb;
3932 14292024 : tree atype = TREE_TYPE (*a);
3933 :
3934 14292024 : if (CONVERT_EXPR_P (*a))
3935 : {
3936 8017971 : suba = TREE_OPERAND (*a, 0);
3937 8017971 : wider_type = TREE_TYPE (suba);
3938 8017971 : if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3939 : return atype;
3940 : }
3941 : else
3942 : return atype;
3943 :
3944 7999817 : if (CONVERT_EXPR_P (*b))
3945 : {
3946 1588741 : subb = TREE_OPERAND (*b, 0);
3947 1588741 : if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3948 : return atype;
3949 : }
3950 : else
3951 : return atype;
3952 :
3953 1506648 : *a = suba;
3954 1506648 : *b = subb;
3955 1506648 : return wider_type;
3956 : }
3957 :
3958 : /* Determines the expression by that USE is expressed from induction variable
3959 : CAND at statement AT in DATA's current loop. The expression is stored in
3960 : two parts in a decomposed form. The invariant part is stored in AFF_INV;
3961 : while variant part in AFF_VAR. Store ratio of CAND.step over USE.step in
3962 : PRAT if it's non-null. Returns false if USE cannot be expressed using
3963 : CAND. */
3964 :
3965 : static bool
3966 17228747 : get_computation_aff_1 (struct ivopts_data *data, gimple *at, struct iv_use *use,
3967 : struct iv_cand *cand, class aff_tree *aff_inv,
3968 : class aff_tree *aff_var, widest_int *prat = NULL)
3969 : {
3970 17228747 : tree ubase = use->iv->base, ustep = use->iv->step;
3971 17228747 : tree cbase = cand->iv->base, cstep = cand->iv->step;
3972 17228747 : tree common_type, uutype, var, cstep_common;
3973 17228747 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3974 17228747 : aff_tree aff_cbase;
3975 17228747 : widest_int rat;
3976 :
3977 : /* We must have a precision to express the values of use. */
3978 17228747 : if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3979 : return false;
3980 :
3981 17228065 : var = var_at_stmt (data->current_loop, cand, at);
3982 17228065 : uutype = unsigned_type_for (utype);
3983 :
3984 : /* If the conversion is not noop, perform it. */
3985 17228065 : if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3986 : {
3987 264882 : if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3988 1642025 : && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3989 : {
3990 35908 : tree inner_base, inner_step, inner_type;
3991 35908 : inner_base = TREE_OPERAND (cbase, 0);
3992 35908 : if (CONVERT_EXPR_P (cstep))
3993 4499 : inner_step = TREE_OPERAND (cstep, 0);
3994 : else
3995 : inner_step = cstep;
3996 :
3997 35908 : inner_type = TREE_TYPE (inner_base);
3998 : /* If candidate is added from a biv whose type is smaller than
3999 : ctype, we know both candidate and the biv won't overflow.
4000 : In this case, it's safe to skip the convertion in candidate.
4001 : As an example, (unsigned short)((unsigned long)A) equals to
4002 : (unsigned short)A, if A has a type no larger than short. */
4003 35908 : if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4004 : {
4005 34948 : cbase = inner_base;
4006 34948 : cstep = inner_step;
4007 : }
4008 : }
4009 1606117 : cbase = fold_convert (uutype, cbase);
4010 1606117 : cstep = fold_convert (uutype, cstep);
4011 1606117 : var = fold_convert (uutype, var);
4012 : }
4013 :
4014 : /* Ratio is 1 when computing the value of biv cand by itself.
4015 : We can't rely on constant_multiple_of in this case because the
4016 : use is created after the original biv is selected. The call
4017 : could fail because of inconsistent fold behavior. See PR68021
4018 : for more information. */
4019 17228065 : if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4020 : {
4021 5320 : gcc_assert (is_gimple_assign (use->stmt));
4022 5320 : gcc_assert (use->iv->ssa_name == cand->var_after);
4023 5320 : gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4024 5320 : rat = 1;
4025 : }
4026 17222745 : else if (!constant_multiple_of (ustep, cstep, &rat, data))
4027 : return false;
4028 :
4029 14292024 : if (prat)
4030 12812100 : *prat = rat;
4031 :
4032 : /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4033 : type, we achieve better folding by computing their difference in this
4034 : wider type, and cast the result to UUTYPE. We do not need to worry about
4035 : overflows, as all the arithmetics will in the end be performed in UUTYPE
4036 : anyway. */
4037 14292024 : common_type = determine_common_wider_type (&ubase, &cbase);
4038 :
4039 : /* use = ubase - ratio * cbase + ratio * var. */
4040 14292024 : tree_to_aff_combination (ubase, common_type, aff_inv);
4041 14292024 : tree_to_aff_combination (cbase, common_type, &aff_cbase);
4042 14292024 : tree_to_aff_combination (var, uutype, aff_var);
4043 :
4044 : /* We need to shift the value if we are after the increment. */
4045 14292024 : if (stmt_after_increment (data->current_loop, cand, at))
4046 : {
4047 3226508 : aff_tree cstep_aff;
4048 :
4049 3226508 : if (common_type != uutype)
4050 842162 : cstep_common = fold_convert (common_type, cstep);
4051 : else
4052 : cstep_common = cstep;
4053 :
4054 3226508 : tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4055 3226508 : aff_combination_add (&aff_cbase, &cstep_aff);
4056 3226508 : }
4057 :
4058 14292024 : aff_combination_scale (&aff_cbase, -rat);
4059 14292024 : aff_combination_add (aff_inv, &aff_cbase);
4060 14292024 : if (common_type != uutype)
4061 9680406 : aff_combination_convert (aff_inv, uutype);
4062 :
4063 14292024 : aff_combination_scale (aff_var, rat);
4064 14292024 : return true;
4065 17228747 : }
4066 :
4067 : /* Determines the expression by that USE is expressed from induction variable
4068 : CAND at statement AT in DATA's current loop. The expression is stored in a
4069 : decomposed form into AFF. Returns false if USE cannot be expressed using
4070 : CAND. */
4071 :
4072 : static bool
4073 1242443 : get_computation_aff (struct ivopts_data *data, gimple *at, struct iv_use *use,
4074 : struct iv_cand *cand, class aff_tree *aff)
4075 : {
4076 1242443 : aff_tree aff_var;
4077 :
4078 1242443 : if (!get_computation_aff_1 (data, at, use, cand, aff, &aff_var))
4079 : return false;
4080 :
4081 1135190 : aff_combination_add (aff, &aff_var);
4082 1135190 : return true;
4083 1242443 : }
4084 :
4085 : /* Return the type of USE. */
4086 :
4087 : static tree
4088 1013167 : get_use_type (struct iv_use *use)
4089 : {
4090 1013167 : tree base_type = TREE_TYPE (use->iv->base);
4091 1013167 : tree type;
4092 :
4093 1013167 : if (use->type == USE_REF_ADDRESS)
4094 : {
4095 : /* The base_type may be a void pointer. Create a pointer type based on
4096 : the mem_ref instead. */
4097 0 : type = build_pointer_type (TREE_TYPE (*use->op_p));
4098 0 : gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4099 : == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4100 : }
4101 : else
4102 : type = base_type;
4103 :
4104 1013167 : return type;
4105 : }
4106 :
4107 : /* Determines the expression by that USE is expressed from induction variable
4108 : CAND at statement AT in DATA's current loop. The computation is
4109 : unshared. */
4110 :
4111 : static tree
4112 384974 : get_computation_at (struct ivopts_data *data, gimple *at,
4113 : struct iv_use *use, struct iv_cand *cand)
4114 : {
4115 384974 : aff_tree aff;
4116 384974 : tree type = get_use_type (use);
4117 :
4118 384974 : if (!get_computation_aff (data, at, use, cand, &aff))
4119 : return NULL_TREE;
4120 277721 : unshare_aff_combination (&aff);
4121 277721 : return fold_convert (type, aff_combination_to_tree (&aff));
4122 384974 : }
4123 :
4124 : /* Like get_computation_at, but try harder, even if the computation
4125 : is more expensive. Intended for debug stmts. */
4126 :
4127 : static tree
4128 178199 : get_debug_computation_at (struct ivopts_data *data, gimple *at,
4129 : struct iv_use *use, struct iv_cand *cand)
4130 : {
4131 178199 : if (tree ret = get_computation_at (data, at, use, cand))
4132 : return ret;
4133 :
4134 107253 : tree ubase = use->iv->base, ustep = use->iv->step;
4135 107253 : tree cbase = cand->iv->base, cstep = cand->iv->step;
4136 107253 : tree var;
4137 107253 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4138 107253 : widest_int rat;
4139 :
4140 : /* We must have a precision to express the values of use. */
4141 107253 : if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4142 : return NULL_TREE;
4143 :
4144 : /* Try to handle the case that get_computation_at doesn't,
4145 : try to express
4146 : use = ubase + (var - cbase) / ratio. */
4147 8726 : if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4148 : &rat, data))
4149 : return NULL_TREE;
4150 :
4151 7633 : bool neg_p = false;
4152 7633 : if (wi::neg_p (rat))
4153 : {
4154 1057 : if (TYPE_UNSIGNED (ctype))
4155 : return NULL_TREE;
4156 0 : neg_p = true;
4157 0 : rat = wi::neg (rat);
4158 : }
4159 :
4160 : /* If both IVs can wrap around and CAND doesn't have a power of two step,
4161 : it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4162 : the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4163 : uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4164 : ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4165 6576 : if (!use->iv->no_overflow
4166 62 : && !cand->iv->no_overflow
4167 6625 : && !integer_pow2p (cstep))
4168 : return NULL_TREE;
4169 :
4170 6562 : int bits = wi::exact_log2 (rat);
4171 6562 : if (bits == -1)
4172 651 : bits = wi::floor_log2 (rat) + 1;
4173 6562 : if (!cand->iv->no_overflow
4174 6562 : && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4175 : return NULL_TREE;
4176 :
4177 6562 : var = var_at_stmt (data->current_loop, cand, at);
4178 :
4179 6562 : if (POINTER_TYPE_P (ctype))
4180 : {
4181 120 : ctype = unsigned_type_for (ctype);
4182 120 : cbase = fold_convert (ctype, cbase);
4183 120 : cstep = fold_convert (ctype, cstep);
4184 120 : var = fold_convert (ctype, var);
4185 : }
4186 :
4187 6562 : if (stmt_after_increment (data->current_loop, cand, at))
4188 70 : var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4189 : unshare_expr (cstep));
4190 :
4191 6562 : var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4192 6562 : var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4193 : wide_int_to_tree (TREE_TYPE (var), rat));
4194 6562 : if (POINTER_TYPE_P (utype))
4195 : {
4196 0 : var = fold_convert (sizetype, var);
4197 0 : if (neg_p)
4198 0 : var = fold_build1 (NEGATE_EXPR, sizetype, var);
4199 0 : var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4200 : }
4201 : else
4202 : {
4203 6562 : var = fold_convert (utype, var);
4204 13124 : var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4205 : ubase, var);
4206 : }
4207 : return var;
4208 107253 : }
4209 :
4210 : /* Adjust the cost COST for being in loop setup rather than loop body.
4211 : If we're optimizing for space, the loop setup overhead is constant;
4212 : if we're optimizing for speed, amortize it over the per-iteration cost.
4213 : If ROUND_UP_P is true, the result is round up rather than to zero when
4214 : optimizing for speed. */
4215 : static int64_t
4216 10359001 : adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4217 : bool round_up_p = false)
4218 : {
4219 10359001 : if (cost == INFTY)
4220 : return cost;
4221 10359001 : else if (optimize_loop_for_speed_p (data->current_loop))
4222 : {
4223 8724842 : uint64_t niters = avg_loop_niter (data->current_loop);
4224 8724842 : if (niters > (uint64_t) cost)
4225 13396751 : return (round_up_p && cost != 0) ? 1 : 0;
4226 1845468 : return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4227 : }
4228 : else
4229 : return cost;
4230 : }
4231 :
4232 : /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4233 : EXPR operand holding the shift. COST0 and COST1 are the costs for
4234 : calculating the operands of EXPR. Returns true if successful, and returns
4235 : the cost in COST. */
4236 :
4237 : static bool
4238 1441743 : get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4239 : comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4240 : {
4241 1441743 : comp_cost res;
4242 1441743 : tree op1 = TREE_OPERAND (expr, 1);
4243 1441743 : tree cst = TREE_OPERAND (mult, 1);
4244 1441743 : tree multop = TREE_OPERAND (mult, 0);
4245 1441743 : int m = exact_log2 (int_cst_value (cst));
4246 4324719 : int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4247 1441743 : int as_cost, sa_cost;
4248 1441743 : bool mult_in_op1;
4249 :
4250 1441743 : if (!(m >= 0 && m < maxm))
4251 : return false;
4252 :
4253 956614 : STRIP_NOPS (op1);
4254 956614 : mult_in_op1 = operand_equal_p (op1, mult, 0);
4255 :
4256 956614 : as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4257 :
4258 : /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4259 : use that in preference to a shift insn followed by an add insn. */
4260 956614 : sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4261 956614 : ? shiftadd_cost (speed, mode, m)
4262 : : (mult_in_op1
4263 145275 : ? shiftsub1_cost (speed, mode, m)
4264 26262 : : shiftsub0_cost (speed, mode, m)));
4265 :
4266 956614 : res = comp_cost (MIN (as_cost, sa_cost), 0);
4267 1731657 : res += (mult_in_op1 ? cost0 : cost1);
4268 :
4269 956614 : STRIP_NOPS (multop);
4270 956614 : if (!is_gimple_val (multop))
4271 486340 : res += force_expr_to_var_cost (multop, speed);
4272 :
4273 956614 : *cost = res;
4274 956614 : return true;
4275 : }
4276 :
4277 : /* Estimates cost of forcing expression EXPR into a variable. */
4278 :
4279 : static comp_cost
4280 28854400 : force_expr_to_var_cost (tree expr, bool speed)
4281 : {
4282 28854400 : static bool costs_initialized = false;
4283 28854400 : static unsigned integer_cost [2];
4284 28854400 : static unsigned symbol_cost [2];
4285 28854400 : static unsigned address_cost [2];
4286 28854400 : tree op0, op1;
4287 28854400 : comp_cost cost0, cost1, cost;
4288 28854400 : machine_mode mode;
4289 28854400 : scalar_int_mode int_mode;
4290 :
4291 28854400 : if (!costs_initialized)
4292 : {
4293 41465 : tree type = build_pointer_type (integer_type_node);
4294 41465 : tree var, addr;
4295 41465 : rtx x;
4296 41465 : int i;
4297 :
4298 41465 : var = create_tmp_var_raw (integer_type_node, "test_var");
4299 41465 : TREE_STATIC (var) = 1;
4300 41465 : x = produce_memory_decl_rtl (var, NULL);
4301 41465 : SET_DECL_RTL (var, x);
4302 :
4303 41465 : addr = build1 (ADDR_EXPR, type, var);
4304 :
4305 :
4306 165860 : for (i = 0; i < 2; i++)
4307 : {
4308 82930 : integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4309 : 2000), i);
4310 :
4311 82930 : symbol_cost[i] = computation_cost (addr, i) + 1;
4312 :
4313 82930 : address_cost[i]
4314 82930 : = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4315 82930 : if (dump_file && (dump_flags & TDF_DETAILS))
4316 : {
4317 105 : fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4318 70 : fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4319 70 : fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4320 70 : fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4321 70 : fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4322 70 : fprintf (dump_file, "\n");
4323 : }
4324 : }
4325 :
4326 41465 : costs_initialized = true;
4327 : }
4328 :
4329 28854400 : STRIP_NOPS (expr);
4330 :
4331 28854400 : if (SSA_VAR_P (expr))
4332 5445689 : return no_cost;
4333 :
4334 23408711 : if (is_gimple_min_invariant (expr))
4335 : {
4336 13913263 : if (poly_int_tree_p (expr))
4337 11850098 : return comp_cost (integer_cost [speed], 0);
4338 :
4339 2063165 : if (TREE_CODE (expr) == ADDR_EXPR)
4340 : {
4341 2063165 : tree obj = TREE_OPERAND (expr, 0);
4342 :
4343 2063165 : if (VAR_P (obj)
4344 : || TREE_CODE (obj) == PARM_DECL
4345 : || TREE_CODE (obj) == RESULT_DECL)
4346 1999106 : return comp_cost (symbol_cost [speed], 0);
4347 : }
4348 :
4349 64059 : return comp_cost (address_cost [speed], 0);
4350 : }
4351 :
4352 9495448 : switch (TREE_CODE (expr))
4353 : {
4354 8124689 : case POINTER_PLUS_EXPR:
4355 8124689 : case PLUS_EXPR:
4356 8124689 : case MINUS_EXPR:
4357 8124689 : case MULT_EXPR:
4358 8124689 : case EXACT_DIV_EXPR:
4359 8124689 : case TRUNC_DIV_EXPR:
4360 8124689 : case BIT_AND_EXPR:
4361 8124689 : case BIT_IOR_EXPR:
4362 8124689 : case LSHIFT_EXPR:
4363 8124689 : case RSHIFT_EXPR:
4364 8124689 : op0 = TREE_OPERAND (expr, 0);
4365 8124689 : op1 = TREE_OPERAND (expr, 1);
4366 8124689 : STRIP_NOPS (op0);
4367 8124689 : STRIP_NOPS (op1);
4368 8124689 : break;
4369 :
4370 1370719 : CASE_CONVERT:
4371 1370719 : case NEGATE_EXPR:
4372 1370719 : case BIT_NOT_EXPR:
4373 1370719 : op0 = TREE_OPERAND (expr, 0);
4374 1370719 : STRIP_NOPS (op0);
4375 1370719 : op1 = NULL_TREE;
4376 1370719 : break;
4377 : /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4378 : introduce COND_EXPR for IV base, need to support better cost estimation
4379 : for this COND_EXPR and tcc_comparison. */
4380 0 : case COND_EXPR:
4381 0 : op0 = TREE_OPERAND (expr, 1);
4382 0 : STRIP_NOPS (op0);
4383 0 : op1 = TREE_OPERAND (expr, 2);
4384 0 : STRIP_NOPS (op1);
4385 0 : break;
4386 0 : case LT_EXPR:
4387 0 : case LE_EXPR:
4388 0 : case GT_EXPR:
4389 0 : case GE_EXPR:
4390 0 : case EQ_EXPR:
4391 0 : case NE_EXPR:
4392 0 : case UNORDERED_EXPR:
4393 0 : case ORDERED_EXPR:
4394 0 : case UNLT_EXPR:
4395 0 : case UNLE_EXPR:
4396 0 : case UNGT_EXPR:
4397 0 : case UNGE_EXPR:
4398 0 : case UNEQ_EXPR:
4399 0 : case LTGT_EXPR:
4400 0 : case MAX_EXPR:
4401 0 : case MIN_EXPR:
4402 0 : op0 = TREE_OPERAND (expr, 0);
4403 0 : STRIP_NOPS (op0);
4404 0 : op1 = TREE_OPERAND (expr, 1);
4405 0 : STRIP_NOPS (op1);
4406 0 : break;
4407 :
4408 40 : default:
4409 : /* Just an arbitrary value, FIXME. */
4410 40 : return comp_cost (target_spill_cost[speed], 0);
4411 : }
4412 :
4413 9495408 : if (op0 == NULL_TREE
4414 9495408 : || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4415 4492704 : cost0 = no_cost;
4416 : else
4417 5002704 : cost0 = force_expr_to_var_cost (op0, speed);
4418 :
4419 9495408 : if (op1 == NULL_TREE
4420 8124689 : || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4421 8722510 : cost1 = no_cost;
4422 : else
4423 772898 : cost1 = force_expr_to_var_cost (op1, speed);
4424 :
4425 9495408 : mode = TYPE_MODE (TREE_TYPE (expr));
4426 9495408 : switch (TREE_CODE (expr))
4427 : {
4428 5708341 : case POINTER_PLUS_EXPR:
4429 5708341 : case PLUS_EXPR:
4430 5708341 : case MINUS_EXPR:
4431 5708341 : case NEGATE_EXPR:
4432 5708341 : cost = comp_cost (add_cost (speed, mode), 0);
4433 5708341 : if (TREE_CODE (expr) != NEGATE_EXPR)
4434 : {
4435 5566924 : tree mult = NULL_TREE;
4436 5566924 : comp_cost sa_cost;
4437 5566924 : if (TREE_CODE (op1) == MULT_EXPR)
4438 : mult = op1;
4439 5167088 : else if (TREE_CODE (op0) == MULT_EXPR)
4440 : mult = op0;
4441 :
4442 : if (mult != NULL_TREE
4443 4610310 : && is_a <scalar_int_mode> (mode, &int_mode)
4444 1690893 : && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4445 1441743 : && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4446 : speed, &sa_cost))
4447 956614 : return sa_cost;
4448 : }
4449 : break;
4450 :
4451 1216444 : CASE_CONVERT:
4452 1216444 : {
4453 1216444 : tree inner_mode, outer_mode;
4454 1216444 : outer_mode = TREE_TYPE (expr);
4455 1216444 : inner_mode = TREE_TYPE (op0);
4456 1216444 : cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4457 1216444 : TYPE_MODE (inner_mode), speed), 0);
4458 : }
4459 1216444 : break;
4460 :
4461 2458482 : case MULT_EXPR:
4462 2458482 : if (cst_and_fits_in_hwi (op0))
4463 0 : cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4464 0 : mode, speed), 0);
4465 2458482 : else if (cst_and_fits_in_hwi (op1))
4466 2005773 : cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4467 2005773 : mode, speed), 0);
4468 : else
4469 452709 : return comp_cost (target_spill_cost [speed], 0);
4470 : break;
4471 :
4472 53199 : case EXACT_DIV_EXPR:
4473 53199 : case TRUNC_DIV_EXPR:
4474 : /* Division by power of two is usually cheap, so we allow it. Forbid
4475 : anything else. */
4476 53199 : if (integer_pow2p (TREE_OPERAND (expr, 1)))
4477 53199 : cost = comp_cost (add_cost (speed, mode), 0);
4478 : else
4479 0 : cost = comp_cost (target_spill_cost[speed], 0);
4480 : break;
4481 :
4482 58942 : case BIT_AND_EXPR:
4483 58942 : case BIT_IOR_EXPR:
4484 58942 : case BIT_NOT_EXPR:
4485 58942 : case LSHIFT_EXPR:
4486 58942 : case RSHIFT_EXPR:
4487 58942 : cost = comp_cost (add_cost (speed, mode), 0);
4488 58942 : break;
4489 0 : case COND_EXPR:
4490 0 : op0 = TREE_OPERAND (expr, 0);
4491 0 : STRIP_NOPS (op0);
4492 0 : if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4493 0 : || CONSTANT_CLASS_P (op0))
4494 0 : cost = no_cost;
4495 : else
4496 0 : cost = force_expr_to_var_cost (op0, speed);
4497 : break;
4498 0 : case LT_EXPR:
4499 0 : case LE_EXPR:
4500 0 : case GT_EXPR:
4501 0 : case GE_EXPR:
4502 0 : case EQ_EXPR:
4503 0 : case NE_EXPR:
4504 0 : case UNORDERED_EXPR:
4505 0 : case ORDERED_EXPR:
4506 0 : case UNLT_EXPR:
4507 0 : case UNLE_EXPR:
4508 0 : case UNGT_EXPR:
4509 0 : case UNGE_EXPR:
4510 0 : case UNEQ_EXPR:
4511 0 : case LTGT_EXPR:
4512 0 : case MAX_EXPR:
4513 0 : case MIN_EXPR:
4514 : /* Simply use add cost for now, FIXME if there is some more accurate cost
4515 : evaluation way. */
4516 0 : cost = comp_cost (add_cost (speed, mode), 0);
4517 0 : break;
4518 :
4519 0 : default:
4520 0 : gcc_unreachable ();
4521 : }
4522 :
4523 8086085 : cost += cost0;
4524 8086085 : cost += cost1;
4525 8086085 : return cost;
4526 : }
4527 :
4528 : /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4529 : invariants the computation depends on. */
4530 :
4531 : static comp_cost
4532 24635365 : force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4533 : {
4534 24635365 : if (!expr)
4535 2042907 : return no_cost;
4536 :
4537 22592458 : find_inv_vars (data, &expr, inv_vars);
4538 22592458 : return force_expr_to_var_cost (expr, data->speed);
4539 : }
4540 :
4541 : /* Returns cost of auto-modifying address expression in shape base + offset.
4542 : AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4543 : address expression. The address expression has ADDR_MODE in addr space
4544 : AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4545 : speed or size. */
4546 :
4547 : enum ainc_type
4548 : {
4549 : AINC_PRE_INC, /* Pre increment. */
4550 : AINC_PRE_DEC, /* Pre decrement. */
4551 : AINC_POST_INC, /* Post increment. */
4552 : AINC_POST_DEC, /* Post decrement. */
4553 : AINC_NONE /* Also the number of auto increment types. */
4554 : };
4555 :
4556 : struct ainc_cost_data
4557 : {
4558 : int64_t costs[AINC_NONE];
4559 : };
4560 :
4561 : static comp_cost
4562 1800546 : get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4563 : machine_mode addr_mode, machine_mode mem_mode,
4564 : addr_space_t as, bool speed)
4565 : {
4566 1800546 : if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4567 : && !USE_STORE_PRE_DECREMENT (mem_mode)
4568 : && !USE_LOAD_POST_DECREMENT (mem_mode)
4569 : && !USE_STORE_POST_DECREMENT (mem_mode)
4570 : && !USE_LOAD_PRE_INCREMENT (mem_mode)
4571 : && !USE_STORE_PRE_INCREMENT (mem_mode)
4572 : && !USE_LOAD_POST_INCREMENT (mem_mode)
4573 : && !USE_STORE_POST_INCREMENT (mem_mode))
4574 1800546 : return infinite_cost;
4575 :
4576 : static vec<ainc_cost_data *> ainc_cost_data_list;
4577 : unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4578 : if (idx >= ainc_cost_data_list.length ())
4579 : {
4580 : unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4581 :
4582 : gcc_assert (nsize > idx);
4583 : ainc_cost_data_list.safe_grow_cleared (nsize, true);
4584 : }
4585 :
4586 : ainc_cost_data *data = ainc_cost_data_list[idx];
4587 : if (data == NULL)
4588 : {
4589 : rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4590 :
4591 : data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4592 : data->costs[AINC_PRE_DEC] = INFTY;
4593 : data->costs[AINC_POST_DEC] = INFTY;
4594 : data->costs[AINC_PRE_INC] = INFTY;
4595 : data->costs[AINC_POST_INC] = INFTY;
4596 : if (USE_LOAD_PRE_DECREMENT (mem_mode)
4597 : || USE_STORE_PRE_DECREMENT (mem_mode))
4598 : {
4599 : rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4600 :
4601 : if (memory_address_addr_space_p (mem_mode, addr, as))
4602 : data->costs[AINC_PRE_DEC]
4603 : = address_cost (addr, mem_mode, as, speed);
4604 : }
4605 : if (USE_LOAD_POST_DECREMENT (mem_mode)
4606 : || USE_STORE_POST_DECREMENT (mem_mode))
4607 : {
4608 : rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4609 :
4610 : if (memory_address_addr_space_p (mem_mode, addr, as))
4611 : data->costs[AINC_POST_DEC]
4612 : = address_cost (addr, mem_mode, as, speed);
4613 : }
4614 : if (USE_LOAD_PRE_INCREMENT (mem_mode)
4615 : || USE_STORE_PRE_INCREMENT (mem_mode))
4616 : {
4617 : rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4618 :
4619 : if (memory_address_addr_space_p (mem_mode, addr, as))
4620 : data->costs[AINC_PRE_INC]
4621 : = address_cost (addr, mem_mode, as, speed);
4622 : }
4623 : if (USE_LOAD_POST_INCREMENT (mem_mode)
4624 : || USE_STORE_POST_INCREMENT (mem_mode))
4625 : {
4626 : rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4627 :
4628 : if (memory_address_addr_space_p (mem_mode, addr, as))
4629 : data->costs[AINC_POST_INC]
4630 : = address_cost (addr, mem_mode, as, speed);
4631 : }
4632 : ainc_cost_data_list[idx] = data;
4633 : }
4634 :
4635 : poly_int64 msize = GET_MODE_SIZE (mem_mode);
4636 : if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4637 : return comp_cost (data->costs[AINC_POST_INC], 0);
4638 : if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4639 : return comp_cost (data->costs[AINC_POST_DEC], 0);
4640 : if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4641 : return comp_cost (data->costs[AINC_PRE_INC], 0);
4642 : if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4643 : return comp_cost (data->costs[AINC_PRE_DEC], 0);
4644 :
4645 : return infinite_cost;
4646 : }
4647 :
4648 : /* Return cost of computing USE's address expression by using CAND.
4649 : AFF_INV and AFF_VAR represent invariant and variant parts of the
4650 : address expression, respectively. If AFF_INV is simple, store
4651 : the loop invariant variables which are depended by it in INV_VARS;
4652 : if AFF_INV is complicated, handle it as a new invariant expression
4653 : and record it in INV_EXPR. RATIO indicates multiple times between
4654 : steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4655 : value to it indicating if this is an auto-increment address. */
4656 :
4657 : static comp_cost
4658 5524252 : get_address_cost (struct ivopts_data *data, struct iv_use *use,
4659 : struct iv_cand *cand, aff_tree *aff_inv,
4660 : aff_tree *aff_var, HOST_WIDE_INT ratio,
4661 : bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4662 : bool *can_autoinc, bool speed)
4663 : {
4664 5524252 : rtx addr;
4665 5524252 : bool simple_inv = true;
4666 5524252 : tree comp_inv = NULL_TREE, type = aff_var->type;
4667 5524252 : comp_cost var_cost = no_cost, cost = no_cost;
4668 5524252 : struct mem_address parts = {NULL_TREE, integer_one_node,
4669 5524252 : NULL_TREE, NULL_TREE, NULL_TREE};
4670 5524252 : machine_mode addr_mode = TYPE_MODE (type);
4671 5524252 : machine_mode mem_mode = TYPE_MODE (use->mem_type);
4672 5524252 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4673 : /* Only true if ratio != 1. */
4674 5524252 : bool ok_with_ratio_p = false;
4675 5524252 : bool ok_without_ratio_p = false;
4676 5524252 : code_helper code = ERROR_MARK;
4677 :
4678 5524252 : if (use->type == USE_PTR_ADDRESS)
4679 : {
4680 4496 : gcall *call = as_a<gcall *> (use->stmt);
4681 4496 : gcc_assert (gimple_call_internal_p (call));
4682 4496 : code = gimple_call_internal_fn (call);
4683 : }
4684 :
4685 5524252 : if (!aff_combination_const_p (aff_inv))
4686 : {
4687 3654686 : parts.index = integer_one_node;
4688 : /* Addressing mode "base + index". */
4689 3654686 : ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4690 3654686 : if (ratio != 1)
4691 : {
4692 2763780 : parts.step = wide_int_to_tree (type, ratio);
4693 : /* Addressing mode "base + index << scale". */
4694 2763780 : ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4695 2763780 : if (!ok_with_ratio_p)
4696 1682369 : parts.step = NULL_TREE;
4697 : }
4698 2573275 : if (ok_with_ratio_p || ok_without_ratio_p)
4699 : {
4700 3654686 : if (maybe_ne (aff_inv->offset, 0))
4701 : {
4702 2382638 : parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4703 : /* Addressing mode "base + index [<< scale] + offset". */
4704 2382638 : if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4705 487 : parts.offset = NULL_TREE;
4706 : else
4707 2382151 : aff_inv->offset = 0;
4708 : }
4709 :
4710 3654686 : move_fixed_address_to_symbol (&parts, aff_inv);
4711 : /* Base is fixed address and is moved to symbol part. */
4712 3654686 : if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4713 443592 : parts.base = NULL_TREE;
4714 :
4715 : /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4716 3654686 : if (parts.symbol != NULL_TREE
4717 3654686 : && !valid_mem_ref_p (mem_mode, as, &parts, code))
4718 : {
4719 6607 : aff_combination_add_elt (aff_inv, parts.symbol, 1);
4720 6607 : parts.symbol = NULL_TREE;
4721 : /* Reset SIMPLE_INV since symbol address needs to be computed
4722 : outside of address expression in this case. */
4723 6607 : simple_inv = false;
4724 : /* Symbol part is moved back to base part, it can't be NULL. */
4725 6607 : parts.base = integer_one_node;
4726 : }
4727 : }
4728 : else
4729 0 : parts.index = NULL_TREE;
4730 : }
4731 : else
4732 : {
4733 1869566 : poly_int64 ainc_step;
4734 1869566 : if (can_autoinc
4735 1869566 : && ratio == 1
4736 3739124 : && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4737 : {
4738 1800546 : poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4739 :
4740 1800546 : if (stmt_after_increment (data->current_loop, cand, use->stmt))
4741 : ainc_offset += ainc_step;
4742 1800546 : cost = get_address_cost_ainc (ainc_step, ainc_offset,
4743 : addr_mode, mem_mode, as, speed);
4744 1800546 : if (!cost.infinite_cost_p ())
4745 : {
4746 0 : *can_autoinc = true;
4747 0 : return cost;
4748 : }
4749 1800546 : cost = no_cost;
4750 : }
4751 1869566 : if (!aff_combination_zero_p (aff_inv))
4752 : {
4753 1048624 : parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4754 : /* Addressing mode "base + offset". */
4755 1048624 : if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4756 44 : parts.offset = NULL_TREE;
4757 : else
4758 1048580 : aff_inv->offset = 0;
4759 : }
4760 : }
4761 :
4762 1876173 : if (simple_inv)
4763 5517645 : simple_inv = (aff_inv == NULL
4764 8734507 : || aff_combination_const_p (aff_inv)
4765 8727900 : || aff_combination_singleton_var_p (aff_inv));
4766 5524252 : if (!aff_combination_zero_p (aff_inv))
4767 3216948 : comp_inv = aff_combination_to_tree (aff_inv);
4768 3216948 : if (comp_inv != NULL_TREE)
4769 3216948 : cost = force_var_cost (data, comp_inv, inv_vars);
4770 5524252 : if (ratio != 1 && parts.step == NULL_TREE)
4771 1682377 : var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4772 5524252 : if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4773 44 : var_cost += add_cost (speed, addr_mode);
4774 :
4775 5524252 : if (comp_inv && inv_expr && !simple_inv)
4776 : {
4777 748522 : *inv_expr = get_loop_invariant_expr (data, comp_inv);
4778 : /* Clear depends on. */
4779 748522 : if (*inv_expr != NULL && inv_vars && *inv_vars)
4780 430851 : bitmap_clear (*inv_vars);
4781 :
4782 : /* Cost of small invariant expression adjusted against loop niters
4783 : is usually zero, which makes it difficult to be differentiated
4784 : from candidate based on loop invariant variables. Secondly, the
4785 : generated invariant expression may not be hoisted out of loop by
4786 : following pass. We penalize the cost by rounding up in order to
4787 : neutralize such effects. */
4788 748522 : cost.cost = adjust_setup_cost (data, cost.cost, true);
4789 748522 : cost.scratch = cost.cost;
4790 : }
4791 :
4792 5524252 : cost += var_cost;
4793 5524252 : addr = addr_for_mem_ref (&parts, as, false);
4794 5524252 : gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4795 5524252 : cost += address_cost (addr, mem_mode, as, speed);
4796 :
4797 5524252 : if (parts.symbol != NULL_TREE)
4798 497628 : cost.complexity += 1;
4799 : /* Don't increase the complexity of adding a scaled index if it's
4800 : the only kind of index that the target allows. */
4801 5524252 : if (parts.step != NULL_TREE && ok_without_ratio_p)
4802 1081411 : cost.complexity += 1;
4803 5524252 : if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4804 3216904 : cost.complexity += 1;
4805 5524252 : if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4806 3430731 : cost.complexity += 1;
4807 :
4808 : return cost;
4809 : }
4810 :
4811 : /* Scale (multiply) the computed COST (except scratch part that should be
4812 : hoisted out a loop) by header->frequency / AT->frequency, which makes
4813 : expected cost more accurate. */
4814 :
4815 : static comp_cost
4816 12812100 : get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4817 : {
4818 12812100 : if (data->speed
4819 12812100 : && data->current_loop->header->count.to_frequency (cfun) > 0)
4820 : {
4821 11235417 : basic_block bb = gimple_bb (at);
4822 11235417 : gcc_assert (cost.scratch <= cost.cost);
4823 11235417 : int scale_factor = (int)(intptr_t) bb->aux;
4824 11235417 : if (scale_factor == 1)
4825 10673867 : return cost;
4826 :
4827 561550 : int64_t scaled_cost
4828 561550 : = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4829 :
4830 561550 : if (dump_file && (dump_flags & TDF_DETAILS))
4831 93 : fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4832 : "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4833 : 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4834 :
4835 : cost.cost = scaled_cost;
4836 : }
4837 :
4838 2138233 : return cost;
4839 : }
4840 :
4841 : /* Determines the cost of the computation by that USE is expressed
4842 : from induction variable CAND. If ADDRESS_P is true, we just need
4843 : to create an address from it, otherwise we want to get it into
4844 : register. A set of invariants we depend on is stored in INV_VARS.
4845 : If CAN_AUTOINC is nonnull, use it to record whether autoinc
4846 : addressing is likely. If INV_EXPR is nonnull, record invariant
4847 : expr entry in it. */
4848 :
4849 : static comp_cost
4850 20151538 : get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4851 : struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4852 : bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4853 : {
4854 20151538 : gimple *at = use->stmt;
4855 20151538 : tree ubase = use->iv->base, cbase = cand->iv->base;
4856 20151538 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4857 20151538 : tree comp_inv = NULL_TREE;
4858 20151538 : HOST_WIDE_INT ratio, aratio;
4859 20151538 : comp_cost cost;
4860 20151538 : widest_int rat;
4861 40303076 : aff_tree aff_inv, aff_var;
4862 20151538 : bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4863 :
4864 20151538 : if (inv_vars)
4865 17703676 : *inv_vars = NULL;
4866 20151538 : if (can_autoinc)
4867 8731480 : *can_autoinc = false;
4868 20151538 : if (inv_expr)
4869 19734271 : *inv_expr = NULL;
4870 :
4871 : /* Check if we have enough precision to express the values of use. */
4872 20151538 : if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4873 3056698 : return infinite_cost;
4874 :
4875 17094840 : if (address_p
4876 17094840 : || (use->iv->base_object
4877 2114492 : && cand->iv->base_object
4878 1031994 : && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4879 1020565 : && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4880 : {
4881 : /* Do not try to express address of an object with computation based
4882 : on address of a different object. This may cause problems in rtl
4883 : level alias analysis (that does not expect this to be happening,
4884 : as this is illegal in C), and would be unlikely to be useful
4885 : anyway. */
4886 7913201 : if (use->iv->base_object
4887 7913201 : && cand->iv->base_object
4888 12158830 : && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4889 1453270 : return infinite_cost;
4890 : }
4891 :
4892 15641570 : if (!get_computation_aff_1 (data, at, use, cand, &aff_inv, &aff_var, &rat)
4893 15641570 : || !wi::fits_shwi_p (rat))
4894 2829470 : return infinite_cost;
4895 :
4896 12812100 : ratio = rat.to_shwi ();
4897 12812100 : if (address_p)
4898 : {
4899 5524252 : cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4900 : inv_vars, inv_expr, can_autoinc, speed);
4901 5524252 : cost = get_scaled_computation_cost_at (data, at, cost);
4902 : /* For doloop IV cand, add on the extra cost. */
4903 5524252 : cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4904 5524252 : return cost;
4905 : }
4906 :
4907 7287848 : bool simple_inv = (aff_combination_const_p (&aff_inv)
4908 2016286 : || aff_combination_singleton_var_p (&aff_inv));
4909 7287848 : tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4910 7287848 : aff_combination_convert (&aff_inv, signed_type);
4911 7287848 : if (!aff_combination_zero_p (&aff_inv))
4912 5244941 : comp_inv = aff_combination_to_tree (&aff_inv);
4913 :
4914 7287848 : cost = force_var_cost (data, comp_inv, inv_vars);
4915 7287848 : if (comp_inv && inv_expr && !simple_inv)
4916 : {
4917 1406667 : *inv_expr = get_loop_invariant_expr (data, comp_inv);
4918 : /* Clear depends on. */
4919 1406667 : if (*inv_expr != NULL && inv_vars && *inv_vars)
4920 876593 : bitmap_clear (*inv_vars);
4921 :
4922 1406667 : cost.cost = adjust_setup_cost (data, cost.cost);
4923 : /* Record setup cost in scratch field. */
4924 1406667 : cost.scratch = cost.cost;
4925 : }
4926 : /* Cost of constant integer can be covered when adding invariant part to
4927 : variant part. */
4928 5881181 : else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4929 3228630 : cost = no_cost;
4930 :
4931 : /* Need type narrowing to represent use with cand. */
4932 7287848 : if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4933 : {
4934 795194 : machine_mode outer_mode = TYPE_MODE (utype);
4935 795194 : machine_mode inner_mode = TYPE_MODE (ctype);
4936 795194 : cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4937 : }
4938 :
4939 : /* Turn a + i * (-c) into a - i * c. */
4940 7287848 : if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4941 1846288 : aratio = -ratio;
4942 : else
4943 : aratio = ratio;
4944 :
4945 7287848 : if (ratio != 1)
4946 2747593 : cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4947 :
4948 : /* TODO: We may also need to check if we can compute a + i * 4 in one
4949 : instruction. */
4950 : /* Need to add up the invariant and variant parts. */
4951 7287848 : if (comp_inv && !integer_zerop (comp_inv))
4952 10482966 : cost += add_cost (speed, TYPE_MODE (utype));
4953 :
4954 7287848 : cost = get_scaled_computation_cost_at (data, at, cost);
4955 :
4956 : /* For doloop IV cand, add on the extra cost. */
4957 7287848 : if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4958 0 : cost += targetm.doloop_cost_for_generic;
4959 :
4960 7287848 : return cost;
4961 20151538 : }
4962 :
4963 : /* Determines cost of computing the use in GROUP with CAND in a generic
4964 : expression. */
4965 :
4966 : static bool
4967 5550950 : determine_group_iv_cost_generic (struct ivopts_data *data,
4968 : struct iv_group *group, struct iv_cand *cand)
4969 : {
4970 5550950 : comp_cost cost;
4971 5550950 : iv_inv_expr_ent *inv_expr = NULL;
4972 5550950 : bitmap inv_vars = NULL, inv_exprs = NULL;
4973 5550950 : struct iv_use *use = group->vuses[0];
4974 :
4975 : /* The simple case first -- if we need to express value of the preserved
4976 : original biv, the cost is 0. This also prevents us from counting the
4977 : cost of increment twice -- once at this use and once in the cost of
4978 : the candidate. */
4979 5550950 : if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4980 57431 : cost = no_cost;
4981 : /* If the IV candidate involves undefined SSA values and is not the
4982 : same IV as on the USE avoid using that candidate here. */
4983 5493519 : else if (cand->involves_undefs
4984 5493519 : && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
4985 218 : return false;
4986 : else
4987 5493301 : cost = get_computation_cost (data, use, cand, false,
4988 : &inv_vars, NULL, &inv_expr);
4989 :
4990 5550732 : if (inv_expr)
4991 : {
4992 988498 : inv_exprs = BITMAP_ALLOC (NULL);
4993 988498 : bitmap_set_bit (inv_exprs, inv_expr->id);
4994 : }
4995 5550732 : set_group_iv_cost (data, group, cand, cost, inv_vars,
4996 : NULL_TREE, ERROR_MARK, inv_exprs);
4997 5550732 : return !cost.infinite_cost_p ();
4998 : }
4999 :
5000 : /* Determines cost of computing uses in GROUP with CAND in addresses. */
5001 :
5002 : static bool
5003 6283618 : determine_group_iv_cost_address (struct ivopts_data *data,
5004 : struct iv_group *group, struct iv_cand *cand)
5005 : {
5006 6283618 : unsigned i;
5007 6283618 : bitmap inv_vars = NULL, inv_exprs = NULL;
5008 6283618 : bool can_autoinc;
5009 6283618 : iv_inv_expr_ent *inv_expr = NULL;
5010 6283618 : struct iv_use *use = group->vuses[0];
5011 6283618 : comp_cost sum_cost = no_cost, cost;
5012 :
5013 6283618 : cost = get_computation_cost (data, use, cand, true,
5014 : &inv_vars, &can_autoinc, &inv_expr);
5015 :
5016 6283618 : if (inv_expr)
5017 : {
5018 461012 : inv_exprs = BITMAP_ALLOC (NULL);
5019 461012 : bitmap_set_bit (inv_exprs, inv_expr->id);
5020 : }
5021 6283618 : sum_cost = cost;
5022 6283618 : if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5023 : {
5024 0 : if (can_autoinc)
5025 0 : sum_cost -= cand->cost_step;
5026 : /* If we generated the candidate solely for exploiting autoincrement
5027 : opportunities, and it turns out it can't be used, set the cost to
5028 : infinity to make sure we ignore it. */
5029 0 : else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5030 0 : sum_cost = infinite_cost;
5031 : }
5032 :
5033 : /* Compute and add costs for rest uses of this group. */
5034 8314213 : for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5035 : {
5036 2030595 : struct iv_use *next = group->vuses[i];
5037 :
5038 : /* TODO: We could skip computing cost for sub iv_use when it has the
5039 : same cost as the first iv_use, but the cost really depends on the
5040 : offset and where the iv_use is. */
5041 2030595 : cost = get_computation_cost (data, next, cand, true,
5042 : NULL, &can_autoinc, &inv_expr);
5043 2030595 : if (inv_expr)
5044 : {
5045 287281 : if (!inv_exprs)
5046 82 : inv_exprs = BITMAP_ALLOC (NULL);
5047 :
5048 : /* Uses in a group can share setup code,
5049 : so only add setup cost once. */
5050 287281 : if (bitmap_bit_p (inv_exprs, inv_expr->id))
5051 286906 : cost -= cost.scratch;
5052 : else
5053 375 : bitmap_set_bit (inv_exprs, inv_expr->id);
5054 : }
5055 2030595 : sum_cost += cost;
5056 : }
5057 6283618 : set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5058 : NULL_TREE, ERROR_MARK, inv_exprs);
5059 :
5060 6283618 : return !sum_cost.infinite_cost_p ();
5061 : }
5062 :
5063 : /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5064 : and stores it to VAL. */
5065 :
5066 : static void
5067 3796615 : cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5068 : class tree_niter_desc *desc, aff_tree *val)
5069 : {
5070 11389845 : aff_tree step, delta, nit;
5071 3796615 : struct iv *iv = cand->iv;
5072 3796615 : tree type = TREE_TYPE (iv->base);
5073 3796615 : tree niter = desc->niter;
5074 3796615 : bool after_adjust = stmt_after_increment (loop, cand, at);
5075 3796615 : tree steptype;
5076 :
5077 3796615 : if (POINTER_TYPE_P (type))
5078 107755 : steptype = sizetype;
5079 : else
5080 3688860 : steptype = unsigned_type_for (type);
5081 :
5082 : /* If AFTER_ADJUST is required, the code below generates the equivalent
5083 : of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5084 : BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5085 : SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5086 : doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5087 : class for common idioms that we know are safe. */
5088 3796615 : if (after_adjust
5089 3603341 : && desc->control.no_overflow
5090 3595707 : && integer_onep (desc->control.step)
5091 954826 : && (desc->cmp == LT_EXPR
5092 39689 : || desc->cmp == NE_EXPR)
5093 4751441 : && TREE_CODE (desc->bound) == SSA_NAME)
5094 : {
5095 507099 : if (integer_onep (desc->control.base))
5096 : {
5097 373388 : niter = desc->bound;
5098 373388 : after_adjust = false;
5099 : }
5100 133711 : else if (TREE_CODE (niter) == MINUS_EXPR
5101 133711 : && integer_onep (TREE_OPERAND (niter, 1)))
5102 : {
5103 72368 : niter = TREE_OPERAND (niter, 0);
5104 72368 : after_adjust = false;
5105 : }
5106 : }
5107 :
5108 3796615 : tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5109 3796615 : aff_combination_convert (&step, steptype);
5110 3796615 : tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5111 3796615 : aff_combination_convert (&nit, steptype);
5112 3796615 : aff_combination_mult (&nit, &step, &delta);
5113 3796615 : if (after_adjust)
5114 3157585 : aff_combination_add (&delta, &step);
5115 :
5116 3796615 : tree_to_aff_combination (iv->base, type, val);
5117 3796615 : if (!POINTER_TYPE_P (type))
5118 3688860 : aff_combination_convert (val, steptype);
5119 3796615 : aff_combination_add (val, &delta);
5120 3796615 : }
5121 :
5122 : /* Returns period of induction variable iv. */
5123 :
5124 : static tree
5125 4068122 : iv_period (struct iv *iv)
5126 : {
5127 4068122 : tree step = iv->step, period, type;
5128 4068122 : tree pow2div;
5129 :
5130 4068122 : gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5131 :
5132 4068122 : type = unsigned_type_for (TREE_TYPE (step));
5133 : /* Period of the iv is lcm (step, type_range)/step -1,
5134 : i.e., N*type_range/step - 1. Since type range is power
5135 : of two, N == (step >> num_of_ending_zeros_binary (step),
5136 : so the final result is
5137 :
5138 : (type_range >> num_of_ending_zeros_binary (step)) - 1
5139 :
5140 : */
5141 4068122 : pow2div = num_ending_zeros (step);
5142 :
5143 12204366 : period = build_low_bits_mask (type,
5144 4068122 : (TYPE_PRECISION (type)
5145 4068122 : - tree_to_uhwi (pow2div)));
5146 :
5147 4068122 : return period;
5148 : }
5149 :
5150 : /* Returns the comparison operator used when eliminating the iv USE. */
5151 :
5152 : static enum tree_code
5153 3796615 : iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5154 : {
5155 3796615 : class loop *loop = data->current_loop;
5156 3796615 : basic_block ex_bb;
5157 3796615 : edge exit;
5158 :
5159 3796615 : ex_bb = gimple_bb (use->stmt);
5160 3796615 : exit = EDGE_SUCC (ex_bb, 0);
5161 3796615 : if (flow_bb_inside_loop_p (loop, exit->dest))
5162 2853612 : exit = EDGE_SUCC (ex_bb, 1);
5163 :
5164 3796615 : return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5165 : }
5166 :
5167 : /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5168 : we only detect the situation that BASE = SOMETHING + OFFSET, where the
5169 : calculation is performed in non-wrapping type.
5170 :
5171 : TODO: More generally, we could test for the situation that
5172 : BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5173 : This would require knowing the sign of OFFSET. */
5174 :
5175 : static bool
5176 477 : difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5177 : {
5178 477 : enum tree_code code;
5179 477 : tree e1, e2;
5180 1431 : aff_tree aff_e1, aff_e2, aff_offset;
5181 :
5182 477 : if (!nowrap_type_p (TREE_TYPE (base)))
5183 : return false;
5184 :
5185 477 : base = expand_simple_operations (base);
5186 :
5187 477 : if (TREE_CODE (base) == SSA_NAME)
5188 : {
5189 476 : gimple *stmt = SSA_NAME_DEF_STMT (base);
5190 :
5191 476 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
5192 : return false;
5193 :
5194 18 : code = gimple_assign_rhs_code (stmt);
5195 18 : if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5196 : return false;
5197 :
5198 5 : e1 = gimple_assign_rhs1 (stmt);
5199 5 : e2 = gimple_assign_rhs2 (stmt);
5200 : }
5201 : else
5202 : {
5203 1 : code = TREE_CODE (base);
5204 1 : if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5205 : return false;
5206 0 : e1 = TREE_OPERAND (base, 0);
5207 0 : e2 = TREE_OPERAND (base, 1);
5208 : }
5209 :
5210 : /* Use affine expansion as deeper inspection to prove the equality. */
5211 5 : tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5212 : &aff_e2, &data->name_expansion_cache);
5213 5 : tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5214 : &aff_offset, &data->name_expansion_cache);
5215 5 : aff_combination_scale (&aff_offset, -1);
5216 5 : switch (code)
5217 : {
5218 3 : case PLUS_EXPR:
5219 3 : aff_combination_add (&aff_e2, &aff_offset);
5220 3 : if (aff_combination_zero_p (&aff_e2))
5221 : return true;
5222 :
5223 1 : tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5224 : &aff_e1, &data->name_expansion_cache);
5225 1 : aff_combination_add (&aff_e1, &aff_offset);
5226 1 : return aff_combination_zero_p (&aff_e1);
5227 :
5228 2 : case POINTER_PLUS_EXPR:
5229 2 : aff_combination_add (&aff_e2, &aff_offset);
5230 2 : return aff_combination_zero_p (&aff_e2);
5231 :
5232 : default:
5233 : return false;
5234 : }
5235 477 : }
5236 :
5237 : /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5238 : comparison with CAND. NITER describes the number of iterations of
5239 : the loops. If successful, the comparison in COMP_P is altered accordingly.
5240 :
5241 : We aim to handle the following situation:
5242 :
5243 : sometype *base, *p;
5244 : int a, b, i;
5245 :
5246 : i = a;
5247 : p = p_0 = base + a;
5248 :
5249 : do
5250 : {
5251 : bla (*p);
5252 : p++;
5253 : i++;
5254 : }
5255 : while (i < b);
5256 :
5257 : Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5258 : We aim to optimize this to
5259 :
5260 : p = p_0 = base + a;
5261 : do
5262 : {
5263 : bla (*p);
5264 : p++;
5265 : }
5266 : while (p < p_0 - a + b);
5267 :
5268 : This preserves the correctness, since the pointer arithmetics does not
5269 : overflow. More precisely:
5270 :
5271 : 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5272 : overflow in computing it or the values of p.
5273 : 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5274 : overflow. To prove this, we use the fact that p_0 = base + a. */
5275 :
5276 : static bool
5277 203856 : iv_elimination_compare_lt (struct ivopts_data *data,
5278 : struct iv_cand *cand, enum tree_code *comp_p,
5279 : class tree_niter_desc *niter)
5280 : {
5281 203856 : tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5282 611568 : class aff_tree nit, tmpa, tmpb;
5283 203856 : enum tree_code comp;
5284 203856 : HOST_WIDE_INT step;
5285 :
5286 : /* We need to know that the candidate induction variable does not overflow.
5287 : While more complex analysis may be used to prove this, for now just
5288 : check that the variable appears in the original program and that it
5289 : is computed in a type that guarantees no overflows. */
5290 203856 : cand_type = TREE_TYPE (cand->iv->base);
5291 203856 : if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5292 182139 : return false;
5293 :
5294 : /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5295 : the calculation of the BOUND could overflow, making the comparison
5296 : invalid. */
5297 21717 : if (!data->loop_single_exit_p)
5298 : return false;
5299 :
5300 : /* We need to be able to decide whether candidate is increasing or decreasing
5301 : in order to choose the right comparison operator. */
5302 15169 : if (!cst_and_fits_in_hwi (cand->iv->step))
5303 : return false;
5304 15169 : step = int_cst_value (cand->iv->step);
5305 :
5306 : /* Check that the number of iterations matches the expected pattern:
5307 : a + 1 > b ? 0 : b - a - 1. */
5308 15169 : mbz = niter->may_be_zero;
5309 15169 : if (TREE_CODE (mbz) == GT_EXPR)
5310 : {
5311 : /* Handle a + 1 > b. */
5312 1715 : tree op0 = TREE_OPERAND (mbz, 0);
5313 1715 : if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5314 : {
5315 794 : a = TREE_OPERAND (op0, 0);
5316 794 : b = TREE_OPERAND (mbz, 1);
5317 : }
5318 : else
5319 921 : return false;
5320 : }
5321 13454 : else if (TREE_CODE (mbz) == LT_EXPR)
5322 : {
5323 4607 : tree op1 = TREE_OPERAND (mbz, 1);
5324 :
5325 : /* Handle b < a + 1. */
5326 4607 : if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5327 : {
5328 82 : a = TREE_OPERAND (op1, 0);
5329 82 : b = TREE_OPERAND (mbz, 0);
5330 : }
5331 : else
5332 4525 : return false;
5333 : }
5334 : else
5335 : return false;
5336 :
5337 : /* Expected number of iterations is B - A - 1. Check that it matches
5338 : the actual number, i.e., that B - A - NITER = 1. */
5339 876 : tree_to_aff_combination (niter->niter, nit_type, &nit);
5340 876 : tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5341 876 : tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5342 876 : aff_combination_scale (&nit, -1);
5343 876 : aff_combination_scale (&tmpa, -1);
5344 876 : aff_combination_add (&tmpb, &tmpa);
5345 876 : aff_combination_add (&tmpb, &nit);
5346 876 : if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5347 399 : return false;
5348 :
5349 : /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5350 : overflow. */
5351 477 : offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5352 : cand->iv->step,
5353 : fold_convert (TREE_TYPE (cand->iv->step), a));
5354 477 : if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5355 : return false;
5356 :
5357 : /* Determine the new comparison operator. */
5358 4 : comp = step < 0 ? GT_EXPR : LT_EXPR;
5359 4 : if (*comp_p == NE_EXPR)
5360 4 : *comp_p = comp;
5361 0 : else if (*comp_p == EQ_EXPR)
5362 0 : *comp_p = invert_tree_comparison (comp, false);
5363 : else
5364 0 : gcc_unreachable ();
5365 :
5366 : return true;
5367 203856 : }
5368 :
5369 : /* Check whether it is possible to express the condition in USE by comparison
5370 : of candidate CAND. If so, store the value compared with to BOUND, and the
5371 : comparison operator to COMP. */
5372 :
5373 : static bool
5374 4912675 : may_eliminate_iv (struct ivopts_data *data,
5375 : struct iv_use *use, struct iv_cand *cand, tree *bound,
5376 : enum tree_code *comp)
5377 : {
5378 4912675 : basic_block ex_bb;
5379 4912675 : edge exit;
5380 4912675 : tree period;
5381 4912675 : class loop *loop = data->current_loop;
5382 4912675 : aff_tree bnd;
5383 4912675 : class tree_niter_desc *desc = NULL;
5384 :
5385 4912675 : if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5386 : return false;
5387 :
5388 : /* For now works only for exits that dominate the loop latch.
5389 : TODO: extend to other conditions inside loop body. */
5390 4718108 : ex_bb = gimple_bb (use->stmt);
5391 4718108 : if (use->stmt != last_nondebug_stmt (ex_bb)
5392 4612203 : || gimple_code (use->stmt) != GIMPLE_COND
5393 9328146 : || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5394 234767 : return false;
5395 :
5396 4483341 : exit = EDGE_SUCC (ex_bb, 0);
5397 4483341 : if (flow_bb_inside_loop_p (loop, exit->dest))
5398 3385941 : exit = EDGE_SUCC (ex_bb, 1);
5399 4483341 : if (flow_bb_inside_loop_p (loop, exit->dest))
5400 : return false;
5401 :
5402 4365614 : desc = niter_for_exit (data, exit);
5403 4365614 : if (!desc)
5404 : return false;
5405 :
5406 : /* Determine whether we can use the variable to test the exit condition.
5407 : This is the case iff the period of the induction variable is greater
5408 : than the number of iterations for which the exit condition is true. */
5409 4068122 : period = iv_period (cand->iv);
5410 :
5411 : /* If the number of iterations is constant, compare against it directly. */
5412 4068122 : if (TREE_CODE (desc->niter) == INTEGER_CST)
5413 : {
5414 : /* See cand_value_at. */
5415 2653316 : if (stmt_after_increment (loop, cand, use->stmt))
5416 : {
5417 2597622 : if (!tree_int_cst_lt (desc->niter, period))
5418 : return false;
5419 : }
5420 : else
5421 : {
5422 55694 : if (tree_int_cst_lt (period, desc->niter))
5423 : return false;
5424 : }
5425 : }
5426 :
5427 : /* If not, and if this is the only possible exit of the loop, see whether
5428 : we can get a conservative estimate on the number of iterations of the
5429 : entire loop and compare against that instead. */
5430 : else
5431 : {
5432 1414806 : widest_int period_value, max_niter;
5433 :
5434 1414806 : max_niter = desc->max;
5435 1414806 : if (stmt_after_increment (loop, cand, use->stmt))
5436 1207739 : max_niter += 1;
5437 1414806 : period_value = wi::to_widest (period);
5438 1414806 : if (wi::gtu_p (max_niter, period_value))
5439 : {
5440 : /* See if we can take advantage of inferred loop bound
5441 : information. */
5442 475572 : if (data->loop_single_exit_p)
5443 : {
5444 282468 : if (!max_loop_iterations (loop, &max_niter))
5445 : return false;
5446 : /* The loop bound is already adjusted by adding 1. */
5447 282468 : if (wi::gtu_p (max_niter, period_value))
5448 : return false;
5449 : }
5450 : else
5451 : return false;
5452 : }
5453 1414806 : }
5454 :
5455 : /* For doloop IV cand, the bound would be zero. It's safe whether
5456 : may_be_zero set or not. */
5457 3796615 : if (cand->doloop_p)
5458 : {
5459 0 : *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5460 0 : *comp = iv_elimination_compare (data, use);
5461 0 : return true;
5462 : }
5463 :
5464 3796615 : cand_value_at (loop, cand, use->stmt, desc, &bnd);
5465 :
5466 3796615 : *bound = fold_convert (TREE_TYPE (cand->iv->base),
5467 : aff_combination_to_tree (&bnd));
5468 3796615 : *comp = iv_elimination_compare (data, use);
5469 :
5470 : /* It is unlikely that computing the number of iterations using division
5471 : would be more profitable than keeping the original induction variable. */
5472 3796615 : bool cond_overflow_p;
5473 3796615 : if (expression_expensive_p (*bound, &cond_overflow_p))
5474 : return false;
5475 :
5476 : /* Sometimes, it is possible to handle the situation that the number of
5477 : iterations may be zero unless additional assumptions by using <
5478 : instead of != in the exit condition.
5479 :
5480 : TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5481 : base the exit condition on it. However, that is often too
5482 : expensive. */
5483 3787740 : if (!integer_zerop (desc->may_be_zero))
5484 203856 : return iv_elimination_compare_lt (data, cand, comp, desc);
5485 :
5486 : return true;
5487 4912675 : }
5488 :
5489 : /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5490 : be copied, if it is used in the loop body and DATA->body_includes_call. */
5491 :
5492 : static int
5493 8347916 : parm_decl_cost (struct ivopts_data *data, tree bound)
5494 : {
5495 8347916 : tree sbound = bound;
5496 8347916 : STRIP_NOPS (sbound);
5497 :
5498 8347916 : if (TREE_CODE (sbound) == SSA_NAME
5499 2912903 : && SSA_NAME_IS_DEFAULT_DEF (sbound)
5500 153001 : && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5501 8498877 : && data->body_includes_call)
5502 37680 : return COSTS_N_INSNS (1);
5503 :
5504 : return 0;
5505 : }
5506 :
5507 : /* Determines cost of computing the use in GROUP with CAND in a condition. */
5508 :
5509 : static bool
5510 5926757 : determine_group_iv_cost_cond (struct ivopts_data *data,
5511 : struct iv_group *group, struct iv_cand *cand)
5512 : {
5513 5926757 : tree bound = NULL_TREE;
5514 5926757 : struct iv *cmp_iv;
5515 5926757 : bitmap inv_exprs = NULL;
5516 5926757 : bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5517 5926757 : comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5518 5926757 : enum comp_iv_rewrite rewrite_type;
5519 5926757 : iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5520 5926757 : tree *control_var, *bound_cst;
5521 5926757 : enum tree_code comp = ERROR_MARK;
5522 5926757 : struct iv_use *use = group->vuses[0];
5523 :
5524 : /* Extract condition operands. */
5525 5926757 : rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5526 : &bound_cst, NULL, &cmp_iv);
5527 5926757 : gcc_assert (rewrite_type != COMP_IV_NA);
5528 :
5529 : /* Try iv elimination. */
5530 5926757 : if (rewrite_type == COMP_IV_ELIM
5531 5926757 : && may_eliminate_iv (data, use, cand, &bound, &comp))
5532 : {
5533 3583888 : elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5534 3583888 : if (elim_cost.cost == 0)
5535 2442974 : elim_cost.cost = parm_decl_cost (data, bound);
5536 1140914 : else if (TREE_CODE (bound) == INTEGER_CST)
5537 0 : elim_cost.cost = 0;
5538 : /* If we replace a loop condition 'i < n' with 'p < base + n',
5539 : inv_vars_elim will have 'base' and 'n' set, which implies that both
5540 : 'base' and 'n' will be live during the loop. More likely,
5541 : 'base + n' will be loop invariant, resulting in only one live value
5542 : during the loop. So in that case we clear inv_vars_elim and set
5543 : inv_expr_elim instead. */
5544 3583888 : if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5545 : {
5546 313118 : inv_expr_elim = get_loop_invariant_expr (data, bound);
5547 313118 : bitmap_clear (inv_vars_elim);
5548 : }
5549 : /* The bound is a loop invariant, so it will be only computed
5550 : once. */
5551 3583888 : elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5552 : }
5553 :
5554 : /* When the condition is a comparison of the candidate IV against
5555 : zero, prefer this IV.
5556 :
5557 : TODO: The constant that we're subtracting from the cost should
5558 : be target-dependent. This information should be added to the
5559 : target costs for each backend. */
5560 5926757 : if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5561 3583888 : && integer_zerop (*bound_cst)
5562 8514479 : && (operand_equal_p (*control_var, cand->var_after, 0)
5563 2339300 : || operand_equal_p (*control_var, cand->var_before, 0)))
5564 254151 : elim_cost -= 1;
5565 :
5566 5926757 : express_cost = get_computation_cost (data, use, cand, false,
5567 : &inv_vars_express, NULL,
5568 : &inv_expr_express);
5569 5926757 : if (cmp_iv != NULL)
5570 5002137 : find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5571 :
5572 : /* Count the cost of the original bound as well. */
5573 5926757 : bound_cost = force_var_cost (data, *bound_cst, NULL);
5574 5926757 : if (bound_cost.cost == 0)
5575 5904942 : bound_cost.cost = parm_decl_cost (data, *bound_cst);
5576 21815 : else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5577 0 : bound_cost.cost = 0;
5578 5926757 : express_cost += bound_cost;
5579 :
5580 : /* Choose the better approach, preferring the eliminated IV. */
5581 5926757 : if (elim_cost <= express_cost)
5582 : {
5583 4504096 : cost = elim_cost;
5584 4504096 : inv_vars = inv_vars_elim;
5585 4504096 : inv_vars_elim = NULL;
5586 4504096 : inv_expr = inv_expr_elim;
5587 : /* For doloop candidate/use pair, adjust to zero cost. */
5588 4504096 : if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5589 0 : cost = no_cost;
5590 : }
5591 : else
5592 : {
5593 1422661 : cost = express_cost;
5594 1422661 : inv_vars = inv_vars_express;
5595 1422661 : inv_vars_express = NULL;
5596 1422661 : bound = NULL_TREE;
5597 1422661 : comp = ERROR_MARK;
5598 1422661 : inv_expr = inv_expr_express;
5599 : }
5600 :
5601 5926757 : if (inv_expr)
5602 : {
5603 604808 : inv_exprs = BITMAP_ALLOC (NULL);
5604 604808 : bitmap_set_bit (inv_exprs, inv_expr->id);
5605 : }
5606 5926757 : set_group_iv_cost (data, group, cand, cost,
5607 : inv_vars, bound, comp, inv_exprs);
5608 :
5609 5926757 : if (inv_vars_elim)
5610 24547 : BITMAP_FREE (inv_vars_elim);
5611 5926757 : if (inv_vars_express)
5612 1255823 : BITMAP_FREE (inv_vars_express);
5613 :
5614 5926757 : return !cost.infinite_cost_p ();
5615 : }
5616 :
5617 : /* Determines cost of computing uses in GROUP with CAND. Returns false
5618 : if USE cannot be represented with CAND. */
5619 :
5620 : static bool
5621 17761325 : determine_group_iv_cost (struct ivopts_data *data,
5622 : struct iv_group *group, struct iv_cand *cand)
5623 : {
5624 17761325 : switch (group->type)
5625 : {
5626 5550950 : case USE_NONLINEAR_EXPR:
5627 5550950 : return determine_group_iv_cost_generic (data, group, cand);
5628 :
5629 6283618 : case USE_REF_ADDRESS:
5630 6283618 : case USE_PTR_ADDRESS:
5631 6283618 : return determine_group_iv_cost_address (data, group, cand);
5632 :
5633 5926757 : case USE_COMPARE:
5634 5926757 : return determine_group_iv_cost_cond (data, group, cand);
5635 :
5636 0 : default:
5637 0 : gcc_unreachable ();
5638 : }
5639 : }
5640 :
5641 : /* Return true if get_computation_cost indicates that autoincrement is
5642 : a possibility for the pair of USE and CAND, false otherwise. */
5643 :
5644 : static bool
5645 1281413 : autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5646 : struct iv_cand *cand)
5647 : {
5648 1281413 : if (!address_p (use->type))
5649 : return false;
5650 :
5651 417267 : bool can_autoinc = false;
5652 417267 : get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5653 417267 : return can_autoinc;
5654 : }
5655 :
5656 : /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5657 : use that allows autoincrement, and set their AINC_USE if possible. */
5658 :
5659 : static void
5660 503212 : set_autoinc_for_original_candidates (struct ivopts_data *data)
5661 : {
5662 503212 : unsigned i, j;
5663 :
5664 5123136 : for (i = 0; i < data->vcands.length (); i++)
5665 : {
5666 4619924 : struct iv_cand *cand = data->vcands[i];
5667 4619924 : struct iv_use *closest_before = NULL;
5668 4619924 : struct iv_use *closest_after = NULL;
5669 4619924 : if (cand->pos != IP_ORIGINAL)
5670 3752090 : continue;
5671 :
5672 3825792 : for (j = 0; j < data->vgroups.length (); j++)
5673 : {
5674 2957958 : struct iv_group *group = data->vgroups[j];
5675 2957958 : struct iv_use *use = group->vuses[0];
5676 2957958 : unsigned uid = gimple_uid (use->stmt);
5677 :
5678 2957958 : if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5679 1178579 : continue;
5680 :
5681 1779379 : if (uid < gimple_uid (cand->incremented_at)
5682 1779379 : && (closest_before == NULL
5683 373331 : || uid > gimple_uid (closest_before->stmt)))
5684 : closest_before = use;
5685 :
5686 1779379 : if (uid > gimple_uid (cand->incremented_at)
5687 1779379 : && (closest_after == NULL
5688 67204 : || uid < gimple_uid (closest_after->stmt)))
5689 : closest_after = use;
5690 : }
5691 :
5692 867834 : if (closest_before != NULL
5693 867834 : && autoinc_possible_for_pair (data, closest_before, cand))
5694 0 : cand->ainc_use = closest_before;
5695 867834 : else if (closest_after != NULL
5696 867834 : && autoinc_possible_for_pair (data, closest_after, cand))
5697 0 : cand->ainc_use = closest_after;
5698 : }
5699 503212 : }
5700 :
5701 : /* Relate compare use with all candidates. */
5702 :
5703 : static void
5704 299 : relate_compare_use_with_all_cands (struct ivopts_data *data)
5705 : {
5706 299 : unsigned i, count = data->vcands.length ();
5707 9999 : for (i = 0; i < data->vgroups.length (); i++)
5708 : {
5709 9700 : struct iv_group *group = data->vgroups[i];
5710 :
5711 9700 : if (group->type == USE_COMPARE)
5712 2185 : bitmap_set_range (group->related_cands, 0, count);
5713 : }
5714 299 : }
5715 :
5716 : /* If PREFERRED_MODE is suitable and profitable, use the preferred
5717 : PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
5718 :
5719 : static tree
5720 0 : compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5721 : const widest_int &iterations_max)
5722 : {
5723 0 : tree ntype = TREE_TYPE (niter);
5724 0 : tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5725 0 : if (!pref_type)
5726 0 : return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5727 : build_int_cst (ntype, 1));
5728 :
5729 0 : gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5730 :
5731 0 : int prec = TYPE_PRECISION (ntype);
5732 0 : int pref_prec = TYPE_PRECISION (pref_type);
5733 :
5734 0 : tree base;
5735 :
5736 : /* Check if the PREFERRED_MODED is able to present niter. */
5737 0 : if (pref_prec > prec
5738 0 : || wi::ltu_p (iterations_max,
5739 0 : widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5740 : UNSIGNED)))
5741 : {
5742 : /* No wrap, it is safe to use preferred type after niter + 1. */
5743 0 : if (wi::ltu_p (iterations_max,
5744 0 : widest_int::from (wi::max_value (prec, UNSIGNED),
5745 : UNSIGNED)))
5746 : {
5747 : /* This could help to optimize "-1 +1" pair when niter looks
5748 : like "n-1": n is in original mode. "base = (n - 1) + 1"
5749 : in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
5750 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5751 : build_int_cst (ntype, 1));
5752 0 : base = fold_convert (pref_type, base);
5753 : }
5754 :
5755 : /* To avoid wrap, convert niter to preferred type before plus 1. */
5756 : else
5757 : {
5758 0 : niter = fold_convert (pref_type, niter);
5759 0 : base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5760 : build_int_cst (pref_type, 1));
5761 : }
5762 : }
5763 : else
5764 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5765 : build_int_cst (ntype, 1));
5766 : return base;
5767 : }
5768 :
5769 : /* Add one doloop dedicated IV candidate:
5770 : - Base is (may_be_zero ? 1 : (niter + 1)).
5771 : - Step is -1. */
5772 :
5773 : static void
5774 0 : add_iv_candidate_for_doloop (struct ivopts_data *data)
5775 : {
5776 0 : tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5777 0 : gcc_assert (niter_desc && niter_desc->assumptions);
5778 :
5779 0 : tree niter = niter_desc->niter;
5780 0 : tree ntype = TREE_TYPE (niter);
5781 0 : gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5782 :
5783 0 : tree may_be_zero = niter_desc->may_be_zero;
5784 0 : if (may_be_zero && integer_zerop (may_be_zero))
5785 : may_be_zero = NULL_TREE;
5786 0 : if (may_be_zero)
5787 : {
5788 0 : if (COMPARISON_CLASS_P (may_be_zero))
5789 : {
5790 0 : niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5791 : build_int_cst (ntype, 0),
5792 : rewrite_to_non_trapping_overflow (niter));
5793 : }
5794 : /* Don't try to obtain the iteration count expression when may_be_zero is
5795 : integer_nonzerop (actually iteration count is one) or else. */
5796 : else
5797 : return;
5798 : }
5799 :
5800 0 : machine_mode mode = TYPE_MODE (ntype);
5801 0 : machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5802 :
5803 0 : tree base;
5804 0 : if (mode != pref_mode)
5805 : {
5806 0 : base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5807 0 : ntype = TREE_TYPE (base);
5808 : }
5809 : else
5810 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5811 : build_int_cst (ntype, 1));
5812 :
5813 :
5814 0 : add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5815 : }
5816 :
5817 : /* Finds the candidates for the induction variables. */
5818 :
5819 : static void
5820 503212 : find_iv_candidates (struct ivopts_data *data)
5821 : {
5822 : /* Add commonly used ivs. */
5823 503212 : add_standard_iv_candidates (data);
5824 :
5825 : /* Add doloop dedicated ivs. */
5826 503212 : if (data->doloop_use_p)
5827 0 : add_iv_candidate_for_doloop (data);
5828 :
5829 : /* Add old induction variables. */
5830 503212 : add_iv_candidate_for_bivs (data);
5831 :
5832 : /* Add induction variables derived from uses. */
5833 503212 : add_iv_candidate_for_groups (data);
5834 :
5835 503212 : set_autoinc_for_original_candidates (data);
5836 :
5837 : /* Record the important candidates. */
5838 503212 : record_important_candidates (data);
5839 :
5840 : /* Relate compare iv_use with all candidates. */
5841 503212 : if (!data->consider_all_candidates)
5842 299 : relate_compare_use_with_all_cands (data);
5843 :
5844 503212 : if (dump_file && (dump_flags & TDF_DETAILS))
5845 : {
5846 67 : unsigned i;
5847 :
5848 67 : fprintf (dump_file, "\n<Important Candidates>:\t");
5849 820 : for (i = 0; i < data->vcands.length (); i++)
5850 686 : if (data->vcands[i]->important)
5851 492 : fprintf (dump_file, " %d,", data->vcands[i]->id);
5852 67 : fprintf (dump_file, "\n");
5853 :
5854 67 : fprintf (dump_file, "\n<Group, Cand> Related:\n");
5855 287 : for (i = 0; i < data->vgroups.length (); i++)
5856 : {
5857 220 : struct iv_group *group = data->vgroups[i];
5858 :
5859 220 : if (group->related_cands)
5860 : {
5861 220 : fprintf (dump_file, " Group %d:\t", group->id);
5862 220 : dump_bitmap (dump_file, group->related_cands);
5863 : }
5864 : }
5865 67 : fprintf (dump_file, "\n");
5866 : }
5867 503212 : }
5868 :
5869 : /* Determines costs of computing use of iv with an iv candidate. */
5870 :
5871 : static void
5872 503212 : determine_group_iv_costs (struct ivopts_data *data)
5873 : {
5874 503212 : unsigned i, j;
5875 503212 : struct iv_cand *cand;
5876 503212 : struct iv_group *group;
5877 503212 : bitmap to_clear = BITMAP_ALLOC (NULL);
5878 :
5879 503212 : alloc_use_cost_map (data);
5880 :
5881 2152246 : for (i = 0; i < data->vgroups.length (); i++)
5882 : {
5883 1649034 : group = data->vgroups[i];
5884 :
5885 1649034 : if (data->consider_all_candidates)
5886 : {
5887 19055931 : for (j = 0; j < data->vcands.length (); j++)
5888 : {
5889 17406897 : cand = data->vcands[j];
5890 17406897 : determine_group_iv_cost (data, group, cand);
5891 : }
5892 : }
5893 : else
5894 : {
5895 9700 : bitmap_iterator bi;
5896 :
5897 364128 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5898 : {
5899 354428 : cand = data->vcands[j];
5900 354428 : if (!determine_group_iv_cost (data, group, cand))
5901 210913 : bitmap_set_bit (to_clear, j);
5902 : }
5903 :
5904 : /* Remove the candidates for that the cost is infinite from
5905 : the list of related candidates. */
5906 9700 : bitmap_and_compl_into (group->related_cands, to_clear);
5907 9700 : bitmap_clear (to_clear);
5908 : }
5909 : }
5910 :
5911 503212 : BITMAP_FREE (to_clear);
5912 :
5913 503212 : if (dump_file && (dump_flags & TDF_DETAILS))
5914 : {
5915 67 : bitmap_iterator bi;
5916 :
5917 : /* Dump invariant variables. */
5918 67 : fprintf (dump_file, "\n<Invariant Vars>:\n");
5919 1041 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5920 : {
5921 974 : struct version_info *info = ver_info (data, i);
5922 974 : if (info->inv_id)
5923 : {
5924 222 : fprintf (dump_file, "Inv %d:\t", info->inv_id);
5925 222 : print_generic_expr (dump_file, info->name, TDF_SLIM);
5926 222 : fprintf (dump_file, "%s\n",
5927 222 : info->has_nonlin_use ? "" : "\t(eliminable)");
5928 : }
5929 : }
5930 :
5931 : /* Dump invariant expressions. */
5932 67 : fprintf (dump_file, "\n<Invariant Expressions>:\n");
5933 67 : auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5934 :
5935 439 : for (hash_table<iv_inv_expr_hasher>::iterator it
5936 506 : = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5937 372 : ++it)
5938 372 : list.safe_push (*it);
5939 :
5940 67 : list.qsort (sort_iv_inv_expr_ent);
5941 :
5942 439 : for (i = 0; i < list.length (); ++i)
5943 : {
5944 372 : fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5945 372 : print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5946 372 : fprintf (dump_file, "\n");
5947 : }
5948 :
5949 67 : fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5950 :
5951 287 : for (i = 0; i < data->vgroups.length (); i++)
5952 : {
5953 220 : group = data->vgroups[i];
5954 :
5955 220 : fprintf (dump_file, "Group %d:\n", i);
5956 220 : fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5957 2982 : for (j = 0; j < group->n_map_members; j++)
5958 : {
5959 3856 : if (!group->cost_map[j].cand
5960 2762 : || group->cost_map[j].cost.infinite_cost_p ())
5961 1094 : continue;
5962 :
5963 1668 : fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5964 1668 : group->cost_map[j].cand->id,
5965 : group->cost_map[j].cost.cost,
5966 1668 : group->cost_map[j].cost.complexity);
5967 1668 : if (!group->cost_map[j].inv_exprs
5968 1668 : || bitmap_empty_p (group->cost_map[j].inv_exprs))
5969 1168 : fprintf (dump_file, "NIL;\t");
5970 : else
5971 500 : bitmap_print (dump_file,
5972 : group->cost_map[j].inv_exprs, "", ";\t");
5973 1668 : if (!group->cost_map[j].inv_vars
5974 1668 : || bitmap_empty_p (group->cost_map[j].inv_vars))
5975 1347 : fprintf (dump_file, "NIL;\n");
5976 : else
5977 321 : bitmap_print (dump_file,
5978 : group->cost_map[j].inv_vars, "", "\n");
5979 : }
5980 :
5981 220 : fprintf (dump_file, "\n");
5982 : }
5983 67 : fprintf (dump_file, "\n");
5984 67 : }
5985 503212 : }
5986 :
5987 : /* Determines cost of the candidate CAND. */
5988 :
5989 : static void
5990 4619924 : determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5991 : {
5992 4619924 : comp_cost cost_base;
5993 4619924 : int64_t cost, cost_step;
5994 4619924 : tree base;
5995 :
5996 4619924 : gcc_assert (cand->iv != NULL);
5997 :
5998 : /* There are two costs associated with the candidate -- its increment
5999 : and its initialization. The second is almost negligible for any loop
6000 : that rolls enough, so we take it just very little into account. */
6001 :
6002 4619924 : base = cand->iv->base;
6003 4619924 : cost_base = force_var_cost (data, base, NULL);
6004 : /* It will be exceptional that the iv register happens to be initialized with
6005 : the proper value at no cost. In general, there will at least be a regcopy
6006 : or a const set. */
6007 4619924 : if (cost_base.cost == 0)
6008 3661977 : cost_base.cost = COSTS_N_INSNS (1);
6009 : /* Doloop decrement should be considered as zero cost. */
6010 4619924 : if (cand->doloop_p)
6011 : cost_step = 0;
6012 : else
6013 4619924 : cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6014 4619924 : cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6015 :
6016 : /* Prefer the original ivs unless we may gain something by replacing it.
6017 : The reason is to make debugging simpler; so this is not relevant for
6018 : artificial ivs created by other optimization passes. */
6019 4619924 : if ((cand->pos != IP_ORIGINAL
6020 867834 : || !SSA_NAME_VAR (cand->var_before)
6021 436642 : || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6022 : /* Prefer doloop as well. */
6023 5141236 : && !cand->doloop_p)
6024 4273402 : cost++;
6025 :
6026 : /* Prefer not to insert statements into latch unless there are some
6027 : already (so that we do not create unnecessary jumps). */
6028 4619924 : if (cand->pos == IP_END
6029 4619924 : && empty_block_p (ip_end_pos (data->current_loop)))
6030 1931 : cost++;
6031 :
6032 4619924 : cand->cost = cost;
6033 4619924 : cand->cost_step = cost_step;
6034 4619924 : }
6035 :
6036 : /* Determines costs of computation of the candidates. */
6037 :
6038 : static void
6039 503212 : determine_iv_costs (struct ivopts_data *data)
6040 : {
6041 503212 : unsigned i;
6042 :
6043 503212 : if (dump_file && (dump_flags & TDF_DETAILS))
6044 : {
6045 67 : fprintf (dump_file, "<Candidate Costs>:\n");
6046 67 : fprintf (dump_file, " cand\tcost\n");
6047 : }
6048 :
6049 5123136 : for (i = 0; i < data->vcands.length (); i++)
6050 : {
6051 4619924 : struct iv_cand *cand = data->vcands[i];
6052 :
6053 4619924 : determine_iv_cost (data, cand);
6054 :
6055 4619924 : if (dump_file && (dump_flags & TDF_DETAILS))
6056 686 : fprintf (dump_file, " %d\t%d\n", i, cand->cost);
6057 : }
6058 :
6059 503212 : if (dump_file && (dump_flags & TDF_DETAILS))
6060 67 : fprintf (dump_file, "\n");
6061 503212 : }
6062 :
6063 : /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6064 : induction variables. Note N_INVS includes both invariant variables and
6065 : invariant expressions. */
6066 :
6067 : static unsigned
6068 419960421 : ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6069 : unsigned n_cands)
6070 : {
6071 419960421 : unsigned cost;
6072 419960421 : unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6073 419960421 : unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6074 419960421 : bool speed = data->speed;
6075 :
6076 : /* If there is a call in the loop body, the call-clobbered registers
6077 : are not available for loop invariants. */
6078 419960421 : if (data->body_includes_call)
6079 93151750 : available_regs = available_regs - target_clobbered_regs;
6080 :
6081 : /* If we have enough registers. */
6082 419960421 : if (regs_needed + target_res_regs < available_regs)
6083 : cost = n_new;
6084 : /* If close to running out of registers, try to preserve them. */
6085 184340058 : else if (regs_needed <= available_regs)
6086 51639885 : cost = target_reg_cost [speed] * regs_needed;
6087 : /* If we run out of available registers but the number of candidates
6088 : does not, we penalize extra registers using target_spill_cost. */
6089 132700173 : else if (n_cands <= available_regs)
6090 118630146 : cost = target_reg_cost [speed] * available_regs
6091 118630146 : + target_spill_cost [speed] * (regs_needed - available_regs);
6092 : /* If the number of candidates runs out available registers, we penalize
6093 : extra candidate registers using target_spill_cost * 2. Because it is
6094 : more expensive to spill induction variable than invariant. */
6095 : else
6096 14070027 : cost = target_reg_cost [speed] * available_regs
6097 14070027 : + target_spill_cost [speed] * (n_cands - available_regs) * 2
6098 14070027 : + target_spill_cost [speed] * (regs_needed - n_cands);
6099 :
6100 : /* Finally, add the number of candidates, so that we prefer eliminating
6101 : induction variables if possible. */
6102 419960421 : return cost + n_cands;
6103 : }
6104 :
6105 : /* For each size of the induction variable set determine the penalty. */
6106 :
6107 : static void
6108 503212 : determine_set_costs (struct ivopts_data *data)
6109 : {
6110 503212 : unsigned j, n;
6111 503212 : gphi *phi;
6112 503212 : gphi_iterator psi;
6113 503212 : tree op;
6114 503212 : class loop *loop = data->current_loop;
6115 503212 : bitmap_iterator bi;
6116 :
6117 503212 : if (dump_file && (dump_flags & TDF_DETAILS))
6118 : {
6119 67 : fprintf (dump_file, "<Global Costs>:\n");
6120 67 : fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
6121 67 : fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6122 67 : fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6123 67 : fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6124 : }
6125 :
6126 503212 : n = 0;
6127 1956450 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6128 : {
6129 1453238 : phi = psi.phi ();
6130 1453238 : op = PHI_RESULT (phi);
6131 :
6132 2906476 : if (virtual_operand_p (op))
6133 308392 : continue;
6134 :
6135 1144846 : if (get_iv (data, op))
6136 873016 : continue;
6137 :
6138 502413 : if (!POINTER_TYPE_P (TREE_TYPE (op))
6139 502260 : && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6140 101839 : continue;
6141 :
6142 169991 : n++;
6143 : }
6144 :
6145 5533982 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6146 : {
6147 5030770 : struct version_info *info = ver_info (data, j);
6148 :
6149 5030770 : if (info->inv_id && info->has_nonlin_use)
6150 509983 : n++;
6151 : }
6152 :
6153 503212 : data->regs_used = n;
6154 503212 : if (dump_file && (dump_flags & TDF_DETAILS))
6155 67 : fprintf (dump_file, " regs_used %d\n", n);
6156 :
6157 503212 : if (dump_file && (dump_flags & TDF_DETAILS))
6158 : {
6159 67 : fprintf (dump_file, " cost for size:\n");
6160 67 : fprintf (dump_file, " ivs\tcost\n");
6161 2144 : for (j = 0; j <= 2 * target_avail_regs; j++)
6162 2077 : fprintf (dump_file, " %d\t%d\n", j,
6163 : ivopts_estimate_reg_pressure (data, 0, j));
6164 67 : fprintf (dump_file, "\n");
6165 : }
6166 503212 : }
6167 :
6168 : /* Returns true if A is a cheaper cost pair than B. */
6169 :
6170 : static bool
6171 82960839 : cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6172 : {
6173 82960839 : if (!a)
6174 : return false;
6175 :
6176 77657684 : if (!b)
6177 : return true;
6178 :
6179 74449387 : if (a->cost < b->cost)
6180 : return true;
6181 :
6182 55074974 : if (b->cost < a->cost)
6183 : return false;
6184 :
6185 : /* In case the costs are the same, prefer the cheaper candidate. */
6186 31575396 : if (a->cand->cost < b->cand->cost)
6187 : return true;
6188 :
6189 : return false;
6190 : }
6191 :
6192 : /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6193 : for more expensive, equal and cheaper respectively. */
6194 :
6195 : static int
6196 29218999 : compare_cost_pair (class cost_pair *a, class cost_pair *b)
6197 : {
6198 29218999 : if (cheaper_cost_pair (a, b))
6199 : return -1;
6200 23041662 : if (cheaper_cost_pair (b, a))
6201 14913956 : return 1;
6202 :
6203 : return 0;
6204 : }
6205 :
6206 : /* Returns candidate by that USE is expressed in IVS. */
6207 :
6208 : static class cost_pair *
6209 280142384 : iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6210 : {
6211 280142384 : return ivs->cand_for_group[group->id];
6212 : }
6213 :
6214 : /* Computes the cost field of IVS structure. */
6215 :
6216 : static void
6217 419958096 : iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6218 : {
6219 419958096 : comp_cost cost = ivs->cand_use_cost;
6220 :
6221 419958096 : cost += ivs->cand_cost;
6222 419958096 : cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6223 419958096 : ivs->cost = cost;
6224 419958096 : }
6225 :
6226 : /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6227 : and IVS. */
6228 :
6229 : static void
6230 575208914 : iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6231 : {
6232 575208914 : bitmap_iterator bi;
6233 575208914 : unsigned iid;
6234 :
6235 575208914 : if (!invs)
6236 456636724 : return;
6237 :
6238 118572190 : gcc_assert (n_inv_uses != NULL);
6239 204539948 : EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6240 : {
6241 85967758 : n_inv_uses[iid]--;
6242 85967758 : if (n_inv_uses[iid] == 0)
6243 63532383 : ivs->n_invs--;
6244 : }
6245 : }
6246 :
6247 : /* Set USE not to be expressed by any candidate in IVS. */
6248 :
6249 : static void
6250 208331956 : iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6251 : struct iv_group *group)
6252 : {
6253 208331956 : unsigned gid = group->id, cid;
6254 208331956 : class cost_pair *cp;
6255 :
6256 208331956 : cp = ivs->cand_for_group[gid];
6257 208331956 : if (!cp)
6258 : return;
6259 208331956 : cid = cp->cand->id;
6260 :
6261 208331956 : ivs->bad_groups++;
6262 208331956 : ivs->cand_for_group[gid] = NULL;
6263 208331956 : ivs->n_cand_uses[cid]--;
6264 :
6265 208331956 : if (ivs->n_cand_uses[cid] == 0)
6266 : {
6267 79272501 : bitmap_clear_bit (ivs->cands, cid);
6268 79272501 : if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6269 79272501 : ivs->n_cands--;
6270 79272501 : ivs->cand_cost -= cp->cand->cost;
6271 79272501 : iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6272 79272501 : iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6273 : }
6274 :
6275 208331956 : ivs->cand_use_cost -= cp->cost;
6276 208331956 : iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6277 208331956 : iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6278 208331956 : iv_ca_recount_cost (data, ivs);
6279 : }
6280 :
6281 : /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6282 : IVS. */
6283 :
6284 : static void
6285 584590160 : iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6286 : {
6287 584590160 : bitmap_iterator bi;
6288 584590160 : unsigned iid;
6289 :
6290 584590160 : if (!invs)
6291 464882237 : return;
6292 :
6293 119707923 : gcc_assert (n_inv_uses != NULL);
6294 206644583 : EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6295 : {
6296 86936660 : n_inv_uses[iid]++;
6297 86936660 : if (n_inv_uses[iid] == 1)
6298 64423573 : ivs->n_invs++;
6299 : }
6300 : }
6301 :
6302 : /* Set cost pair for GROUP in set IVS to CP. */
6303 :
6304 : static void
6305 224629061 : iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6306 : struct iv_group *group, class cost_pair *cp)
6307 : {
6308 224629061 : unsigned gid = group->id, cid;
6309 :
6310 224629061 : if (ivs->cand_for_group[gid] == cp)
6311 : return;
6312 :
6313 211626140 : if (ivs->cand_for_group[gid])
6314 196068975 : iv_ca_set_no_cp (data, ivs, group);
6315 :
6316 211626140 : if (cp)
6317 : {
6318 211626140 : cid = cp->cand->id;
6319 :
6320 211626140 : ivs->bad_groups--;
6321 211626140 : ivs->cand_for_group[gid] = cp;
6322 211626140 : ivs->n_cand_uses[cid]++;
6323 211626140 : if (ivs->n_cand_uses[cid] == 1)
6324 : {
6325 80668940 : bitmap_set_bit (ivs->cands, cid);
6326 80668940 : if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6327 80668940 : ivs->n_cands++;
6328 80668940 : ivs->cand_cost += cp->cand->cost;
6329 80668940 : iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6330 80668940 : iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6331 : }
6332 :
6333 211626140 : ivs->cand_use_cost += cp->cost;
6334 211626140 : iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6335 211626140 : iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6336 211626140 : iv_ca_recount_cost (data, ivs);
6337 : }
6338 : }
6339 :
6340 : /* Extend set IVS by expressing USE by some of the candidates in it
6341 : if possible. Consider all important candidates if candidates in
6342 : set IVS don't give any result. */
6343 :
6344 : static void
6345 3295422 : iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6346 : struct iv_group *group)
6347 : {
6348 3295422 : class cost_pair *best_cp = NULL, *cp;
6349 3295422 : bitmap_iterator bi;
6350 3295422 : unsigned i;
6351 3295422 : struct iv_cand *cand;
6352 :
6353 3295422 : gcc_assert (ivs->upto >= group->id);
6354 3295422 : ivs->upto++;
6355 3295422 : ivs->bad_groups++;
6356 :
6357 6211051 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6358 : {
6359 2915629 : cand = data->vcands[i];
6360 2915629 : cp = get_group_iv_cost (data, group, cand);
6361 2915629 : if (cheaper_cost_pair (cp, best_cp))
6362 2025388 : best_cp = cp;
6363 : }
6364 :
6365 3295422 : if (best_cp == NULL)
6366 : {
6367 11787653 : EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6368 : {
6369 10444966 : cand = data->vcands[i];
6370 10444966 : cp = get_group_iv_cost (data, group, cand);
6371 10444966 : if (cheaper_cost_pair (cp, best_cp))
6372 2408017 : best_cp = cp;
6373 : }
6374 : }
6375 :
6376 3295422 : iv_ca_set_cp (data, ivs, group, best_cp);
6377 3295422 : }
6378 :
6379 : /* Get cost for assignment IVS. */
6380 :
6381 : static comp_cost
6382 82608165 : iv_ca_cost (class iv_ca *ivs)
6383 : {
6384 : /* This was a conditional expression but it triggered a bug in
6385 : Sun C 5.5. */
6386 0 : if (ivs->bad_groups)
6387 87125 : return infinite_cost;
6388 : else
6389 82521040 : return ivs->cost;
6390 : }
6391 :
6392 : /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6393 : than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6394 : respectively. */
6395 :
6396 : static int
6397 38883570 : iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6398 : struct iv_group *group, class cost_pair *old_cp,
6399 : class cost_pair *new_cp)
6400 : {
6401 38883570 : gcc_assert (old_cp && new_cp && old_cp != new_cp);
6402 38883570 : unsigned old_n_invs = ivs->n_invs;
6403 38883570 : iv_ca_set_cp (data, ivs, group, new_cp);
6404 38883570 : unsigned new_n_invs = ivs->n_invs;
6405 38883570 : iv_ca_set_cp (data, ivs, group, old_cp);
6406 :
6407 38883570 : return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6408 : }
6409 :
6410 : /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6411 : it before NEXT. */
6412 :
6413 : static struct iv_ca_delta *
6414 48630725 : iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6415 : class cost_pair *new_cp, struct iv_ca_delta *next)
6416 : {
6417 0 : struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6418 :
6419 48630725 : change->group = group;
6420 48630725 : change->old_cp = old_cp;
6421 48630725 : change->new_cp = new_cp;
6422 48630725 : change->next = next;
6423 :
6424 48630725 : return change;
6425 : }
6426 :
6427 : /* Joins two lists of changes L1 and L2. Destructive -- old lists
6428 : are rewritten. */
6429 :
6430 : static struct iv_ca_delta *
6431 8155527 : iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6432 : {
6433 8155527 : struct iv_ca_delta *last;
6434 :
6435 0 : if (!l2)
6436 : return l1;
6437 :
6438 0 : if (!l1)
6439 : return l2;
6440 :
6441 3502470 : for (last = l1; last->next; last = last->next)
6442 1131156 : continue;
6443 2371314 : last->next = l2;
6444 :
6445 2371314 : return l1;
6446 1131156 : }
6447 :
6448 : /* Reverse the list of changes DELTA, forming the inverse to it. */
6449 :
6450 : static struct iv_ca_delta *
6451 0 : iv_ca_delta_reverse (struct iv_ca_delta *delta)
6452 : {
6453 0 : struct iv_ca_delta *act, *next, *prev = NULL;
6454 :
6455 161563838 : for (act = delta; act; act = next)
6456 : {
6457 91411018 : next = act->next;
6458 91411018 : act->next = prev;
6459 91411018 : prev = act;
6460 :
6461 91411018 : std::swap (act->old_cp, act->new_cp);
6462 : }
6463 :
6464 0 : return prev;
6465 : }
6466 :
6467 : /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6468 : reverted instead. */
6469 :
6470 : static void
6471 73933747 : iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6472 : struct iv_ca_delta *delta, bool forward)
6473 : {
6474 73933747 : class cost_pair *from, *to;
6475 73933747 : struct iv_ca_delta *act;
6476 :
6477 73933747 : if (!forward)
6478 73933747 : delta = iv_ca_delta_reverse (delta);
6479 :
6480 170236909 : for (act = delta; act; act = act->next)
6481 : {
6482 96303162 : from = act->old_cp;
6483 96303162 : to = act->new_cp;
6484 96303162 : gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6485 96303162 : iv_ca_set_cp (data, ivs, act->group, to);
6486 : }
6487 :
6488 73933747 : if (!forward)
6489 73933747 : iv_ca_delta_reverse (delta);
6490 73933747 : }
6491 :
6492 : /* Returns true if CAND is used in IVS. */
6493 :
6494 : static bool
6495 29104213 : iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6496 : {
6497 29104213 : return ivs->n_cand_uses[cand->id] > 0;
6498 : }
6499 :
6500 : /* Returns number of induction variable candidates in the set IVS. */
6501 :
6502 : static unsigned
6503 12676806 : iv_ca_n_cands (class iv_ca *ivs)
6504 : {
6505 12676806 : return ivs->n_cands;
6506 : }
6507 :
6508 : /* Free the list of changes DELTA. */
6509 :
6510 : static void
6511 43606006 : iv_ca_delta_free (struct iv_ca_delta **delta)
6512 : {
6513 43606006 : struct iv_ca_delta *act, *next;
6514 :
6515 92236731 : for (act = *delta; act; act = next)
6516 : {
6517 48630725 : next = act->next;
6518 48630725 : free (act);
6519 : }
6520 :
6521 43606006 : *delta = NULL;
6522 43606006 : }
6523 :
6524 : /* Allocates new iv candidates assignment. */
6525 :
6526 : static class iv_ca *
6527 1006424 : iv_ca_new (struct ivopts_data *data)
6528 : {
6529 1006424 : class iv_ca *nw = XNEW (class iv_ca);
6530 :
6531 1006424 : nw->upto = 0;
6532 1006424 : nw->bad_groups = 0;
6533 2012848 : nw->cand_for_group = XCNEWVEC (class cost_pair *,
6534 : data->vgroups.length ());
6535 2012848 : nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6536 1006424 : nw->cands = BITMAP_ALLOC (NULL);
6537 1006424 : nw->n_cands = 0;
6538 1006424 : nw->n_invs = 0;
6539 1006424 : nw->cand_use_cost = no_cost;
6540 1006424 : nw->cand_cost = 0;
6541 1006424 : nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6542 1006424 : nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6543 1006424 : nw->cost = no_cost;
6544 :
6545 1006424 : return nw;
6546 : }
6547 :
6548 : /* Free memory occupied by the set IVS. */
6549 :
6550 : static void
6551 1006424 : iv_ca_free (class iv_ca **ivs)
6552 : {
6553 1006424 : free ((*ivs)->cand_for_group);
6554 1006424 : free ((*ivs)->n_cand_uses);
6555 1006424 : BITMAP_FREE ((*ivs)->cands);
6556 1006424 : free ((*ivs)->n_inv_var_uses);
6557 1006424 : free ((*ivs)->n_inv_expr_uses);
6558 1006424 : free (*ivs);
6559 1006424 : *ivs = NULL;
6560 1006424 : }
6561 :
6562 : /* Dumps IVS to FILE. */
6563 :
6564 : static void
6565 248 : iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6566 : {
6567 248 : unsigned i;
6568 248 : comp_cost cost = iv_ca_cost (ivs);
6569 :
6570 248 : fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6571 : cost.complexity);
6572 248 : fprintf (file, " reg_cost: %d\n",
6573 : ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6574 248 : fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6575 : "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6576 : ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6577 248 : bitmap_print (file, ivs->cands, " candidates: ","\n");
6578 :
6579 1285 : for (i = 0; i < ivs->upto; i++)
6580 : {
6581 1037 : struct iv_group *group = data->vgroups[i];
6582 1037 : class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6583 1037 : if (cp)
6584 1037 : fprintf (file, " group:%d --> iv_cand:%d, cost=("
6585 1037 : "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6586 : cp->cost.cost, cp->cost.complexity);
6587 : else
6588 0 : fprintf (file, " group:%d --> ??\n", group->id);
6589 : }
6590 :
6591 248 : const char *pref = "";
6592 248 : fprintf (file, " invariant variables: ");
6593 1438 : for (i = 1; i <= data->max_inv_var_id; i++)
6594 942 : if (ivs->n_inv_var_uses[i])
6595 : {
6596 133 : fprintf (file, "%s%d", pref, i);
6597 133 : pref = ", ";
6598 : }
6599 :
6600 248 : pref = "";
6601 248 : fprintf (file, "\n invariant expressions: ");
6602 2486 : for (i = 1; i <= data->max_inv_expr_id; i++)
6603 1990 : if (ivs->n_inv_expr_uses[i])
6604 : {
6605 303 : fprintf (file, "%s%d", pref, i);
6606 303 : pref = ", ";
6607 : }
6608 :
6609 248 : fprintf (file, "\n\n");
6610 248 : }
6611 :
6612 : /* Try changing candidate in IVS to CAND for each use. Return cost of the
6613 : new set, and store differences in DELTA. Number of induction variables
6614 : in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6615 : the function will try to find a solution with mimimal iv candidates. */
6616 :
6617 : static comp_cost
6618 21731490 : iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6619 : struct iv_cand *cand, struct iv_ca_delta **delta,
6620 : unsigned *n_ivs, bool min_ncand)
6621 : {
6622 21731490 : unsigned i;
6623 21731490 : comp_cost cost;
6624 21731490 : struct iv_group *group;
6625 21731490 : class cost_pair *old_cp, *new_cp;
6626 :
6627 21731490 : *delta = NULL;
6628 121045440 : for (i = 0; i < ivs->upto; i++)
6629 : {
6630 99313950 : group = data->vgroups[i];
6631 99313950 : old_cp = iv_ca_cand_for_group (ivs, group);
6632 :
6633 99313950 : if (old_cp
6634 99313950 : && old_cp->cand == cand)
6635 9054684 : continue;
6636 :
6637 90259266 : new_cp = get_group_iv_cost (data, group, cand);
6638 90259266 : if (!new_cp)
6639 35009404 : continue;
6640 :
6641 55249862 : if (!min_ncand)
6642 : {
6643 38883570 : int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6644 : /* Skip if new_cp depends on more invariants. */
6645 38883570 : if (cmp_invs > 0)
6646 9664571 : continue;
6647 :
6648 29218999 : int cmp_cost = compare_cost_pair (new_cp, old_cp);
6649 : /* Skip if new_cp is not cheaper. */
6650 29218999 : if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6651 22645797 : continue;
6652 : }
6653 :
6654 22939494 : *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6655 : }
6656 :
6657 21731490 : iv_ca_delta_commit (data, ivs, *delta, true);
6658 21731490 : cost = iv_ca_cost (ivs);
6659 21731490 : if (n_ivs)
6660 12676806 : *n_ivs = iv_ca_n_cands (ivs);
6661 21731490 : iv_ca_delta_commit (data, ivs, *delta, false);
6662 :
6663 21731490 : return cost;
6664 : }
6665 :
6666 : /* Try narrowing set IVS by removing CAND. Return the cost of
6667 : the new set and store the differences in DELTA. START is
6668 : the candidate with which we start narrowing. */
6669 :
6670 : static comp_cost
6671 15114013 : iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6672 : struct iv_cand *cand, struct iv_cand *start,
6673 : struct iv_ca_delta **delta)
6674 : {
6675 15114013 : unsigned i, ci;
6676 15114013 : struct iv_group *group;
6677 15114013 : class cost_pair *old_cp, *new_cp, *cp;
6678 15114013 : bitmap_iterator bi;
6679 15114013 : struct iv_cand *cnd;
6680 15114013 : comp_cost cost, best_cost, acost;
6681 :
6682 15114013 : *delta = NULL;
6683 80466350 : for (i = 0; i < data->vgroups.length (); i++)
6684 : {
6685 75276957 : group = data->vgroups[i];
6686 :
6687 75276957 : old_cp = iv_ca_cand_for_group (ivs, group);
6688 75276957 : if (old_cp->cand != cand)
6689 53412460 : continue;
6690 :
6691 21864497 : best_cost = iv_ca_cost (ivs);
6692 : /* Start narrowing with START. */
6693 21864497 : new_cp = get_group_iv_cost (data, group, start);
6694 :
6695 21864497 : if (data->consider_all_candidates)
6696 : {
6697 92689386 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6698 : {
6699 72047444 : if (ci == cand->id || (start && ci == start->id))
6700 35518391 : continue;
6701 :
6702 36529053 : cnd = data->vcands[ci];
6703 :
6704 36529053 : cp = get_group_iv_cost (data, group, cnd);
6705 36529053 : if (!cp)
6706 21875499 : continue;
6707 :
6708 14653554 : iv_ca_set_cp (data, ivs, group, cp);
6709 14653554 : acost = iv_ca_cost (ivs);
6710 :
6711 14653554 : if (acost < best_cost)
6712 : {
6713 1949012 : best_cost = acost;
6714 1949012 : new_cp = cp;
6715 : }
6716 : }
6717 : }
6718 : else
6719 : {
6720 4982112 : EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6721 : {
6722 3759557 : if (ci == cand->id || (start && ci == start->id))
6723 2068955 : continue;
6724 :
6725 1690602 : cnd = data->vcands[ci];
6726 :
6727 1690602 : cp = get_group_iv_cost (data, group, cnd);
6728 1690602 : if (!cp)
6729 0 : continue;
6730 :
6731 1690602 : iv_ca_set_cp (data, ivs, group, cp);
6732 1690602 : acost = iv_ca_cost (ivs);
6733 :
6734 1690602 : if (acost < best_cost)
6735 : {
6736 43334 : best_cost = acost;
6737 43334 : new_cp = cp;
6738 : }
6739 : }
6740 : }
6741 : /* Restore to old cp for use. */
6742 21864497 : iv_ca_set_cp (data, ivs, group, old_cp);
6743 :
6744 21864497 : if (!new_cp)
6745 : {
6746 9924620 : iv_ca_delta_free (delta);
6747 9924620 : return infinite_cost;
6748 : }
6749 :
6750 11939877 : *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6751 : }
6752 :
6753 5189393 : iv_ca_delta_commit (data, ivs, *delta, true);
6754 5189393 : cost = iv_ca_cost (ivs);
6755 5189393 : iv_ca_delta_commit (data, ivs, *delta, false);
6756 :
6757 5189393 : return cost;
6758 : }
6759 :
6760 : /* Try optimizing the set of candidates IVS by removing candidates different
6761 : from to EXCEPT_CAND from it. Return cost of the new set, and store
6762 : differences in DELTA. */
6763 :
6764 : static comp_cost
6765 9191205 : iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6766 : struct iv_cand *except_cand, struct iv_ca_delta **delta)
6767 : {
6768 9191205 : bitmap_iterator bi;
6769 9191205 : struct iv_ca_delta *act_delta, *best_delta;
6770 9191205 : unsigned i;
6771 9191205 : comp_cost best_cost, acost;
6772 9191205 : struct iv_cand *cand;
6773 :
6774 9191205 : best_delta = NULL;
6775 9191205 : best_cost = iv_ca_cost (ivs);
6776 :
6777 30751475 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6778 : {
6779 21560270 : cand = data->vcands[i];
6780 :
6781 21560270 : if (cand == except_cand)
6782 6446257 : continue;
6783 :
6784 15114013 : acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6785 :
6786 15114013 : if (acost < best_cost)
6787 : {
6788 2552481 : best_cost = acost;
6789 2552481 : iv_ca_delta_free (&best_delta);
6790 2552481 : best_delta = act_delta;
6791 : }
6792 : else
6793 12561532 : iv_ca_delta_free (&act_delta);
6794 : }
6795 :
6796 9191205 : if (!best_delta)
6797 : {
6798 6818712 : *delta = NULL;
6799 6818712 : return best_cost;
6800 : }
6801 :
6802 : /* Recurse to possibly remove other unnecessary ivs. */
6803 2372493 : iv_ca_delta_commit (data, ivs, best_delta, true);
6804 2372493 : best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6805 2372493 : iv_ca_delta_commit (data, ivs, best_delta, false);
6806 2372493 : *delta = iv_ca_delta_join (best_delta, *delta);
6807 2372493 : return best_cost;
6808 : }
6809 :
6810 : /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6811 : cheaper local cost for GROUP than BEST_CP. Return pointer to
6812 : the corresponding cost_pair, otherwise just return BEST_CP. */
6813 :
6814 : static class cost_pair*
6815 29254824 : cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6816 : unsigned int cand_idx, struct iv_cand *old_cand,
6817 : class cost_pair *best_cp)
6818 : {
6819 29254824 : struct iv_cand *cand;
6820 29254824 : class cost_pair *cp;
6821 :
6822 29254824 : gcc_assert (old_cand != NULL && best_cp != NULL);
6823 29254824 : if (cand_idx == old_cand->id)
6824 : return best_cp;
6825 :
6826 26426492 : cand = data->vcands[cand_idx];
6827 26426492 : cp = get_group_iv_cost (data, group, cand);
6828 26426492 : if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6829 : return cp;
6830 :
6831 : return best_cp;
6832 : }
6833 :
6834 : /* Try breaking local optimal fixed-point for IVS by replacing candidates
6835 : which are used by more than one iv uses. For each of those candidates,
6836 : this function tries to represent iv uses under that candidate using
6837 : other ones with lower local cost, then tries to prune the new set.
6838 : If the new set has lower cost, It returns the new cost after recording
6839 : candidate replacement in list DELTA. */
6840 :
6841 : static comp_cost
6842 1005186 : iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6843 : struct iv_ca_delta **delta)
6844 : {
6845 1005186 : bitmap_iterator bi, bj;
6846 1005186 : unsigned int i, j, k;
6847 1005186 : struct iv_cand *cand;
6848 1005186 : comp_cost orig_cost, acost;
6849 1005186 : struct iv_ca_delta *act_delta, *tmp_delta;
6850 1005186 : class cost_pair *old_cp, *best_cp = NULL;
6851 :
6852 1005186 : *delta = NULL;
6853 1005186 : orig_cost = iv_ca_cost (ivs);
6854 :
6855 2337592 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6856 : {
6857 1361719 : if (ivs->n_cand_uses[i] == 1
6858 1025692 : || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6859 342034 : continue;
6860 :
6861 1019685 : cand = data->vcands[i];
6862 :
6863 1019685 : act_delta = NULL;
6864 : /* Represent uses under current candidate using other ones with
6865 : lower local cost. */
6866 5218118 : for (j = 0; j < ivs->upto; j++)
6867 : {
6868 4198433 : struct iv_group *group = data->vgroups[j];
6869 4198433 : old_cp = iv_ca_cand_for_group (ivs, group);
6870 :
6871 4198433 : if (old_cp->cand != cand)
6872 1370101 : continue;
6873 :
6874 2828332 : best_cp = old_cp;
6875 2828332 : if (data->consider_all_candidates)
6876 31983672 : for (k = 0; k < data->vcands.length (); k++)
6877 29162671 : best_cp = cheaper_cost_with_cand (data, group, k,
6878 : old_cp->cand, best_cp);
6879 : else
6880 99484 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6881 92153 : best_cp = cheaper_cost_with_cand (data, group, k,
6882 : old_cp->cand, best_cp);
6883 :
6884 2828332 : if (best_cp == old_cp)
6885 1339959 : continue;
6886 :
6887 1488373 : act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6888 : }
6889 : /* No need for further prune. */
6890 1019685 : if (!act_delta)
6891 234738 : continue;
6892 :
6893 : /* Prune the new candidate set. */
6894 784947 : iv_ca_delta_commit (data, ivs, act_delta, true);
6895 784947 : acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6896 784947 : iv_ca_delta_commit (data, ivs, act_delta, false);
6897 784947 : act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6898 :
6899 784947 : if (acost < orig_cost)
6900 : {
6901 29313 : *delta = act_delta;
6902 29313 : return acost;
6903 : }
6904 : else
6905 755634 : iv_ca_delta_free (&act_delta);
6906 : }
6907 :
6908 975873 : return orig_cost;
6909 : }
6910 :
6911 : /* Tries to extend the sets IVS in the best possible way in order to
6912 : express the GROUP. If ORIGINALP is true, prefer candidates from
6913 : the original set of IVs, otherwise favor important candidates not
6914 : based on any memory object. */
6915 :
6916 : static bool
6917 3295422 : try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6918 : struct iv_group *group, bool originalp)
6919 : {
6920 3295422 : comp_cost best_cost, act_cost;
6921 3295422 : unsigned i;
6922 3295422 : bitmap_iterator bi;
6923 3295422 : struct iv_cand *cand;
6924 3295422 : struct iv_ca_delta *best_delta = NULL, *act_delta;
6925 3295422 : class cost_pair *cp;
6926 :
6927 3295422 : iv_ca_add_group (data, ivs, group);
6928 3295422 : best_cost = iv_ca_cost (ivs);
6929 3295422 : cp = iv_ca_cand_for_group (ivs, group);
6930 3295422 : if (cp)
6931 : {
6932 3208297 : best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6933 3208297 : iv_ca_set_no_cp (data, ivs, group);
6934 : }
6935 :
6936 : /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6937 : first try important candidates not based on any memory object. Only if
6938 : this fails, try the specific ones. Rationale -- in loops with many
6939 : variables the best choice often is to use just one generic biv. If we
6940 : added here many ivs specific to the uses, the optimization algorithm later
6941 : would be likely to get stuck in a local minimum, thus causing us to create
6942 : too many ivs. The approach from few ivs to more seems more likely to be
6943 : successful -- starting from few ivs, replacing an expensive use by a
6944 : specific iv should always be a win. */
6945 30704074 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6946 : {
6947 27408652 : cand = data->vcands[i];
6948 :
6949 27408652 : if (originalp && cand->pos !=IP_ORIGINAL)
6950 10804657 : continue;
6951 :
6952 13704326 : if (!originalp && cand->iv->base_object != NULL_TREE)
6953 2510537 : continue;
6954 :
6955 14093458 : if (iv_ca_cand_used_p (ivs, cand))
6956 1521146 : continue;
6957 :
6958 12572312 : cp = get_group_iv_cost (data, group, cand);
6959 12572312 : if (!cp)
6960 3624651 : continue;
6961 :
6962 8947661 : iv_ca_set_cp (data, ivs, group, cp);
6963 8947661 : act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6964 : true);
6965 8947661 : iv_ca_set_no_cp (data, ivs, group);
6966 8947661 : act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6967 :
6968 8947661 : if (act_cost < best_cost)
6969 : {
6970 396488 : best_cost = act_cost;
6971 :
6972 396488 : iv_ca_delta_free (&best_delta);
6973 396488 : best_delta = act_delta;
6974 : }
6975 : else
6976 8551173 : iv_ca_delta_free (&act_delta);
6977 : }
6978 :
6979 3295422 : if (best_cost.infinite_cost_p ())
6980 : {
6981 692696 : for (i = 0; i < group->n_map_members; i++)
6982 : {
6983 629083 : cp = group->cost_map + i;
6984 629083 : cand = cp->cand;
6985 629083 : if (!cand)
6986 522060 : continue;
6987 :
6988 : /* Already tried this. */
6989 107023 : if (cand->important)
6990 : {
6991 0 : if (originalp && cand->pos == IP_ORIGINAL)
6992 0 : continue;
6993 0 : if (!originalp && cand->iv->base_object == NULL_TREE)
6994 0 : continue;
6995 : }
6996 :
6997 107023 : if (iv_ca_cand_used_p (ivs, cand))
6998 0 : continue;
6999 :
7000 107023 : act_delta = NULL;
7001 107023 : iv_ca_set_cp (data, ivs, group, cp);
7002 107023 : act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7003 107023 : iv_ca_set_no_cp (data, ivs, group);
7004 107023 : act_delta = iv_ca_delta_add (group,
7005 : iv_ca_cand_for_group (ivs, group),
7006 : cp, act_delta);
7007 :
7008 107023 : if (act_cost < best_cost)
7009 : {
7010 65111 : best_cost = act_cost;
7011 :
7012 65111 : if (best_delta)
7013 2736 : iv_ca_delta_free (&best_delta);
7014 65111 : best_delta = act_delta;
7015 : }
7016 : else
7017 41912 : iv_ca_delta_free (&act_delta);
7018 : }
7019 : }
7020 :
7021 3295422 : iv_ca_delta_commit (data, ivs, best_delta, true);
7022 3295422 : iv_ca_delta_free (&best_delta);
7023 :
7024 3295422 : return !best_cost.infinite_cost_p ();
7025 : }
7026 :
7027 : /* Finds an initial assignment of candidates to uses. */
7028 :
7029 : static class iv_ca *
7030 1006424 : get_initial_solution (struct ivopts_data *data, bool originalp)
7031 : {
7032 1006424 : unsigned i;
7033 1006424 : class iv_ca *ivs = iv_ca_new (data);
7034 :
7035 4300608 : for (i = 0; i < data->vgroups.length (); i++)
7036 3295422 : if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7037 : {
7038 1238 : iv_ca_free (&ivs);
7039 1238 : return NULL;
7040 : }
7041 :
7042 : return ivs;
7043 : }
7044 :
7045 : /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
7046 : points to a bool variable, this function tries to break local
7047 : optimal fixed-point by replacing candidates in IVS if it's true. */
7048 :
7049 : static bool
7050 1490691 : try_improve_iv_set (struct ivopts_data *data,
7051 : class iv_ca *ivs, bool *try_replace_p)
7052 : {
7053 1490691 : unsigned i, n_ivs;
7054 1490691 : comp_cost acost, best_cost = iv_ca_cost (ivs);
7055 1490691 : struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7056 1490691 : struct iv_cand *cand;
7057 :
7058 : /* Try extending the set of induction variables by one. */
7059 16394423 : for (i = 0; i < data->vcands.length (); i++)
7060 : {
7061 14903732 : cand = data->vcands[i];
7062 :
7063 14903732 : if (iv_ca_cand_used_p (ivs, cand))
7064 2226926 : continue;
7065 :
7066 12676806 : acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7067 12676806 : if (!act_delta)
7068 7638303 : continue;
7069 :
7070 : /* If we successfully added the candidate and the set is small enough,
7071 : try optimizing it by removing other candidates. */
7072 5038503 : if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7073 : {
7074 4998087 : iv_ca_delta_commit (data, ivs, act_delta, true);
7075 4998087 : acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7076 4998087 : iv_ca_delta_commit (data, ivs, act_delta, false);
7077 4998087 : act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7078 : }
7079 :
7080 5038503 : if (acost < best_cost)
7081 : {
7082 581250 : best_cost = acost;
7083 581250 : iv_ca_delta_free (&best_delta);
7084 581250 : best_delta = act_delta;
7085 : }
7086 : else
7087 4457253 : iv_ca_delta_free (&act_delta);
7088 : }
7089 :
7090 1490691 : if (!best_delta)
7091 : {
7092 : /* Try removing the candidates from the set instead. */
7093 1035678 : best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7094 :
7095 1035678 : if (!best_delta && *try_replace_p)
7096 : {
7097 1005186 : *try_replace_p = false;
7098 : /* So far candidate selecting algorithm tends to choose fewer IVs
7099 : so that it can handle cases in which loops have many variables
7100 : but the best choice is often to use only one general biv. One
7101 : weakness is it can't handle opposite cases, in which different
7102 : candidates should be chosen with respect to each use. To solve
7103 : the problem, we replace candidates in a manner described by the
7104 : comments of iv_ca_replace, thus give general algorithm a chance
7105 : to break local optimal fixed-point in these cases. */
7106 1005186 : best_cost = iv_ca_replace (data, ivs, &best_delta);
7107 : }
7108 :
7109 1035678 : if (!best_delta)
7110 : return false;
7111 : }
7112 :
7113 485505 : iv_ca_delta_commit (data, ivs, best_delta, true);
7114 485505 : iv_ca_delta_free (&best_delta);
7115 971010 : return best_cost == iv_ca_cost (ivs);
7116 : }
7117 :
7118 : /* Attempts to find the optimal set of induction variables. We do simple
7119 : greedy heuristic -- we try to replace at most one candidate in the selected
7120 : solution and remove the unused ivs while this improves the cost. */
7121 :
7122 : static class iv_ca *
7123 1006424 : find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7124 : {
7125 1006424 : class iv_ca *set;
7126 1006424 : bool try_replace_p = true;
7127 :
7128 : /* Get the initial solution. */
7129 1006424 : set = get_initial_solution (data, originalp);
7130 1006424 : if (!set)
7131 : {
7132 1238 : if (dump_file && (dump_flags & TDF_DETAILS))
7133 0 : fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7134 1238 : return NULL;
7135 : }
7136 :
7137 1005186 : if (dump_file && (dump_flags & TDF_DETAILS))
7138 : {
7139 134 : fprintf (dump_file, "Initial set of candidates:\n");
7140 134 : iv_ca_dump (data, dump_file, set);
7141 : }
7142 :
7143 1490691 : while (try_improve_iv_set (data, set, &try_replace_p))
7144 : {
7145 485505 : if (dump_file && (dump_flags & TDF_DETAILS))
7146 : {
7147 114 : fprintf (dump_file, "Improved to:\n");
7148 114 : iv_ca_dump (data, dump_file, set);
7149 : }
7150 : }
7151 :
7152 : /* If the set has infinite_cost, it can't be optimal. */
7153 2010372 : if (iv_ca_cost (set).infinite_cost_p ())
7154 : {
7155 0 : if (dump_file && (dump_flags & TDF_DETAILS))
7156 0 : fprintf (dump_file,
7157 : "Overflow to infinite cost in try_improve_iv_set.\n");
7158 0 : iv_ca_free (&set);
7159 : }
7160 1005186 : return set;
7161 : }
7162 :
7163 : static class iv_ca *
7164 503212 : find_optimal_iv_set (struct ivopts_data *data)
7165 : {
7166 503212 : unsigned i;
7167 503212 : comp_cost cost, origcost;
7168 503212 : class iv_ca *set, *origset;
7169 :
7170 : /* Determine the cost based on a strategy that starts with original IVs,
7171 : and try again using a strategy that prefers candidates not based
7172 : on any IVs. */
7173 503212 : origset = find_optimal_iv_set_1 (data, true);
7174 503212 : set = find_optimal_iv_set_1 (data, false);
7175 :
7176 503212 : if (!origset && !set)
7177 : return NULL;
7178 :
7179 502593 : origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7180 502593 : cost = set ? iv_ca_cost (set) : infinite_cost;
7181 :
7182 502593 : if (dump_file && (dump_flags & TDF_DETAILS))
7183 : {
7184 67 : fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7185 : origcost.cost, origcost.complexity);
7186 67 : fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7187 : cost.cost, cost.complexity);
7188 : }
7189 :
7190 : /* Choose the one with the best cost. */
7191 502593 : if (origcost <= cost)
7192 : {
7193 467288 : if (set)
7194 467288 : iv_ca_free (&set);
7195 467288 : set = origset;
7196 : }
7197 35305 : else if (origset)
7198 35305 : iv_ca_free (&origset);
7199 :
7200 2148993 : for (i = 0; i < data->vgroups.length (); i++)
7201 : {
7202 1646400 : struct iv_group *group = data->vgroups[i];
7203 1646400 : group->selected = iv_ca_cand_for_group (set, group)->cand;
7204 : }
7205 :
7206 502593 : return set;
7207 : }
7208 :
7209 : /* Creates a new induction variable corresponding to CAND. */
7210 :
7211 : static void
7212 677681 : create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7213 : {
7214 677681 : gimple_stmt_iterator incr_pos;
7215 677681 : tree base;
7216 677681 : struct iv_use *use;
7217 677681 : struct iv_group *group;
7218 677681 : bool after = false;
7219 :
7220 677681 : gcc_assert (cand->iv != NULL);
7221 :
7222 677681 : switch (cand->pos)
7223 : {
7224 465216 : case IP_NORMAL:
7225 465216 : incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7226 465216 : break;
7227 :
7228 10193 : case IP_END:
7229 10193 : incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7230 10193 : after = true;
7231 10193 : gcc_assert (gsi_end_p (incr_pos) || !stmt_ends_bb_p (*incr_pos));
7232 : break;
7233 :
7234 0 : case IP_AFTER_USE:
7235 0 : after = true;
7236 : /* fall through */
7237 0 : case IP_BEFORE_USE:
7238 0 : incr_pos = gsi_for_stmt (cand->incremented_at);
7239 0 : break;
7240 :
7241 202272 : case IP_ORIGINAL:
7242 : /* Mark that the iv is preserved. */
7243 202272 : name_info (data, cand->var_before)->preserve_biv = true;
7244 202272 : name_info (data, cand->var_after)->preserve_biv = true;
7245 :
7246 : /* Rewrite the increment so that it uses var_before directly. */
7247 202272 : use = find_interesting_uses_op (data, cand->var_after);
7248 202272 : group = data->vgroups[use->group_id];
7249 202272 : group->selected = cand;
7250 202272 : return;
7251 : }
7252 :
7253 475409 : gimple_add_tmp_var (cand->var_before);
7254 :
7255 475409 : base = unshare_expr (cand->iv->base);
7256 :
7257 : /* The step computation could invoke UB when the loop does not iterate.
7258 : Avoid inserting it on the preheader in its native form but rewrite
7259 : it to a well-defined form. This also helps masking SCEV issues
7260 : which freely re-associates the IV computations when building up
7261 : CHRECs without much regard for signed overflow invoking UB. */
7262 475409 : gimple_seq stmts = NULL;
7263 475409 : tree step = force_gimple_operand (unshare_expr (cand->iv->step), &stmts,
7264 : true, NULL_TREE);
7265 475409 : if (stmts)
7266 : {
7267 142284 : for (auto gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
7268 94408 : if (gimple_needing_rewrite_undefined (gsi_stmt (gsi)))
7269 11647 : rewrite_to_defined_unconditional (&gsi);
7270 47876 : gsi_insert_seq_on_edge_immediate
7271 47876 : (loop_preheader_edge (data->current_loop), stmts);
7272 : }
7273 :
7274 475409 : create_iv (base, PLUS_EXPR, step,
7275 : cand->var_before, data->current_loop,
7276 : &incr_pos, after, &cand->var_before, &cand->var_after);
7277 : }
7278 :
7279 : /* Creates new induction variables described in SET. */
7280 :
7281 : static void
7282 502593 : create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7283 : {
7284 502593 : unsigned i;
7285 502593 : struct iv_cand *cand;
7286 502593 : bitmap_iterator bi;
7287 :
7288 1180274 : EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7289 : {
7290 677681 : cand = data->vcands[i];
7291 677681 : create_new_iv (data, cand);
7292 : }
7293 :
7294 502593 : if (dump_file && (dump_flags & TDF_DETAILS))
7295 : {
7296 67 : fprintf (dump_file, "Selected IV set for loop %d",
7297 67 : data->current_loop->num);
7298 67 : if (data->loop_loc != UNKNOWN_LOCATION)
7299 65 : fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7300 130 : LOCATION_LINE (data->loop_loc));
7301 67 : fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " avg niters",
7302 : avg_loop_niter (data->current_loop));
7303 67 : fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7304 178 : EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7305 : {
7306 111 : cand = data->vcands[i];
7307 111 : dump_cand (dump_file, cand);
7308 : }
7309 67 : fprintf (dump_file, "\n");
7310 : }
7311 502593 : }
7312 :
7313 : /* Rewrites USE (definition of iv used in a nonlinear expression)
7314 : using candidate CAND. */
7315 :
7316 : static void
7317 627398 : rewrite_use_nonlinear_expr (struct ivopts_data *data,
7318 : struct iv_use *use, struct iv_cand *cand)
7319 : {
7320 627398 : gassign *ass;
7321 627398 : gimple_stmt_iterator bsi;
7322 627398 : tree comp, type = get_use_type (use), tgt;
7323 :
7324 : /* An important special case -- if we are asked to express value of
7325 : the original iv by itself, just exit; there is no need to
7326 : introduce a new computation (that might also need casting the
7327 : variable to unsigned and back). */
7328 627398 : if (cand->pos == IP_ORIGINAL
7329 336344 : && cand->incremented_at == use->stmt)
7330 : {
7331 202272 : tree op = NULL_TREE;
7332 202272 : enum tree_code stmt_code;
7333 :
7334 202272 : gcc_assert (is_gimple_assign (use->stmt));
7335 202272 : gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7336 :
7337 : /* Check whether we may leave the computation unchanged.
7338 : This is the case only if it does not rely on other
7339 : computations in the loop -- otherwise, the computation
7340 : we rely upon may be removed in remove_unused_ivs,
7341 : thus leading to ICE. */
7342 202272 : stmt_code = gimple_assign_rhs_code (use->stmt);
7343 202272 : if (stmt_code == PLUS_EXPR
7344 202272 : || stmt_code == MINUS_EXPR
7345 202272 : || stmt_code == POINTER_PLUS_EXPR)
7346 : {
7347 198293 : if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7348 196452 : op = gimple_assign_rhs2 (use->stmt);
7349 1841 : else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7350 : op = gimple_assign_rhs1 (use->stmt);
7351 : }
7352 :
7353 196952 : if (op != NULL_TREE)
7354 : {
7355 196952 : if (expr_invariant_in_loop_p (data->current_loop, op))
7356 282664 : return;
7357 181 : if (TREE_CODE (op) == SSA_NAME)
7358 : {
7359 181 : struct iv *iv = get_iv (data, op);
7360 181 : if (iv != NULL && integer_zerop (iv->step))
7361 : return;
7362 : }
7363 : }
7364 : }
7365 :
7366 430446 : switch (gimple_code (use->stmt))
7367 : {
7368 128124 : case GIMPLE_PHI:
7369 128124 : tgt = PHI_RESULT (use->stmt);
7370 :
7371 : /* If we should keep the biv, do not replace it. */
7372 128124 : if (name_info (data, tgt)->preserve_biv)
7373 : return;
7374 :
7375 42412 : bsi = gsi_after_labels (gimple_bb (use->stmt));
7376 42412 : break;
7377 :
7378 302322 : case GIMPLE_ASSIGN:
7379 302322 : tgt = gimple_assign_lhs (use->stmt);
7380 302322 : bsi = gsi_for_stmt (use->stmt);
7381 302322 : break;
7382 :
7383 0 : default:
7384 0 : gcc_unreachable ();
7385 : }
7386 :
7387 1034202 : aff_tree aff_inv, aff_var;
7388 344734 : if (!get_computation_aff_1 (data, use->stmt, use, cand, &aff_inv, &aff_var))
7389 0 : gcc_unreachable ();
7390 :
7391 344734 : unshare_aff_combination (&aff_inv);
7392 344734 : unshare_aff_combination (&aff_var);
7393 : /* Prefer CSE opportunity than loop invariant by adding offset at last
7394 : so that iv_uses have different offsets can be CSEed. */
7395 689468 : poly_widest_int offset = aff_inv.offset;
7396 344734 : aff_inv.offset = 0;
7397 :
7398 344734 : gimple_seq stmt_list = NULL, seq = NULL;
7399 344734 : tree comp_op1 = aff_combination_to_tree (&aff_inv);
7400 344734 : tree comp_op2 = aff_combination_to_tree (&aff_var);
7401 344734 : gcc_assert (comp_op1 && comp_op2);
7402 :
7403 344734 : comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7404 344734 : gimple_seq_add_seq (&stmt_list, seq);
7405 344734 : comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7406 344734 : gimple_seq_add_seq (&stmt_list, seq);
7407 :
7408 344734 : if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7409 : std::swap (comp_op1, comp_op2);
7410 :
7411 344734 : if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7412 : {
7413 0 : comp = fold_build_pointer_plus (comp_op1,
7414 : fold_convert (sizetype, comp_op2));
7415 0 : comp = fold_build_pointer_plus (comp,
7416 : wide_int_to_tree (sizetype, offset));
7417 : }
7418 : else
7419 : {
7420 344734 : comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7421 : fold_convert (TREE_TYPE (comp_op1), comp_op2));
7422 344734 : comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7423 : wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7424 : }
7425 :
7426 344734 : comp = fold_convert (type, comp);
7427 344734 : comp = force_gimple_operand (comp, &seq, false, NULL);
7428 344734 : gimple_seq_add_seq (&stmt_list, seq);
7429 344734 : if (gimple_code (use->stmt) != GIMPLE_PHI
7430 : /* We can't allow re-allocating the stmt as it might be pointed
7431 : to still. */
7432 344734 : && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7433 302322 : >= gimple_num_ops (gsi_stmt (bsi))))
7434 : {
7435 8491 : comp = force_gimple_operand (comp, &seq, true, NULL);
7436 8491 : gimple_seq_add_seq (&stmt_list, seq);
7437 8491 : if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7438 : {
7439 0 : duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7440 : /* As this isn't a plain copy we have to reset alignment
7441 : information. */
7442 0 : if (SSA_NAME_PTR_INFO (comp))
7443 0 : mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7444 : }
7445 : }
7446 :
7447 344734 : gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7448 344734 : if (gimple_code (use->stmt) == GIMPLE_PHI)
7449 : {
7450 42412 : ass = gimple_build_assign (tgt, comp);
7451 42412 : gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7452 :
7453 42412 : bsi = gsi_for_stmt (use->stmt);
7454 42412 : remove_phi_node (&bsi, false);
7455 : }
7456 : else
7457 : {
7458 302322 : gimple_assign_set_rhs_from_tree (&bsi, comp);
7459 302322 : use->stmt = gsi_stmt (bsi);
7460 : }
7461 : }
7462 :
7463 : /* Performs a peephole optimization to reorder the iv update statement with
7464 : a mem ref to enable instruction combining in later phases. The mem ref uses
7465 : the iv value before the update, so the reordering transformation requires
7466 : adjustment of the offset. CAND is the selected IV_CAND.
7467 :
7468 : Example:
7469 :
7470 : t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7471 : iv2 = iv1 + 1;
7472 :
7473 : if (t < val) (1)
7474 : goto L;
7475 : goto Head;
7476 :
7477 :
7478 : directly propagating t over to (1) will introduce overlapping live range
7479 : thus increase register pressure. This peephole transform it into:
7480 :
7481 :
7482 : iv2 = iv1 + 1;
7483 : t = MEM_REF (base, iv2, 8, 8);
7484 : if (t < val)
7485 : goto L;
7486 : goto Head;
7487 : */
7488 :
7489 : static void
7490 857469 : adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7491 : {
7492 857469 : tree var_after;
7493 857469 : gimple *iv_update, *stmt;
7494 857469 : basic_block bb;
7495 857469 : gimple_stmt_iterator gsi, gsi_iv;
7496 :
7497 857469 : if (cand->pos != IP_NORMAL)
7498 855286 : return;
7499 :
7500 655024 : var_after = cand->var_after;
7501 655024 : iv_update = SSA_NAME_DEF_STMT (var_after);
7502 :
7503 655024 : bb = gimple_bb (iv_update);
7504 655024 : gsi = gsi_last_nondebug_bb (bb);
7505 655024 : stmt = gsi_stmt (gsi);
7506 :
7507 : /* Only handle conditional statement for now. */
7508 655024 : if (gimple_code (stmt) != GIMPLE_COND)
7509 : return;
7510 :
7511 655024 : gsi_prev_nondebug (&gsi);
7512 655024 : stmt = gsi_stmt (gsi);
7513 655024 : if (stmt != iv_update)
7514 : return;
7515 :
7516 529450 : gsi_prev_nondebug (&gsi);
7517 529450 : if (gsi_end_p (gsi))
7518 : return;
7519 :
7520 526310 : stmt = gsi_stmt (gsi);
7521 526310 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
7522 : return;
7523 :
7524 526127 : if (stmt != use->stmt)
7525 : return;
7526 :
7527 5029 : if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7528 : return;
7529 :
7530 2183 : if (dump_file && (dump_flags & TDF_DETAILS))
7531 : {
7532 0 : fprintf (dump_file, "Reordering \n");
7533 0 : print_gimple_stmt (dump_file, iv_update, 0);
7534 0 : print_gimple_stmt (dump_file, use->stmt, 0);
7535 0 : fprintf (dump_file, "\n");
7536 : }
7537 :
7538 2183 : gsi = gsi_for_stmt (use->stmt);
7539 2183 : gsi_iv = gsi_for_stmt (iv_update);
7540 2183 : gsi_move_before (&gsi_iv, &gsi);
7541 :
7542 2183 : cand->pos = IP_BEFORE_USE;
7543 2183 : cand->incremented_at = use->stmt;
7544 : }
7545 :
7546 : /* Return the alias pointer type that should be used for a MEM_REF
7547 : associated with USE, which has type USE_PTR_ADDRESS. */
7548 :
7549 : static tree
7550 795 : get_alias_ptr_type_for_ptr_address (iv_use *use)
7551 : {
7552 795 : gcall *call = as_a <gcall *> (use->stmt);
7553 795 : switch (gimple_call_internal_fn (call))
7554 : {
7555 795 : case IFN_MASK_LOAD:
7556 795 : case IFN_MASK_STORE:
7557 795 : case IFN_MASK_LOAD_LANES:
7558 795 : case IFN_MASK_STORE_LANES:
7559 795 : case IFN_MASK_LEN_LOAD_LANES:
7560 795 : case IFN_MASK_LEN_STORE_LANES:
7561 795 : case IFN_LEN_LOAD:
7562 795 : case IFN_LEN_STORE:
7563 795 : case IFN_MASK_LEN_LOAD:
7564 795 : case IFN_MASK_LEN_STORE:
7565 : /* The second argument contains the correct alias type. */
7566 795 : gcc_assert (use->op_p == gimple_call_arg_ptr (call, 0));
7567 795 : return TREE_TYPE (gimple_call_arg (call, 1));
7568 :
7569 0 : default:
7570 0 : gcc_unreachable ();
7571 : }
7572 : }
7573 :
7574 :
7575 : /* Rewrites USE (address that is an iv) using candidate CAND. */
7576 :
7577 : static void
7578 857469 : rewrite_use_address (struct ivopts_data *data,
7579 : struct iv_use *use, struct iv_cand *cand)
7580 : {
7581 857469 : aff_tree aff;
7582 857469 : bool ok;
7583 :
7584 857469 : adjust_iv_update_pos (cand, use);
7585 857469 : ok = get_computation_aff (data, use->stmt, use, cand, &aff);
7586 857469 : gcc_assert (ok);
7587 857469 : unshare_aff_combination (&aff);
7588 :
7589 : /* To avoid undefined overflow problems, all IV candidates use unsigned
7590 : integer types. The drawback is that this makes it impossible for
7591 : create_mem_ref to distinguish an IV that is based on a memory object
7592 : from one that represents simply an offset.
7593 :
7594 : To work around this problem, we pass a hint to create_mem_ref that
7595 : indicates which variable (if any) in aff is an IV based on a memory
7596 : object. Note that we only consider the candidate. If this is not
7597 : based on an object, the base of the reference is in some subexpression
7598 : of the use -- but these will use pointer types, so they are recognized
7599 : by the create_mem_ref heuristics anyway. */
7600 857469 : tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7601 857469 : tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7602 857469 : gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7603 857469 : tree type = use->mem_type;
7604 857469 : tree alias_ptr_type;
7605 857469 : if (use->type == USE_PTR_ADDRESS)
7606 795 : alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7607 : else
7608 : {
7609 856674 : gcc_assert (type == TREE_TYPE (*use->op_p));
7610 856674 : unsigned int align = get_object_alignment (*use->op_p);
7611 856674 : if (align != TYPE_ALIGN (type))
7612 34424 : type = build_aligned_type (type, align);
7613 856674 : alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7614 : }
7615 1714938 : tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7616 857469 : iv, base_hint, data->speed);
7617 :
7618 857469 : if (use->type == USE_PTR_ADDRESS)
7619 : {
7620 795 : ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7621 795 : ref = fold_convert (get_use_type (use), ref);
7622 795 : ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7623 : true, GSI_SAME_STMT);
7624 : }
7625 : else
7626 : {
7627 : /* When we end up confused enough and have no suitable base but
7628 : stuffed everything to index2 use a LEA for the address and
7629 : create a plain MEM_REF to avoid basing a memory reference
7630 : on address zero which create_mem_ref_raw does as fallback. */
7631 856674 : if (TREE_CODE (ref) == TARGET_MEM_REF
7632 856674 : && TMR_INDEX2 (ref) != NULL_TREE
7633 867217 : && integer_zerop (TREE_OPERAND (ref, 0)))
7634 : {
7635 20 : ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7636 20 : ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7637 : true, GSI_SAME_STMT);
7638 20 : ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7639 : }
7640 856674 : copy_ref_info (ref, *use->op_p);
7641 : }
7642 :
7643 857469 : *use->op_p = ref;
7644 857469 : }
7645 :
7646 : /* Rewrites USE (the condition such that one of the arguments is an iv) using
7647 : candidate CAND. */
7648 :
7649 : static void
7650 599017 : rewrite_use_compare (struct ivopts_data *data,
7651 : struct iv_use *use, struct iv_cand *cand)
7652 : {
7653 599017 : tree comp, op, bound;
7654 599017 : gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7655 599017 : enum tree_code compare;
7656 599017 : struct iv_group *group = data->vgroups[use->group_id];
7657 599017 : class cost_pair *cp = get_group_iv_cost (data, group, cand);
7658 :
7659 599017 : bound = cp->value;
7660 599017 : if (bound)
7661 : {
7662 392242 : tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7663 392242 : tree var_type = TREE_TYPE (var);
7664 392242 : gimple_seq stmts;
7665 :
7666 392242 : if (dump_file && (dump_flags & TDF_DETAILS))
7667 : {
7668 58 : fprintf (dump_file, "Replacing exit test: ");
7669 58 : print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7670 : }
7671 392242 : compare = cp->comp;
7672 392242 : bound = unshare_expr (fold_convert (var_type, bound));
7673 392242 : op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7674 392242 : if (stmts)
7675 180039 : gsi_insert_seq_on_edge_immediate (
7676 180039 : loop_preheader_edge (data->current_loop),
7677 : stmts);
7678 :
7679 392242 : gcond *cond_stmt = as_a <gcond *> (use->stmt);
7680 392242 : gimple_cond_set_lhs (cond_stmt, var);
7681 392242 : gimple_cond_set_code (cond_stmt, compare);
7682 392242 : gimple_cond_set_rhs (cond_stmt, op);
7683 392242 : return;
7684 : }
7685 :
7686 : /* The induction variable elimination failed; just express the original
7687 : giv. */
7688 206775 : comp = get_computation_at (data, use->stmt, use, cand);
7689 206775 : gcc_assert (comp != NULL_TREE);
7690 206775 : gcc_assert (use->op_p != NULL);
7691 206775 : *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7692 206775 : SSA_NAME_VAR (*use->op_p),
7693 : true, GSI_SAME_STMT);
7694 : }
7695 :
7696 : /* Rewrite the groups using the selected induction variables. */
7697 :
7698 : static void
7699 502593 : rewrite_groups (struct ivopts_data *data)
7700 : {
7701 502593 : unsigned i, j;
7702 :
7703 2310706 : for (i = 0; i < data->vgroups.length (); i++)
7704 : {
7705 1808113 : struct iv_group *group = data->vgroups[i];
7706 1808113 : struct iv_cand *cand = group->selected;
7707 :
7708 1808113 : gcc_assert (cand);
7709 :
7710 1808113 : if (group->type == USE_NONLINEAR_EXPR)
7711 : {
7712 1254796 : for (j = 0; j < group->vuses.length (); j++)
7713 : {
7714 627398 : rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7715 627398 : update_stmt (group->vuses[j]->stmt);
7716 : }
7717 : }
7718 1180715 : else if (address_p (group->type))
7719 : {
7720 1439167 : for (j = 0; j < group->vuses.length (); j++)
7721 : {
7722 857469 : rewrite_use_address (data, group->vuses[j], cand);
7723 857469 : update_stmt (group->vuses[j]->stmt);
7724 : }
7725 : }
7726 : else
7727 : {
7728 599017 : gcc_assert (group->type == USE_COMPARE);
7729 :
7730 2407130 : for (j = 0; j < group->vuses.length (); j++)
7731 : {
7732 599017 : rewrite_use_compare (data, group->vuses[j], cand);
7733 599017 : update_stmt (group->vuses[j]->stmt);
7734 : }
7735 : }
7736 : }
7737 502593 : }
7738 :
7739 : /* Removes the ivs that are not used after rewriting. */
7740 :
7741 : static void
7742 502593 : remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7743 : {
7744 502593 : unsigned j;
7745 502593 : bitmap_iterator bi;
7746 :
7747 : /* Figure out an order in which to release SSA DEFs so that we don't
7748 : release something that we'd have to propagate into a debug stmt
7749 : afterwards. */
7750 5517439 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7751 : {
7752 5014846 : struct version_info *info;
7753 :
7754 5014846 : info = ver_info (data, j);
7755 5014846 : if (info->iv
7756 4871213 : && !integer_zerop (info->iv->step)
7757 3205282 : && !info->inv_id
7758 3205282 : && !info->iv->nonlin_use
7759 7592730 : && !info->preserve_biv)
7760 : {
7761 2461324 : bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7762 :
7763 2461324 : tree def = info->iv->ssa_name;
7764 :
7765 3197423 : if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7766 : {
7767 736099 : imm_use_iterator imm_iter;
7768 736099 : use_operand_p use_p;
7769 736099 : gimple *stmt;
7770 736099 : int count = 0;
7771 :
7772 2182501 : FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7773 : {
7774 738409 : if (!gimple_debug_bind_p (stmt))
7775 621178 : continue;
7776 :
7777 : /* We just want to determine whether to do nothing
7778 : (count == 0), to substitute the computed
7779 : expression into a single use of the SSA DEF by
7780 : itself (count == 1), or to use a debug temp
7781 : because the SSA DEF is used multiple times or as
7782 : part of a larger expression (count > 1). */
7783 117231 : count++;
7784 117231 : if (gimple_debug_bind_get_value (stmt) != def)
7785 7469 : count++;
7786 :
7787 117231 : if (count > 1)
7788 : break;
7789 736099 : }
7790 :
7791 736099 : if (!count)
7792 661041 : continue;
7793 :
7794 96372 : struct iv_use dummy_use;
7795 96372 : struct iv_cand *best_cand = NULL, *cand;
7796 96372 : unsigned i, best_pref = 0, cand_pref;
7797 96372 : tree comp = NULL_TREE;
7798 :
7799 96372 : memset (&dummy_use, 0, sizeof (dummy_use));
7800 96372 : dummy_use.iv = info->iv;
7801 492861 : for (i = 0; i < data->vgroups.length () && i < 64; i++)
7802 : {
7803 396489 : cand = data->vgroups[i]->selected;
7804 396489 : if (cand == best_cand)
7805 164429 : continue;
7806 153858 : cand_pref = operand_equal_p (cand->iv->step,
7807 232060 : info->iv->step, 0)
7808 232060 : ? 4 : 0;
7809 232060 : cand_pref
7810 232060 : += TYPE_MODE (TREE_TYPE (cand->iv->base))
7811 232060 : == TYPE_MODE (TREE_TYPE (info->iv->base))
7812 232060 : ? 2 : 0;
7813 232060 : cand_pref
7814 464120 : += TREE_CODE (cand->iv->base) == INTEGER_CST
7815 232060 : ? 1 : 0;
7816 232060 : if (best_cand == NULL || best_pref < cand_pref)
7817 : {
7818 178199 : tree this_comp
7819 356398 : = get_debug_computation_at (data,
7820 178199 : SSA_NAME_DEF_STMT (def),
7821 : &dummy_use, cand);
7822 178199 : if (this_comp)
7823 : {
7824 396489 : best_cand = cand;
7825 396489 : best_pref = cand_pref;
7826 396489 : comp = this_comp;
7827 : }
7828 : }
7829 : }
7830 :
7831 96372 : if (!best_cand)
7832 21314 : continue;
7833 :
7834 75058 : comp = unshare_expr (comp);
7835 75058 : if (count > 1)
7836 : {
7837 23949 : tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7838 : /* FIXME: Is setting the mode really necessary? */
7839 23949 : if (SSA_NAME_VAR (def))
7840 14159 : SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7841 : else
7842 9790 : SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7843 23949 : gdebug *def_temp
7844 23949 : = gimple_build_debug_bind (vexpr, comp, NULL);
7845 23949 : gimple_stmt_iterator gsi;
7846 :
7847 23949 : if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7848 13473 : gsi = gsi_after_labels (gimple_bb
7849 13473 : (SSA_NAME_DEF_STMT (def)));
7850 : else
7851 10476 : gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7852 :
7853 23949 : gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7854 23949 : comp = vexpr;
7855 : }
7856 :
7857 360994 : FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7858 : {
7859 210878 : if (!gimple_debug_bind_p (stmt))
7860 82497 : continue;
7861 :
7862 385215 : FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7863 128417 : SET_USE (use_p, comp);
7864 :
7865 128381 : update_stmt (stmt);
7866 75058 : }
7867 : }
7868 : }
7869 : }
7870 502593 : }
7871 :
7872 : /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7873 : for hash_map::traverse. */
7874 :
7875 : bool
7876 484081 : free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7877 : {
7878 484081 : if (value)
7879 : {
7880 443850 : value->~tree_niter_desc ();
7881 443850 : free (value);
7882 : }
7883 484081 : return true;
7884 : }
7885 :
7886 : /* Frees data allocated by the optimization of a single loop. */
7887 :
7888 : static void
7889 872629 : free_loop_data (struct ivopts_data *data)
7890 : {
7891 872629 : unsigned i, j;
7892 872629 : bitmap_iterator bi;
7893 872629 : tree obj;
7894 :
7895 872629 : if (data->niters)
7896 : {
7897 955980 : data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7898 943798 : delete data->niters;
7899 471899 : data->niters = NULL;
7900 : }
7901 :
7902 5903409 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7903 : {
7904 5030780 : struct version_info *info;
7905 :
7906 5030780 : info = ver_info (data, i);
7907 5030780 : info->iv = NULL;
7908 5030780 : info->has_nonlin_use = false;
7909 5030780 : info->preserve_biv = false;
7910 5030780 : info->inv_id = 0;
7911 : }
7912 872629 : bitmap_clear (data->relevant);
7913 872629 : bitmap_clear (data->important_candidates);
7914 :
7915 2683380 : for (i = 0; i < data->vgroups.length (); i++)
7916 : {
7917 1810751 : struct iv_group *group = data->vgroups[i];
7918 :
7919 3897312 : for (j = 0; j < group->vuses.length (); j++)
7920 2086561 : free (group->vuses[j]);
7921 1810751 : group->vuses.release ();
7922 :
7923 1810751 : BITMAP_FREE (group->related_cands);
7924 19692216 : for (j = 0; j < group->n_map_members; j++)
7925 : {
7926 17881465 : if (group->cost_map[j].inv_vars)
7927 3747357 : BITMAP_FREE (group->cost_map[j].inv_vars);
7928 17881465 : if (group->cost_map[j].inv_exprs)
7929 2054400 : BITMAP_FREE (group->cost_map[j].inv_exprs);
7930 : }
7931 :
7932 1810751 : free (group->cost_map);
7933 1810751 : free (group);
7934 : }
7935 872629 : data->vgroups.truncate (0);
7936 :
7937 5492553 : for (i = 0; i < data->vcands.length (); i++)
7938 : {
7939 4619924 : struct iv_cand *cand = data->vcands[i];
7940 :
7941 4619924 : if (cand->inv_vars)
7942 74696 : BITMAP_FREE (cand->inv_vars);
7943 4619924 : if (cand->inv_exprs)
7944 100512 : BITMAP_FREE (cand->inv_exprs);
7945 4619924 : free (cand);
7946 : }
7947 872629 : data->vcands.truncate (0);
7948 :
7949 872629 : if (data->version_info_size < num_ssa_names)
7950 : {
7951 162 : data->version_info_size = 2 * num_ssa_names;
7952 162 : free (data->version_info);
7953 162 : data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7954 : }
7955 :
7956 872629 : data->max_inv_var_id = 0;
7957 872629 : data->max_inv_expr_id = 0;
7958 :
7959 872629 : FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7960 0 : SET_DECL_RTL (obj, NULL_RTX);
7961 :
7962 872629 : decl_rtl_to_reset.truncate (0);
7963 :
7964 872629 : data->inv_expr_tab->empty ();
7965 :
7966 872629 : data->iv_common_cand_tab->empty ();
7967 872629 : data->iv_common_cands.truncate (0);
7968 872629 : }
7969 :
7970 : /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7971 : loop tree. */
7972 :
7973 : static void
7974 241428 : tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7975 : {
7976 241428 : free_loop_data (data);
7977 241428 : free (data->version_info);
7978 241428 : BITMAP_FREE (data->relevant);
7979 241428 : BITMAP_FREE (data->important_candidates);
7980 :
7981 241428 : decl_rtl_to_reset.release ();
7982 241428 : data->vgroups.release ();
7983 241428 : data->vcands.release ();
7984 241428 : delete data->inv_expr_tab;
7985 241428 : data->inv_expr_tab = NULL;
7986 241428 : free_affine_expand_cache (&data->name_expansion_cache);
7987 241428 : if (data->base_object_map)
7988 163273 : delete data->base_object_map;
7989 241428 : delete data->iv_common_cand_tab;
7990 241428 : data->iv_common_cand_tab = NULL;
7991 241428 : data->iv_common_cands.release ();
7992 241428 : obstack_free (&data->iv_obstack, NULL);
7993 241428 : }
7994 :
7995 : /* Returns true if the loop body BODY includes any function calls. */
7996 :
7997 : static bool
7998 631201 : loop_body_includes_call (basic_block *body, unsigned num_nodes)
7999 : {
8000 631201 : gimple_stmt_iterator gsi;
8001 631201 : unsigned i;
8002 :
8003 2839654 : for (i = 0; i < num_nodes; i++)
8004 23696143 : for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
8005 : {
8006 19068002 : gimple *stmt = gsi_stmt (gsi);
8007 19068002 : if (is_gimple_call (stmt)
8008 282372 : && !gimple_call_internal_p (stmt)
8009 19285103 : && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
8010 : return true;
8011 : }
8012 : return false;
8013 : }
8014 :
8015 : /* Determine cost scaling factor for basic blocks in loop. */
8016 : #define COST_SCALING_FACTOR_BOUND (20)
8017 :
8018 : static void
8019 503212 : determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8020 : {
8021 503212 : int lfreq = data->current_loop->header->count.to_frequency (cfun);
8022 503212 : if (!data->speed || lfreq <= 0)
8023 : return;
8024 :
8025 : int max_freq = lfreq;
8026 2866410 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8027 : {
8028 2450153 : body[i]->aux = (void *)(intptr_t) 1;
8029 2450153 : if (max_freq < body[i]->count.to_frequency (cfun))
8030 102821 : max_freq = body[i]->count.to_frequency (cfun);
8031 : }
8032 416257 : if (max_freq > lfreq)
8033 : {
8034 65706 : int divisor, factor;
8035 : /* Check if scaling factor itself needs to be scaled by the bound. This
8036 : is to avoid overflow when scaling cost according to profile info. */
8037 65706 : if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8038 : {
8039 : divisor = max_freq;
8040 : factor = COST_SCALING_FACTOR_BOUND;
8041 : }
8042 : else
8043 : {
8044 49876 : divisor = lfreq;
8045 49876 : factor = 1;
8046 : }
8047 998005 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8048 : {
8049 932299 : int bfreq = body[i]->count.to_frequency (cfun);
8050 932299 : if (bfreq <= lfreq)
8051 514439 : continue;
8052 :
8053 417860 : body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8054 : }
8055 : }
8056 : }
8057 :
8058 : /* Find doloop comparison use and set its doloop_p on if found. */
8059 :
8060 : static bool
8061 0 : find_doloop_use (struct ivopts_data *data)
8062 : {
8063 0 : struct loop *loop = data->current_loop;
8064 :
8065 0 : for (unsigned i = 0; i < data->vgroups.length (); i++)
8066 : {
8067 0 : struct iv_group *group = data->vgroups[i];
8068 0 : if (group->type == USE_COMPARE)
8069 : {
8070 0 : gcc_assert (group->vuses.length () == 1);
8071 0 : struct iv_use *use = group->vuses[0];
8072 0 : gimple *stmt = use->stmt;
8073 0 : if (gimple_code (stmt) == GIMPLE_COND)
8074 : {
8075 0 : basic_block bb = gimple_bb (stmt);
8076 0 : edge true_edge, false_edge;
8077 0 : extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8078 : /* This comparison is used for loop latch. Require latch is empty
8079 : for now. */
8080 0 : if ((loop->latch == true_edge->dest
8081 0 : || loop->latch == false_edge->dest)
8082 0 : && empty_block_p (loop->latch))
8083 : {
8084 0 : group->doloop_p = true;
8085 0 : if (dump_file && (dump_flags & TDF_DETAILS))
8086 : {
8087 0 : fprintf (dump_file, "Doloop cmp iv use: ");
8088 0 : print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8089 : }
8090 0 : return true;
8091 : }
8092 : }
8093 : }
8094 : }
8095 :
8096 : return false;
8097 : }
8098 :
8099 : /* For the targets which support doloop, to predict whether later RTL doloop
8100 : transformation will perform on this loop, further detect the doloop use and
8101 : mark the flag doloop_use_p if predicted. */
8102 :
8103 : void
8104 503212 : analyze_and_mark_doloop_use (struct ivopts_data *data)
8105 : {
8106 503212 : data->doloop_use_p = false;
8107 :
8108 503212 : if (!flag_branch_on_count_reg)
8109 : return;
8110 :
8111 503212 : if (data->current_loop->unroll == USHRT_MAX)
8112 : return;
8113 :
8114 503212 : if (!generic_predict_doloop_p (data))
8115 : return;
8116 :
8117 0 : if (find_doloop_use (data))
8118 : {
8119 0 : data->doloop_use_p = true;
8120 0 : if (dump_file && (dump_flags & TDF_DETAILS))
8121 : {
8122 0 : struct loop *loop = data->current_loop;
8123 0 : fprintf (dump_file,
8124 : "Predict loop %d can perform"
8125 : " doloop optimization later.\n",
8126 : loop->num);
8127 0 : flow_loop_dump (loop, dump_file, NULL, 1);
8128 : }
8129 : }
8130 : }
8131 :
8132 : /* Optimizes the LOOP. Returns true if anything changed. */
8133 :
8134 : static bool
8135 631201 : tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8136 : bitmap toremove)
8137 : {
8138 631201 : bool changed = false;
8139 631201 : class iv_ca *iv_ca;
8140 631201 : edge exit = single_dom_exit (loop);
8141 631201 : basic_block *body;
8142 :
8143 631201 : gcc_assert (!data->niters);
8144 631201 : data->current_loop = loop;
8145 631201 : data->loop_loc = find_loop_location (loop).get_location_t ();
8146 631201 : data->speed = optimize_loop_for_speed_p (loop);
8147 :
8148 631201 : if (dump_file && (dump_flags & TDF_DETAILS))
8149 : {
8150 67 : fprintf (dump_file, "Processing loop %d", loop->num);
8151 67 : if (data->loop_loc != UNKNOWN_LOCATION)
8152 65 : fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8153 130 : LOCATION_LINE (data->loop_loc));
8154 67 : fprintf (dump_file, "\n");
8155 :
8156 67 : if (exit)
8157 : {
8158 57 : fprintf (dump_file, " single exit %d -> %d, exit condition ",
8159 57 : exit->src->index, exit->dest->index);
8160 114 : print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8161 : 0, TDF_SLIM);
8162 57 : fprintf (dump_file, "\n");
8163 : }
8164 :
8165 67 : fprintf (dump_file, "\n");
8166 : }
8167 :
8168 631201 : body = get_loop_body (loop);
8169 631201 : data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8170 631201 : renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8171 :
8172 631201 : data->loop_single_exit_p
8173 631201 : = exit != NULL && loop_only_exit_p (loop, body, exit);
8174 :
8175 : /* For each ssa name determines whether it behaves as an induction variable
8176 : in some loop. */
8177 631201 : if (!find_induction_variables (data, body))
8178 127988 : goto finish;
8179 :
8180 : /* Finds interesting uses (item 1). */
8181 503213 : find_interesting_uses (data, body);
8182 503213 : if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8183 1 : goto finish;
8184 :
8185 : /* Determine cost scaling factor for basic blocks in loop. */
8186 503212 : determine_scaling_factor (data, body);
8187 :
8188 : /* Analyze doloop possibility and mark the doloop use if predicted. */
8189 503212 : analyze_and_mark_doloop_use (data);
8190 :
8191 : /* Finds candidates for the induction variables (item 2). */
8192 503212 : find_iv_candidates (data);
8193 :
8194 : /* Calculates the costs (item 3, part 1). */
8195 503212 : determine_iv_costs (data);
8196 503212 : determine_group_iv_costs (data);
8197 503212 : determine_set_costs (data);
8198 :
8199 : /* Find the optimal set of induction variables (item 3, part 2). */
8200 503212 : iv_ca = find_optimal_iv_set (data);
8201 : /* Cleanup basic block aux field. */
8202 3313146 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8203 2809934 : body[i]->aux = NULL;
8204 503212 : if (!iv_ca)
8205 619 : goto finish;
8206 502593 : changed = true;
8207 :
8208 : /* Create the new induction variables (item 4, part 1). */
8209 502593 : create_new_ivs (data, iv_ca);
8210 502593 : iv_ca_free (&iv_ca);
8211 :
8212 : /* Rewrite the uses (item 4, part 2). */
8213 502593 : rewrite_groups (data);
8214 :
8215 : /* Remove the ivs that are unused after rewriting. */
8216 502593 : remove_unused_ivs (data, toremove);
8217 :
8218 631201 : finish:
8219 631201 : free (body);
8220 631201 : free_loop_data (data);
8221 :
8222 631201 : return changed;
8223 : }
8224 :
8225 : /* Main entry point. Optimizes induction variables in loops. */
8226 :
8227 : void
8228 241428 : tree_ssa_iv_optimize (void)
8229 : {
8230 241428 : struct ivopts_data data;
8231 241428 : auto_bitmap toremove;
8232 :
8233 241428 : tree_ssa_iv_optimize_init (&data);
8234 241428 : mark_ssa_maybe_undefs ();
8235 :
8236 : /* Optimize the loops starting with the innermost ones. */
8237 1355485 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8238 : {
8239 631201 : if (!dbg_cnt (ivopts_loop))
8240 0 : continue;
8241 :
8242 631201 : if (dump_file && (dump_flags & TDF_DETAILS))
8243 67 : flow_loop_dump (loop, dump_file, NULL, 1);
8244 :
8245 631201 : tree_ssa_iv_optimize_loop (&data, loop, toremove);
8246 241428 : }
8247 :
8248 : /* Remove eliminated IV defs. */
8249 241428 : release_defs_bitset (toremove);
8250 :
8251 : /* We have changed the structure of induction variables; it might happen
8252 : that definitions in the scev database refer to some of them that were
8253 : eliminated. */
8254 241428 : scev_reset_htab ();
8255 : /* Likewise niter and control-IV information. */
8256 241428 : free_numbers_of_iterations_estimates (cfun);
8257 :
8258 241428 : tree_ssa_iv_optimize_finalize (&data);
8259 241428 : }
8260 :
8261 : #include "gt-tree-ssa-loop-ivopts.h"
|