Line data Source code
1 : /* Induction variable optimizations.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* This pass tries to find the optimal set of induction variables for the loop.
21 : It optimizes just the basic linear induction variables (although adding
22 : support for other types should not be too hard). It includes the
23 : optimizations commonly known as strength reduction, induction variable
24 : coalescing and induction variable elimination. It does it in the
25 : following steps:
26 :
27 : 1) The interesting uses of induction variables are found. This includes
28 :
29 : -- uses of induction variables in non-linear expressions
30 : -- addresses of arrays
31 : -- comparisons of induction variables
32 :
33 : Note the interesting uses are categorized and handled in group.
34 : Generally, address type uses are grouped together if their iv bases
35 : are different in constant offset.
36 :
37 : 2) Candidates for the induction variables are found. This includes
38 :
39 : -- old induction variables
40 : -- the variables defined by expressions derived from the "interesting
41 : groups/uses" above
42 :
43 : 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 : cost function assigns a cost to sets of induction variables and consists
45 : of three parts:
46 :
47 : -- The group/use costs. Each of the interesting groups/uses chooses
48 : the best induction variable in the set and adds its cost to the sum.
49 : The cost reflects the time spent on modifying the induction variables
50 : value to be usable for the given purpose (adding base and offset for
51 : arrays, etc.).
52 : -- The variable costs. Each of the variables has a cost assigned that
53 : reflects the costs associated with incrementing the value of the
54 : variable. The original variables are somewhat preferred.
55 : -- The set cost. Depending on the size of the set, extra cost may be
56 : added to reflect register pressure.
57 :
58 : All the costs are defined in a machine-specific way, using the target
59 : hooks and machine descriptions to determine them.
60 :
61 : 4) The trees are transformed to use the new variables, the dead code is
62 : removed.
63 :
64 : All of this is done loop by loop. Doing it globally is theoretically
65 : possible, it might give a better performance and it might enable us
66 : to decide costs more precisely, but getting all the interactions right
67 : would be complicated.
68 :
69 : For the targets supporting low-overhead loops, IVOPTs has to take care of
70 : the loops which will probably be transformed in RTL doloop optimization,
71 : to try to make selected IV candidate set optimal. The process of doloop
72 : support includes:
73 :
74 : 1) Analyze the current loop will be transformed to doloop or not, find and
75 : mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 : set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 : doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 : The target hook predict_doloop_p can be used for target specific checks.
79 :
80 : 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 : set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 : like biv. For cost determination between doloop IV cand and IV use, the
83 : target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 : provided to add on extra costs for generic type and address type IV use.
85 : Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 : use, and bound zero is set for IV elimination.
87 :
88 : 3) With the cost setting in step 2), the current cost model based IV
89 : selection algorithm will process as usual, pick up doloop dedicated IV if
90 : profitable. */
91 :
92 : #include "config.h"
93 : #include "system.h"
94 : #include "coretypes.h"
95 : #include "backend.h"
96 : #include "rtl.h"
97 : #include "tree.h"
98 : #include "gimple.h"
99 : #include "cfghooks.h"
100 : #include "tree-pass.h"
101 : #include "memmodel.h"
102 : #include "tm_p.h"
103 : #include "ssa.h"
104 : #include "expmed.h"
105 : #include "insn-config.h"
106 : #include "emit-rtl.h"
107 : #include "recog.h"
108 : #include "cgraph.h"
109 : #include "gimple-pretty-print.h"
110 : #include "alias.h"
111 : #include "fold-const.h"
112 : #include "stor-layout.h"
113 : #include "tree-eh.h"
114 : #include "gimplify.h"
115 : #include "gimple-iterator.h"
116 : #include "gimplify-me.h"
117 : #include "tree-cfg.h"
118 : #include "tree-ssa-loop-ivopts.h"
119 : #include "tree-ssa-loop-manip.h"
120 : #include "tree-ssa-loop-niter.h"
121 : #include "tree-ssa-loop.h"
122 : #include "explow.h"
123 : #include "expr.h"
124 : #include "tree-dfa.h"
125 : #include "tree-ssa.h"
126 : #include "cfgloop.h"
127 : #include "tree-scalar-evolution.h"
128 : #include "tree-affine.h"
129 : #include "tree-ssa-propagate.h"
130 : #include "tree-ssa-address.h"
131 : #include "builtins.h"
132 : #include "tree-vectorizer.h"
133 : #include "dbgcnt.h"
134 : #include "cfganal.h"
135 : #include "gimple-fold.h"
136 :
137 : /* For lang_hooks.types.type_for_mode. */
138 : #include "langhooks.h"
139 :
140 : /* FIXME: Expressions are expanded to RTL in this pass to determine the
141 : cost of different addressing modes. This should be moved to a TBD
142 : interface between the GIMPLE and RTL worlds. */
143 :
144 : /* The infinite cost. */
145 : #define INFTY 1000000000
146 :
147 : /* Returns the expected number of loop iterations for LOOP.
148 : The average trip count is computed from profile data if it
149 : exists. */
150 :
151 : static inline unsigned HOST_WIDE_INT
152 9360963 : avg_loop_niter (class loop *loop)
153 : {
154 9360963 : HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
155 9360963 : if (niter == -1)
156 : {
157 5490462 : niter = likely_max_stmt_executions_int (loop);
158 :
159 5490462 : if (niter == -1 || niter > param_avg_loop_niter)
160 4668836 : return param_avg_loop_niter;
161 : }
162 :
163 4692127 : return niter;
164 : }
165 :
166 : struct iv_use;
167 :
168 : /* Representation of the induction variable. */
169 : struct iv
170 : {
171 : tree base; /* Initial value of the iv. */
172 : tree base_object; /* A memory object to that the induction variable points. */
173 : tree step; /* Step of the iv (constant only). */
174 : tree ssa_name; /* The ssa name with the value. */
175 : struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
176 : bool biv_p; /* Is it a biv? */
177 : bool no_overflow; /* True if the iv doesn't overflow. */
178 : bool have_address_use;/* For biv, indicate if it's used in any address
179 : type use. */
180 : };
181 :
182 : /* Per-ssa version information (induction variable descriptions, etc.). */
183 : struct version_info
184 : {
185 : tree name; /* The ssa name. */
186 : struct iv *iv; /* Induction variable description. */
187 : bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
188 : an expression that is not an induction variable. */
189 : bool preserve_biv; /* For the original biv, whether to preserve it. */
190 : unsigned inv_id; /* Id of an invariant. */
191 : };
192 :
193 : /* Types of uses. */
194 : enum use_type
195 : {
196 : USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
197 : USE_REF_ADDRESS, /* Use is an address for an explicit memory
198 : reference. */
199 : USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
200 : cases where the expansion of the function
201 : will turn the argument into a normal address. */
202 : USE_COMPARE /* Use is a compare. */
203 : };
204 :
205 : /* Cost of a computation. */
206 : class comp_cost
207 : {
208 : public:
209 139836140 : comp_cost (): cost (0), complexity (0), scratch (0)
210 : {}
211 :
212 27143340 : comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
213 15825678 : : cost (cost), complexity (complexity), scratch (scratch)
214 15002931 : {}
215 :
216 : /* Returns true if COST is infinite. */
217 : bool infinite_cost_p ();
218 :
219 : /* Adds costs COST1 and COST2. */
220 : friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
221 :
222 : /* Adds COST to the comp_cost. */
223 : comp_cost operator+= (comp_cost cost);
224 :
225 : /* Adds constant C to this comp_cost. */
226 : comp_cost operator+= (HOST_WIDE_INT c);
227 :
228 : /* Subtracts constant C to this comp_cost. */
229 : comp_cost operator-= (HOST_WIDE_INT c);
230 :
231 : /* Divide the comp_cost by constant C. */
232 : comp_cost operator/= (HOST_WIDE_INT c);
233 :
234 : /* Multiply the comp_cost by constant C. */
235 : comp_cost operator*= (HOST_WIDE_INT c);
236 :
237 : /* Subtracts costs COST1 and COST2. */
238 : friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
239 :
240 : /* Subtracts COST from this comp_cost. */
241 : comp_cost operator-= (comp_cost cost);
242 :
243 : /* Returns true if COST1 is smaller than COST2. */
244 : friend bool operator< (comp_cost cost1, comp_cost cost2);
245 :
246 : /* Returns true if COST1 and COST2 are equal. */
247 : friend bool operator== (comp_cost cost1, comp_cost cost2);
248 :
249 : /* Returns true if COST1 is smaller or equal than COST2. */
250 : friend bool operator<= (comp_cost cost1, comp_cost cost2);
251 :
252 : int64_t cost; /* The runtime cost. */
253 : unsigned complexity; /* The estimate of the complexity of the code for
254 : the computation (in no concrete units --
255 : complexity field should be larger for more
256 : complex expressions and addressing modes). */
257 : int64_t scratch; /* Scratch used during cost computation. */
258 : };
259 :
260 : static const comp_cost no_cost;
261 : static const comp_cost infinite_cost (INFTY, 0, INFTY);
262 :
263 : bool
264 1959761037 : comp_cost::infinite_cost_p ()
265 : {
266 1959761037 : return cost == INFTY;
267 : }
268 :
269 : comp_cost
270 262727600 : operator+ (comp_cost cost1, comp_cost cost2)
271 : {
272 262727600 : if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
273 2109078 : return infinite_cost;
274 :
275 260618522 : gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
276 260618522 : cost1.cost += cost2.cost;
277 260618522 : cost1.complexity += cost2.complexity;
278 :
279 260618522 : return cost1;
280 : }
281 :
282 : comp_cost
283 223813029 : operator- (comp_cost cost1, comp_cost cost2)
284 : {
285 223813029 : if (cost1.infinite_cost_p ())
286 0 : return infinite_cost;
287 :
288 223813029 : gcc_assert (!cost2.infinite_cost_p ());
289 223813029 : gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
290 :
291 223813029 : cost1.cost -= cost2.cost;
292 223813029 : cost1.complexity -= cost2.complexity;
293 :
294 223813029 : return cost1;
295 : }
296 :
297 : comp_cost
298 262727600 : comp_cost::operator+= (comp_cost cost)
299 : {
300 262727600 : *this = *this + cost;
301 262727600 : return *this;
302 : }
303 :
304 : comp_cost
305 924272759 : comp_cost::operator+= (HOST_WIDE_INT c)
306 : {
307 924272759 : if (c >= INFTY)
308 0 : this->cost = INFTY;
309 :
310 924272759 : if (infinite_cost_p ())
311 0 : return *this;
312 :
313 924272759 : gcc_assert (this->cost + c < infinite_cost.cost);
314 924272759 : this->cost += c;
315 :
316 924272759 : return *this;
317 : }
318 :
319 : comp_cost
320 569483 : comp_cost::operator-= (HOST_WIDE_INT c)
321 : {
322 569483 : if (infinite_cost_p ())
323 0 : return *this;
324 :
325 569483 : gcc_assert (this->cost - c < infinite_cost.cost);
326 569483 : this->cost -= c;
327 :
328 569483 : return *this;
329 : }
330 :
331 : comp_cost
332 0 : comp_cost::operator/= (HOST_WIDE_INT c)
333 : {
334 0 : gcc_assert (c != 0);
335 0 : if (infinite_cost_p ())
336 0 : return *this;
337 :
338 0 : this->cost /= c;
339 :
340 0 : return *this;
341 : }
342 :
343 : comp_cost
344 0 : comp_cost::operator*= (HOST_WIDE_INT c)
345 : {
346 0 : if (infinite_cost_p ())
347 0 : return *this;
348 :
349 0 : gcc_assert (this->cost * c < infinite_cost.cost);
350 0 : this->cost *= c;
351 :
352 0 : return *this;
353 : }
354 :
355 : comp_cost
356 223813029 : comp_cost::operator-= (comp_cost cost)
357 : {
358 223813029 : *this = *this - cost;
359 223813029 : return *this;
360 : }
361 :
362 : bool
363 199656791 : operator< (comp_cost cost1, comp_cost cost2)
364 : {
365 199656791 : if (cost1.cost == cost2.cost)
366 89168978 : return cost1.complexity < cost2.complexity;
367 :
368 110487813 : return cost1.cost < cost2.cost;
369 : }
370 :
371 : bool
372 4346549 : operator== (comp_cost cost1, comp_cost cost2)
373 : {
374 4346549 : return cost1.cost == cost2.cost
375 4346549 : && cost1.complexity == cost2.complexity;
376 : }
377 :
378 : bool
379 7013015 : operator<= (comp_cost cost1, comp_cost cost2)
380 : {
381 7013015 : return cost1 < cost2 || cost1 == cost2;
382 : }
383 :
384 : struct iv_inv_expr_ent;
385 :
386 : /* The candidate - cost pair. */
387 : class cost_pair
388 : {
389 : public:
390 : struct iv_cand *cand; /* The candidate. */
391 : comp_cost cost; /* The cost. */
392 : enum tree_code comp; /* For iv elimination, the comparison. */
393 : bitmap inv_vars; /* The list of invariant ssa_vars that have to be
394 : preserved when representing iv_use with iv_cand. */
395 : bitmap inv_exprs; /* The list of newly created invariant expressions
396 : when representing iv_use with iv_cand. */
397 : tree value; /* For final value elimination, the expression for
398 : the final value of the iv. For iv elimination,
399 : the new bound to compare with. */
400 : };
401 :
402 : /* Use. */
403 : struct iv_use
404 : {
405 : unsigned id; /* The id of the use. */
406 : unsigned group_id; /* The group id the use belongs to. */
407 : enum use_type type; /* Type of the use. */
408 : tree mem_type; /* The memory type to use when testing whether an
409 : address is legitimate, and what the address's
410 : cost is. */
411 : struct iv *iv; /* The induction variable it is based on. */
412 : gimple *stmt; /* Statement in that it occurs. */
413 : tree *op_p; /* The place where it occurs. */
414 :
415 : tree addr_base; /* Base address with const offset stripped. */
416 : poly_uint64 addr_offset;
417 : /* Const offset stripped from base address. */
418 : };
419 :
420 : /* Group of uses. */
421 : struct iv_group
422 : {
423 : /* The id of the group. */
424 : unsigned id;
425 : /* Uses of the group are of the same type. */
426 : enum use_type type;
427 : /* The set of "related" IV candidates, plus the important ones. */
428 : bitmap related_cands;
429 : /* Number of IV candidates in the cost_map. */
430 : unsigned n_map_members;
431 : /* The costs wrto the iv candidates. */
432 : class cost_pair *cost_map;
433 : /* The selected candidate for the group. */
434 : struct iv_cand *selected;
435 : /* To indicate this is a doloop use group. */
436 : bool doloop_p;
437 : /* Uses in the group. */
438 : vec<struct iv_use *> vuses;
439 : };
440 :
441 : /* The position where the iv is computed. */
442 : enum iv_position
443 : {
444 : IP_NORMAL, /* At the end, just before the exit condition. */
445 : IP_END, /* At the end of the latch block. */
446 : IP_BEFORE_USE, /* Immediately before a specific use. */
447 : IP_AFTER_USE, /* Immediately after a specific use. */
448 : IP_ORIGINAL /* The original biv. */
449 : };
450 :
451 : /* The induction variable candidate. */
452 : struct iv_cand
453 : {
454 : unsigned id; /* The number of the candidate. */
455 : bool important; /* Whether this is an "important" candidate, i.e. such
456 : that it should be considered by all uses. */
457 : bool involves_undefs; /* Whether the IV involves undefined values. */
458 : ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
459 : gimple *incremented_at;/* For original biv, the statement where it is
460 : incremented. */
461 : tree var_before; /* The variable used for it before increment. */
462 : tree var_after; /* The variable used for it after increment. */
463 : struct iv *iv; /* The value of the candidate. NULL for
464 : "pseudocandidate" used to indicate the possibility
465 : to replace the final value of an iv by direct
466 : computation of the value. */
467 : unsigned cost; /* Cost of the candidate. */
468 : unsigned cost_step; /* Cost of the candidate's increment operation. */
469 : struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
470 : where it is incremented. */
471 : bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
472 : iv_cand. */
473 : bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
474 : handle it as a new invariant expression which will
475 : be hoisted out of loop. */
476 : struct iv *orig_iv; /* The original iv if this cand is added from biv with
477 : smaller type. */
478 : bool doloop_p; /* Whether this is a doloop candidate. */
479 : };
480 :
481 : /* Hashtable entry for common candidate derived from iv uses. */
482 2896686 : class iv_common_cand
483 : {
484 : public:
485 : tree base;
486 : tree step;
487 : /* IV uses from which this common candidate is derived. */
488 : auto_vec<struct iv_use *> uses;
489 : hashval_t hash;
490 : };
491 :
492 : /* Hashtable helpers. */
493 :
494 : struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
495 : {
496 : static inline hashval_t hash (const iv_common_cand *);
497 : static inline bool equal (const iv_common_cand *, const iv_common_cand *);
498 : };
499 :
500 : /* Hash function for possible common candidates. */
501 :
502 : inline hashval_t
503 11450817 : iv_common_cand_hasher::hash (const iv_common_cand *ccand)
504 : {
505 11450817 : return ccand->hash;
506 : }
507 :
508 : /* Hash table equality function for common candidates. */
509 :
510 : inline bool
511 12979972 : iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
512 : const iv_common_cand *ccand2)
513 : {
514 12979972 : return (ccand1->hash == ccand2->hash
515 1890701 : && operand_equal_p (ccand1->base, ccand2->base, 0)
516 1868800 : && operand_equal_p (ccand1->step, ccand2->step, 0)
517 14841550 : && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
518 1861578 : == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
519 : }
520 :
521 : /* Loop invariant expression hashtable entry. */
522 :
523 : struct iv_inv_expr_ent
524 : {
525 : /* Tree expression of the entry. */
526 : tree expr;
527 : /* Unique identifier. */
528 : int id;
529 : /* Hash value. */
530 : hashval_t hash;
531 : };
532 :
533 : /* Sort iv_inv_expr_ent pair A and B by id field. */
534 :
535 : static int
536 5984 : sort_iv_inv_expr_ent (const void *a, const void *b)
537 : {
538 5984 : const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
539 5984 : const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
540 :
541 5984 : unsigned id1 = (*e1)->id;
542 5984 : unsigned id2 = (*e2)->id;
543 :
544 5984 : if (id1 < id2)
545 : return -1;
546 2784 : else if (id1 > id2)
547 : return 1;
548 : else
549 0 : return 0;
550 : }
551 :
552 : /* Hashtable helpers. */
553 :
554 : struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
555 : {
556 : static inline hashval_t hash (const iv_inv_expr_ent *);
557 : static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
558 : };
559 :
560 : /* Return true if uses of type TYPE represent some form of address. */
561 :
562 : inline bool
563 8944832 : address_p (use_type type)
564 : {
565 8944832 : return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
566 : }
567 :
568 : /* Hash function for loop invariant expressions. */
569 :
570 : inline hashval_t
571 7538930 : iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
572 : {
573 7538930 : return expr->hash;
574 : }
575 :
576 : /* Hash table equality function for expressions. */
577 :
578 : inline bool
579 9105592 : iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
580 : const iv_inv_expr_ent *expr2)
581 : {
582 9105592 : return expr1->hash == expr2->hash
583 9105592 : && operand_equal_p (expr1->expr, expr2->expr, 0);
584 : }
585 :
586 : struct ivopts_data
587 : {
588 : /* The currently optimized loop. */
589 : class loop *current_loop;
590 : location_t loop_loc;
591 :
592 : /* Numbers of iterations for all exits of the current loop. */
593 : hash_map<edge, tree_niter_desc *> *niters;
594 :
595 : /* Number of registers used in it. */
596 : unsigned regs_used;
597 :
598 : /* The size of version_info array allocated. */
599 : unsigned version_info_size;
600 :
601 : /* The array of information for the ssa names. */
602 : struct version_info *version_info;
603 :
604 : /* The hashtable of loop invariant expressions created
605 : by ivopt. */
606 : hash_table<iv_inv_expr_hasher> *inv_expr_tab;
607 :
608 : /* The bitmap of indices in version_info whose value was changed. */
609 : bitmap relevant;
610 :
611 : /* The uses of induction variables. */
612 : vec<iv_group *> vgroups;
613 :
614 : /* The candidates. */
615 : vec<iv_cand *> vcands;
616 :
617 : /* A bitmap of important candidates. */
618 : bitmap important_candidates;
619 :
620 : /* Cache used by tree_to_aff_combination_expand. */
621 : hash_map<tree, name_expansion *> *name_expansion_cache;
622 :
623 : /* The hashtable of common candidates derived from iv uses. */
624 : hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
625 :
626 : /* The common candidates. */
627 : vec<iv_common_cand *> iv_common_cands;
628 :
629 : /* Hash map recording base object information of tree exp. */
630 : hash_map<tree, tree> *base_object_map;
631 :
632 : /* The maximum invariant variable id. */
633 : unsigned max_inv_var_id;
634 :
635 : /* The maximum invariant expression id. */
636 : unsigned max_inv_expr_id;
637 :
638 : /* Number of no_overflow BIVs which are not used in memory address. */
639 : unsigned bivs_not_used_in_addr;
640 :
641 : /* Obstack for iv structure. */
642 : struct obstack iv_obstack;
643 :
644 : /* Whether to consider just related and important candidates when replacing a
645 : use. */
646 : bool consider_all_candidates;
647 :
648 : /* Are we optimizing for speed? */
649 : bool speed;
650 :
651 : /* Whether the loop body includes any function calls. */
652 : bool body_includes_call;
653 :
654 : /* Whether the loop body can only be exited via single exit. */
655 : bool loop_single_exit_p;
656 :
657 : /* Whether the loop has doloop comparison use. */
658 : bool doloop_use_p;
659 : };
660 :
661 : /* An assignment of iv candidates to uses. */
662 :
663 : class iv_ca
664 : {
665 : public:
666 : /* The number of uses covered by the assignment. */
667 : unsigned upto;
668 :
669 : /* Number of uses that cannot be expressed by the candidates in the set. */
670 : unsigned bad_groups;
671 :
672 : /* Candidate assigned to a use, together with the related costs. */
673 : class cost_pair **cand_for_group;
674 :
675 : /* Number of times each candidate is used. */
676 : unsigned *n_cand_uses;
677 :
678 : /* The candidates used. */
679 : bitmap cands;
680 :
681 : /* The number of candidates in the set. */
682 : unsigned n_cands;
683 :
684 : /* The number of invariants needed, including both invariant variants and
685 : invariant expressions. */
686 : unsigned n_invs;
687 :
688 : /* Total cost of expressing uses. */
689 : comp_cost cand_use_cost;
690 :
691 : /* Total cost of candidates. */
692 : int64_t cand_cost;
693 :
694 : /* Number of times each invariant variable is used. */
695 : unsigned *n_inv_var_uses;
696 :
697 : /* Number of times each invariant expression is used. */
698 : unsigned *n_inv_expr_uses;
699 :
700 : /* Total cost of the assignment. */
701 : comp_cost cost;
702 : };
703 :
704 : /* Difference of two iv candidate assignments. */
705 :
706 : struct iv_ca_delta
707 : {
708 : /* Changed group. */
709 : struct iv_group *group;
710 :
711 : /* An old assignment (for rollback purposes). */
712 : class cost_pair *old_cp;
713 :
714 : /* A new assignment. */
715 : class cost_pair *new_cp;
716 :
717 : /* Next change in the list. */
718 : struct iv_ca_delta *next;
719 : };
720 :
721 : /* Bound on number of candidates below that all candidates are considered. */
722 :
723 : #define CONSIDER_ALL_CANDIDATES_BOUND \
724 : ((unsigned) param_iv_consider_all_candidates_bound)
725 :
726 : /* If there are more iv occurrences, we just give up (it is quite unlikely that
727 : optimizing such a loop would help, and it would take ages). */
728 :
729 : #define MAX_CONSIDERED_GROUPS \
730 : ((unsigned) param_iv_max_considered_uses)
731 :
732 : /* If there are at most this number of ivs in the set, try removing unnecessary
733 : ivs from the set always. */
734 :
735 : #define ALWAYS_PRUNE_CAND_SET_BOUND \
736 : ((unsigned) param_iv_always_prune_cand_set_bound)
737 :
738 : /* The list of trees for that the decl_rtl field must be reset is stored
739 : here. */
740 :
741 : static vec<tree> decl_rtl_to_reset;
742 :
743 : static comp_cost force_expr_to_var_cost (tree, bool);
744 :
745 : /* The single loop exit if it dominates the latch, NULL otherwise. */
746 :
747 : edge
748 696462 : single_dom_exit (class loop *loop)
749 : {
750 696462 : edge exit = single_exit (loop);
751 :
752 696462 : if (!exit)
753 : return NULL;
754 :
755 464748 : if (!just_once_each_iteration_p (loop, exit->src))
756 : return NULL;
757 :
758 : return exit;
759 : }
760 :
761 : /* Dumps information about the induction variable IV to FILE. Don't dump
762 : variable's name if DUMP_NAME is FALSE. The information is dumped with
763 : preceding spaces indicated by INDENT_LEVEL. */
764 :
765 : void
766 1627 : dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
767 : {
768 1627 : const char *p;
769 1627 : const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
770 :
771 1627 : if (indent_level > 4)
772 : indent_level = 4;
773 1627 : p = spaces + 8 - (indent_level << 1);
774 :
775 1627 : fprintf (file, "%sIV struct:\n", p);
776 1627 : if (iv->ssa_name && dump_name)
777 : {
778 541 : fprintf (file, "%s SSA_NAME:\t", p);
779 541 : print_generic_expr (file, iv->ssa_name, TDF_SLIM);
780 541 : fprintf (file, "\n");
781 : }
782 :
783 1627 : fprintf (file, "%s Type:\t", p);
784 1627 : print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
785 1627 : fprintf (file, "\n");
786 :
787 1627 : fprintf (file, "%s Base:\t", p);
788 1627 : print_generic_expr (file, iv->base, TDF_SLIM);
789 1627 : fprintf (file, "\n");
790 :
791 1627 : fprintf (file, "%s Step:\t", p);
792 1627 : print_generic_expr (file, iv->step, TDF_SLIM);
793 1627 : fprintf (file, "\n");
794 :
795 1627 : if (iv->base_object)
796 : {
797 522 : fprintf (file, "%s Object:\t", p);
798 522 : print_generic_expr (file, iv->base_object, TDF_SLIM);
799 522 : fprintf (file, "\n");
800 : }
801 :
802 2947 : fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
803 :
804 1627 : fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
805 1627 : p, iv->no_overflow ? "No-overflow" : "Overflow");
806 1627 : }
807 :
808 : /* Dumps information about the USE to FILE. */
809 :
810 : void
811 250 : dump_use (FILE *file, struct iv_use *use)
812 : {
813 250 : fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
814 250 : fprintf (file, " At stmt:\t");
815 250 : print_gimple_stmt (file, use->stmt, 0);
816 250 : fprintf (file, " At pos:\t");
817 250 : if (use->op_p)
818 160 : print_generic_expr (file, *use->op_p, TDF_SLIM);
819 250 : fprintf (file, "\n");
820 250 : dump_iv (file, use->iv, false, 2);
821 250 : }
822 :
823 : /* Dumps information about the uses to FILE. */
824 :
825 : void
826 67 : dump_groups (FILE *file, struct ivopts_data *data)
827 : {
828 67 : unsigned i, j;
829 67 : struct iv_group *group;
830 :
831 287 : for (i = 0; i < data->vgroups.length (); i++)
832 : {
833 220 : group = data->vgroups[i];
834 220 : fprintf (file, "Group %d:\n", group->id);
835 220 : if (group->type == USE_NONLINEAR_EXPR)
836 90 : fprintf (file, " Type:\tGENERIC\n");
837 130 : else if (group->type == USE_REF_ADDRESS)
838 56 : fprintf (file, " Type:\tREFERENCE ADDRESS\n");
839 74 : else if (group->type == USE_PTR_ADDRESS)
840 0 : fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
841 : else
842 : {
843 74 : gcc_assert (group->type == USE_COMPARE);
844 74 : fprintf (file, " Type:\tCOMPARE\n");
845 : }
846 470 : for (j = 0; j < group->vuses.length (); j++)
847 250 : dump_use (file, group->vuses[j]);
848 : }
849 67 : }
850 :
851 : /* Dumps information about induction variable candidate CAND to FILE. */
852 :
853 : void
854 836 : dump_cand (FILE *file, struct iv_cand *cand)
855 : {
856 836 : struct iv *iv = cand->iv;
857 :
858 836 : fprintf (file, "Candidate %d:\n", cand->id);
859 836 : if (cand->inv_vars)
860 : {
861 30 : fprintf (file, " Depend on inv.vars: ");
862 30 : dump_bitmap (file, cand->inv_vars);
863 : }
864 836 : if (cand->inv_exprs)
865 : {
866 0 : fprintf (file, " Depend on inv.exprs: ");
867 0 : dump_bitmap (file, cand->inv_exprs);
868 : }
869 :
870 836 : if (cand->var_before)
871 : {
872 726 : fprintf (file, " Var before: ");
873 726 : print_generic_expr (file, cand->var_before, TDF_SLIM);
874 726 : fprintf (file, "\n");
875 : }
876 836 : if (cand->var_after)
877 : {
878 726 : fprintf (file, " Var after: ");
879 726 : print_generic_expr (file, cand->var_after, TDF_SLIM);
880 726 : fprintf (file, "\n");
881 : }
882 :
883 836 : switch (cand->pos)
884 : {
885 692 : case IP_NORMAL:
886 692 : fprintf (file, " Incr POS: before exit test\n");
887 692 : break;
888 :
889 0 : case IP_BEFORE_USE:
890 0 : fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
891 0 : break;
892 :
893 0 : case IP_AFTER_USE:
894 0 : fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
895 0 : break;
896 :
897 0 : case IP_END:
898 0 : fprintf (file, " Incr POS: at end\n");
899 0 : break;
900 :
901 144 : case IP_ORIGINAL:
902 144 : fprintf (file, " Incr POS: orig biv\n");
903 144 : break;
904 : }
905 :
906 836 : dump_iv (file, iv, false, 1);
907 836 : }
908 :
909 : /* Returns the info for ssa version VER. */
910 :
911 : static inline struct version_info *
912 119672140 : ver_info (struct ivopts_data *data, unsigned ver)
913 : {
914 119672140 : return data->version_info + ver;
915 : }
916 :
917 : /* Returns the info for ssa name NAME. */
918 :
919 : static inline struct version_info *
920 97323891 : name_info (struct ivopts_data *data, tree name)
921 : {
922 97323891 : return ver_info (data, SSA_NAME_VERSION (name));
923 : }
924 :
925 : /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
926 : emitted in LOOP. */
927 :
928 : static bool
929 36713896 : stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
930 : {
931 36713896 : basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
932 :
933 36713896 : gcc_assert (bb);
934 :
935 36713896 : if (sbb == loop->latch)
936 : return true;
937 :
938 36596209 : if (sbb != bb)
939 : return false;
940 :
941 20985236 : return stmt == last_nondebug_stmt (bb);
942 : }
943 :
944 : /* Returns true if STMT if after the place where the original induction
945 : variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
946 : if the positions are identical. */
947 :
948 : static bool
949 7833335 : stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
950 : {
951 7833335 : basic_block cand_bb = gimple_bb (cand->incremented_at);
952 7833335 : basic_block stmt_bb = gimple_bb (stmt);
953 :
954 7833335 : if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
955 : return false;
956 :
957 5439498 : if (stmt_bb != cand_bb)
958 : return true;
959 :
960 5183273 : if (true_if_equal
961 5183273 : && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
962 : return true;
963 5176781 : return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
964 : }
965 :
966 : /* Returns true if STMT if after the place where the induction variable
967 : CAND is incremented in LOOP. */
968 :
969 : static bool
970 45964537 : stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
971 : {
972 45964537 : switch (cand->pos)
973 : {
974 : case IP_END:
975 : return false;
976 :
977 36713896 : case IP_NORMAL:
978 36713896 : return stmt_after_ip_normal_pos (loop, stmt);
979 :
980 7823351 : case IP_ORIGINAL:
981 7823351 : case IP_AFTER_USE:
982 7823351 : return stmt_after_inc_pos (cand, stmt, false);
983 :
984 9984 : case IP_BEFORE_USE:
985 9984 : return stmt_after_inc_pos (cand, stmt, true);
986 :
987 0 : default:
988 0 : gcc_unreachable ();
989 : }
990 : }
991 :
992 : /* walk_tree callback for contains_abnormal_ssa_name_p. */
993 :
994 : static tree
995 14854424 : contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
996 : {
997 14854424 : if (TREE_CODE (*tp) == SSA_NAME
998 14854424 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
999 : return *tp;
1000 :
1001 14854407 : if (!EXPR_P (*tp))
1002 10143449 : *walk_subtrees = 0;
1003 :
1004 : return NULL_TREE;
1005 : }
1006 :
1007 : /* Returns true if EXPR contains a ssa name that occurs in an
1008 : abnormal phi node. */
1009 :
1010 : bool
1011 7946822 : contains_abnormal_ssa_name_p (tree expr)
1012 : {
1013 7946822 : return walk_tree_without_duplicates
1014 7946822 : (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1015 : }
1016 :
1017 : /* Returns the structure describing number of iterations determined from
1018 : EXIT of DATA->current_loop, or NULL if something goes wrong. */
1019 :
1020 : static class tree_niter_desc *
1021 4618827 : niter_for_exit (struct ivopts_data *data, edge exit)
1022 : {
1023 4618827 : class tree_niter_desc *desc;
1024 4618827 : tree_niter_desc **slot;
1025 :
1026 4618827 : if (!data->niters)
1027 : {
1028 470559 : data->niters = new hash_map<edge, tree_niter_desc *>;
1029 470559 : slot = NULL;
1030 : }
1031 : else
1032 4148268 : slot = data->niters->get (exit);
1033 :
1034 4618827 : if (!slot)
1035 : {
1036 : /* Try to determine number of iterations. We cannot safely work with ssa
1037 : names that appear in phi nodes on abnormal edges, so that we do not
1038 : create overlapping life ranges for them (PR 27283). */
1039 482672 : desc = XNEW (class tree_niter_desc);
1040 482672 : ::new (static_cast<void*> (desc)) tree_niter_desc ();
1041 482672 : if (!number_of_iterations_exit (data->current_loop,
1042 : exit, desc, true)
1043 482672 : || contains_abnormal_ssa_name_p (desc->niter))
1044 : {
1045 39416 : desc->~tree_niter_desc ();
1046 39416 : XDELETE (desc);
1047 39416 : desc = NULL;
1048 : }
1049 482672 : data->niters->put (exit, desc);
1050 : }
1051 : else
1052 4136155 : desc = *slot;
1053 :
1054 4618827 : return desc;
1055 : }
1056 :
1057 : /* Returns the structure describing number of iterations determined from
1058 : single dominating exit of DATA->current_loop, or NULL if something
1059 : goes wrong. */
1060 :
1061 : static class tree_niter_desc *
1062 67 : niter_for_single_dom_exit (struct ivopts_data *data)
1063 : {
1064 67 : edge exit = single_dom_exit (data->current_loop);
1065 :
1066 67 : if (!exit)
1067 : return NULL;
1068 :
1069 57 : return niter_for_exit (data, exit);
1070 : }
1071 :
1072 : /* Initializes data structures used by the iv optimization pass, stored
1073 : in DATA. */
1074 :
1075 : static void
1076 240894 : tree_ssa_iv_optimize_init (struct ivopts_data *data)
1077 : {
1078 240894 : data->version_info_size = 2 * num_ssa_names;
1079 240894 : data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1080 240894 : data->relevant = BITMAP_ALLOC (NULL);
1081 240894 : data->important_candidates = BITMAP_ALLOC (NULL);
1082 240894 : data->max_inv_var_id = 0;
1083 240894 : data->max_inv_expr_id = 0;
1084 240894 : data->niters = NULL;
1085 240894 : data->vgroups.create (20);
1086 240894 : data->vcands.create (20);
1087 240894 : data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1088 240894 : data->name_expansion_cache = NULL;
1089 240894 : data->base_object_map = NULL;
1090 240894 : data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1091 240894 : data->iv_common_cands.create (20);
1092 240894 : decl_rtl_to_reset.create (20);
1093 240894 : gcc_obstack_init (&data->iv_obstack);
1094 240894 : }
1095 :
1096 : /* walk_tree callback for determine_base_object. */
1097 :
1098 : static tree
1099 17692118 : determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1100 : {
1101 17692118 : tree_code code = TREE_CODE (*tp);
1102 17692118 : tree obj = NULL_TREE;
1103 17692118 : if (code == ADDR_EXPR)
1104 : {
1105 1028820 : tree base = get_base_address (TREE_OPERAND (*tp, 0));
1106 1028820 : if (!base)
1107 0 : obj = *tp;
1108 1028820 : else if (TREE_CODE (base) != MEM_REF)
1109 1028820 : obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1110 : }
1111 16663298 : else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1112 1755894 : obj = fold_convert (ptr_type_node, *tp);
1113 :
1114 2784714 : if (!obj)
1115 : {
1116 14907404 : if (!EXPR_P (*tp))
1117 7679274 : *walk_subtrees = 0;
1118 :
1119 14907404 : return NULL_TREE;
1120 : }
1121 : /* Record special node for multiple base objects and stop. */
1122 2784714 : if (*static_cast<tree *> (wdata))
1123 : {
1124 6874 : *static_cast<tree *> (wdata) = integer_zero_node;
1125 6874 : return integer_zero_node;
1126 : }
1127 : /* Record the base object and continue looking. */
1128 2777840 : *static_cast<tree *> (wdata) = obj;
1129 2777840 : return NULL_TREE;
1130 : }
1131 :
1132 : /* Returns a memory object to that EXPR points with caching. Return NULL if we
1133 : are able to determine that it does not point to any such object; specially
1134 : return integer_zero_node if EXPR contains multiple base objects. */
1135 :
1136 : static tree
1137 10678333 : determine_base_object (struct ivopts_data *data, tree expr)
1138 : {
1139 10678333 : tree *slot, obj = NULL_TREE;
1140 10678333 : if (data->base_object_map)
1141 : {
1142 10515441 : if ((slot = data->base_object_map->get(expr)) != NULL)
1143 5012487 : return *slot;
1144 : }
1145 : else
1146 162892 : data->base_object_map = new hash_map<tree, tree>;
1147 :
1148 5665846 : (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1149 5665846 : data->base_object_map->put (expr, obj);
1150 5665846 : return obj;
1151 : }
1152 :
1153 : /* Allocates an induction variable with given initial value BASE and step STEP
1154 : for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1155 :
1156 : static struct iv *
1157 10678333 : alloc_iv (struct ivopts_data *data, tree base, tree step,
1158 : bool no_overflow = false)
1159 : {
1160 10678333 : tree expr = base;
1161 10678333 : struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1162 : sizeof (struct iv));
1163 10678333 : gcc_assert (step != NULL_TREE);
1164 :
1165 : /* Canonicalize the address expression in base.
1166 : That leads to more equalities being detected and results in:
1167 :
1168 : 1) More accurate cost can be computed for address expressions;
1169 : 2) Duplicate candidates won't be created for bases in different
1170 : forms, like &a[0] and &a.
1171 : 3) Duplicate candidates won't be created for IV expressions that differ
1172 : only in their sign. */
1173 10678333 : aff_tree comb;
1174 10678333 : tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1175 10678333 : base = aff_combination_to_tree (&comb);
1176 :
1177 10678333 : iv->base = base;
1178 10678333 : iv->base_object = determine_base_object (data, base);
1179 10678333 : iv->step = step;
1180 10678333 : iv->biv_p = false;
1181 10678333 : iv->nonlin_use = NULL;
1182 10678333 : iv->ssa_name = NULL_TREE;
1183 10678333 : if (!no_overflow
1184 10678333 : && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1185 : base, step))
1186 : no_overflow = true;
1187 10678333 : iv->no_overflow = no_overflow;
1188 10678333 : iv->have_address_use = false;
1189 :
1190 21356666 : return iv;
1191 10678333 : }
1192 :
1193 : /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1194 : doesn't overflow. */
1195 :
1196 : static void
1197 4921598 : set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1198 : bool no_overflow)
1199 : {
1200 4921598 : struct version_info *info = name_info (data, iv);
1201 :
1202 4921598 : gcc_assert (!info->iv);
1203 :
1204 4921598 : bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1205 4921598 : info->iv = alloc_iv (data, base, step, no_overflow);
1206 4921598 : info->iv->ssa_name = iv;
1207 4921598 : }
1208 :
1209 : /* Finds induction variable declaration for VAR. */
1210 :
1211 : static struct iv *
1212 45238474 : get_iv (struct ivopts_data *data, tree var)
1213 : {
1214 45238474 : basic_block bb;
1215 45238474 : tree type = TREE_TYPE (var);
1216 :
1217 45238474 : if (!POINTER_TYPE_P (type)
1218 35921347 : && !INTEGRAL_TYPE_P (type))
1219 : return NULL;
1220 :
1221 39531869 : if (!name_info (data, var)->iv)
1222 : {
1223 18053538 : bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1224 :
1225 18053538 : if (!bb
1226 18053538 : || !flow_bb_inside_loop_p (data->current_loop, bb))
1227 : {
1228 794251 : if (POINTER_TYPE_P (type))
1229 316286 : type = sizetype;
1230 794251 : set_iv (data, var, var, build_int_cst (type, 0), true);
1231 : }
1232 : }
1233 :
1234 39531869 : return name_info (data, var)->iv;
1235 : }
1236 :
1237 : /* Return the first non-invariant ssa var found in EXPR. */
1238 :
1239 : static tree
1240 4128695 : extract_single_var_from_expr (tree expr)
1241 : {
1242 4128695 : int i, n;
1243 4128695 : tree tmp;
1244 4128695 : enum tree_code code;
1245 :
1246 4128695 : if (!expr || is_gimple_min_invariant (expr))
1247 3415907 : return NULL;
1248 :
1249 712788 : code = TREE_CODE (expr);
1250 712788 : if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1251 : {
1252 395535 : n = TREE_OPERAND_LENGTH (expr);
1253 791129 : for (i = 0; i < n; i++)
1254 : {
1255 395594 : tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1256 :
1257 395594 : if (tmp)
1258 : return tmp;
1259 : }
1260 : }
1261 317253 : return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1262 : }
1263 :
1264 : /* Finds basic ivs. */
1265 :
1266 : static bool
1267 626235 : find_bivs (struct ivopts_data *data)
1268 : {
1269 626235 : gphi *phi;
1270 626235 : affine_iv iv;
1271 626235 : tree step, type, base, stop;
1272 626235 : bool found = false;
1273 626235 : class loop *loop = data->current_loop;
1274 626235 : gphi_iterator psi;
1275 :
1276 2339345 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1277 : {
1278 1713110 : phi = psi.phi ();
1279 :
1280 1713110 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1281 239 : continue;
1282 :
1283 1712871 : if (virtual_operand_p (PHI_RESULT (phi)))
1284 410652 : continue;
1285 :
1286 1302219 : if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1287 430381 : continue;
1288 :
1289 871838 : if (integer_zerop (iv.step))
1290 0 : continue;
1291 :
1292 871838 : step = iv.step;
1293 871838 : base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1294 : /* Stop expanding iv base at the first ssa var referred by iv step.
1295 : Ideally we should stop at any ssa var, because that's expensive
1296 : and unusual to happen, we just do it on the first one.
1297 :
1298 : See PR64705 for the rationale. */
1299 871838 : stop = extract_single_var_from_expr (step);
1300 871838 : base = expand_simple_operations (base, stop);
1301 871838 : if (contains_abnormal_ssa_name_p (base)
1302 871838 : || contains_abnormal_ssa_name_p (step))
1303 10 : continue;
1304 :
1305 871828 : type = TREE_TYPE (PHI_RESULT (phi));
1306 871828 : base = fold_convert (type, base);
1307 871828 : if (step)
1308 : {
1309 871828 : if (POINTER_TYPE_P (type))
1310 163938 : step = convert_to_ptrofftype (step);
1311 : else
1312 707890 : step = fold_convert (type, step);
1313 : }
1314 :
1315 871828 : set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1316 871828 : found = true;
1317 : }
1318 :
1319 626235 : return found;
1320 : }
1321 :
1322 : /* Marks basic ivs. */
1323 :
1324 : static void
1325 500624 : mark_bivs (struct ivopts_data *data)
1326 : {
1327 500624 : gphi *phi;
1328 500624 : gimple *def;
1329 500624 : tree var;
1330 500624 : struct iv *iv, *incr_iv;
1331 500624 : class loop *loop = data->current_loop;
1332 500624 : basic_block incr_bb;
1333 500624 : gphi_iterator psi;
1334 :
1335 500624 : data->bivs_not_used_in_addr = 0;
1336 1950085 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1337 : {
1338 1449461 : phi = psi.phi ();
1339 :
1340 1449461 : iv = get_iv (data, PHI_RESULT (phi));
1341 1449461 : if (!iv)
1342 577633 : continue;
1343 :
1344 871828 : var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1345 871828 : def = SSA_NAME_DEF_STMT (var);
1346 : /* Don't mark iv peeled from other one as biv. */
1347 873440 : if (def
1348 871828 : && gimple_code (def) == GIMPLE_PHI
1349 874572 : && gimple_bb (def) == loop->header)
1350 1612 : continue;
1351 :
1352 870216 : incr_iv = get_iv (data, var);
1353 870216 : if (!incr_iv)
1354 1143 : continue;
1355 :
1356 : /* If the increment is in the subloop, ignore it. */
1357 869073 : incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1358 869073 : if (incr_bb->loop_father != data->current_loop
1359 869073 : || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1360 0 : continue;
1361 :
1362 869073 : iv->biv_p = true;
1363 869073 : incr_iv->biv_p = true;
1364 869073 : if (iv->no_overflow)
1365 581633 : data->bivs_not_used_in_addr++;
1366 869073 : if (incr_iv->no_overflow)
1367 573682 : data->bivs_not_used_in_addr++;
1368 : }
1369 500624 : }
1370 :
1371 : /* Checks whether STMT defines a linear induction variable and stores its
1372 : parameters to IV. */
1373 :
1374 : static bool
1375 12531053 : find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1376 : {
1377 12531053 : tree lhs, stop;
1378 12531053 : class loop *loop = data->current_loop;
1379 :
1380 12531053 : iv->base = NULL_TREE;
1381 12531053 : iv->step = NULL_TREE;
1382 :
1383 12531053 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
1384 : return false;
1385 :
1386 10507266 : lhs = gimple_assign_lhs (stmt);
1387 10507266 : if (TREE_CODE (lhs) != SSA_NAME)
1388 : return false;
1389 :
1390 18799766 : if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1391 : return false;
1392 :
1393 : /* Stop expanding iv base at the first ssa var referred by iv step.
1394 : Ideally we should stop at any ssa var, because that's expensive
1395 : and unusual to happen, we just do it on the first one.
1396 :
1397 : See PR64705 for the rationale. */
1398 2861263 : stop = extract_single_var_from_expr (iv->step);
1399 2861263 : iv->base = expand_simple_operations (iv->base, stop);
1400 2861263 : if (contains_abnormal_ssa_name_p (iv->base)
1401 2861263 : || contains_abnormal_ssa_name_p (iv->step))
1402 6 : return false;
1403 :
1404 : /* If STMT could throw, then do not consider STMT as defining a GIV.
1405 : While this will suppress optimizations, we cannot safely delete this
1406 : GIV and associated statements, even if it appears it is not used. */
1407 2861257 : if (stmt_could_throw_p (cfun, stmt))
1408 : return false;
1409 :
1410 : return true;
1411 : }
1412 :
1413 : /* Finds general ivs in statement STMT. */
1414 :
1415 : static void
1416 12531053 : find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1417 : {
1418 12531053 : affine_iv iv;
1419 :
1420 12531053 : if (!find_givs_in_stmt_scev (data, stmt, &iv))
1421 9669804 : return;
1422 :
1423 2861249 : set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1424 : }
1425 :
1426 : /* Finds general ivs in basic block BB. */
1427 :
1428 : static void
1429 2800840 : find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1430 : {
1431 2800840 : gimple_stmt_iterator bsi;
1432 :
1433 27428419 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1434 21826739 : if (!is_gimple_debug (gsi_stmt (bsi)))
1435 12531053 : find_givs_in_stmt (data, gsi_stmt (bsi));
1436 2800840 : }
1437 :
1438 : /* Finds general ivs. */
1439 :
1440 : static void
1441 500624 : find_givs (struct ivopts_data *data, basic_block *body)
1442 : {
1443 500624 : class loop *loop = data->current_loop;
1444 500624 : unsigned i;
1445 :
1446 3301464 : for (i = 0; i < loop->num_nodes; i++)
1447 2800840 : find_givs_in_bb (data, body[i]);
1448 500624 : }
1449 :
1450 : /* For each ssa name defined in LOOP determines whether it is an induction
1451 : variable and if so, its initial value and step. */
1452 :
1453 : static bool
1454 626235 : find_induction_variables (struct ivopts_data *data, basic_block *body)
1455 : {
1456 626235 : unsigned i;
1457 626235 : bitmap_iterator bi;
1458 :
1459 626235 : if (!find_bivs (data))
1460 : return false;
1461 :
1462 500624 : find_givs (data, body);
1463 500624 : mark_bivs (data);
1464 :
1465 500624 : if (dump_file && (dump_flags & TDF_DETAILS))
1466 : {
1467 67 : class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1468 :
1469 67 : if (niter)
1470 : {
1471 51 : fprintf (dump_file, " number of iterations ");
1472 51 : print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1473 51 : if (!integer_zerop (niter->may_be_zero))
1474 : {
1475 1 : fprintf (dump_file, "; zero if ");
1476 1 : print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1477 : }
1478 51 : fprintf (dump_file, "\n");
1479 67 : };
1480 :
1481 67 : fprintf (dump_file, "\n<Induction Vars>:\n");
1482 801 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1483 : {
1484 734 : struct version_info *info = ver_info (data, i);
1485 734 : if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1486 541 : dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1487 : }
1488 : }
1489 :
1490 : return true;
1491 : }
1492 :
1493 : /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1494 : For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1495 : is the const offset stripped from IV base and MEM_TYPE is the type
1496 : of the memory being addressed. For uses of other types, ADDR_BASE
1497 : and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1498 :
1499 : static struct iv_use *
1500 2073591 : record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1501 : gimple *stmt, enum use_type type, tree mem_type,
1502 : tree addr_base, poly_uint64 addr_offset)
1503 : {
1504 2073591 : struct iv_use *use = XCNEW (struct iv_use);
1505 :
1506 2073591 : use->id = group->vuses.length ();
1507 2073591 : use->group_id = group->id;
1508 2073591 : use->type = type;
1509 2073591 : use->mem_type = mem_type;
1510 2073591 : use->iv = iv;
1511 2073591 : use->stmt = stmt;
1512 2073591 : use->op_p = use_p;
1513 2073591 : use->addr_base = addr_base;
1514 2073591 : use->addr_offset = addr_offset;
1515 :
1516 2073591 : group->vuses.safe_push (use);
1517 2073591 : return use;
1518 : }
1519 :
1520 : /* Checks whether OP is a loop-level invariant and if so, records it.
1521 : NONLINEAR_USE is true if the invariant is used in a way we do not
1522 : handle specially. */
1523 :
1524 : static void
1525 22614623 : record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1526 : {
1527 22614623 : basic_block bb;
1528 22614623 : struct version_info *info;
1529 :
1530 22614623 : if (TREE_CODE (op) != SSA_NAME
1531 22614623 : || virtual_operand_p (op))
1532 : return;
1533 :
1534 21430256 : bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1535 21430256 : if (bb
1536 21430256 : && flow_bb_inside_loop_p (data->current_loop, bb))
1537 : return;
1538 :
1539 3857892 : info = name_info (data, op);
1540 3857892 : info->name = op;
1541 3857892 : info->has_nonlin_use |= nonlinear_use;
1542 3857892 : if (!info->inv_id)
1543 1333614 : info->inv_id = ++data->max_inv_var_id;
1544 3857892 : bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1545 : }
1546 :
1547 : /* Record a group of TYPE. */
1548 :
1549 : static struct iv_group *
1550 1799773 : record_group (struct ivopts_data *data, enum use_type type)
1551 : {
1552 1799773 : struct iv_group *group = XCNEW (struct iv_group);
1553 :
1554 1799773 : group->id = data->vgroups.length ();
1555 1799773 : group->type = type;
1556 1799773 : group->related_cands = BITMAP_ALLOC (NULL);
1557 1799773 : group->vuses.create (1);
1558 1799773 : group->doloop_p = false;
1559 :
1560 1799773 : data->vgroups.safe_push (group);
1561 1799773 : return group;
1562 : }
1563 :
1564 : /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1565 : New group will be created if there is no existing group for the use.
1566 : MEM_TYPE is the type of memory being addressed, or NULL if this
1567 : isn't an address reference. */
1568 :
1569 : static struct iv_use *
1570 2073591 : record_group_use (struct ivopts_data *data, tree *use_p,
1571 : struct iv *iv, gimple *stmt, enum use_type type,
1572 : tree mem_type)
1573 : {
1574 2073591 : tree addr_base = NULL;
1575 2073591 : struct iv_group *group = NULL;
1576 2073591 : poly_uint64 addr_offset = 0;
1577 :
1578 : /* Record non address type use in a new group. */
1579 2073591 : if (address_p (type))
1580 : {
1581 855754 : unsigned int i;
1582 :
1583 855754 : gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1584 855754 : tree addr_toffset;
1585 855754 : split_constant_offset (iv->base, &addr_base, &addr_toffset);
1586 855754 : addr_offset = int_cst_value (addr_toffset);
1587 1611362 : for (i = 0; i < data->vgroups.length (); i++)
1588 : {
1589 1083101 : struct iv_use *use;
1590 :
1591 1083101 : group = data->vgroups[i];
1592 1083101 : use = group->vuses[0];
1593 1083101 : if (!address_p (use->type))
1594 333890 : continue;
1595 :
1596 : /* Check if it has the same stripped base and step. */
1597 749211 : if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1598 393795 : && operand_equal_p (iv->step, use->iv->step, OEP_ASSUME_WRAPV)
1599 1139907 : && operand_equal_p (addr_base, use->addr_base, OEP_ASSUME_WRAPV))
1600 : break;
1601 : }
1602 1711508 : if (i == data->vgroups.length ())
1603 528261 : group = NULL;
1604 : }
1605 :
1606 855754 : if (!group)
1607 1746098 : group = record_group (data, type);
1608 :
1609 2073591 : return record_use (group, use_p, iv, stmt, type, mem_type,
1610 2073591 : addr_base, addr_offset);
1611 : }
1612 :
1613 : /* Checks whether the use OP is interesting and if so, records it. */
1614 :
1615 : static struct iv_use *
1616 7244979 : find_interesting_uses_op (struct ivopts_data *data, tree op)
1617 : {
1618 7244979 : struct iv *iv;
1619 7244979 : gimple *stmt;
1620 7244979 : struct iv_use *use;
1621 :
1622 7244979 : if (TREE_CODE (op) != SSA_NAME)
1623 : return NULL;
1624 :
1625 5839273 : iv = get_iv (data, op);
1626 5839273 : if (!iv)
1627 : return NULL;
1628 :
1629 2550130 : if (iv->nonlin_use)
1630 : {
1631 195312 : gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1632 : return iv->nonlin_use;
1633 : }
1634 :
1635 2354818 : if (integer_zerop (iv->step))
1636 : {
1637 1734696 : record_invariant (data, op, true);
1638 1734696 : return NULL;
1639 : }
1640 :
1641 620122 : stmt = SSA_NAME_DEF_STMT (op);
1642 620122 : gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1643 :
1644 620122 : use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1645 620122 : iv->nonlin_use = use;
1646 620122 : return use;
1647 : }
1648 :
1649 : /* Indicate how compare type iv_use can be handled. */
1650 : enum comp_iv_rewrite
1651 : {
1652 : COMP_IV_NA,
1653 : /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1654 : COMP_IV_EXPR,
1655 : /* We may rewrite compare type iv_uses on both sides of comparison by
1656 : expressing value of each iv_use. */
1657 : COMP_IV_EXPR_2,
1658 : /* We may rewrite compare type iv_use by expressing value of the iv_use
1659 : or by eliminating it with other iv_cand. */
1660 : COMP_IV_ELIM
1661 : };
1662 :
1663 : /* Given a condition in statement STMT, checks whether it is a compare
1664 : of an induction variable and an invariant. If this is the case,
1665 : CONTROL_VAR is set to location of the iv, BOUND to the location of
1666 : the invariant, IV_VAR and IV_BOUND are set to the corresponding
1667 : induction variable descriptions, and true is returned. If this is not
1668 : the case, CONTROL_VAR and BOUND are set to the arguments of the
1669 : condition and false is returned. */
1670 :
1671 : static enum comp_iv_rewrite
1672 8048491 : extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1673 : tree **control_var, tree **bound,
1674 : struct iv **iv_var, struct iv **iv_bound)
1675 : {
1676 : /* The objects returned when COND has constant operands. */
1677 8048491 : static struct iv const_iv;
1678 8048491 : static tree zero;
1679 8048491 : tree *op0 = &zero, *op1 = &zero;
1680 8048491 : struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1681 8048491 : enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1682 :
1683 8048491 : if (gimple_code (stmt) == GIMPLE_COND)
1684 : {
1685 7737317 : gcond *cond_stmt = as_a <gcond *> (stmt);
1686 7737317 : op0 = gimple_cond_lhs_ptr (cond_stmt);
1687 7737317 : op1 = gimple_cond_rhs_ptr (cond_stmt);
1688 : }
1689 : else
1690 : {
1691 311174 : op0 = gimple_assign_rhs1_ptr (stmt);
1692 311174 : op1 = gimple_assign_rhs2_ptr (stmt);
1693 : }
1694 :
1695 8048491 : zero = integer_zero_node;
1696 8048491 : const_iv.step = integer_zero_node;
1697 :
1698 8048491 : if (TREE_CODE (*op0) == SSA_NAME)
1699 8048334 : iv0 = get_iv (data, *op0);
1700 8048491 : if (TREE_CODE (*op1) == SSA_NAME)
1701 3824866 : iv1 = get_iv (data, *op1);
1702 :
1703 : /* If both sides of comparison are IVs. We can express ivs on both end. */
1704 8048491 : if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1705 : {
1706 129085 : rewrite_type = COMP_IV_EXPR_2;
1707 129085 : goto end;
1708 : }
1709 :
1710 : /* If none side of comparison is IV. */
1711 6225750 : if ((!iv0 || integer_zerop (iv0->step))
1712 9415229 : && (!iv1 || integer_zerop (iv1->step)))
1713 939750 : goto end;
1714 :
1715 : /* Control variable may be on the other side. */
1716 6979656 : if (!iv0 || integer_zerop (iv0->step))
1717 : {
1718 : std::swap (op0, op1);
1719 : std::swap (iv0, iv1);
1720 : }
1721 : /* If one side is IV and the other side isn't loop invariant. */
1722 6979656 : if (!iv1)
1723 : rewrite_type = COMP_IV_EXPR;
1724 : /* If one side is IV and the other side is loop invariant. */
1725 5779734 : else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1726 : rewrite_type = COMP_IV_ELIM;
1727 :
1728 8048491 : end:
1729 8048491 : if (control_var)
1730 8048491 : *control_var = op0;
1731 8048491 : if (iv_var)
1732 1535450 : *iv_var = iv0;
1733 8048491 : if (bound)
1734 8048491 : *bound = op1;
1735 8048491 : if (iv_bound)
1736 8048491 : *iv_bound = iv1;
1737 :
1738 8048491 : return rewrite_type;
1739 : }
1740 :
1741 : /* Checks whether the condition in STMT is interesting and if so,
1742 : records it. */
1743 :
1744 : static void
1745 1535450 : find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1746 : {
1747 1535450 : tree *var_p, *bound_p;
1748 1535450 : struct iv *var_iv, *bound_iv;
1749 1535450 : enum comp_iv_rewrite ret;
1750 :
1751 1535450 : ret = extract_cond_operands (data, stmt,
1752 : &var_p, &bound_p, &var_iv, &bound_iv);
1753 1535450 : if (ret == COMP_IV_NA)
1754 : {
1755 939750 : find_interesting_uses_op (data, *var_p);
1756 939750 : find_interesting_uses_op (data, *bound_p);
1757 939750 : return;
1758 : }
1759 :
1760 595700 : record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1761 : /* Record compare type iv_use for iv on the other side of comparison. */
1762 595700 : if (ret == COMP_IV_EXPR_2)
1763 2015 : record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1764 : }
1765 :
1766 : /* Returns the outermost loop EXPR is obviously invariant in
1767 : relative to the loop LOOP, i.e. if all its operands are defined
1768 : outside of the returned loop. Returns NULL if EXPR is not
1769 : even obviously invariant in LOOP. */
1770 :
1771 : class loop *
1772 367440 : outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1773 : {
1774 367440 : basic_block def_bb;
1775 367440 : unsigned i, len;
1776 :
1777 367440 : if (is_gimple_min_invariant (expr))
1778 49008 : return current_loops->tree_root;
1779 :
1780 318432 : if (TREE_CODE (expr) == SSA_NAME)
1781 : {
1782 186251 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1783 186251 : if (def_bb)
1784 : {
1785 113847 : if (flow_bb_inside_loop_p (loop, def_bb))
1786 : return NULL;
1787 227678 : return superloop_at_depth (loop,
1788 156940 : loop_depth (def_bb->loop_father) + 1);
1789 : }
1790 :
1791 72404 : return current_loops->tree_root;
1792 : }
1793 :
1794 132181 : if (!EXPR_P (expr))
1795 : return NULL;
1796 :
1797 132181 : unsigned maxdepth = 0;
1798 132181 : len = TREE_OPERAND_LENGTH (expr);
1799 344484 : for (i = 0; i < len; i++)
1800 : {
1801 212327 : class loop *ivloop;
1802 212327 : if (!TREE_OPERAND (expr, i))
1803 0 : continue;
1804 :
1805 212327 : ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1806 212327 : if (!ivloop)
1807 : return NULL;
1808 376506 : maxdepth = MAX (maxdepth, loop_depth (ivloop));
1809 : }
1810 :
1811 132157 : return superloop_at_depth (loop, maxdepth);
1812 : }
1813 :
1814 : /* Returns true if expression EXPR is obviously invariant in LOOP,
1815 : i.e. if all its operands are defined outside of the LOOP. LOOP
1816 : should not be the function body. */
1817 :
1818 : bool
1819 12065345 : expr_invariant_in_loop_p (class loop *loop, tree expr)
1820 : {
1821 12065345 : basic_block def_bb;
1822 12065345 : unsigned i, len;
1823 :
1824 12065345 : gcc_assert (loop_depth (loop) > 0);
1825 :
1826 12065345 : if (is_gimple_min_invariant (expr))
1827 : return true;
1828 :
1829 8379425 : if (TREE_CODE (expr) == SSA_NAME)
1830 : {
1831 7966239 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1832 7966239 : if (def_bb
1833 7966239 : && flow_bb_inside_loop_p (loop, def_bb))
1834 : return false;
1835 :
1836 4082779 : return true;
1837 : }
1838 :
1839 413186 : if (!EXPR_P (expr))
1840 : return false;
1841 :
1842 413183 : len = TREE_OPERAND_LENGTH (expr);
1843 889341 : for (i = 0; i < len; i++)
1844 525310 : if (TREE_OPERAND (expr, i)
1845 525310 : && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1846 : return false;
1847 :
1848 : return true;
1849 : }
1850 :
1851 : /* Given expression EXPR which computes inductive values with respect
1852 : to loop recorded in DATA, this function returns biv from which EXPR
1853 : is derived by tracing definition chains of ssa variables in EXPR. */
1854 :
1855 : static struct iv*
1856 728697 : find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1857 : {
1858 1177935 : struct iv *iv;
1859 1177935 : unsigned i, n;
1860 1177935 : tree e2, e1;
1861 1177935 : enum tree_code code;
1862 1177935 : gimple *stmt;
1863 :
1864 1177935 : if (expr == NULL_TREE)
1865 : return NULL;
1866 :
1867 1177800 : if (is_gimple_min_invariant (expr))
1868 : return NULL;
1869 :
1870 938143 : code = TREE_CODE (expr);
1871 938143 : if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1872 : {
1873 18085 : n = TREE_OPERAND_LENGTH (expr);
1874 19420 : for (i = 0; i < n; i++)
1875 : {
1876 19104 : iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1877 19104 : if (iv)
1878 : return iv;
1879 : }
1880 : }
1881 :
1882 : /* Stop if it's not ssa name. */
1883 920374 : if (code != SSA_NAME)
1884 : return NULL;
1885 :
1886 919602 : iv = get_iv (data, expr);
1887 919602 : if (!iv || integer_zerop (iv->step))
1888 34941 : return NULL;
1889 884661 : else if (iv->biv_p)
1890 : return iv;
1891 :
1892 645653 : stmt = SSA_NAME_DEF_STMT (expr);
1893 645653 : if (gphi *phi = dyn_cast <gphi *> (stmt))
1894 : {
1895 1878 : ssa_op_iter iter;
1896 1878 : use_operand_p use_p;
1897 1878 : basic_block phi_bb = gimple_bb (phi);
1898 :
1899 : /* Skip loop header PHI that doesn't define biv. */
1900 1878 : if (phi_bb->loop_father == data->current_loop)
1901 : return NULL;
1902 :
1903 0 : if (virtual_operand_p (gimple_phi_result (phi)))
1904 : return NULL;
1905 :
1906 0 : FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1907 : {
1908 0 : tree use = USE_FROM_PTR (use_p);
1909 0 : iv = find_deriving_biv_for_expr (data, use);
1910 0 : if (iv)
1911 : return iv;
1912 : }
1913 : return NULL;
1914 : }
1915 643775 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
1916 : return NULL;
1917 :
1918 643775 : e1 = gimple_assign_rhs1 (stmt);
1919 643775 : code = gimple_assign_rhs_code (stmt);
1920 643775 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1921 : return find_deriving_biv_for_expr (data, e1);
1922 :
1923 635174 : switch (code)
1924 : {
1925 468443 : case MULT_EXPR:
1926 468443 : case PLUS_EXPR:
1927 468443 : case MINUS_EXPR:
1928 468443 : case POINTER_PLUS_EXPR:
1929 : /* Increments, decrements and multiplications by a constant
1930 : are simple. */
1931 468443 : e2 = gimple_assign_rhs2 (stmt);
1932 468443 : iv = find_deriving_biv_for_expr (data, e2);
1933 468443 : if (iv)
1934 : return iv;
1935 440637 : gcc_fallthrough ();
1936 :
1937 440637 : CASE_CONVERT:
1938 : /* Casts are simple. */
1939 440637 : return find_deriving_biv_for_expr (data, e1);
1940 :
1941 : default:
1942 : break;
1943 : }
1944 :
1945 : return NULL;
1946 : }
1947 :
1948 : /* Record BIV, its predecessor and successor that they are used in
1949 : address type uses. */
1950 :
1951 : static void
1952 554774 : record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1953 : {
1954 554774 : unsigned i;
1955 554774 : tree type, base_1, base_2;
1956 554774 : bitmap_iterator bi;
1957 :
1958 552632 : if (!biv || !biv->biv_p || integer_zerop (biv->step)
1959 1107406 : || biv->have_address_use || !biv->no_overflow)
1960 288910 : return;
1961 :
1962 525071 : type = TREE_TYPE (biv->base);
1963 525071 : if (!INTEGRAL_TYPE_P (type))
1964 : return;
1965 :
1966 265864 : biv->have_address_use = true;
1967 265864 : data->bivs_not_used_in_addr--;
1968 265864 : base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1969 2447028 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1970 : {
1971 2181164 : struct iv *iv = ver_info (data, i)->iv;
1972 :
1973 1966832 : if (!iv || !iv->biv_p || integer_zerop (iv->step)
1974 3085785 : || iv->have_address_use || !iv->no_overflow)
1975 1888778 : continue;
1976 :
1977 292386 : if (type != TREE_TYPE (iv->base)
1978 292386 : || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1979 31176 : continue;
1980 :
1981 261210 : if (!operand_equal_p (biv->step, iv->step, 0))
1982 5844 : continue;
1983 :
1984 255366 : base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1985 255366 : if (operand_equal_p (base_1, iv->base, 0)
1986 255366 : || operand_equal_p (base_2, biv->base, 0))
1987 : {
1988 249599 : iv->have_address_use = true;
1989 249599 : data->bivs_not_used_in_addr--;
1990 : }
1991 : }
1992 : }
1993 :
1994 : /* Cumulates the steps of indices into DATA and replaces their values with the
1995 : initial ones. Returns false when the value of the index cannot be determined.
1996 : Callback for for_each_index. */
1997 :
1998 : struct ifs_ivopts_data
1999 : {
2000 : struct ivopts_data *ivopts_data;
2001 : gimple *stmt;
2002 : tree step;
2003 : };
2004 :
2005 : static bool
2006 2219976 : idx_find_step (tree base, tree *idx, void *data)
2007 : {
2008 2219976 : struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2009 2219976 : struct iv *iv;
2010 2219976 : bool use_overflow_semantics = false;
2011 2219976 : tree step, iv_base, iv_step, lbound, off;
2012 2219976 : class loop *loop = dta->ivopts_data->current_loop;
2013 :
2014 : /* If base is a component ref, require that the offset of the reference
2015 : be invariant. */
2016 2219976 : if (TREE_CODE (base) == COMPONENT_REF)
2017 : {
2018 78 : off = component_ref_field_offset (base);
2019 78 : return expr_invariant_in_loop_p (loop, off);
2020 : }
2021 :
2022 : /* If base is array, first check whether we will be able to move the
2023 : reference out of the loop (in order to take its address in strength
2024 : reduction). In order for this to work we need both lower bound
2025 : and step to be loop invariants. */
2026 2219898 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2027 : {
2028 : /* Moreover, for a range, the size needs to be invariant as well. */
2029 521680 : if (TREE_CODE (base) == ARRAY_RANGE_REF
2030 521680 : && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2031 : return false;
2032 :
2033 521680 : step = array_ref_element_size (base);
2034 521680 : lbound = array_ref_low_bound (base);
2035 :
2036 521680 : if (!expr_invariant_in_loop_p (loop, step)
2037 521680 : || !expr_invariant_in_loop_p (loop, lbound))
2038 3150 : return false;
2039 : }
2040 :
2041 2216748 : if (TREE_CODE (*idx) != SSA_NAME)
2042 : return true;
2043 :
2044 1803013 : iv = get_iv (dta->ivopts_data, *idx);
2045 1803013 : if (!iv)
2046 : return false;
2047 :
2048 : /* XXX We produce for a base of *D42 with iv->base being &x[0]
2049 : *&x[0], which is not folded and does not trigger the
2050 : ARRAY_REF path below. */
2051 1167856 : *idx = iv->base;
2052 :
2053 1167856 : if (integer_zerop (iv->step))
2054 : return true;
2055 :
2056 872452 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2057 : {
2058 308414 : step = array_ref_element_size (base);
2059 :
2060 : /* We only handle addresses whose step is an integer constant. */
2061 308414 : if (TREE_CODE (step) != INTEGER_CST)
2062 : return false;
2063 : }
2064 : else
2065 : /* The step for pointer arithmetics already is 1 byte. */
2066 564038 : step = size_one_node;
2067 :
2068 872435 : iv_base = iv->base;
2069 872435 : iv_step = iv->step;
2070 872435 : if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2071 : use_overflow_semantics = true;
2072 :
2073 872435 : if (!convert_affine_scev (dta->ivopts_data->current_loop,
2074 : sizetype, &iv_base, &iv_step, dta->stmt,
2075 : use_overflow_semantics))
2076 : {
2077 : /* The index might wrap. */
2078 : return false;
2079 : }
2080 :
2081 869143 : step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2082 869143 : dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2083 :
2084 869143 : if (dta->ivopts_data->bivs_not_used_in_addr)
2085 : {
2086 554774 : if (!iv->biv_p)
2087 241150 : iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2088 :
2089 554774 : record_biv_for_address_use (dta->ivopts_data, iv);
2090 : }
2091 : return true;
2092 : }
2093 :
2094 : /* Records use in index IDX. Callback for for_each_index. Ivopts data
2095 : object is passed to it in DATA. */
2096 :
2097 : static bool
2098 1811291 : idx_record_use (tree base, tree *idx,
2099 : void *vdata)
2100 : {
2101 1811291 : struct ivopts_data *data = (struct ivopts_data *) vdata;
2102 1811291 : find_interesting_uses_op (data, *idx);
2103 1811291 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2104 : {
2105 230483 : if (TREE_OPERAND (base, 2))
2106 5498 : find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2107 230483 : if (TREE_OPERAND (base, 3))
2108 16794 : find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2109 : }
2110 1811291 : return true;
2111 : }
2112 :
2113 : /* If we can prove that TOP = cst * BOT for some constant cst,
2114 : store cst to MUL and return true. Otherwise return false.
2115 : The returned value is always sign-extended, regardless of the
2116 : signedness of TOP and BOT. */
2117 :
2118 : static bool
2119 18830521 : constant_multiple_of (tree top, tree bot, widest_int *mul,
2120 : struct ivopts_data *data)
2121 : {
2122 37661042 : aff_tree aff_top, aff_bot;
2123 18830521 : tree_to_aff_combination_expand (top, TREE_TYPE (top), &aff_top,
2124 : &data->name_expansion_cache);
2125 18830521 : tree_to_aff_combination_expand (bot, TREE_TYPE (bot), &aff_bot,
2126 : &data->name_expansion_cache);
2127 :
2128 18830521 : poly_widest_int poly_mul;
2129 18830521 : if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul)
2130 18830521 : && poly_mul.is_constant (mul))
2131 15527553 : return true;
2132 :
2133 : return false;
2134 18830521 : }
2135 :
2136 : /* Return true if memory reference REF with step STEP may be unaligned. */
2137 :
2138 : static bool
2139 0 : may_be_unaligned_p (tree ref, tree step)
2140 : {
2141 : /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2142 : thus they are not misaligned. */
2143 0 : if (TREE_CODE (ref) == TARGET_MEM_REF)
2144 : return false;
2145 :
2146 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2147 0 : if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2148 0 : align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2149 :
2150 0 : unsigned HOST_WIDE_INT bitpos;
2151 0 : unsigned int ref_align;
2152 0 : get_object_alignment_1 (ref, &ref_align, &bitpos);
2153 0 : if (ref_align < align
2154 0 : || (bitpos % align) != 0
2155 0 : || (bitpos % BITS_PER_UNIT) != 0)
2156 : return true;
2157 :
2158 0 : unsigned int trailing_zeros = tree_ctz (step);
2159 0 : if (trailing_zeros < HOST_BITS_PER_INT
2160 0 : && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2161 : return true;
2162 :
2163 : return false;
2164 : }
2165 :
2166 : /* Return true if EXPR may be non-addressable. */
2167 :
2168 : bool
2169 12998488 : may_be_nonaddressable_p (tree expr)
2170 : {
2171 13882067 : switch (TREE_CODE (expr))
2172 : {
2173 9246513 : case VAR_DECL:
2174 : /* Check if it's a register variable. */
2175 9246513 : return DECL_HARD_REGISTER (expr);
2176 :
2177 : case TARGET_MEM_REF:
2178 : /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2179 : target, thus they are always addressable. */
2180 : return false;
2181 :
2182 1941681 : case MEM_REF:
2183 : /* Likewise for MEM_REFs, modulo the storage order. */
2184 1941681 : return REF_REVERSE_STORAGE_ORDER (expr);
2185 :
2186 76 : case BIT_FIELD_REF:
2187 76 : if (REF_REVERSE_STORAGE_ORDER (expr))
2188 : return true;
2189 76 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2190 :
2191 1231978 : case COMPONENT_REF:
2192 1231978 : if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2193 : return true;
2194 1231978 : return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2195 1231978 : || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2196 :
2197 861161 : case ARRAY_REF:
2198 861161 : case ARRAY_RANGE_REF:
2199 861161 : if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2200 : return true;
2201 861161 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2202 :
2203 22352 : case VIEW_CONVERT_EXPR:
2204 : /* This kind of view-conversions may wrap non-addressable objects
2205 : and make them look addressable. After some processing the
2206 : non-addressability may be uncovered again, causing ADDR_EXPRs
2207 : of inappropriate objects to be built. */
2208 22352 : if (is_gimple_reg (TREE_OPERAND (expr, 0))
2209 22352 : || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2210 : return true;
2211 22342 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2212 :
2213 : CASE_CONVERT:
2214 : return true;
2215 :
2216 : default:
2217 : break;
2218 : }
2219 :
2220 : return false;
2221 : }
2222 :
2223 : /* Finds addresses in *OP_P inside STMT. */
2224 :
2225 : static void
2226 2693359 : find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2227 : tree *op_p)
2228 : {
2229 2693359 : tree base = *op_p, step = size_zero_node;
2230 2693359 : struct iv *civ;
2231 2693359 : struct ifs_ivopts_data ifs_ivopts_data;
2232 :
2233 : /* Do not play with volatile memory references. A bit too conservative,
2234 : perhaps, but safe. */
2235 5386718 : if (gimple_has_volatile_ops (stmt))
2236 7530 : goto fail;
2237 :
2238 : /* Ignore bitfields for now. Not really something terribly complicated
2239 : to handle. TODO. */
2240 2685829 : if (TREE_CODE (base) == BIT_FIELD_REF)
2241 91485 : goto fail;
2242 :
2243 2594344 : base = unshare_expr (base);
2244 :
2245 2594344 : if (TREE_CODE (base) == TARGET_MEM_REF)
2246 : {
2247 313549 : tree type = build_pointer_type (TREE_TYPE (base));
2248 313549 : tree astep;
2249 :
2250 313549 : if (TMR_BASE (base)
2251 313549 : && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2252 : {
2253 288270 : civ = get_iv (data, TMR_BASE (base));
2254 288270 : if (!civ)
2255 252961 : goto fail;
2256 :
2257 35309 : TMR_BASE (base) = civ->base;
2258 35309 : step = civ->step;
2259 : }
2260 60588 : if (TMR_INDEX2 (base)
2261 60588 : && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2262 : {
2263 12003 : civ = get_iv (data, TMR_INDEX2 (base));
2264 12003 : if (!civ)
2265 4287 : goto fail;
2266 :
2267 7716 : TMR_INDEX2 (base) = civ->base;
2268 7716 : step = civ->step;
2269 : }
2270 56301 : if (TMR_INDEX (base)
2271 56301 : && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2272 : {
2273 56301 : civ = get_iv (data, TMR_INDEX (base));
2274 56301 : if (!civ)
2275 56301 : goto fail;
2276 :
2277 0 : TMR_INDEX (base) = civ->base;
2278 0 : astep = civ->step;
2279 :
2280 0 : if (astep)
2281 : {
2282 0 : if (TMR_STEP (base))
2283 0 : astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2284 :
2285 0 : step = fold_build2 (PLUS_EXPR, type, step, astep);
2286 : }
2287 : }
2288 :
2289 0 : if (integer_zerop (step))
2290 0 : goto fail;
2291 0 : base = tree_mem_ref_addr (type, base);
2292 : }
2293 : else
2294 : {
2295 2280795 : ifs_ivopts_data.ivopts_data = data;
2296 2280795 : ifs_ivopts_data.stmt = stmt;
2297 2280795 : ifs_ivopts_data.step = size_zero_node;
2298 2280795 : if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2299 2280795 : || integer_zerop (ifs_ivopts_data.step))
2300 1413612 : goto fail;
2301 867183 : step = ifs_ivopts_data.step;
2302 :
2303 : /* Check that the base expression is addressable. This needs
2304 : to be done after substituting bases of IVs into it. */
2305 867183 : if (may_be_nonaddressable_p (base))
2306 770 : goto fail;
2307 :
2308 : /* Moreover, on strict alignment platforms, check that it is
2309 : sufficiently aligned. */
2310 866413 : if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2311 : goto fail;
2312 :
2313 866413 : base = build_fold_addr_expr (base);
2314 :
2315 : /* Substituting bases of IVs into the base expression might
2316 : have caused folding opportunities. */
2317 866413 : if (TREE_CODE (base) == ADDR_EXPR)
2318 : {
2319 459819 : tree *ref = &TREE_OPERAND (base, 0);
2320 1573013 : while (handled_component_p (*ref))
2321 653375 : ref = &TREE_OPERAND (*ref, 0);
2322 459819 : if (TREE_CODE (*ref) == MEM_REF)
2323 : {
2324 300711 : tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2325 : TREE_OPERAND (*ref, 0),
2326 : TREE_OPERAND (*ref, 1));
2327 300711 : if (tem)
2328 0 : *ref = tem;
2329 : }
2330 : }
2331 : }
2332 :
2333 866413 : civ = alloc_iv (data, base, step);
2334 : /* Fail if base object of this memory reference is unknown. */
2335 866413 : if (civ->base_object == NULL_TREE)
2336 11458 : goto fail;
2337 :
2338 854955 : record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2339 854955 : return;
2340 :
2341 1838404 : fail:
2342 1838404 : for_each_index (op_p, idx_record_use, data);
2343 : }
2344 :
2345 : /* Finds and records invariants used in STMT. */
2346 :
2347 : static void
2348 15384229 : find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2349 : {
2350 15384229 : ssa_op_iter iter;
2351 15384229 : use_operand_p use_p;
2352 15384229 : tree op;
2353 :
2354 51254115 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2355 : {
2356 20485657 : op = USE_FROM_PTR (use_p);
2357 20485657 : record_invariant (data, op, false);
2358 : }
2359 15384229 : }
2360 :
2361 : /* CALL calls an internal function. If operand *OP_P will become an
2362 : address when the call is expanded, return the type of the memory
2363 : being addressed, otherwise return null. */
2364 :
2365 : static tree
2366 2242 : get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2367 : {
2368 2242 : switch (gimple_call_internal_fn (call))
2369 : {
2370 372 : case IFN_MASK_LOAD:
2371 372 : case IFN_MASK_LOAD_LANES:
2372 372 : case IFN_MASK_LEN_LOAD_LANES:
2373 372 : case IFN_LEN_LOAD:
2374 372 : case IFN_MASK_LEN_LOAD:
2375 372 : if (op_p == gimple_call_arg_ptr (call, 0))
2376 372 : return TREE_TYPE (gimple_call_lhs (call));
2377 : return NULL_TREE;
2378 :
2379 427 : case IFN_MASK_STORE:
2380 427 : case IFN_MASK_STORE_LANES:
2381 427 : case IFN_MASK_LEN_STORE_LANES:
2382 427 : case IFN_LEN_STORE:
2383 427 : case IFN_MASK_LEN_STORE:
2384 427 : {
2385 427 : if (op_p == gimple_call_arg_ptr (call, 0))
2386 : {
2387 427 : internal_fn ifn = gimple_call_internal_fn (call);
2388 427 : int index = internal_fn_stored_value_index (ifn);
2389 427 : return TREE_TYPE (gimple_call_arg (call, index));
2390 : }
2391 : return NULL_TREE;
2392 : }
2393 :
2394 : default:
2395 : return NULL_TREE;
2396 : }
2397 : }
2398 :
2399 : /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2400 : Return true if the operand will become an address when STMT
2401 : is expanded and record the associated address use if so. */
2402 :
2403 : static bool
2404 1742084 : find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2405 : struct iv *iv)
2406 : {
2407 : /* Fail if base object of this memory reference is unknown. */
2408 1742084 : if (iv->base_object == NULL_TREE)
2409 : return false;
2410 :
2411 643032 : tree mem_type = NULL_TREE;
2412 643032 : if (gcall *call = dyn_cast <gcall *> (stmt))
2413 123286 : if (gimple_call_internal_p (call))
2414 2242 : mem_type = get_mem_type_for_internal_fn (call, op_p);
2415 2242 : if (mem_type)
2416 : {
2417 799 : iv = alloc_iv (data, iv->base, iv->step);
2418 799 : record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2419 799 : return true;
2420 : }
2421 : return false;
2422 : }
2423 :
2424 : /* Finds interesting uses of induction variables in the statement STMT. */
2425 :
2426 : static void
2427 15384229 : find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2428 : {
2429 15384229 : struct iv *iv;
2430 15384229 : tree op, *lhs, *rhs;
2431 15384229 : ssa_op_iter iter;
2432 15384229 : use_operand_p use_p;
2433 15384229 : enum tree_code code;
2434 :
2435 15384229 : find_invariants_stmt (data, stmt);
2436 :
2437 15384229 : if (gimple_code (stmt) == GIMPLE_COND)
2438 : {
2439 1451270 : find_interesting_uses_cond (data, stmt);
2440 8996943 : return;
2441 : }
2442 :
2443 13932959 : if (is_gimple_assign (stmt))
2444 : {
2445 10507266 : lhs = gimple_assign_lhs_ptr (stmt);
2446 10507266 : rhs = gimple_assign_rhs1_ptr (stmt);
2447 :
2448 10507266 : if (TREE_CODE (*lhs) == SSA_NAME)
2449 : {
2450 : /* If the statement defines an induction variable, the uses are not
2451 : interesting by themselves. */
2452 :
2453 9399883 : iv = get_iv (data, *lhs);
2454 :
2455 9399883 : if (iv && !integer_zerop (iv->step))
2456 : return;
2457 : }
2458 :
2459 8162697 : code = gimple_assign_rhs_code (stmt);
2460 8162697 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2461 8162697 : && (REFERENCE_CLASS_P (*rhs)
2462 1255056 : || is_gimple_val (*rhs)))
2463 : {
2464 2793826 : if (REFERENCE_CLASS_P (*rhs))
2465 1733402 : find_interesting_uses_address (data, stmt, rhs);
2466 : else
2467 1060424 : find_interesting_uses_op (data, *rhs);
2468 :
2469 2793826 : if (REFERENCE_CLASS_P (*lhs))
2470 959957 : find_interesting_uses_address (data, stmt, lhs);
2471 2793826 : return;
2472 : }
2473 5368871 : else if (TREE_CODE_CLASS (code) == tcc_comparison)
2474 : {
2475 84180 : find_interesting_uses_cond (data, stmt);
2476 84180 : return;
2477 : }
2478 :
2479 : /* TODO -- we should also handle address uses of type
2480 :
2481 : memory = call (whatever);
2482 :
2483 : and
2484 :
2485 : call (memory). */
2486 : }
2487 :
2488 8710384 : if (gimple_code (stmt) == GIMPLE_PHI
2489 8710384 : && gimple_bb (stmt) == data->current_loop->header)
2490 : {
2491 1449461 : iv = get_iv (data, PHI_RESULT (stmt));
2492 :
2493 1449461 : if (iv && !integer_zerop (iv->step))
2494 : return;
2495 : }
2496 :
2497 26334465 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2498 : {
2499 10657353 : op = USE_FROM_PTR (use_p);
2500 :
2501 10657353 : if (TREE_CODE (op) != SSA_NAME)
2502 522590 : continue;
2503 :
2504 10134763 : iv = get_iv (data, op);
2505 10134763 : if (!iv)
2506 8392679 : continue;
2507 :
2508 1742084 : if (!find_address_like_use (data, stmt, use_p->use, iv))
2509 1741285 : find_interesting_uses_op (data, op);
2510 : }
2511 : }
2512 :
2513 : /* Finds interesting uses of induction variables outside of loops
2514 : on loop exit edge EXIT. */
2515 :
2516 : static void
2517 883106 : find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2518 : {
2519 883106 : gphi *phi;
2520 883106 : gphi_iterator psi;
2521 883106 : tree def;
2522 :
2523 1970108 : for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2524 : {
2525 1087002 : phi = psi.phi ();
2526 1087002 : def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2527 2083572 : if (!virtual_operand_p (def))
2528 532324 : find_interesting_uses_op (data, def);
2529 : }
2530 883106 : }
2531 :
2532 : /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2533 : mode for memory reference represented by USE. */
2534 :
2535 : static GTY (()) vec<rtx, va_gc> *addr_list;
2536 :
2537 : static bool
2538 216367 : addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2539 : {
2540 216367 : rtx reg, addr;
2541 216367 : unsigned list_index;
2542 216367 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2543 216367 : machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2544 :
2545 216367 : list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2546 216367 : if (list_index >= vec_safe_length (addr_list))
2547 10214 : vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2548 :
2549 216367 : addr = (*addr_list)[list_index];
2550 216367 : if (!addr)
2551 : {
2552 13317 : addr_mode = targetm.addr_space.address_mode (as);
2553 13317 : reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2554 13317 : addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2555 13317 : (*addr_list)[list_index] = addr;
2556 : }
2557 : else
2558 203050 : addr_mode = GET_MODE (addr);
2559 :
2560 216367 : XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2561 216367 : return (memory_address_addr_space_p (mem_mode, addr, as));
2562 : }
2563 :
2564 : /* Comparison function to sort group in ascending order of addr_offset. */
2565 :
2566 : static int
2567 3121086 : group_compare_offset (const void *a, const void *b)
2568 : {
2569 3121086 : const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2570 3121086 : const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2571 :
2572 3121086 : return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2573 : }
2574 :
2575 : /* Check if small groups should be split. Return true if no group
2576 : contains more than two uses with distinct addr_offsets. Return
2577 : false otherwise. We want to split such groups because:
2578 :
2579 : 1) Small groups don't have much benefit and may interfere with
2580 : general candidate selection.
2581 : 2) Size for problem with only small groups is usually small and
2582 : general algorithm can handle it well.
2583 :
2584 : TODO -- Above claim may not hold when we want to merge memory
2585 : accesses with conseuctive addresses. */
2586 :
2587 : static bool
2588 500624 : split_small_address_groups_p (struct ivopts_data *data)
2589 : {
2590 500624 : unsigned int i, j, distinct = 1;
2591 500624 : struct iv_use *pre;
2592 500624 : struct iv_group *group;
2593 :
2594 2089053 : for (i = 0; i < data->vgroups.length (); i++)
2595 : {
2596 1588429 : group = data->vgroups[i];
2597 1588429 : if (group->vuses.length () == 1)
2598 1450114 : continue;
2599 :
2600 138315 : gcc_assert (address_p (group->type));
2601 138315 : if (group->vuses.length () == 2)
2602 : {
2603 79977 : if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2604 79977 : group->vuses[1]->addr_offset) > 0)
2605 19627 : std::swap (group->vuses[0], group->vuses[1]);
2606 : }
2607 : else
2608 58338 : group->vuses.qsort (group_compare_offset);
2609 :
2610 138315 : if (distinct > 2)
2611 13788 : continue;
2612 :
2613 124527 : distinct = 1;
2614 1778275 : for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2615 : {
2616 189846 : if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2617 : {
2618 132694 : pre = group->vuses[j];
2619 132694 : distinct++;
2620 : }
2621 :
2622 189846 : if (distinct > 2)
2623 : break;
2624 : }
2625 : }
2626 :
2627 500624 : return (distinct <= 2);
2628 : }
2629 :
2630 : /* For each group of address type uses, this function further groups
2631 : these uses according to the maximum offset supported by target's
2632 : [base + offset] addressing mode. */
2633 :
2634 : static void
2635 500624 : split_address_groups (struct ivopts_data *data)
2636 : {
2637 500624 : unsigned int i, j;
2638 : /* Always split group. */
2639 500624 : bool split_p = split_small_address_groups_p (data);
2640 :
2641 2142728 : for (i = 0; i < data->vgroups.length (); i++)
2642 : {
2643 1642104 : struct iv_group *new_group = NULL;
2644 1642104 : struct iv_group *group = data->vgroups[i];
2645 1642104 : struct iv_use *use = group->vuses[0];
2646 :
2647 1642104 : use->id = 0;
2648 1642104 : use->group_id = group->id;
2649 1642104 : if (group->vuses.length () == 1)
2650 1498214 : continue;
2651 :
2652 143890 : gcc_assert (address_p (use->type));
2653 :
2654 1975621 : for (j = 1; j < group->vuses.length ();)
2655 : {
2656 333517 : struct iv_use *next = group->vuses[j];
2657 333517 : poly_int64 offset = next->addr_offset - use->addr_offset;
2658 :
2659 : /* Split group if asked to, or the offset against the first
2660 : use can't fit in offset part of addressing mode. IV uses
2661 : having the same offset are still kept in one group. */
2662 393216 : if (maybe_ne (offset, 0)
2663 333517 : && (split_p || !addr_offset_valid_p (use, offset)))
2664 : {
2665 59699 : if (!new_group)
2666 53675 : new_group = record_group (data, group->type);
2667 59699 : group->vuses.ordered_remove (j);
2668 59699 : new_group->vuses.safe_push (next);
2669 59699 : continue;
2670 : }
2671 :
2672 273818 : next->id = j;
2673 273818 : next->group_id = group->id;
2674 273818 : j++;
2675 : }
2676 : }
2677 500624 : }
2678 :
2679 : /* Finds uses of the induction variables that are interesting. */
2680 :
2681 : static void
2682 500624 : find_interesting_uses (struct ivopts_data *data, basic_block *body)
2683 : {
2684 500624 : basic_block bb;
2685 500624 : gimple_stmt_iterator bsi;
2686 500624 : unsigned i;
2687 500624 : edge e;
2688 :
2689 3301464 : for (i = 0; i < data->current_loop->num_nodes; i++)
2690 : {
2691 2800840 : edge_iterator ei;
2692 2800840 : bb = body[i];
2693 :
2694 7146644 : FOR_EACH_EDGE (e, ei, bb->succs)
2695 4345804 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2696 4345804 : && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2697 883106 : find_interesting_uses_outside (data, e);
2698 :
2699 5654016 : for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2700 2853176 : find_interesting_uses_stmt (data, gsi_stmt (bsi));
2701 27428419 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2702 21826739 : if (!is_gimple_debug (gsi_stmt (bsi)))
2703 12531053 : find_interesting_uses_stmt (data, gsi_stmt (bsi));
2704 : }
2705 :
2706 500624 : split_address_groups (data);
2707 :
2708 500624 : if (dump_file && (dump_flags & TDF_DETAILS))
2709 : {
2710 67 : fprintf (dump_file, "\n<IV Groups>:\n");
2711 67 : dump_groups (dump_file, data);
2712 67 : fprintf (dump_file, "\n");
2713 : }
2714 500624 : }
2715 :
2716 : /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2717 : is true, assume we are inside an address. If TOP_COMPREF is true, assume
2718 : we are at the top-level of the processed address. */
2719 :
2720 : static tree
2721 3381005 : strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2722 : poly_int64 *offset)
2723 : {
2724 3381005 : tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2725 3381005 : enum tree_code code;
2726 3381005 : tree type, orig_type = TREE_TYPE (expr);
2727 3381005 : poly_int64 off0, off1;
2728 3381005 : HOST_WIDE_INT st;
2729 3381005 : tree orig_expr = expr;
2730 :
2731 3381005 : STRIP_NOPS (expr);
2732 :
2733 3381005 : type = TREE_TYPE (expr);
2734 3381005 : code = TREE_CODE (expr);
2735 3381005 : *offset = 0;
2736 :
2737 3381005 : switch (code)
2738 : {
2739 620165 : case POINTER_PLUS_EXPR:
2740 620165 : case PLUS_EXPR:
2741 620165 : case MINUS_EXPR:
2742 620165 : op0 = TREE_OPERAND (expr, 0);
2743 620165 : op1 = TREE_OPERAND (expr, 1);
2744 :
2745 620165 : op0 = strip_offset_1 (op0, false, false, &off0);
2746 620165 : op1 = strip_offset_1 (op1, false, false, &off1);
2747 :
2748 620165 : *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2749 620165 : if (op0 == TREE_OPERAND (expr, 0)
2750 620165 : && op1 == TREE_OPERAND (expr, 1))
2751 : return orig_expr;
2752 :
2753 424502 : if (integer_zerop (op1))
2754 : expr = op0;
2755 46944 : else if (integer_zerop (op0))
2756 : {
2757 637 : if (code == MINUS_EXPR)
2758 : {
2759 637 : if (TYPE_OVERFLOW_UNDEFINED (type))
2760 : {
2761 380 : type = unsigned_type_for (type);
2762 380 : op1 = fold_convert (type, op1);
2763 : }
2764 637 : expr = fold_build1 (NEGATE_EXPR, type, op1);
2765 : }
2766 : else
2767 : expr = op1;
2768 : }
2769 : else
2770 : {
2771 46307 : if (TYPE_OVERFLOW_UNDEFINED (type))
2772 : {
2773 45895 : type = unsigned_type_for (type);
2774 45895 : if (code == POINTER_PLUS_EXPR)
2775 45881 : code = PLUS_EXPR;
2776 45895 : op0 = fold_convert (type, op0);
2777 45895 : op1 = fold_convert (type, op1);
2778 : }
2779 46307 : expr = fold_build2 (code, type, op0, op1);
2780 : }
2781 :
2782 424502 : return fold_convert (orig_type, expr);
2783 :
2784 228285 : case MULT_EXPR:
2785 228285 : op1 = TREE_OPERAND (expr, 1);
2786 228285 : if (!cst_and_fits_in_hwi (op1))
2787 : return orig_expr;
2788 :
2789 184854 : op0 = TREE_OPERAND (expr, 0);
2790 184854 : op0 = strip_offset_1 (op0, false, false, &off0);
2791 184854 : if (op0 == TREE_OPERAND (expr, 0))
2792 : return orig_expr;
2793 :
2794 35816 : *offset = off0 * int_cst_value (op1);
2795 35816 : if (integer_zerop (op0))
2796 : expr = op0;
2797 : else
2798 : {
2799 35816 : if (TYPE_OVERFLOW_UNDEFINED (type))
2800 : {
2801 3236 : type = unsigned_type_for (type);
2802 3236 : op0 = fold_convert (type, op0);
2803 3236 : op1 = fold_convert (type, op1);
2804 : }
2805 35816 : expr = fold_build2 (MULT_EXPR, type, op0, op1);
2806 : }
2807 :
2808 35816 : return fold_convert (orig_type, expr);
2809 :
2810 0 : case ARRAY_REF:
2811 0 : case ARRAY_RANGE_REF:
2812 0 : if (!inside_addr)
2813 : return orig_expr;
2814 :
2815 0 : step = array_ref_element_size (expr);
2816 0 : if (!cst_and_fits_in_hwi (step))
2817 : break;
2818 :
2819 0 : st = int_cst_value (step);
2820 0 : op1 = TREE_OPERAND (expr, 1);
2821 0 : op1 = strip_offset_1 (op1, false, false, &off1);
2822 0 : *offset = off1 * st;
2823 :
2824 0 : if (top_compref
2825 0 : && integer_zerop (op1))
2826 : {
2827 : /* Strip the component reference completely. */
2828 0 : op0 = TREE_OPERAND (expr, 0);
2829 0 : op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2830 0 : *offset += off0;
2831 0 : return op0;
2832 : }
2833 : break;
2834 :
2835 0 : case COMPONENT_REF:
2836 0 : {
2837 0 : tree field;
2838 :
2839 0 : if (!inside_addr)
2840 : return orig_expr;
2841 :
2842 0 : tmp = component_ref_field_offset (expr);
2843 0 : field = TREE_OPERAND (expr, 1);
2844 0 : if (top_compref
2845 0 : && cst_and_fits_in_hwi (tmp)
2846 0 : && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2847 : {
2848 0 : HOST_WIDE_INT boffset, abs_off;
2849 :
2850 : /* Strip the component reference completely. */
2851 0 : op0 = TREE_OPERAND (expr, 0);
2852 0 : op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2853 0 : boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2854 0 : abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2855 0 : if (boffset < 0)
2856 0 : abs_off = -abs_off;
2857 :
2858 0 : *offset = off0 + int_cst_value (tmp) + abs_off;
2859 0 : return op0;
2860 : }
2861 : }
2862 : break;
2863 :
2864 313960 : case ADDR_EXPR:
2865 313960 : op0 = TREE_OPERAND (expr, 0);
2866 313960 : op0 = strip_offset_1 (op0, true, true, &off0);
2867 313960 : *offset += off0;
2868 :
2869 313960 : if (op0 == TREE_OPERAND (expr, 0))
2870 : return orig_expr;
2871 :
2872 0 : expr = build_fold_addr_expr (op0);
2873 0 : return fold_convert (orig_type, expr);
2874 :
2875 : case MEM_REF:
2876 : /* ??? Offset operand? */
2877 : inside_addr = false;
2878 : break;
2879 :
2880 2218595 : default:
2881 2218595 : if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2882 863401 : return build_int_cst (orig_type, 0);
2883 : return orig_expr;
2884 : }
2885 :
2886 : /* Default handling of expressions for that we want to recurse into
2887 : the first operand. */
2888 0 : op0 = TREE_OPERAND (expr, 0);
2889 0 : op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2890 0 : *offset += off0;
2891 :
2892 0 : if (op0 == TREE_OPERAND (expr, 0)
2893 0 : && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2894 : return orig_expr;
2895 :
2896 0 : expr = copy_node (expr);
2897 0 : TREE_OPERAND (expr, 0) = op0;
2898 0 : if (op1)
2899 0 : TREE_OPERAND (expr, 1) = op1;
2900 :
2901 : /* Inside address, we might strip the top level component references,
2902 : thus changing type of the expression. Handling of ADDR_EXPR
2903 : will fix that. */
2904 0 : expr = fold_convert (orig_type, expr);
2905 :
2906 0 : return expr;
2907 : }
2908 :
2909 : /* Strips constant offsets from EXPR and stores them to OFFSET. */
2910 :
2911 : static tree
2912 1641861 : strip_offset (tree expr, poly_uint64 *offset)
2913 : {
2914 1641861 : poly_int64 off;
2915 1641861 : tree core = strip_offset_1 (expr, false, false, &off);
2916 1641861 : *offset = off;
2917 1641861 : return core;
2918 : }
2919 :
2920 : /* Returns variant of TYPE that can be used as base for different uses.
2921 : We return unsigned type with the same precision, which avoids problems
2922 : with overflows. */
2923 :
2924 : static tree
2925 8117545 : generic_type_for (tree type)
2926 : {
2927 8117545 : if (POINTER_TYPE_P (type))
2928 1407866 : return unsigned_type_for (type);
2929 :
2930 6709679 : if (TYPE_UNSIGNED (type))
2931 : return type;
2932 :
2933 3100493 : return unsigned_type_for (type);
2934 : }
2935 :
2936 : /* Private data for walk_tree. */
2937 :
2938 : struct walk_tree_data
2939 : {
2940 : bitmap *inv_vars;
2941 : struct ivopts_data *idata;
2942 : };
2943 :
2944 : /* Callback function for walk_tree, it records invariants and symbol
2945 : reference in *EXPR_P. DATA is the structure storing result info. */
2946 :
2947 : static tree
2948 37482751 : find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2949 : {
2950 37482751 : tree op = *expr_p;
2951 37482751 : struct version_info *info;
2952 37482751 : struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2953 :
2954 37482751 : if (TREE_CODE (op) != SSA_NAME)
2955 : return NULL_TREE;
2956 :
2957 8960775 : info = name_info (wdata->idata, op);
2958 : /* Because we expand simple operations when finding IVs, loop invariant
2959 : variable that isn't referred by the original loop could be used now.
2960 : Record such invariant variables here. */
2961 8960775 : if (!info->iv)
2962 : {
2963 394270 : struct ivopts_data *idata = wdata->idata;
2964 394270 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2965 :
2966 394270 : if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2967 : {
2968 394270 : tree steptype = TREE_TYPE (op);
2969 394270 : if (POINTER_TYPE_P (steptype))
2970 195032 : steptype = sizetype;
2971 394270 : set_iv (idata, op, op, build_int_cst (steptype, 0), true);
2972 394270 : record_invariant (idata, op, false);
2973 : }
2974 : }
2975 8960775 : if (!info->inv_id || info->has_nonlin_use)
2976 : return NULL_TREE;
2977 :
2978 7327959 : if (!*wdata->inv_vars)
2979 5730134 : *wdata->inv_vars = BITMAP_ALLOC (NULL);
2980 7327959 : bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2981 :
2982 7327959 : return NULL_TREE;
2983 : }
2984 :
2985 : /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2986 : store it. */
2987 :
2988 : static inline void
2989 30007048 : find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2990 : {
2991 30007048 : struct walk_tree_data wdata;
2992 :
2993 30007048 : if (!inv_vars)
2994 12804660 : return;
2995 :
2996 17202388 : wdata.idata = data;
2997 17202388 : wdata.inv_vars = inv_vars;
2998 17202388 : walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2999 : }
3000 :
3001 : /* Get entry from invariant expr hash table for INV_EXPR. New entry
3002 : will be recorded if it doesn't exist yet. Given below two exprs:
3003 : inv_expr + cst1, inv_expr + cst2
3004 : It's hard to make decision whether constant part should be stripped
3005 : or not. We choose to not strip based on below facts:
3006 : 1) We need to count ADD cost for constant part if it's stripped,
3007 : which isn't always trivial where this functions is called.
3008 : 2) Stripping constant away may be conflict with following loop
3009 : invariant hoisting pass.
3010 : 3) Not stripping constant away results in more invariant exprs,
3011 : which usually leads to decision preferring lower reg pressure. */
3012 :
3013 : static iv_inv_expr_ent *
3014 2922552 : get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3015 : {
3016 2922552 : STRIP_NOPS (inv_expr);
3017 :
3018 2922552 : if (poly_int_tree_p (inv_expr)
3019 2922552 : || TREE_CODE (inv_expr) == SSA_NAME)
3020 : return NULL;
3021 :
3022 : /* Don't strip constant part away as we used to. */
3023 :
3024 : /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3025 2806009 : struct iv_inv_expr_ent ent;
3026 2806009 : ent.expr = inv_expr;
3027 2806009 : ent.hash = iterative_hash_expr (inv_expr, 0);
3028 2806009 : struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3029 :
3030 2806009 : if (!*slot)
3031 : {
3032 1186370 : *slot = XNEW (struct iv_inv_expr_ent);
3033 1186370 : (*slot)->expr = inv_expr;
3034 1186370 : (*slot)->hash = ent.hash;
3035 1186370 : (*slot)->id = ++data->max_inv_expr_id;
3036 : }
3037 :
3038 2806009 : return *slot;
3039 : }
3040 :
3041 :
3042 : /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3043 : unsuitable as ivopts candidates for potentially involving undefined
3044 : behavior. */
3045 :
3046 : static tree
3047 13555017 : find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3048 : {
3049 13555017 : basic_block bb = (basic_block) bb_;
3050 13555017 : if (TREE_CODE (*tp) == SSA_NAME
3051 2284608 : && ssa_name_maybe_undef_p (*tp)
3052 13564119 : && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3053 3215 : return *tp;
3054 13551802 : if (!EXPR_P (*tp))
3055 10519995 : *walk_subtrees = 0;
3056 : return NULL;
3057 : }
3058 :
3059 : /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3060 : position to POS. If USE is not NULL, the candidate is set as related to
3061 : it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3062 : replacement of the final value of the iv by a direct computation. */
3063 :
3064 : static struct iv_cand *
3065 9093143 : add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3066 : enum iv_position pos, struct iv_use *use,
3067 : gimple *incremented_at, struct iv *orig_iv = NULL,
3068 : bool doloop = false)
3069 : {
3070 9093143 : unsigned i;
3071 9093143 : struct iv_cand *cand = NULL;
3072 9093143 : tree type, orig_type;
3073 :
3074 9093143 : gcc_assert (base && step);
3075 :
3076 : /* -fkeep-gc-roots-live means that we have to keep a real pointer
3077 : live, but the ivopts code may replace a real pointer with one
3078 : pointing before or after the memory block that is then adjusted
3079 : into the memory block during the loop. FIXME: It would likely be
3080 : better to actually force the pointer live and still use ivopts;
3081 : for example, it would be enough to write the pointer into memory
3082 : and keep it there until after the loop. */
3083 9093143 : if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3084 : return NULL;
3085 :
3086 : /* If BASE contains undefined SSA names make sure we only record
3087 : the original IV. */
3088 8987138 : bool involves_undefs = false;
3089 8987138 : if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3090 : {
3091 3215 : if (pos != IP_ORIGINAL)
3092 : return NULL;
3093 : important = false;
3094 : involves_undefs = true;
3095 : }
3096 :
3097 : /* For non-original variables, make sure their values are computed in a type
3098 : that does not invoke undefined behavior on overflows (since in general,
3099 : we cannot prove that these induction variables are non-wrapping). */
3100 8983923 : if (pos != IP_ORIGINAL)
3101 : {
3102 8117545 : orig_type = TREE_TYPE (base);
3103 8117545 : type = generic_type_for (orig_type);
3104 8117545 : if (type != orig_type)
3105 : {
3106 4508359 : base = fold_convert (type, base);
3107 4508359 : step = fold_convert (type, step);
3108 : }
3109 : }
3110 :
3111 47665520 : for (i = 0; i < data->vcands.length (); i++)
3112 : {
3113 42775997 : cand = data->vcands[i];
3114 :
3115 42775997 : if (cand->pos != pos)
3116 10305042 : continue;
3117 :
3118 32470955 : if (cand->incremented_at != incremented_at
3119 31978771 : || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3120 0 : && cand->ainc_use != use))
3121 492184 : continue;
3122 :
3123 31978771 : if (operand_equal_p (base, cand->iv->base, 0)
3124 9295319 : && operand_equal_p (step, cand->iv->step, 0)
3125 37456556 : && (TYPE_PRECISION (TREE_TYPE (base))
3126 5477785 : == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3127 : break;
3128 : }
3129 :
3130 17968598 : if (i == data->vcands.length ())
3131 : {
3132 4889523 : cand = XCNEW (struct iv_cand);
3133 4889523 : cand->id = i;
3134 4889523 : cand->iv = alloc_iv (data, base, step);
3135 4889523 : cand->pos = pos;
3136 4889523 : if (pos != IP_ORIGINAL)
3137 : {
3138 4022929 : if (doloop)
3139 0 : cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3140 : else
3141 4022929 : cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3142 4022929 : cand->var_after = cand->var_before;
3143 : }
3144 4889523 : cand->important = important;
3145 4889523 : cand->involves_undefs = involves_undefs;
3146 4889523 : cand->incremented_at = incremented_at;
3147 4889523 : cand->doloop_p = doloop;
3148 4889523 : data->vcands.safe_push (cand);
3149 :
3150 4889523 : if (!poly_int_tree_p (step))
3151 : {
3152 209136 : find_inv_vars (data, &step, &cand->inv_vars);
3153 :
3154 209136 : iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3155 : /* Share bitmap between inv_vars and inv_exprs for cand. */
3156 209136 : if (inv_expr != NULL)
3157 : {
3158 120981 : cand->inv_exprs = cand->inv_vars;
3159 120981 : cand->inv_vars = NULL;
3160 120981 : if (cand->inv_exprs)
3161 98090 : bitmap_clear (cand->inv_exprs);
3162 : else
3163 22891 : cand->inv_exprs = BITMAP_ALLOC (NULL);
3164 :
3165 120981 : bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3166 : }
3167 : }
3168 :
3169 4889523 : if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3170 0 : cand->ainc_use = use;
3171 : else
3172 4889523 : cand->ainc_use = NULL;
3173 :
3174 4889523 : cand->orig_iv = orig_iv;
3175 4889523 : if (dump_file && (dump_flags & TDF_DETAILS))
3176 725 : dump_cand (dump_file, cand);
3177 : }
3178 :
3179 8984299 : cand->important |= important;
3180 8984299 : cand->doloop_p |= doloop;
3181 :
3182 : /* Relate candidate to the group for which it is added. */
3183 8984299 : if (use)
3184 2489977 : bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3185 :
3186 : return cand;
3187 : }
3188 :
3189 : /* Returns true if incrementing the induction variable at the end of the LOOP
3190 : is allowed.
3191 :
3192 : The purpose is to avoid splitting latch edge with a biv increment, thus
3193 : creating a jump, possibly confusing other optimization passes and leaving
3194 : less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3195 : available (so we do not have a better alternative), or if the latch edge
3196 : is already nonempty. */
3197 :
3198 : static bool
3199 7998159 : allow_ip_end_pos_p (class loop *loop)
3200 : {
3201 : /* Do not allow IP_END when creating the IV would need to split the
3202 : latch edge as that makes all IP_NORMAL invalid. */
3203 7998159 : auto pos = gsi_last_bb (ip_end_pos (loop));
3204 7998159 : if (!gsi_end_p (pos) && stmt_ends_bb_p (*pos))
3205 : return false;
3206 :
3207 7998159 : if (!ip_normal_pos (loop))
3208 : return true;
3209 :
3210 7896234 : if (!empty_block_p (ip_end_pos (loop)))
3211 : return true;
3212 :
3213 : return false;
3214 : }
3215 :
3216 : /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3217 : Important field is set to IMPORTANT. */
3218 :
3219 : static void
3220 581933 : add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3221 : bool important, struct iv_use *use)
3222 : {
3223 581933 : basic_block use_bb = gimple_bb (use->stmt);
3224 581933 : machine_mode mem_mode;
3225 581933 : unsigned HOST_WIDE_INT cstepi;
3226 :
3227 : /* If we insert the increment in any position other than the standard
3228 : ones, we must ensure that it is incremented once per iteration.
3229 : It must not be in an inner nested loop, or one side of an if
3230 : statement. */
3231 581933 : if (use_bb->loop_father != data->current_loop
3232 580549 : || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3233 554050 : || stmt_can_throw_internal (cfun, use->stmt)
3234 1132162 : || !cst_and_fits_in_hwi (step))
3235 62150 : return;
3236 :
3237 519783 : cstepi = int_cst_value (step);
3238 :
3239 519783 : mem_mode = TYPE_MODE (use->mem_type);
3240 : if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3241 : || USE_STORE_PRE_INCREMENT (mem_mode))
3242 : && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3243 : || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3244 : || USE_STORE_PRE_DECREMENT (mem_mode))
3245 : && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3246 : {
3247 : enum tree_code code = MINUS_EXPR;
3248 : tree new_base;
3249 : tree new_step = step;
3250 :
3251 : if (POINTER_TYPE_P (TREE_TYPE (base)))
3252 : {
3253 : new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3254 : code = POINTER_PLUS_EXPR;
3255 : }
3256 : else
3257 : new_step = fold_convert (TREE_TYPE (base), new_step);
3258 : new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3259 : add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3260 : use->stmt);
3261 : }
3262 : if (((USE_LOAD_POST_INCREMENT (mem_mode)
3263 : || USE_STORE_POST_INCREMENT (mem_mode))
3264 : && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3265 : || ((USE_LOAD_POST_DECREMENT (mem_mode)
3266 : || USE_STORE_POST_DECREMENT (mem_mode))
3267 : && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3268 : {
3269 : add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3270 : use->stmt);
3271 : }
3272 : }
3273 :
3274 : /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3275 : position to POS. If USE is not NULL, the candidate is set as related to
3276 : it. The candidate computation is scheduled before exit condition and at
3277 : the end of loop. */
3278 :
3279 : static void
3280 6963310 : add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3281 : struct iv_use *use, struct iv *orig_iv = NULL,
3282 : bool doloop = false)
3283 : {
3284 6963310 : if (ip_normal_pos (data->current_loop))
3285 6878442 : add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3286 : doloop);
3287 : /* Exclude doloop candidate here since it requires decrement then comparison
3288 : and jump, the IP_END position doesn't match. */
3289 6963310 : if (!doloop && ip_end_pos (data->current_loop)
3290 13926620 : && allow_ip_end_pos_p (data->current_loop))
3291 278255 : add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3292 6963310 : }
3293 :
3294 : /* Adds standard iv candidates. */
3295 :
3296 : static void
3297 500623 : add_standard_iv_candidates (struct ivopts_data *data)
3298 : {
3299 500623 : add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3300 :
3301 : /* The same for a double-integer type if it is still fast enough. */
3302 500623 : if (TYPE_PRECISION
3303 500623 : (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3304 500623 : && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3305 452496 : add_candidate (data, build_int_cst (long_integer_type_node, 0),
3306 : build_int_cst (long_integer_type_node, 1), true, NULL);
3307 :
3308 : /* The same for a double-integer type if it is still fast enough. */
3309 500623 : if (TYPE_PRECISION
3310 500623 : (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3311 548738 : && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3312 12 : add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3313 : build_int_cst (long_long_integer_type_node, 1), true, NULL);
3314 500623 : }
3315 :
3316 :
3317 : /* Adds candidates bases on the old induction variable IV. */
3318 :
3319 : static void
3320 1737980 : add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3321 : {
3322 1737980 : gimple *phi;
3323 1737980 : tree def;
3324 1737980 : struct iv_cand *cand;
3325 :
3326 : /* Check if this biv is used in address type use. */
3327 1155149 : if (iv->no_overflow && iv->have_address_use
3328 515463 : && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3329 2253443 : && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3330 : {
3331 287873 : tree base = fold_convert (sizetype, iv->base);
3332 287873 : tree step = fold_convert (sizetype, iv->step);
3333 :
3334 : /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3335 287873 : add_candidate (data, base, step, true, NULL, iv);
3336 : /* Add iv cand of the original type only if it has nonlinear use. */
3337 287873 : if (iv->nonlin_use)
3338 28719 : add_candidate (data, iv->base, iv->step, true, NULL);
3339 : }
3340 : else
3341 1450107 : add_candidate (data, iv->base, iv->step, true, NULL);
3342 :
3343 : /* The same, but with initial value zero. */
3344 1737980 : if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3345 324592 : add_candidate (data, size_int (0), iv->step, true, NULL);
3346 : else
3347 1413388 : add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3348 : iv->step, true, NULL);
3349 :
3350 1737980 : phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3351 1737980 : if (gimple_code (phi) == GIMPLE_PHI)
3352 : {
3353 : /* Additionally record the possibility of leaving the original iv
3354 : untouched. */
3355 869070 : def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3356 : /* Don't add candidate if it's from another PHI node because
3357 : it's an affine iv appearing in the form of PEELED_CHREC. */
3358 869070 : phi = SSA_NAME_DEF_STMT (def);
3359 869070 : if (gimple_code (phi) != GIMPLE_PHI)
3360 : {
3361 1738140 : cand = add_candidate_1 (data,
3362 : iv->base, iv->step, true, IP_ORIGINAL, NULL,
3363 869070 : SSA_NAME_DEF_STMT (def));
3364 869070 : if (cand)
3365 : {
3366 866754 : cand->var_before = iv->ssa_name;
3367 866754 : cand->var_after = def;
3368 : }
3369 : }
3370 : else
3371 0 : gcc_assert (gimple_bb (phi) == data->current_loop->header);
3372 : }
3373 1737980 : }
3374 :
3375 : /* Adds candidates based on the old induction variables. */
3376 :
3377 : static void
3378 500623 : add_iv_candidate_for_bivs (struct ivopts_data *data)
3379 : {
3380 500623 : unsigned i;
3381 500623 : struct iv *iv;
3382 500623 : bitmap_iterator bi;
3383 :
3384 5481191 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3385 : {
3386 4980568 : iv = ver_info (data, i)->iv;
3387 4980568 : if (iv && iv->biv_p && !integer_zerop (iv->step))
3388 1737980 : add_iv_candidate_for_biv (data, iv);
3389 : }
3390 500623 : }
3391 :
3392 : /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3393 :
3394 : static void
3395 4704954 : record_common_cand (struct ivopts_data *data, tree base,
3396 : tree step, struct iv_use *use)
3397 : {
3398 4704954 : class iv_common_cand ent;
3399 4704954 : class iv_common_cand **slot;
3400 :
3401 4704954 : ent.base = base;
3402 4704954 : ent.step = step;
3403 4704954 : ent.hash = iterative_hash_expr (base, 0);
3404 4704954 : ent.hash = iterative_hash_expr (step, ent.hash);
3405 :
3406 4704954 : slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3407 4704954 : if (*slot == NULL)
3408 : {
3409 2896686 : *slot = new iv_common_cand ();
3410 2896686 : (*slot)->base = base;
3411 2896686 : (*slot)->step = step;
3412 2896686 : (*slot)->uses.create (8);
3413 2896686 : (*slot)->hash = ent.hash;
3414 2896686 : data->iv_common_cands.safe_push ((*slot));
3415 : }
3416 :
3417 4704954 : gcc_assert (use != NULL);
3418 4704954 : (*slot)->uses.safe_push (use);
3419 4704954 : return;
3420 4704954 : }
3421 :
3422 : /* Comparison function used to sort common candidates. */
3423 :
3424 : static int
3425 24194197 : common_cand_cmp (const void *p1, const void *p2)
3426 : {
3427 24194197 : unsigned n1, n2;
3428 24194197 : const class iv_common_cand *const *const ccand1
3429 : = (const class iv_common_cand *const *)p1;
3430 24194197 : const class iv_common_cand *const *const ccand2
3431 : = (const class iv_common_cand *const *)p2;
3432 :
3433 24194197 : n1 = (*ccand1)->uses.length ();
3434 24194197 : n2 = (*ccand2)->uses.length ();
3435 24194197 : return n2 - n1;
3436 : }
3437 :
3438 : /* Adds IV candidates based on common candidated recorded. */
3439 :
3440 : static void
3441 500623 : add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3442 : {
3443 500623 : unsigned i, j;
3444 500623 : struct iv_cand *cand_1, *cand_2;
3445 :
3446 500623 : data->iv_common_cands.qsort (common_cand_cmp);
3447 1535472 : for (i = 0; i < data->iv_common_cands.length (); i++)
3448 : {
3449 1519591 : class iv_common_cand *ptr = data->iv_common_cands[i];
3450 :
3451 : /* Only add IV candidate if it's derived from multiple uses. */
3452 1519591 : if (ptr->uses.length () <= 1)
3453 : break;
3454 :
3455 1034849 : cand_1 = NULL;
3456 1034849 : cand_2 = NULL;
3457 1034849 : if (ip_normal_pos (data->current_loop))
3458 1017792 : cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3459 : false, IP_NORMAL, NULL, NULL);
3460 :
3461 1034849 : if (ip_end_pos (data->current_loop)
3462 1034849 : && allow_ip_end_pos_p (data->current_loop))
3463 49584 : cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3464 : false, IP_END, NULL, NULL);
3465 :
3466 : /* Bind deriving uses and the new candidates. */
3467 3877966 : for (j = 0; j < ptr->uses.length (); j++)
3468 : {
3469 2843117 : struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3470 2843117 : if (cand_1)
3471 2761472 : bitmap_set_bit (group->related_cands, cand_1->id);
3472 2843117 : if (cand_2)
3473 154976 : bitmap_set_bit (group->related_cands, cand_2->id);
3474 : }
3475 : }
3476 :
3477 : /* Release data since it is useless from this point. */
3478 500623 : data->iv_common_cand_tab->empty ();
3479 500623 : data->iv_common_cands.truncate (0);
3480 500623 : }
3481 :
3482 : /* Adds candidates based on the value of USE's iv. */
3483 :
3484 : static void
3485 1642100 : add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3486 : {
3487 1642100 : poly_uint64 offset;
3488 1642100 : tree base;
3489 1642100 : struct iv *iv = use->iv;
3490 1642100 : tree basetype = TREE_TYPE (iv->base);
3491 :
3492 : /* Don't add candidate for iv_use with non integer, pointer or non-mode
3493 : precision types, instead, add candidate for the corresponding scev in
3494 : unsigned type with the same precision. See PR93674 for more info. */
3495 767341 : if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3496 2409213 : || !type_has_mode_precision_p (basetype))
3497 : {
3498 239 : basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3499 239 : TYPE_UNSIGNED (basetype));
3500 239 : add_candidate (data, fold_convert (basetype, iv->base),
3501 : fold_convert (basetype, iv->step), false, NULL);
3502 239 : return;
3503 : }
3504 :
3505 1641861 : add_candidate (data, iv->base, iv->step, false, use);
3506 :
3507 : /* Record common candidate for use in case it can be shared by others. */
3508 1641861 : record_common_cand (data, iv->base, iv->step, use);
3509 :
3510 : /* Record common candidate with initial value zero. */
3511 1641861 : basetype = TREE_TYPE (iv->base);
3512 1641861 : if (POINTER_TYPE_P (basetype))
3513 767113 : basetype = sizetype;
3514 1641861 : record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3515 :
3516 : /* Compare the cost of an address with an unscaled index with the cost of
3517 : an address with a scaled index and add candidate if useful. */
3518 1641861 : poly_int64 step;
3519 1641861 : if (use != NULL
3520 1641861 : && poly_int_tree_p (iv->step, &step)
3521 1404089 : && address_p (use->type))
3522 : {
3523 530937 : poly_int64 new_step;
3524 530937 : unsigned int fact = preferred_mem_scale_factor
3525 530937 : (use->iv->base,
3526 530937 : TYPE_MODE (use->mem_type),
3527 530937 : optimize_loop_for_speed_p (data->current_loop));
3528 :
3529 530937 : if (fact != 1
3530 530937 : && multiple_p (step, fact, &new_step))
3531 0 : add_candidate (data, size_int (0),
3532 0 : wide_int_to_tree (sizetype, new_step),
3533 : true, NULL);
3534 : }
3535 :
3536 : /* Record common candidate with constant offset stripped in base.
3537 : Like the use itself, we also add candidate directly for it. */
3538 1641861 : base = strip_offset (iv->base, &offset);
3539 1641861 : if (maybe_ne (offset, 0U) || base != iv->base)
3540 : {
3541 863400 : record_common_cand (data, base, iv->step, use);
3542 863400 : add_candidate (data, base, iv->step, false, use);
3543 : }
3544 :
3545 : /* Record common candidate with base_object removed in base. */
3546 1641861 : base = iv->base;
3547 1641861 : STRIP_NOPS (base);
3548 1641861 : if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3549 : {
3550 317481 : tree step = iv->step;
3551 :
3552 317481 : STRIP_NOPS (step);
3553 317481 : base = TREE_OPERAND (base, 1);
3554 317481 : step = fold_convert (sizetype, step);
3555 317481 : record_common_cand (data, base, step, use);
3556 : /* Also record common candidate with offset stripped. */
3557 317481 : tree alt_base, alt_offset;
3558 317481 : split_constant_offset (base, &alt_base, &alt_offset);
3559 317481 : if (!integer_zerop (alt_offset))
3560 240351 : record_common_cand (data, alt_base, step, use);
3561 : }
3562 :
3563 : /* At last, add auto-incremental candidates. Make such variables
3564 : important since other iv uses with same base object may be based
3565 : on it. */
3566 1641861 : if (use != NULL && address_p (use->type))
3567 581933 : add_autoinc_candidates (data, iv->base, iv->step, true, use);
3568 : }
3569 :
3570 : /* Adds candidates based on the uses. */
3571 :
3572 : static void
3573 500623 : add_iv_candidate_for_groups (struct ivopts_data *data)
3574 : {
3575 500623 : unsigned i;
3576 :
3577 : /* Only add candidate for the first use in group. */
3578 2142723 : for (i = 0; i < data->vgroups.length (); i++)
3579 : {
3580 1642100 : struct iv_group *group = data->vgroups[i];
3581 :
3582 1642100 : gcc_assert (group->vuses[0] != NULL);
3583 1642100 : add_iv_candidate_for_use (data, group->vuses[0]);
3584 : }
3585 500623 : add_iv_candidate_derived_from_uses (data);
3586 500623 : }
3587 :
3588 : /* Record important candidates and add them to related_cands bitmaps. */
3589 :
3590 : static void
3591 500623 : record_important_candidates (struct ivopts_data *data)
3592 : {
3593 500623 : unsigned i;
3594 500623 : struct iv_group *group;
3595 :
3596 5390146 : for (i = 0; i < data->vcands.length (); i++)
3597 : {
3598 4889523 : struct iv_cand *cand = data->vcands[i];
3599 :
3600 4889523 : if (cand->important)
3601 3699049 : bitmap_set_bit (data->important_candidates, i);
3602 : }
3603 :
3604 500623 : data->consider_all_candidates = (data->vcands.length ()
3605 500623 : <= CONSIDER_ALL_CANDIDATES_BOUND);
3606 :
3607 : /* Add important candidates to groups' related_cands bitmaps. */
3608 2142723 : for (i = 0; i < data->vgroups.length (); i++)
3609 : {
3610 1642100 : group = data->vgroups[i];
3611 1642100 : bitmap_ior_into (group->related_cands, data->important_candidates);
3612 : }
3613 500623 : }
3614 :
3615 : /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3616 : If consider_all_candidates is true, we use a two-dimensional array, otherwise
3617 : we allocate a simple list to every use. */
3618 :
3619 : static void
3620 500623 : alloc_use_cost_map (struct ivopts_data *data)
3621 : {
3622 500623 : unsigned i, size, s;
3623 :
3624 2142723 : for (i = 0; i < data->vgroups.length (); i++)
3625 : {
3626 1642100 : struct iv_group *group = data->vgroups[i];
3627 :
3628 1642100 : if (data->consider_all_candidates)
3629 1618464 : size = data->vcands.length ();
3630 : else
3631 : {
3632 23636 : s = bitmap_count_bits (group->related_cands);
3633 :
3634 : /* Round up to the power of two, so that moduling by it is fast. */
3635 47272 : size = s ? (1 << ceil_log2 (s)) : 1;
3636 : }
3637 :
3638 1642100 : group->n_map_members = size;
3639 1642100 : group->cost_map = XCNEWVEC (class cost_pair, size);
3640 : }
3641 500623 : }
3642 :
3643 : /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3644 : on invariants INV_VARS and that the value used in expressing it is
3645 : VALUE, and in case of iv elimination the comparison operator is COMP. */
3646 :
3647 : static void
3648 19163490 : set_group_iv_cost (struct ivopts_data *data,
3649 : struct iv_group *group, struct iv_cand *cand,
3650 : comp_cost cost, bitmap inv_vars, tree value,
3651 : enum tree_code comp, bitmap inv_exprs)
3652 : {
3653 19163490 : unsigned i, s;
3654 :
3655 19163490 : if (cost.infinite_cost_p ())
3656 : {
3657 6464940 : BITMAP_FREE (inv_vars);
3658 6464940 : BITMAP_FREE (inv_exprs);
3659 6464940 : return;
3660 : }
3661 :
3662 12698550 : if (data->consider_all_candidates)
3663 : {
3664 12419304 : group->cost_map[cand->id].cand = cand;
3665 12419304 : group->cost_map[cand->id].cost = cost;
3666 12419304 : group->cost_map[cand->id].inv_vars = inv_vars;
3667 12419304 : group->cost_map[cand->id].inv_exprs = inv_exprs;
3668 12419304 : group->cost_map[cand->id].value = value;
3669 12419304 : group->cost_map[cand->id].comp = comp;
3670 12419304 : return;
3671 : }
3672 :
3673 : /* n_map_members is a power of two, so this computes modulo. */
3674 279246 : s = cand->id & (group->n_map_members - 1);
3675 317874 : for (i = s; i < group->n_map_members; i++)
3676 317679 : if (!group->cost_map[i].cand)
3677 279051 : goto found;
3678 447 : for (i = 0; i < s; i++)
3679 447 : if (!group->cost_map[i].cand)
3680 195 : goto found;
3681 :
3682 0 : gcc_unreachable ();
3683 :
3684 279246 : found:
3685 279246 : group->cost_map[i].cand = cand;
3686 279246 : group->cost_map[i].cost = cost;
3687 279246 : group->cost_map[i].inv_vars = inv_vars;
3688 279246 : group->cost_map[i].inv_exprs = inv_exprs;
3689 279246 : group->cost_map[i].value = value;
3690 279246 : group->cost_map[i].comp = comp;
3691 : }
3692 :
3693 : /* Gets cost of (GROUP, CAND) pair. */
3694 :
3695 : static class cost_pair *
3696 218324978 : get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3697 : struct iv_cand *cand)
3698 : {
3699 218324978 : unsigned i, s;
3700 218324978 : class cost_pair *ret;
3701 :
3702 218324978 : if (!cand)
3703 : return NULL;
3704 :
3705 212515545 : if (data->consider_all_candidates)
3706 : {
3707 187555136 : ret = group->cost_map + cand->id;
3708 187555136 : if (!ret->cand)
3709 : return NULL;
3710 :
3711 114764295 : return ret;
3712 : }
3713 :
3714 : /* n_map_members is a power of two, so this computes modulo. */
3715 24960409 : s = cand->id & (group->n_map_members - 1);
3716 37687459 : for (i = s; i < group->n_map_members; i++)
3717 37452641 : if (group->cost_map[i].cand == cand)
3718 : return group->cost_map + i;
3719 27003888 : else if (group->cost_map[i].cand == NULL)
3720 : return NULL;
3721 574409 : for (i = 0; i < s; i++)
3722 549043 : if (group->cost_map[i].cand == cand)
3723 : return group->cost_map + i;
3724 546392 : else if (group->cost_map[i].cand == NULL)
3725 : return NULL;
3726 :
3727 : return NULL;
3728 : }
3729 :
3730 : /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3731 : static rtx
3732 41732 : produce_memory_decl_rtl (tree obj, int *regno)
3733 : {
3734 41732 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3735 41732 : machine_mode address_mode = targetm.addr_space.address_mode (as);
3736 41732 : rtx x;
3737 :
3738 41732 : gcc_assert (obj);
3739 41732 : if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3740 : {
3741 41732 : const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3742 41732 : x = gen_rtx_SYMBOL_REF (address_mode, name);
3743 41732 : SET_SYMBOL_REF_DECL (x, obj);
3744 41732 : x = gen_rtx_MEM (DECL_MODE (obj), x);
3745 41732 : set_mem_addr_space (x, as);
3746 41732 : targetm.encode_section_info (obj, x, true);
3747 : }
3748 : else
3749 : {
3750 0 : x = gen_raw_REG (address_mode, (*regno)++);
3751 0 : x = gen_rtx_MEM (DECL_MODE (obj), x);
3752 0 : set_mem_addr_space (x, as);
3753 : }
3754 :
3755 41732 : return x;
3756 : }
3757 :
3758 : /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3759 : walk_tree. DATA contains the actual fake register number. */
3760 :
3761 : static tree
3762 584248 : prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3763 : {
3764 584248 : tree obj = NULL_TREE;
3765 584248 : rtx x = NULL_RTX;
3766 584248 : int *regno = (int *) data;
3767 :
3768 584248 : switch (TREE_CODE (*expr_p))
3769 : {
3770 166928 : case ADDR_EXPR:
3771 166928 : for (expr_p = &TREE_OPERAND (*expr_p, 0);
3772 166928 : handled_component_p (*expr_p);
3773 0 : expr_p = &TREE_OPERAND (*expr_p, 0))
3774 0 : continue;
3775 166928 : obj = *expr_p;
3776 166928 : if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3777 0 : x = produce_memory_decl_rtl (obj, regno);
3778 : break;
3779 :
3780 0 : case SSA_NAME:
3781 0 : *ws = 0;
3782 0 : obj = SSA_NAME_VAR (*expr_p);
3783 : /* Defer handling of anonymous SSA_NAMEs to the expander. */
3784 0 : if (!obj)
3785 : return NULL_TREE;
3786 0 : if (!DECL_RTL_SET_P (obj))
3787 0 : x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3788 : break;
3789 :
3790 166928 : case VAR_DECL:
3791 166928 : case PARM_DECL:
3792 166928 : case RESULT_DECL:
3793 166928 : *ws = 0;
3794 166928 : obj = *expr_p;
3795 :
3796 166928 : if (DECL_RTL_SET_P (obj))
3797 : break;
3798 :
3799 0 : if (DECL_MODE (obj) == BLKmode)
3800 0 : x = produce_memory_decl_rtl (obj, regno);
3801 : else
3802 0 : x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3803 :
3804 : break;
3805 :
3806 : default:
3807 : break;
3808 : }
3809 :
3810 0 : if (x)
3811 : {
3812 0 : decl_rtl_to_reset.safe_push (obj);
3813 0 : SET_DECL_RTL (obj, x);
3814 : }
3815 :
3816 : return NULL_TREE;
3817 : }
3818 :
3819 : /* Predict whether the given loop will be transformed in the RTL
3820 : doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3821 : This is only for target independent checks, see targetm.predict_doloop_p
3822 : for the target dependent ones.
3823 :
3824 : Note that according to some initial investigation, some checks like costly
3825 : niter check and invalid stmt scanning don't have much gains among general
3826 : cases, so keep this as simple as possible first.
3827 :
3828 : Some RTL specific checks seems unable to be checked in gimple, if any new
3829 : checks or easy checks _are_ missing here, please add them. */
3830 :
3831 : static bool
3832 500623 : generic_predict_doloop_p (struct ivopts_data *data)
3833 : {
3834 500623 : class loop *loop = data->current_loop;
3835 :
3836 : /* Call target hook for target dependent checks. */
3837 500623 : if (!targetm.predict_doloop_p (loop))
3838 : {
3839 500623 : if (dump_file && (dump_flags & TDF_DETAILS))
3840 67 : fprintf (dump_file, "Predict doloop failure due to"
3841 : " target specific checks.\n");
3842 500623 : return false;
3843 : }
3844 :
3845 : /* Similar to doloop_optimize, check iteration description to know it's
3846 : suitable or not. Keep it as simple as possible, feel free to extend it
3847 : if you find any multiple exits cases matter. */
3848 0 : edge exit = single_dom_exit (loop);
3849 0 : class tree_niter_desc *niter_desc;
3850 0 : if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3851 : {
3852 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3853 0 : fprintf (dump_file, "Predict doloop failure due to"
3854 : " unexpected niters.\n");
3855 0 : return false;
3856 : }
3857 :
3858 : /* Similar to doloop_optimize, check whether iteration count too small
3859 : and not profitable. */
3860 0 : HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3861 0 : if (est_niter == -1)
3862 0 : est_niter = get_likely_max_loop_iterations_int (loop);
3863 0 : if (est_niter >= 0 && est_niter < 3)
3864 : {
3865 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3866 0 : fprintf (dump_file,
3867 : "Predict doloop failure due to"
3868 : " too few iterations (%u).\n",
3869 : (unsigned int) est_niter);
3870 0 : return false;
3871 : }
3872 :
3873 : return true;
3874 : }
3875 :
3876 : /* Determines cost of the computation of EXPR. */
3877 :
3878 : static unsigned
3879 250392 : computation_cost (tree expr, bool speed)
3880 : {
3881 250392 : rtx_insn *seq;
3882 250392 : rtx rslt;
3883 250392 : tree type = TREE_TYPE (expr);
3884 250392 : unsigned cost;
3885 : /* Avoid using hard regs in ways which may be unsupported. */
3886 250392 : int regno = LAST_VIRTUAL_REGISTER + 1;
3887 250392 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
3888 250392 : enum node_frequency real_frequency = node->frequency;
3889 :
3890 250392 : node->frequency = NODE_FREQUENCY_NORMAL;
3891 250392 : crtl->maybe_hot_insn_p = speed;
3892 250392 : walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
3893 250392 : start_sequence ();
3894 250392 : rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3895 250392 : seq = end_sequence ();
3896 250392 : default_rtl_profile ();
3897 250392 : node->frequency = real_frequency;
3898 :
3899 250392 : cost = seq_cost (seq, speed);
3900 250392 : if (MEM_P (rslt))
3901 0 : cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3902 0 : TYPE_ADDR_SPACE (type), speed);
3903 250392 : else if (!REG_P (rslt))
3904 500784 : cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3905 :
3906 250392 : return cost;
3907 : }
3908 :
3909 : /* Returns variable containing the value of candidate CAND at statement AT. */
3910 :
3911 : static tree
3912 20082105 : var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3913 : {
3914 20082105 : if (stmt_after_increment (loop, cand, stmt))
3915 4974175 : return cand->var_after;
3916 : else
3917 15107930 : return cand->var_before;
3918 : }
3919 :
3920 : /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3921 : same precision that is at least as wide as the precision of TYPE, stores
3922 : BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3923 : type of A and B. */
3924 :
3925 : static tree
3926 15525300 : determine_common_wider_type (tree *a, tree *b)
3927 : {
3928 15525300 : tree wider_type = NULL;
3929 15525300 : tree suba, subb;
3930 15525300 : tree atype = TREE_TYPE (*a);
3931 :
3932 15525300 : if (CONVERT_EXPR_P (*a))
3933 : {
3934 351183 : suba = TREE_OPERAND (*a, 0);
3935 351183 : wider_type = TREE_TYPE (suba);
3936 351183 : if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3937 : return atype;
3938 : }
3939 : else
3940 : return atype;
3941 :
3942 330620 : if (CONVERT_EXPR_P (*b))
3943 : {
3944 80803 : subb = TREE_OPERAND (*b, 0);
3945 80803 : if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3946 : return atype;
3947 : }
3948 : else
3949 : return atype;
3950 :
3951 74890 : *a = suba;
3952 74890 : *b = subb;
3953 74890 : return wider_type;
3954 : }
3955 :
3956 : /* Determines the expression by that USE is expressed from induction variable
3957 : CAND at statement AT in DATA's current loop. The expression is stored in
3958 : two parts in a decomposed form. The invariant part is stored in AFF_INV;
3959 : while variant part in AFF_VAR. Store ratio of CAND.step over USE.step in
3960 : PRAT if it's non-null. Returns false if USE cannot be expressed using
3961 : CAND. */
3962 :
3963 : static bool
3964 18828028 : get_computation_aff_1 (struct ivopts_data *data, gimple *at, struct iv_use *use,
3965 : struct iv_cand *cand, class aff_tree *aff_inv,
3966 : class aff_tree *aff_var, widest_int *prat = NULL)
3967 : {
3968 18828028 : tree ubase = use->iv->base, ustep = use->iv->step;
3969 18828028 : tree cbase = cand->iv->base, cstep = cand->iv->step;
3970 18828028 : tree common_type, uutype, var, cstep_common;
3971 18828028 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3972 18828028 : aff_tree aff_cbase;
3973 18828028 : widest_int rat;
3974 :
3975 : /* We must have a precision to express the values of use. */
3976 18828028 : if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3977 : return false;
3978 :
3979 18827176 : var = var_at_stmt (data->current_loop, cand, at);
3980 18827176 : uutype = unsigned_type_for (utype);
3981 :
3982 : /* If the conversion is not noop, perform it. */
3983 18827176 : if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3984 : {
3985 276948 : if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3986 1703540 : && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3987 : {
3988 24429 : tree inner_base, inner_step, inner_type;
3989 24429 : inner_base = TREE_OPERAND (cbase, 0);
3990 24429 : if (CONVERT_EXPR_P (cstep))
3991 3117 : inner_step = TREE_OPERAND (cstep, 0);
3992 : else
3993 : inner_step = cstep;
3994 :
3995 24429 : inner_type = TREE_TYPE (inner_base);
3996 : /* If candidate is added from a biv whose type is smaller than
3997 : ctype, we know both candidate and the biv won't overflow.
3998 : In this case, it's safe to skip the conversion in candidate.
3999 : As an example, (unsigned short)((unsigned long)A) equals to
4000 : (unsigned short)A, if A has a type no larger than short. */
4001 24429 : if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4002 : {
4003 24155 : cbase = inner_base;
4004 24155 : cstep = inner_step;
4005 : }
4006 : }
4007 1679111 : cbase = fold_convert (uutype, cbase);
4008 1679111 : cstep = fold_convert (uutype, cstep);
4009 1679111 : var = fold_convert (uutype, var);
4010 : }
4011 :
4012 : /* Ratio is 1 when computing the value of biv cand by itself.
4013 : We can't rely on constant_multiple_of in this case because the
4014 : use is created after the original biv is selected. The call
4015 : could fail because of inconsistent fold behavior. See PR68021
4016 : for more information. */
4017 18827176 : if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4018 : {
4019 5369 : gcc_assert (is_gimple_assign (use->stmt));
4020 5369 : gcc_assert (use->iv->ssa_name == cand->var_after);
4021 5369 : gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4022 5369 : rat = 1;
4023 : }
4024 18821807 : else if (!constant_multiple_of (ustep, cstep, &rat, data))
4025 : return false;
4026 :
4027 15525300 : if (prat)
4028 14050969 : *prat = rat;
4029 :
4030 : /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4031 : type, we achieve better folding by computing their difference in this
4032 : wider type, and cast the result to UUTYPE. We do not need to worry about
4033 : overflows, as all the arithmetics will in the end be performed in UUTYPE
4034 : anyway. */
4035 15525300 : common_type = determine_common_wider_type (&ubase, &cbase);
4036 :
4037 : /* use = ubase - ratio * cbase + ratio * var. */
4038 15525300 : tree_to_aff_combination (ubase, common_type, aff_inv);
4039 15525300 : tree_to_aff_combination (cbase, common_type, &aff_cbase);
4040 15525300 : tree_to_aff_combination (var, uutype, aff_var);
4041 :
4042 : /* We need to shift the value if we are after the increment. */
4043 15525300 : if (stmt_after_increment (data->current_loop, cand, at))
4044 : {
4045 3357960 : aff_tree cstep_aff;
4046 :
4047 3357960 : if (common_type != uutype)
4048 1129631 : cstep_common = fold_convert (common_type, cstep);
4049 : else
4050 : cstep_common = cstep;
4051 :
4052 3357960 : tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4053 3357960 : aff_combination_add (&aff_cbase, &cstep_aff);
4054 3357960 : }
4055 :
4056 15525300 : aff_combination_scale (&aff_cbase, -rat);
4057 15525300 : aff_combination_add (aff_inv, &aff_cbase);
4058 15525300 : if (common_type != uutype)
4059 11671939 : aff_combination_convert (aff_inv, uutype);
4060 :
4061 15525300 : aff_combination_scale (aff_var, rat);
4062 15525300 : return true;
4063 18828028 : }
4064 :
4065 : /* Determines the expression by that USE is expressed from induction variable
4066 : CAND at statement AT in DATA's current loop. The expression is stored in a
4067 : decomposed form into AFF. Returns false if USE cannot be expressed using
4068 : CAND. */
4069 :
4070 : static bool
4071 1250592 : get_computation_aff (struct ivopts_data *data, gimple *at, struct iv_use *use,
4072 : struct iv_cand *cand, class aff_tree *aff)
4073 : {
4074 1250592 : aff_tree aff_var;
4075 :
4076 1250592 : if (!get_computation_aff_1 (data, at, use, cand, aff, &aff_var))
4077 : return false;
4078 :
4079 1129976 : aff_combination_add (aff, &aff_var);
4080 1129976 : return true;
4081 1250592 : }
4082 :
4083 : /* Return the type of USE. */
4084 :
4085 : static tree
4086 1014499 : get_use_type (struct iv_use *use)
4087 : {
4088 1014499 : tree base_type = TREE_TYPE (use->iv->base);
4089 1014499 : tree type;
4090 :
4091 1014499 : if (use->type == USE_REF_ADDRESS)
4092 : {
4093 : /* The base_type may be a void pointer. Create a pointer type based on
4094 : the mem_ref instead. */
4095 0 : type = build_pointer_type (TREE_TYPE (*use->op_p));
4096 0 : gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4097 : == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4098 : }
4099 : else
4100 : type = base_type;
4101 :
4102 1014499 : return type;
4103 : }
4104 :
4105 : /* Determines the expression by that USE is expressed from induction variable
4106 : CAND at statement AT in DATA's current loop. The computation is
4107 : unshared. */
4108 :
4109 : static tree
4110 395099 : get_computation_at (struct ivopts_data *data, gimple *at,
4111 : struct iv_use *use, struct iv_cand *cand)
4112 : {
4113 395099 : aff_tree aff;
4114 395099 : tree type = get_use_type (use);
4115 :
4116 395099 : if (!get_computation_aff (data, at, use, cand, &aff))
4117 : return NULL_TREE;
4118 274483 : unshare_aff_combination (&aff);
4119 274483 : return fold_convert (type, aff_combination_to_tree (&aff));
4120 395099 : }
4121 :
4122 : /* Like get_computation_at, but try harder, even if the computation
4123 : is more expensive. Intended for debug stmts. */
4124 :
4125 : static tree
4126 190882 : get_debug_computation_at (struct ivopts_data *data, gimple *at,
4127 : struct iv_use *use, struct iv_cand *cand)
4128 : {
4129 190882 : if (tree ret = get_computation_at (data, at, use, cand))
4130 : return ret;
4131 :
4132 120616 : tree ubase = use->iv->base, ustep = use->iv->step;
4133 120616 : tree cbase = cand->iv->base, cstep = cand->iv->step;
4134 120616 : tree var;
4135 120616 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4136 120616 : widest_int rat;
4137 :
4138 : /* We must have a precision to express the values of use. */
4139 120616 : if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4140 : return NULL_TREE;
4141 :
4142 : /* Try to handle the case that get_computation_at doesn't,
4143 : try to express
4144 : use = ubase + (var - cbase) / ratio. */
4145 8714 : if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4146 : &rat, data))
4147 : return NULL_TREE;
4148 :
4149 7622 : bool neg_p = false;
4150 7622 : if (wi::neg_p (rat))
4151 : {
4152 825 : if (TYPE_UNSIGNED (ctype))
4153 : return NULL_TREE;
4154 0 : neg_p = true;
4155 0 : rat = wi::neg (rat);
4156 : }
4157 :
4158 : /* If both IVs can wrap around and CAND doesn't have a power of two step,
4159 : it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4160 : the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4161 : uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4162 : ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4163 6797 : if (!use->iv->no_overflow
4164 61 : && !cand->iv->no_overflow
4165 6845 : && !integer_pow2p (cstep))
4166 : return NULL_TREE;
4167 :
4168 6787 : int bits = wi::exact_log2 (rat);
4169 6787 : if (bits == -1)
4170 653 : bits = wi::floor_log2 (rat) + 1;
4171 6787 : if (!cand->iv->no_overflow
4172 6787 : && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4173 : return NULL_TREE;
4174 :
4175 6787 : var = var_at_stmt (data->current_loop, cand, at);
4176 :
4177 6787 : if (POINTER_TYPE_P (ctype))
4178 : {
4179 126 : ctype = unsigned_type_for (ctype);
4180 126 : cbase = fold_convert (ctype, cbase);
4181 126 : cstep = fold_convert (ctype, cstep);
4182 126 : var = fold_convert (ctype, var);
4183 : }
4184 :
4185 6787 : if (stmt_after_increment (data->current_loop, cand, at))
4186 74 : var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4187 : unshare_expr (cstep));
4188 :
4189 6787 : var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4190 6787 : var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4191 : wide_int_to_tree (TREE_TYPE (var), rat));
4192 6787 : if (POINTER_TYPE_P (utype))
4193 : {
4194 0 : var = fold_convert (sizetype, var);
4195 0 : if (neg_p)
4196 0 : var = fold_build1 (NEGATE_EXPR, sizetype, var);
4197 0 : var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4198 : }
4199 : else
4200 : {
4201 6787 : var = fold_convert (utype, var);
4202 13574 : var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4203 : ubase, var);
4204 : }
4205 : return var;
4206 120616 : }
4207 :
4208 : /* Adjust the cost COST for being in loop setup rather than loop body.
4209 : If we're optimizing for space, the loop setup overhead is constant;
4210 : if we're optimizing for speed, amortize it over the per-iteration cost.
4211 : If ROUND_UP_P is true, the result is round up rather than to zero when
4212 : optimizing for speed. */
4213 : static int64_t
4214 11014632 : adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4215 : bool round_up_p = false)
4216 : {
4217 11014632 : if (cost == INFTY)
4218 : return cost;
4219 11014632 : else if (optimize_loop_for_speed_p (data->current_loop))
4220 : {
4221 9360896 : uint64_t niters = avg_loop_niter (data->current_loop);
4222 9360896 : if (niters > (uint64_t) cost)
4223 14255485 : return (round_up_p && cost != 0) ? 1 : 0;
4224 2024263 : return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4225 : }
4226 : else
4227 : return cost;
4228 : }
4229 :
4230 : /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4231 : EXPR operand holding the shift. COST0 and COST1 are the costs for
4232 : calculating the operands of EXPR. Returns true if successful, and returns
4233 : the cost in COST. */
4234 :
4235 : static bool
4236 1633840 : get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4237 : comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4238 : {
4239 1633840 : comp_cost res;
4240 1633840 : tree op1 = TREE_OPERAND (expr, 1);
4241 1633840 : tree cst = TREE_OPERAND (mult, 1);
4242 1633840 : tree multop = TREE_OPERAND (mult, 0);
4243 1633840 : int m = exact_log2 (int_cst_value (cst));
4244 4901157 : int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4245 1633840 : int as_cost, sa_cost;
4246 1633840 : bool mult_in_op1;
4247 :
4248 1633840 : if (!(m >= 0 && m < maxm))
4249 : return false;
4250 :
4251 1099479 : STRIP_NOPS (op1);
4252 1099479 : mult_in_op1 = operand_equal_p (op1, mult, 0);
4253 :
4254 1099479 : as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4255 :
4256 : /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4257 : use that in preference to a shift insn followed by an add insn. */
4258 1099479 : sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4259 1099479 : ? shiftadd_cost (speed, mode, m)
4260 : : (mult_in_op1
4261 183342 : ? shiftsub1_cost (speed, mode, m)
4262 32468 : : shiftsub0_cost (speed, mode, m)));
4263 :
4264 1099479 : res = comp_cost (MIN (as_cost, sa_cost), 0);
4265 1977584 : res += (mult_in_op1 ? cost0 : cost1);
4266 :
4267 1099479 : STRIP_NOPS (multop);
4268 1099479 : if (!is_gimple_val (multop))
4269 532608 : res += force_expr_to_var_cost (multop, speed);
4270 :
4271 1099479 : *cost = res;
4272 1099479 : return true;
4273 : }
4274 :
4275 : /* Estimates cost of forcing expression EXPR into a variable. */
4276 :
4277 : static comp_cost
4278 31327652 : force_expr_to_var_cost (tree expr, bool speed)
4279 : {
4280 31327652 : static bool costs_initialized = false;
4281 31327652 : static unsigned integer_cost [2];
4282 31327652 : static unsigned symbol_cost [2];
4283 31327652 : static unsigned address_cost [2];
4284 31327652 : tree op0, op1;
4285 31327652 : comp_cost cost0, cost1, cost;
4286 31327652 : machine_mode mode;
4287 31327652 : scalar_int_mode int_mode;
4288 :
4289 31327652 : if (!costs_initialized)
4290 : {
4291 41732 : tree type = build_pointer_type (integer_type_node);
4292 41732 : tree var, addr;
4293 41732 : rtx x;
4294 41732 : int i;
4295 :
4296 41732 : var = create_tmp_var_raw (integer_type_node, "test_var");
4297 41732 : TREE_STATIC (var) = 1;
4298 41732 : x = produce_memory_decl_rtl (var, NULL);
4299 41732 : SET_DECL_RTL (var, x);
4300 :
4301 41732 : addr = build1 (ADDR_EXPR, type, var);
4302 :
4303 :
4304 166928 : for (i = 0; i < 2; i++)
4305 : {
4306 83464 : integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4307 : 2000), i);
4308 :
4309 83464 : symbol_cost[i] = computation_cost (addr, i) + 1;
4310 :
4311 83464 : address_cost[i]
4312 83464 : = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4313 83464 : if (dump_file && (dump_flags & TDF_DETAILS))
4314 : {
4315 105 : fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4316 70 : fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4317 70 : fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4318 70 : fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4319 70 : fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4320 70 : fprintf (dump_file, "\n");
4321 : }
4322 : }
4323 :
4324 41732 : costs_initialized = true;
4325 : }
4326 :
4327 31327652 : STRIP_NOPS (expr);
4328 :
4329 31327652 : if (SSA_VAR_P (expr))
4330 6106538 : return no_cost;
4331 :
4332 25221114 : if (is_gimple_min_invariant (expr))
4333 : {
4334 14520015 : if (poly_int_tree_p (expr))
4335 12344044 : return comp_cost (integer_cost [speed], 0);
4336 :
4337 2175971 : if (TREE_CODE (expr) == ADDR_EXPR)
4338 : {
4339 2175971 : tree obj = TREE_OPERAND (expr, 0);
4340 :
4341 2175971 : if (VAR_P (obj)
4342 : || TREE_CODE (obj) == PARM_DECL
4343 : || TREE_CODE (obj) == RESULT_DECL)
4344 2102449 : return comp_cost (symbol_cost [speed], 0);
4345 : }
4346 :
4347 73522 : return comp_cost (address_cost [speed], 0);
4348 : }
4349 :
4350 10701099 : switch (TREE_CODE (expr))
4351 : {
4352 9122391 : case POINTER_PLUS_EXPR:
4353 9122391 : case PLUS_EXPR:
4354 9122391 : case MINUS_EXPR:
4355 9122391 : case MULT_EXPR:
4356 9122391 : case EXACT_DIV_EXPR:
4357 9122391 : case TRUNC_DIV_EXPR:
4358 9122391 : case BIT_AND_EXPR:
4359 9122391 : case BIT_IOR_EXPR:
4360 9122391 : case LSHIFT_EXPR:
4361 9122391 : case RSHIFT_EXPR:
4362 9122391 : op0 = TREE_OPERAND (expr, 0);
4363 9122391 : op1 = TREE_OPERAND (expr, 1);
4364 9122391 : STRIP_NOPS (op0);
4365 9122391 : STRIP_NOPS (op1);
4366 9122391 : break;
4367 :
4368 1578708 : CASE_CONVERT:
4369 1578708 : case NEGATE_EXPR:
4370 1578708 : case BIT_NOT_EXPR:
4371 1578708 : op0 = TREE_OPERAND (expr, 0);
4372 1578708 : STRIP_NOPS (op0);
4373 1578708 : op1 = NULL_TREE;
4374 1578708 : break;
4375 : /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4376 : introduce COND_EXPR for IV base, need to support better cost estimation
4377 : for this COND_EXPR and tcc_comparison. */
4378 0 : case COND_EXPR:
4379 0 : op0 = TREE_OPERAND (expr, 1);
4380 0 : STRIP_NOPS (op0);
4381 0 : op1 = TREE_OPERAND (expr, 2);
4382 0 : STRIP_NOPS (op1);
4383 0 : break;
4384 0 : case LT_EXPR:
4385 0 : case LE_EXPR:
4386 0 : case GT_EXPR:
4387 0 : case GE_EXPR:
4388 0 : case EQ_EXPR:
4389 0 : case NE_EXPR:
4390 0 : case UNORDERED_EXPR:
4391 0 : case ORDERED_EXPR:
4392 0 : case UNLT_EXPR:
4393 0 : case UNLE_EXPR:
4394 0 : case UNGT_EXPR:
4395 0 : case UNGE_EXPR:
4396 0 : case UNEQ_EXPR:
4397 0 : case LTGT_EXPR:
4398 0 : case MAX_EXPR:
4399 0 : case MIN_EXPR:
4400 0 : op0 = TREE_OPERAND (expr, 0);
4401 0 : STRIP_NOPS (op0);
4402 0 : op1 = TREE_OPERAND (expr, 1);
4403 0 : STRIP_NOPS (op1);
4404 0 : break;
4405 :
4406 0 : default:
4407 : /* Just an arbitrary value, FIXME. */
4408 0 : return comp_cost (target_spill_cost[speed], 0);
4409 : }
4410 :
4411 10701099 : if (op0 == NULL_TREE
4412 10701099 : || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4413 5169814 : cost0 = no_cost;
4414 : else
4415 5531285 : cost0 = force_expr_to_var_cost (op0, speed);
4416 :
4417 10701099 : if (op1 == NULL_TREE
4418 9122391 : || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4419 9845660 : cost1 = no_cost;
4420 : else
4421 855439 : cost1 = force_expr_to_var_cost (op1, speed);
4422 :
4423 10701099 : mode = TYPE_MODE (TREE_TYPE (expr));
4424 10701099 : switch (TREE_CODE (expr))
4425 : {
4426 6394200 : case POINTER_PLUS_EXPR:
4427 6394200 : case PLUS_EXPR:
4428 6394200 : case MINUS_EXPR:
4429 6394200 : case NEGATE_EXPR:
4430 6394200 : cost = comp_cost (add_cost (speed, mode), 0);
4431 6394200 : if (TREE_CODE (expr) != NEGATE_EXPR)
4432 : {
4433 6222354 : tree mult = NULL_TREE;
4434 6222354 : comp_cost sa_cost;
4435 6222354 : if (TREE_CODE (op1) == MULT_EXPR)
4436 : mult = op1;
4437 5782419 : else if (TREE_CODE (op0) == MULT_EXPR)
4438 : mult = op0;
4439 :
4440 : if (mult != NULL_TREE
4441 5122875 : && is_a <scalar_int_mode> (mode, &int_mode)
4442 1849105 : && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4443 1633840 : && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4444 : speed, &sa_cost))
4445 1099479 : return sa_cost;
4446 : }
4447 : break;
4448 :
4449 1391504 : CASE_CONVERT:
4450 1391504 : {
4451 1391504 : tree inner_mode, outer_mode;
4452 1391504 : outer_mode = TREE_TYPE (expr);
4453 1391504 : inner_mode = TREE_TYPE (op0);
4454 1391504 : cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4455 1391504 : TYPE_MODE (inner_mode), speed), 0);
4456 : }
4457 1391504 : break;
4458 :
4459 2786083 : case MULT_EXPR:
4460 2786083 : if (cst_and_fits_in_hwi (op0))
4461 0 : cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4462 0 : mode, speed), 0);
4463 2786083 : else if (cst_and_fits_in_hwi (op1))
4464 2303167 : cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4465 2303167 : mode, speed), 0);
4466 : else
4467 482916 : return comp_cost (target_spill_cost [speed], 0);
4468 : break;
4469 :
4470 58189 : case EXACT_DIV_EXPR:
4471 58189 : case TRUNC_DIV_EXPR:
4472 : /* Division by power of two is usually cheap, so we allow it. Forbid
4473 : anything else. */
4474 58189 : if (integer_pow2p (TREE_OPERAND (expr, 1)))
4475 58189 : cost = comp_cost (add_cost (speed, mode), 0);
4476 : else
4477 0 : cost = comp_cost (target_spill_cost[speed], 0);
4478 : break;
4479 :
4480 71123 : case BIT_AND_EXPR:
4481 71123 : case BIT_IOR_EXPR:
4482 71123 : case BIT_NOT_EXPR:
4483 71123 : case LSHIFT_EXPR:
4484 71123 : case RSHIFT_EXPR:
4485 71123 : cost = comp_cost (add_cost (speed, mode), 0);
4486 71123 : break;
4487 0 : case COND_EXPR:
4488 0 : op0 = TREE_OPERAND (expr, 0);
4489 0 : STRIP_NOPS (op0);
4490 0 : if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4491 0 : || CONSTANT_CLASS_P (op0))
4492 0 : cost = no_cost;
4493 : else
4494 0 : cost = force_expr_to_var_cost (op0, speed);
4495 : break;
4496 0 : case LT_EXPR:
4497 0 : case LE_EXPR:
4498 0 : case GT_EXPR:
4499 0 : case GE_EXPR:
4500 0 : case EQ_EXPR:
4501 0 : case NE_EXPR:
4502 0 : case UNORDERED_EXPR:
4503 0 : case ORDERED_EXPR:
4504 0 : case UNLT_EXPR:
4505 0 : case UNLE_EXPR:
4506 0 : case UNGT_EXPR:
4507 0 : case UNGE_EXPR:
4508 0 : case UNEQ_EXPR:
4509 0 : case LTGT_EXPR:
4510 0 : case MAX_EXPR:
4511 0 : case MIN_EXPR:
4512 : /* Simply use add cost for now, FIXME if there is some more accurate cost
4513 : evaluation way. */
4514 0 : cost = comp_cost (add_cost (speed, mode), 0);
4515 0 : break;
4516 :
4517 0 : default:
4518 0 : gcc_unreachable ();
4519 : }
4520 :
4521 9118704 : cost += cost0;
4522 9118704 : cost += cost1;
4523 9118704 : return cost;
4524 : }
4525 :
4526 : /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4527 : invariants the computation depends on. */
4528 :
4529 : static comp_cost
4530 26640143 : force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4531 : {
4532 26640143 : if (!expr)
4533 2231823 : return no_cost;
4534 :
4535 24408320 : find_inv_vars (data, &expr, inv_vars);
4536 24408320 : return force_expr_to_var_cost (expr, data->speed);
4537 : }
4538 :
4539 : /* Returns cost of auto-modifying address expression in shape base + offset.
4540 : AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4541 : address expression. The address expression has ADDR_MODE in addr space
4542 : AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4543 : speed or size. */
4544 :
4545 : enum ainc_type
4546 : {
4547 : AINC_PRE_INC, /* Pre increment. */
4548 : AINC_PRE_DEC, /* Pre decrement. */
4549 : AINC_POST_INC, /* Post increment. */
4550 : AINC_POST_DEC, /* Post decrement. */
4551 : AINC_NONE /* Also the number of auto increment types. */
4552 : };
4553 :
4554 : struct ainc_cost_data
4555 : {
4556 : int64_t costs[AINC_NONE];
4557 : };
4558 :
4559 : static comp_cost
4560 2041890 : get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4561 : machine_mode addr_mode, machine_mode mem_mode,
4562 : addr_space_t as, bool speed)
4563 : {
4564 2041890 : if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4565 : && !USE_STORE_PRE_DECREMENT (mem_mode)
4566 : && !USE_LOAD_POST_DECREMENT (mem_mode)
4567 : && !USE_STORE_POST_DECREMENT (mem_mode)
4568 : && !USE_LOAD_PRE_INCREMENT (mem_mode)
4569 : && !USE_STORE_PRE_INCREMENT (mem_mode)
4570 : && !USE_LOAD_POST_INCREMENT (mem_mode)
4571 : && !USE_STORE_POST_INCREMENT (mem_mode))
4572 2041890 : return infinite_cost;
4573 :
4574 : static vec<ainc_cost_data *> ainc_cost_data_list;
4575 : unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4576 : if (idx >= ainc_cost_data_list.length ())
4577 : {
4578 : unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4579 :
4580 : gcc_assert (nsize > idx);
4581 : ainc_cost_data_list.safe_grow_cleared (nsize, true);
4582 : }
4583 :
4584 : ainc_cost_data *data = ainc_cost_data_list[idx];
4585 : if (data == NULL)
4586 : {
4587 : rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4588 :
4589 : data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4590 : data->costs[AINC_PRE_DEC] = INFTY;
4591 : data->costs[AINC_POST_DEC] = INFTY;
4592 : data->costs[AINC_PRE_INC] = INFTY;
4593 : data->costs[AINC_POST_INC] = INFTY;
4594 : if (USE_LOAD_PRE_DECREMENT (mem_mode)
4595 : || USE_STORE_PRE_DECREMENT (mem_mode))
4596 : {
4597 : rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4598 :
4599 : if (memory_address_addr_space_p (mem_mode, addr, as))
4600 : data->costs[AINC_PRE_DEC]
4601 : = address_cost (addr, mem_mode, as, speed);
4602 : }
4603 : if (USE_LOAD_POST_DECREMENT (mem_mode)
4604 : || USE_STORE_POST_DECREMENT (mem_mode))
4605 : {
4606 : rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4607 :
4608 : if (memory_address_addr_space_p (mem_mode, addr, as))
4609 : data->costs[AINC_POST_DEC]
4610 : = address_cost (addr, mem_mode, as, speed);
4611 : }
4612 : if (USE_LOAD_PRE_INCREMENT (mem_mode)
4613 : || USE_STORE_PRE_INCREMENT (mem_mode))
4614 : {
4615 : rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4616 :
4617 : if (memory_address_addr_space_p (mem_mode, addr, as))
4618 : data->costs[AINC_PRE_INC]
4619 : = address_cost (addr, mem_mode, as, speed);
4620 : }
4621 : if (USE_LOAD_POST_INCREMENT (mem_mode)
4622 : || USE_STORE_POST_INCREMENT (mem_mode))
4623 : {
4624 : rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4625 :
4626 : if (memory_address_addr_space_p (mem_mode, addr, as))
4627 : data->costs[AINC_POST_INC]
4628 : = address_cost (addr, mem_mode, as, speed);
4629 : }
4630 : ainc_cost_data_list[idx] = data;
4631 : }
4632 :
4633 : poly_int64 msize = GET_MODE_SIZE (mem_mode);
4634 : if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4635 : return comp_cost (data->costs[AINC_POST_INC], 0);
4636 : if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4637 : return comp_cost (data->costs[AINC_POST_DEC], 0);
4638 : if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4639 : return comp_cost (data->costs[AINC_PRE_INC], 0);
4640 : if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4641 : return comp_cost (data->costs[AINC_PRE_DEC], 0);
4642 :
4643 : return infinite_cost;
4644 : }
4645 :
4646 : /* Return cost of computing USE's address expression by using CAND.
4647 : AFF_INV and AFF_VAR represent invariant and variant parts of the
4648 : address expression, respectively. If AFF_INV is simple, store
4649 : the loop invariant variables which are depended by it in INV_VARS;
4650 : if AFF_INV is complicated, handle it as a new invariant expression
4651 : and record it in INV_EXPR. RATIO indicates multiple times between
4652 : steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4653 : value to it indicating if this is an auto-increment address. */
4654 :
4655 : static comp_cost
4656 6130202 : get_address_cost (struct ivopts_data *data, struct iv_use *use,
4657 : struct iv_cand *cand, aff_tree *aff_inv,
4658 : aff_tree *aff_var, HOST_WIDE_INT ratio,
4659 : bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4660 : bool *can_autoinc, bool speed)
4661 : {
4662 6130202 : rtx addr;
4663 6130202 : bool simple_inv = true;
4664 6130202 : tree comp_inv = NULL_TREE, type = aff_var->type;
4665 6130202 : comp_cost var_cost = no_cost, cost = no_cost;
4666 6130202 : struct mem_address parts = {NULL_TREE, integer_one_node,
4667 6130202 : NULL_TREE, NULL_TREE, NULL_TREE};
4668 6130202 : machine_mode addr_mode = TYPE_MODE (type);
4669 6130202 : machine_mode mem_mode = TYPE_MODE (use->mem_type);
4670 6130202 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4671 : /* Only true if ratio != 1. */
4672 6130202 : bool ok_with_ratio_p = false;
4673 6130202 : bool ok_without_ratio_p = false;
4674 6130202 : code_helper code = ERROR_MARK;
4675 :
4676 6130202 : if (use->type == USE_PTR_ADDRESS)
4677 : {
4678 5038 : gcall *call = as_a<gcall *> (use->stmt);
4679 5038 : gcc_assert (gimple_call_internal_p (call));
4680 5038 : code = gimple_call_internal_fn (call);
4681 : }
4682 :
4683 6130202 : if (!aff_combination_const_p (aff_inv))
4684 : {
4685 4013376 : parts.index = integer_one_node;
4686 : /* Addressing mode "base + index". */
4687 4013376 : ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4688 4013376 : if (ratio != 1)
4689 : {
4690 2872306 : parts.step = wide_int_to_tree (type, ratio);
4691 : /* Addressing mode "base + index << scale". */
4692 2872306 : ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4693 2872306 : if (!ok_with_ratio_p)
4694 1736927 : parts.step = NULL_TREE;
4695 : }
4696 2877997 : if (ok_with_ratio_p || ok_without_ratio_p)
4697 : {
4698 4013376 : if (maybe_ne (aff_inv->offset, 0))
4699 : {
4700 2637614 : parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4701 : /* Addressing mode "base + index [<< scale] + offset". */
4702 2637614 : if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4703 489 : parts.offset = NULL_TREE;
4704 : else
4705 2637125 : aff_inv->offset = 0;
4706 : }
4707 :
4708 4013376 : move_fixed_address_to_symbol (&parts, aff_inv);
4709 : /* Base is fixed address and is moved to symbol part. */
4710 4013376 : if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4711 465849 : parts.base = NULL_TREE;
4712 :
4713 : /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4714 4013376 : if (parts.symbol != NULL_TREE
4715 4013376 : && !valid_mem_ref_p (mem_mode, as, &parts, code))
4716 : {
4717 6649 : aff_combination_add_elt (aff_inv, parts.symbol, 1);
4718 6649 : parts.symbol = NULL_TREE;
4719 : /* Reset SIMPLE_INV since symbol address needs to be computed
4720 : outside of address expression in this case. */
4721 6649 : simple_inv = false;
4722 : /* Symbol part is moved back to base part, it can't be NULL. */
4723 6649 : parts.base = integer_one_node;
4724 : }
4725 : }
4726 : else
4727 0 : parts.index = NULL_TREE;
4728 : }
4729 : else
4730 : {
4731 2116826 : poly_int64 ainc_step;
4732 2116826 : if (can_autoinc
4733 2116826 : && ratio == 1
4734 4233644 : && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4735 : {
4736 2041890 : poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4737 :
4738 2041890 : if (stmt_after_increment (data->current_loop, cand, use->stmt))
4739 : ainc_offset += ainc_step;
4740 2041890 : cost = get_address_cost_ainc (ainc_step, ainc_offset,
4741 : addr_mode, mem_mode, as, speed);
4742 2041890 : if (!cost.infinite_cost_p ())
4743 : {
4744 0 : *can_autoinc = true;
4745 0 : return cost;
4746 : }
4747 2041890 : cost = no_cost;
4748 : }
4749 2116826 : if (!aff_combination_zero_p (aff_inv))
4750 : {
4751 1233564 : parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4752 : /* Addressing mode "base + offset". */
4753 1233564 : if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4754 60 : parts.offset = NULL_TREE;
4755 : else
4756 1233504 : aff_inv->offset = 0;
4757 : }
4758 : }
4759 :
4760 2123475 : if (simple_inv)
4761 6123553 : simple_inv = (aff_inv == NULL
4762 9677025 : || aff_combination_const_p (aff_inv)
4763 9670376 : || aff_combination_singleton_var_p (aff_inv));
4764 6130202 : if (!aff_combination_zero_p (aff_inv))
4765 3553568 : comp_inv = aff_combination_to_tree (aff_inv);
4766 3553568 : if (comp_inv != NULL_TREE)
4767 3553568 : cost = force_var_cost (data, comp_inv, inv_vars);
4768 6130202 : if (ratio != 1 && parts.step == NULL_TREE)
4769 1736935 : var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4770 6130202 : if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4771 60 : var_cost += add_cost (speed, addr_mode);
4772 :
4773 6130202 : if (comp_inv && inv_expr && !simple_inv)
4774 : {
4775 814795 : *inv_expr = get_loop_invariant_expr (data, comp_inv);
4776 : /* Clear depends on. */
4777 814795 : if (*inv_expr != NULL && inv_vars && *inv_vars)
4778 470178 : bitmap_clear (*inv_vars);
4779 :
4780 : /* Cost of small invariant expression adjusted against loop niters
4781 : is usually zero, which makes it difficult to be differentiated
4782 : from candidate based on loop invariant variables. Secondly, the
4783 : generated invariant expression may not be hoisted out of loop by
4784 : following pass. We penalize the cost by rounding up in order to
4785 : neutralize such effects. */
4786 814795 : cost.cost = adjust_setup_cost (data, cost.cost, true);
4787 814795 : cost.scratch = cost.cost;
4788 : }
4789 :
4790 6130202 : cost += var_cost;
4791 6130202 : addr = addr_for_mem_ref (&parts, as, false);
4792 6130202 : gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4793 6130202 : cost += address_cost (addr, mem_mode, as, speed);
4794 :
4795 6130202 : if (parts.symbol != NULL_TREE)
4796 520813 : cost.complexity += 1;
4797 : /* Don't increase the complexity of adding a scaled index if it's
4798 : the only kind of index that the target allows. */
4799 6130202 : if (parts.step != NULL_TREE && ok_without_ratio_p)
4800 1135379 : cost.complexity += 1;
4801 6130202 : if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4802 3553508 : cost.complexity += 1;
4803 6130202 : if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4804 3870629 : cost.complexity += 1;
4805 :
4806 : return cost;
4807 : }
4808 :
4809 : /* Scale (multiply) the computed COST (except scratch part that should be
4810 : hoisted out a loop) by header->frequency / AT->frequency, which makes
4811 : expected cost more accurate. */
4812 :
4813 : static comp_cost
4814 14050969 : get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4815 : {
4816 14050969 : if (data->speed
4817 14050969 : && data->current_loop->header->count.to_frequency (cfun) > 0)
4818 : {
4819 12397575 : basic_block bb = gimple_bb (at);
4820 12397575 : gcc_assert (cost.scratch <= cost.cost);
4821 12397575 : int scale_factor = (int)(intptr_t) bb->aux;
4822 12397575 : if (scale_factor == 1)
4823 11805594 : return cost;
4824 :
4825 591981 : int64_t scaled_cost
4826 591981 : = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4827 :
4828 591981 : if (dump_file && (dump_flags & TDF_DETAILS))
4829 93 : fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4830 : "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4831 : 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4832 :
4833 : cost.cost = scaled_cost;
4834 : }
4835 :
4836 2245375 : return cost;
4837 : }
4838 :
4839 : /* Determines the cost of the computation by that USE is expressed
4840 : from induction variable CAND. If ADDRESS_P is true, we just need
4841 : to create an address from it, otherwise we want to get it into
4842 : register. A set of invariants we depend on is stored in INV_VARS.
4843 : If CAN_AUTOINC is nonnull, use it to record whether autoinc
4844 : addressing is likely. If INV_EXPR is nonnull, record invariant
4845 : expr entry in it. */
4846 :
4847 : static comp_cost
4848 21823332 : get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4849 : struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4850 : bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4851 : {
4852 21823332 : gimple *at = use->stmt;
4853 21823332 : tree ubase = use->iv->base, cbase = cand->iv->base;
4854 21823332 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4855 21823332 : tree comp_inv = NULL_TREE;
4856 21823332 : HOST_WIDE_INT ratio, aratio;
4857 21823332 : comp_cost cost;
4858 21823332 : widest_int rat;
4859 43646664 : aff_tree aff_inv, aff_var;
4860 21823332 : bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4861 :
4862 21823332 : if (inv_vars)
4863 19106554 : *inv_vars = NULL;
4864 21823332 : if (can_autoinc)
4865 9462203 : *can_autoinc = false;
4866 21823332 : if (inv_expr)
4867 21405282 : *inv_expr = NULL;
4868 :
4869 : /* Check if we have enough precision to express the values of use. */
4870 21823332 : if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4871 3065054 : return infinite_cost;
4872 :
4873 18758278 : if (address_p
4874 18758278 : || (use->iv->base_object
4875 2406547 : && cand->iv->base_object
4876 1274581 : && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4877 1250786 : && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4878 : {
4879 : /* Do not try to express address of an object with computation based
4880 : on address of a different object. This may cause problems in rtl
4881 : level alias analysis (that does not expect this to be happening,
4882 : as this is illegal in C), and would be unlikely to be useful
4883 : anyway. */
4884 8854199 : if (use->iv->base_object
4885 8854199 : && cand->iv->base_object
4886 13636133 : && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4887 1525197 : return infinite_cost;
4888 : }
4889 :
4890 17233081 : if (!get_computation_aff_1 (data, at, use, cand, &aff_inv, &aff_var, &rat)
4891 17233081 : || !wi::fits_shwi_p (rat))
4892 3182112 : return infinite_cost;
4893 :
4894 14050969 : ratio = rat.to_shwi ();
4895 14050969 : if (address_p)
4896 : {
4897 6130202 : cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4898 : inv_vars, inv_expr, can_autoinc, speed);
4899 6130202 : cost = get_scaled_computation_cost_at (data, at, cost);
4900 : /* For doloop IV cand, add on the extra cost. */
4901 6130202 : cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4902 6130202 : return cost;
4903 : }
4904 :
4905 7920767 : bool simple_inv = (aff_combination_const_p (&aff_inv)
4906 2202678 : || aff_combination_singleton_var_p (&aff_inv));
4907 7920767 : tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4908 7920767 : aff_combination_convert (&aff_inv, signed_type);
4909 7920767 : if (!aff_combination_zero_p (&aff_inv))
4910 5688944 : comp_inv = aff_combination_to_tree (&aff_inv);
4911 :
4912 7920767 : cost = force_var_cost (data, comp_inv, inv_vars);
4913 7920767 : if (comp_inv && inv_expr && !simple_inv)
4914 : {
4915 1547070 : *inv_expr = get_loop_invariant_expr (data, comp_inv);
4916 : /* Clear depends on. */
4917 1547070 : if (*inv_expr != NULL && inv_vars && *inv_vars)
4918 912465 : bitmap_clear (*inv_vars);
4919 :
4920 1547070 : cost.cost = adjust_setup_cost (data, cost.cost);
4921 : /* Record setup cost in scratch field. */
4922 1547070 : cost.scratch = cost.cost;
4923 : }
4924 : /* Cost of constant integer can be covered when adding invariant part to
4925 : variant part. */
4926 6373697 : else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4927 3486241 : cost = no_cost;
4928 :
4929 : /* Need type narrowing to represent use with cand. */
4930 7920767 : if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4931 : {
4932 822747 : machine_mode outer_mode = TYPE_MODE (utype);
4933 822747 : machine_mode inner_mode = TYPE_MODE (ctype);
4934 822747 : cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4935 : }
4936 :
4937 : /* Turn a + i * (-c) into a - i * c. */
4938 7920767 : if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4939 1876687 : aratio = -ratio;
4940 : else
4941 : aratio = ratio;
4942 :
4943 7920767 : if (ratio != 1)
4944 2785260 : cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4945 :
4946 : /* TODO: We may also need to check if we can compute a + i * 4 in one
4947 : instruction. */
4948 : /* Need to add up the invariant and variant parts. */
4949 7920767 : if (comp_inv && !integer_zerop (comp_inv))
4950 11354536 : cost += add_cost (speed, TYPE_MODE (utype));
4951 :
4952 7920767 : cost = get_scaled_computation_cost_at (data, at, cost);
4953 :
4954 : /* For doloop IV cand, add on the extra cost. */
4955 7920767 : if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4956 0 : cost += targetm.doloop_cost_for_generic;
4957 :
4958 7920767 : return cost;
4959 21823332 : }
4960 :
4961 : /* Determines cost of computing the use in GROUP with CAND in a generic
4962 : expression. */
4963 :
4964 : static bool
4965 5905236 : determine_group_iv_cost_generic (struct ivopts_data *data,
4966 : struct iv_group *group, struct iv_cand *cand)
4967 : {
4968 5905236 : comp_cost cost;
4969 5905236 : iv_inv_expr_ent *inv_expr = NULL;
4970 5905236 : bitmap inv_vars = NULL, inv_exprs = NULL;
4971 5905236 : struct iv_use *use = group->vuses[0];
4972 :
4973 : /* The simple case first -- if we need to express value of the preserved
4974 : original biv, the cost is 0. This also prevents us from counting the
4975 : cost of increment twice -- once at this use and once in the cost of
4976 : the candidate. */
4977 5905236 : if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4978 56936 : cost = no_cost;
4979 : /* If the IV candidate involves undefined SSA values and is not the
4980 : same IV as on the USE avoid using that candidate here. */
4981 5848300 : else if (cand->involves_undefs
4982 5848300 : && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
4983 212 : return false;
4984 : else
4985 5848088 : cost = get_computation_cost (data, use, cand, false,
4986 : &inv_vars, NULL, &inv_expr);
4987 :
4988 5905024 : if (inv_expr)
4989 : {
4990 1040003 : inv_exprs = BITMAP_ALLOC (NULL);
4991 1040003 : bitmap_set_bit (inv_exprs, inv_expr->id);
4992 : }
4993 5905024 : set_group_iv_cost (data, group, cand, cost, inv_vars,
4994 : NULL_TREE, ERROR_MARK, inv_exprs);
4995 5905024 : return !cost.infinite_cost_p ();
4996 : }
4997 :
4998 : /* Determines cost of computing uses in GROUP with CAND in addresses. */
4999 :
5000 : static bool
5001 6745425 : determine_group_iv_cost_address (struct ivopts_data *data,
5002 : struct iv_group *group, struct iv_cand *cand)
5003 : {
5004 6745425 : unsigned i;
5005 6745425 : bitmap inv_vars = NULL, inv_exprs = NULL;
5006 6745425 : bool can_autoinc;
5007 6745425 : iv_inv_expr_ent *inv_expr = NULL;
5008 6745425 : struct iv_use *use = group->vuses[0];
5009 6745425 : comp_cost sum_cost = no_cost, cost;
5010 :
5011 6745425 : cost = get_computation_cost (data, use, cand, true,
5012 : &inv_vars, &can_autoinc, &inv_expr);
5013 :
5014 6745425 : if (inv_expr)
5015 : {
5016 499167 : inv_exprs = BITMAP_ALLOC (NULL);
5017 499167 : bitmap_set_bit (inv_exprs, inv_expr->id);
5018 : }
5019 6745425 : sum_cost = cost;
5020 6745425 : if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5021 : {
5022 0 : if (can_autoinc)
5023 0 : sum_cost -= cand->cost_step;
5024 : /* If we generated the candidate solely for exploiting autoincrement
5025 : opportunities, and it turns out it can't be used, set the cost to
5026 : infinity to make sure we ignore it. */
5027 0 : else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5028 0 : sum_cost = infinite_cost;
5029 : }
5030 :
5031 : /* Compute and add costs for rest uses of this group. */
5032 9044153 : for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5033 : {
5034 2298728 : struct iv_use *next = group->vuses[i];
5035 :
5036 : /* TODO: We could skip computing cost for sub iv_use when it has the
5037 : same cost as the first iv_use, but the cost really depends on the
5038 : offset and where the iv_use is. */
5039 2298728 : cost = get_computation_cost (data, next, cand, true,
5040 : NULL, &can_autoinc, &inv_expr);
5041 2298728 : if (inv_expr)
5042 : {
5043 315218 : if (!inv_exprs)
5044 122 : inv_exprs = BITMAP_ALLOC (NULL);
5045 :
5046 : /* Uses in a group can share setup code,
5047 : so only add setup cost once. */
5048 315218 : if (bitmap_bit_p (inv_exprs, inv_expr->id))
5049 314823 : cost -= cost.scratch;
5050 : else
5051 395 : bitmap_set_bit (inv_exprs, inv_expr->id);
5052 : }
5053 2298728 : sum_cost += cost;
5054 : }
5055 6745425 : set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5056 : NULL_TREE, ERROR_MARK, inv_exprs);
5057 :
5058 6745425 : return !sum_cost.infinite_cost_p ();
5059 : }
5060 :
5061 : /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5062 : and stores it to VAL. */
5063 :
5064 : static void
5065 4039690 : cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5066 : class tree_niter_desc *desc, aff_tree *val)
5067 : {
5068 12119070 : aff_tree step, delta, nit;
5069 4039690 : struct iv *iv = cand->iv;
5070 4039690 : tree type = TREE_TYPE (iv->base);
5071 4039690 : tree niter = desc->niter;
5072 4039690 : bool after_adjust = stmt_after_increment (loop, cand, at);
5073 4039690 : tree steptype;
5074 :
5075 4039690 : if (POINTER_TYPE_P (type))
5076 107433 : steptype = sizetype;
5077 : else
5078 3932257 : steptype = unsigned_type_for (type);
5079 :
5080 : /* If AFTER_ADJUST is required, the code below generates the equivalent
5081 : of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5082 : BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5083 : SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5084 : doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5085 : class for common idioms that we know are safe. */
5086 4039690 : if (after_adjust
5087 3771554 : && desc->control.no_overflow
5088 3763007 : && integer_onep (desc->control.step)
5089 1056943 : && (desc->cmp == LT_EXPR
5090 47228 : || desc->cmp == NE_EXPR)
5091 5096633 : && TREE_CODE (desc->bound) == SSA_NAME)
5092 : {
5093 540581 : if (integer_onep (desc->control.base))
5094 : {
5095 387506 : niter = desc->bound;
5096 387506 : after_adjust = false;
5097 : }
5098 153075 : else if (TREE_CODE (niter) == MINUS_EXPR
5099 153075 : && integer_onep (TREE_OPERAND (niter, 1)))
5100 : {
5101 84267 : niter = TREE_OPERAND (niter, 0);
5102 84267 : after_adjust = false;
5103 : }
5104 : }
5105 :
5106 4039690 : tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5107 4039690 : aff_combination_convert (&step, steptype);
5108 4039690 : tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5109 4039690 : aff_combination_convert (&nit, steptype);
5110 4039690 : aff_combination_mult (&nit, &step, &delta);
5111 4039690 : if (after_adjust)
5112 3299781 : aff_combination_add (&delta, &step);
5113 :
5114 4039690 : tree_to_aff_combination (iv->base, type, val);
5115 4039690 : if (!POINTER_TYPE_P (type))
5116 3932257 : aff_combination_convert (val, steptype);
5117 4039690 : aff_combination_add (val, &delta);
5118 4039690 : }
5119 :
5120 : /* Returns period of induction variable iv. */
5121 :
5122 : static tree
5123 4268765 : iv_period (struct iv *iv)
5124 : {
5125 4268765 : tree step = iv->step, period, type;
5126 4268765 : tree pow2div;
5127 :
5128 4268765 : gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5129 :
5130 4268765 : type = unsigned_type_for (TREE_TYPE (step));
5131 : /* Period of the iv is lcm (step, type_range)/step -1,
5132 : i.e., N*type_range/step - 1. Since type range is power
5133 : of two, N == (step >> num_of_ending_zeros_binary (step),
5134 : so the final result is
5135 :
5136 : (type_range >> num_of_ending_zeros_binary (step)) - 1
5137 :
5138 : */
5139 4268765 : pow2div = num_ending_zeros (step);
5140 :
5141 12806295 : period = build_low_bits_mask (type,
5142 4268765 : (TYPE_PRECISION (type)
5143 4268765 : - tree_to_uhwi (pow2div)));
5144 :
5145 4268765 : return period;
5146 : }
5147 :
5148 : /* Returns the comparison operator used when eliminating the iv USE. */
5149 :
5150 : static enum tree_code
5151 4039690 : iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5152 : {
5153 4039690 : class loop *loop = data->current_loop;
5154 4039690 : basic_block ex_bb;
5155 4039690 : edge exit;
5156 :
5157 4039690 : ex_bb = gimple_bb (use->stmt);
5158 4039690 : exit = EDGE_SUCC (ex_bb, 0);
5159 4039690 : if (flow_bb_inside_loop_p (loop, exit->dest))
5160 3001086 : exit = EDGE_SUCC (ex_bb, 1);
5161 :
5162 4039690 : return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5163 : }
5164 :
5165 : /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5166 : we only detect the situation that BASE = SOMETHING + OFFSET, where the
5167 : calculation is performed in non-wrapping type.
5168 :
5169 : TODO: More generally, we could test for the situation that
5170 : BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5171 : This would require knowing the sign of OFFSET. */
5172 :
5173 : static bool
5174 477 : difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5175 : {
5176 477 : enum tree_code code;
5177 477 : tree e1, e2;
5178 1431 : aff_tree aff_e1, aff_e2, aff_offset;
5179 :
5180 477 : if (!nowrap_type_p (TREE_TYPE (base)))
5181 : return false;
5182 :
5183 477 : base = expand_simple_operations (base);
5184 :
5185 477 : if (TREE_CODE (base) == SSA_NAME)
5186 : {
5187 476 : gimple *stmt = SSA_NAME_DEF_STMT (base);
5188 :
5189 476 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
5190 : return false;
5191 :
5192 18 : code = gimple_assign_rhs_code (stmt);
5193 18 : if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5194 : return false;
5195 :
5196 5 : e1 = gimple_assign_rhs1 (stmt);
5197 5 : e2 = gimple_assign_rhs2 (stmt);
5198 : }
5199 : else
5200 : {
5201 1 : code = TREE_CODE (base);
5202 1 : if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5203 : return false;
5204 1 : e1 = TREE_OPERAND (base, 0);
5205 1 : e2 = TREE_OPERAND (base, 1);
5206 : }
5207 :
5208 : /* Use affine expansion as deeper inspection to prove the equality. */
5209 6 : tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5210 : &aff_e2, &data->name_expansion_cache);
5211 6 : tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5212 : &aff_offset, &data->name_expansion_cache);
5213 6 : aff_combination_scale (&aff_offset, -1);
5214 6 : switch (code)
5215 : {
5216 4 : case PLUS_EXPR:
5217 4 : aff_combination_add (&aff_e2, &aff_offset);
5218 4 : if (aff_combination_zero_p (&aff_e2))
5219 : return true;
5220 :
5221 2 : tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5222 : &aff_e1, &data->name_expansion_cache);
5223 2 : aff_combination_add (&aff_e1, &aff_offset);
5224 2 : return aff_combination_zero_p (&aff_e1);
5225 :
5226 2 : case POINTER_PLUS_EXPR:
5227 2 : aff_combination_add (&aff_e2, &aff_offset);
5228 2 : return aff_combination_zero_p (&aff_e2);
5229 :
5230 : default:
5231 : return false;
5232 : }
5233 477 : }
5234 :
5235 : /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5236 : comparison with CAND. NITER describes the number of iterations of
5237 : the loops. If successful, the comparison in COMP_P is altered accordingly.
5238 :
5239 : We aim to handle the following situation:
5240 :
5241 : sometype *base, *p;
5242 : int a, b, i;
5243 :
5244 : i = a;
5245 : p = p_0 = base + a;
5246 :
5247 : do
5248 : {
5249 : bla (*p);
5250 : p++;
5251 : i++;
5252 : }
5253 : while (i < b);
5254 :
5255 : Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5256 : We aim to optimize this to
5257 :
5258 : p = p_0 = base + a;
5259 : do
5260 : {
5261 : bla (*p);
5262 : p++;
5263 : }
5264 : while (p < p_0 - a + b);
5265 :
5266 : This preserves the correctness, since the pointer arithmetics does not
5267 : overflow. More precisely:
5268 :
5269 : 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5270 : overflow in computing it or the values of p.
5271 : 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5272 : overflow. To prove this, we use the fact that p_0 = base + a. */
5273 :
5274 : static bool
5275 264522 : iv_elimination_compare_lt (struct ivopts_data *data,
5276 : struct iv_cand *cand, enum tree_code *comp_p,
5277 : class tree_niter_desc *niter)
5278 : {
5279 264522 : tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5280 793566 : class aff_tree nit, tmpa, tmpb;
5281 264522 : enum tree_code comp;
5282 264522 : HOST_WIDE_INT step;
5283 :
5284 : /* We need to know that the candidate induction variable does not overflow.
5285 : While more complex analysis may be used to prove this, for now just
5286 : check that the variable appears in the original program and that it
5287 : is computed in a type that guarantees no overflows. */
5288 264522 : cand_type = TREE_TYPE (cand->iv->base);
5289 264522 : if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5290 241417 : return false;
5291 :
5292 : /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5293 : the calculation of the BOUND could overflow, making the comparison
5294 : invalid. */
5295 23105 : if (!data->loop_single_exit_p)
5296 : return false;
5297 :
5298 : /* We need to be able to decide whether candidate is increasing or decreasing
5299 : in order to choose the right comparison operator. */
5300 16223 : if (!cst_and_fits_in_hwi (cand->iv->step))
5301 : return false;
5302 16223 : step = int_cst_value (cand->iv->step);
5303 :
5304 : /* Check that the number of iterations matches the expected pattern:
5305 : a + 1 > b ? 0 : b - a - 1. */
5306 16223 : mbz = niter->may_be_zero;
5307 16223 : if (TREE_CODE (mbz) == GT_EXPR)
5308 : {
5309 : /* Handle a + 1 > b. */
5310 1713 : tree op0 = TREE_OPERAND (mbz, 0);
5311 1713 : if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5312 : {
5313 794 : a = TREE_OPERAND (op0, 0);
5314 794 : b = TREE_OPERAND (mbz, 1);
5315 : }
5316 : else
5317 919 : return false;
5318 : }
5319 14510 : else if (TREE_CODE (mbz) == LT_EXPR)
5320 : {
5321 4751 : tree op1 = TREE_OPERAND (mbz, 1);
5322 :
5323 : /* Handle b < a + 1. */
5324 4751 : if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5325 : {
5326 82 : a = TREE_OPERAND (op1, 0);
5327 82 : b = TREE_OPERAND (mbz, 0);
5328 : }
5329 : else
5330 4669 : return false;
5331 : }
5332 : else
5333 : return false;
5334 :
5335 : /* Expected number of iterations is B - A - 1. Check that it matches
5336 : the actual number, i.e., that B - A - NITER = 1. */
5337 876 : tree_to_aff_combination (niter->niter, nit_type, &nit);
5338 876 : tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5339 876 : tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5340 876 : aff_combination_scale (&nit, -1);
5341 876 : aff_combination_scale (&tmpa, -1);
5342 876 : aff_combination_add (&tmpb, &tmpa);
5343 876 : aff_combination_add (&tmpb, &nit);
5344 876 : if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5345 399 : return false;
5346 :
5347 : /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5348 : overflow. */
5349 477 : offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5350 : cand->iv->step,
5351 : fold_convert (TREE_TYPE (cand->iv->step), a));
5352 477 : if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5353 : return false;
5354 :
5355 : /* Determine the new comparison operator. */
5356 5 : comp = step < 0 ? GT_EXPR : LT_EXPR;
5357 5 : if (*comp_p == NE_EXPR)
5358 5 : *comp_p = comp;
5359 0 : else if (*comp_p == EQ_EXPR)
5360 0 : *comp_p = invert_tree_comparison (comp, false);
5361 : else
5362 0 : gcc_unreachable ();
5363 :
5364 : return true;
5365 264522 : }
5366 :
5367 : /* Check whether it is possible to express the condition in USE by comparison
5368 : of candidate CAND. If so, store the value compared with to BOUND, and the
5369 : comparison operator to COMP. */
5370 :
5371 : static bool
5372 5262522 : may_eliminate_iv (struct ivopts_data *data,
5373 : struct iv_use *use, struct iv_cand *cand, tree *bound,
5374 : enum tree_code *comp)
5375 : {
5376 5262522 : basic_block ex_bb;
5377 5262522 : edge exit;
5378 5262522 : tree period;
5379 5262522 : class loop *loop = data->current_loop;
5380 5262522 : aff_tree bnd;
5381 5262522 : class tree_niter_desc *desc = NULL;
5382 :
5383 : /* If the IV candidate involves undefs do not attempt to use it to
5384 : express a condition. */
5385 5262522 : if (cand->involves_undefs)
5386 : return false;
5387 :
5388 5262170 : if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5389 : return false;
5390 :
5391 : /* For now works only for exits that dominate the loop latch.
5392 : TODO: extend to other conditions inside loop body. */
5393 5035415 : ex_bb = gimple_bb (use->stmt);
5394 5035415 : if (use->stmt != last_nondebug_stmt (ex_bb)
5395 4907595 : || gimple_code (use->stmt) != GIMPLE_COND
5396 9940703 : || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5397 281324 : return false;
5398 :
5399 4754091 : exit = EDGE_SUCC (ex_bb, 0);
5400 4754091 : if (flow_bb_inside_loop_p (loop, exit->dest))
5401 3584275 : exit = EDGE_SUCC (ex_bb, 1);
5402 4754091 : if (flow_bb_inside_loop_p (loop, exit->dest))
5403 : return false;
5404 :
5405 4618770 : desc = niter_for_exit (data, exit);
5406 4618770 : if (!desc)
5407 : return false;
5408 :
5409 : /* Determine whether we can use the variable to test the exit condition.
5410 : This is the case iff the period of the induction variable is greater
5411 : than the number of iterations for which the exit condition is true. */
5412 4268765 : period = iv_period (cand->iv);
5413 :
5414 : /* If the number of iterations is constant, compare against it directly. */
5415 4268765 : if (TREE_CODE (desc->niter) == INTEGER_CST)
5416 : {
5417 : /* See cand_value_at. */
5418 2693844 : if (stmt_after_increment (loop, cand, use->stmt))
5419 : {
5420 2635001 : if (!tree_int_cst_lt (desc->niter, period))
5421 : return false;
5422 : }
5423 : else
5424 : {
5425 58843 : if (tree_int_cst_lt (period, desc->niter))
5426 : return false;
5427 : }
5428 : }
5429 :
5430 : /* If not, and if this is the only possible exit of the loop, see whether
5431 : we can get a conservative estimate on the number of iterations of the
5432 : entire loop and compare against that instead. */
5433 : else
5434 : {
5435 1574921 : widest_int period_value, max_niter;
5436 :
5437 1574921 : max_niter = desc->max;
5438 1574921 : if (stmt_after_increment (loop, cand, use->stmt))
5439 1325147 : max_niter += 1;
5440 1574921 : period_value = wi::to_widest (period);
5441 1574921 : if (wi::gtu_p (max_niter, period_value))
5442 : {
5443 : /* See if we can take advantage of inferred loop bound
5444 : information. */
5445 396786 : if (data->loop_single_exit_p)
5446 : {
5447 248005 : if (!max_loop_iterations (loop, &max_niter))
5448 : return false;
5449 : /* The loop bound is already adjusted by adding 1. */
5450 248005 : if (wi::gtu_p (max_niter, period_value))
5451 : return false;
5452 : }
5453 : else
5454 : return false;
5455 : }
5456 1574921 : }
5457 :
5458 : /* For doloop IV cand, the bound would be zero. It's safe whether
5459 : may_be_zero set or not. */
5460 4039690 : if (cand->doloop_p)
5461 : {
5462 0 : *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5463 0 : *comp = iv_elimination_compare (data, use);
5464 0 : return true;
5465 : }
5466 :
5467 4039690 : cand_value_at (loop, cand, use->stmt, desc, &bnd);
5468 :
5469 4039690 : *bound = fold_convert (TREE_TYPE (cand->iv->base),
5470 : aff_combination_to_tree (&bnd));
5471 4039690 : *comp = iv_elimination_compare (data, use);
5472 :
5473 : /* It is unlikely that computing the number of iterations using division
5474 : would be more profitable than keeping the original induction variable. */
5475 4039690 : bool cond_overflow_p;
5476 4039690 : if (expression_expensive_p (*bound, &cond_overflow_p))
5477 : return false;
5478 :
5479 : /* Sometimes, it is possible to handle the situation that the number of
5480 : iterations may be zero unless additional assumptions by using <
5481 : instead of != in the exit condition.
5482 :
5483 : TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5484 : base the exit condition on it. However, that is often too
5485 : expensive. */
5486 4027761 : if (!integer_zerop (desc->may_be_zero))
5487 264522 : return iv_elimination_compare_lt (data, cand, comp, desc);
5488 :
5489 : return true;
5490 5262522 : }
5491 :
5492 : /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5493 : be copied, if it is used in the loop body and DATA->body_includes_call. */
5494 :
5495 : static int
5496 8973105 : parm_decl_cost (struct ivopts_data *data, tree bound)
5497 : {
5498 8973105 : tree sbound = bound;
5499 8973105 : STRIP_NOPS (sbound);
5500 :
5501 8973105 : if (TREE_CODE (sbound) == SSA_NAME
5502 3367594 : && SSA_NAME_IS_DEFAULT_DEF (sbound)
5503 166659 : && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5504 9137554 : && data->body_includes_call)
5505 42188 : return COSTS_N_INSNS (1);
5506 :
5507 : return 0;
5508 : }
5509 :
5510 : /* Determines cost of computing the use in GROUP with CAND in a condition. */
5511 :
5512 : static bool
5513 6513041 : determine_group_iv_cost_cond (struct ivopts_data *data,
5514 : struct iv_group *group, struct iv_cand *cand)
5515 : {
5516 6513041 : tree bound = NULL_TREE;
5517 6513041 : struct iv *cmp_iv;
5518 6513041 : bitmap inv_exprs = NULL;
5519 6513041 : bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5520 6513041 : comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5521 6513041 : enum comp_iv_rewrite rewrite_type;
5522 6513041 : iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5523 6513041 : tree *control_var, *bound_cst;
5524 6513041 : enum tree_code comp = ERROR_MARK;
5525 6513041 : struct iv_use *use = group->vuses[0];
5526 :
5527 : /* Extract condition operands. */
5528 6513041 : rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5529 : &bound_cst, NULL, &cmp_iv);
5530 6513041 : gcc_assert (rewrite_type != COMP_IV_NA);
5531 :
5532 : /* Try iv elimination. */
5533 6513041 : if (rewrite_type == COMP_IV_ELIM
5534 6513041 : && may_eliminate_iv (data, use, cand, &bound, &comp))
5535 : {
5536 3763244 : elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5537 3763244 : if (elim_cost.cost == 0)
5538 2489580 : elim_cost.cost = parm_decl_cost (data, bound);
5539 1273664 : else if (TREE_CODE (bound) == INTEGER_CST)
5540 0 : elim_cost.cost = 0;
5541 : /* If we replace a loop condition 'i < n' with 'p < base + n',
5542 : inv_vars_elim will have 'base' and 'n' set, which implies that both
5543 : 'base' and 'n' will be live during the loop. More likely,
5544 : 'base + n' will be loop invariant, resulting in only one live value
5545 : during the loop. So in that case we clear inv_vars_elim and set
5546 : inv_expr_elim instead. */
5547 3763244 : if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5548 : {
5549 351551 : inv_expr_elim = get_loop_invariant_expr (data, bound);
5550 351551 : bitmap_clear (inv_vars_elim);
5551 : }
5552 : /* The bound is a loop invariant, so it will be only computed
5553 : once. */
5554 3763244 : elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5555 : }
5556 :
5557 : /* When the condition is a comparison of the candidate IV against
5558 : zero, prefer this IV.
5559 :
5560 : TODO: The constant that we're subtracting from the cost should
5561 : be target-dependent. This information should be added to the
5562 : target costs for each backend. */
5563 6513041 : if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5564 3763244 : && integer_zerop (*bound_cst)
5565 9155864 : && (operand_equal_p (*control_var, cand->var_after, 0)
5566 2394421 : || operand_equal_p (*control_var, cand->var_before, 0)))
5567 254660 : elim_cost -= 1;
5568 :
5569 6513041 : express_cost = get_computation_cost (data, use, cand, false,
5570 : &inv_vars_express, NULL,
5571 : &inv_expr_express);
5572 6513041 : if (cmp_iv != NULL)
5573 5389592 : find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5574 :
5575 : /* Count the cost of the original bound as well. */
5576 6513041 : bound_cost = force_var_cost (data, *bound_cst, NULL);
5577 6513041 : if (bound_cost.cost == 0)
5578 6483525 : bound_cost.cost = parm_decl_cost (data, *bound_cst);
5579 29516 : else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5580 0 : bound_cost.cost = 0;
5581 6513041 : express_cost += bound_cost;
5582 :
5583 : /* Choose the better approach, preferring the eliminated IV. */
5584 6513041 : if (elim_cost <= express_cost)
5585 : {
5586 4814336 : cost = elim_cost;
5587 4814336 : inv_vars = inv_vars_elim;
5588 4814336 : inv_vars_elim = NULL;
5589 4814336 : inv_expr = inv_expr_elim;
5590 : /* For doloop candidate/use pair, adjust to zero cost. */
5591 4814336 : if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5592 0 : cost = no_cost;
5593 : }
5594 : else
5595 : {
5596 1698705 : cost = express_cost;
5597 1698705 : inv_vars = inv_vars_express;
5598 1698705 : inv_vars_express = NULL;
5599 1698705 : bound = NULL_TREE;
5600 1698705 : comp = ERROR_MARK;
5601 1698705 : inv_expr = inv_expr_express;
5602 : }
5603 :
5604 6513041 : if (inv_expr)
5605 : {
5606 698527 : inv_exprs = BITMAP_ALLOC (NULL);
5607 698527 : bitmap_set_bit (inv_exprs, inv_expr->id);
5608 : }
5609 6513041 : set_group_iv_cost (data, group, cand, cost,
5610 : inv_vars, bound, comp, inv_exprs);
5611 :
5612 6513041 : if (inv_vars_elim)
5613 26263 : BITMAP_FREE (inv_vars_elim);
5614 6513041 : if (inv_vars_express)
5615 1395905 : BITMAP_FREE (inv_vars_express);
5616 :
5617 6513041 : return !cost.infinite_cost_p ();
5618 : }
5619 :
5620 : /* Determines cost of computing uses in GROUP with CAND. Returns false
5621 : if USE cannot be represented with CAND. */
5622 :
5623 : static bool
5624 19163702 : determine_group_iv_cost (struct ivopts_data *data,
5625 : struct iv_group *group, struct iv_cand *cand)
5626 : {
5627 19163702 : switch (group->type)
5628 : {
5629 5905236 : case USE_NONLINEAR_EXPR:
5630 5905236 : return determine_group_iv_cost_generic (data, group, cand);
5631 :
5632 6745425 : case USE_REF_ADDRESS:
5633 6745425 : case USE_PTR_ADDRESS:
5634 6745425 : return determine_group_iv_cost_address (data, group, cand);
5635 :
5636 6513041 : case USE_COMPARE:
5637 6513041 : return determine_group_iv_cost_cond (data, group, cand);
5638 :
5639 0 : default:
5640 0 : gcc_unreachable ();
5641 : }
5642 : }
5643 :
5644 : /* Return true if get_computation_cost indicates that autoincrement is
5645 : a possibility for the pair of USE and CAND, false otherwise. */
5646 :
5647 : static bool
5648 1281405 : autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5649 : struct iv_cand *cand)
5650 : {
5651 1281405 : if (!address_p (use->type))
5652 : return false;
5653 :
5654 418050 : bool can_autoinc = false;
5655 418050 : get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5656 418050 : return can_autoinc;
5657 : }
5658 :
5659 : /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5660 : use that allows autoincrement, and set their AINC_USE if possible. */
5661 :
5662 : static void
5663 500623 : set_autoinc_for_original_candidates (struct ivopts_data *data)
5664 : {
5665 500623 : unsigned i, j;
5666 :
5667 5390146 : for (i = 0; i < data->vcands.length (); i++)
5668 : {
5669 4889523 : struct iv_cand *cand = data->vcands[i];
5670 4889523 : struct iv_use *closest_before = NULL;
5671 4889523 : struct iv_use *closest_after = NULL;
5672 4889523 : if (cand->pos != IP_ORIGINAL)
5673 4022929 : continue;
5674 :
5675 3816738 : for (j = 0; j < data->vgroups.length (); j++)
5676 : {
5677 2950144 : struct iv_group *group = data->vgroups[j];
5678 2950144 : struct iv_use *use = group->vuses[0];
5679 2950144 : unsigned uid = gimple_uid (use->stmt);
5680 :
5681 2950144 : if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5682 1165458 : continue;
5683 :
5684 1784686 : if (uid < gimple_uid (cand->incremented_at)
5685 1784686 : && (closest_before == NULL
5686 377162 : || uid > gimple_uid (closest_before->stmt)))
5687 : closest_before = use;
5688 :
5689 1784686 : if (uid > gimple_uid (cand->incremented_at)
5690 1784686 : && (closest_after == NULL
5691 69183 : || uid < gimple_uid (closest_after->stmt)))
5692 : closest_after = use;
5693 : }
5694 :
5695 866594 : if (closest_before != NULL
5696 866594 : && autoinc_possible_for_pair (data, closest_before, cand))
5697 0 : cand->ainc_use = closest_before;
5698 866594 : else if (closest_after != NULL
5699 866594 : && autoinc_possible_for_pair (data, closest_after, cand))
5700 0 : cand->ainc_use = closest_after;
5701 : }
5702 500623 : }
5703 :
5704 : /* Relate compare use with all candidates. */
5705 :
5706 : static void
5707 801 : relate_compare_use_with_all_cands (struct ivopts_data *data)
5708 : {
5709 801 : unsigned i, count = data->vcands.length ();
5710 24437 : for (i = 0; i < data->vgroups.length (); i++)
5711 : {
5712 23636 : struct iv_group *group = data->vgroups[i];
5713 :
5714 23636 : if (group->type == USE_COMPARE)
5715 3932 : bitmap_set_range (group->related_cands, 0, count);
5716 : }
5717 801 : }
5718 :
5719 : /* If PREFERRED_MODE is suitable and profitable, use the preferred
5720 : PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
5721 :
5722 : static tree
5723 0 : compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5724 : const widest_int &iterations_max)
5725 : {
5726 0 : tree ntype = TREE_TYPE (niter);
5727 0 : tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5728 0 : if (!pref_type)
5729 0 : return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5730 : build_int_cst (ntype, 1));
5731 :
5732 0 : gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5733 :
5734 0 : int prec = TYPE_PRECISION (ntype);
5735 0 : int pref_prec = TYPE_PRECISION (pref_type);
5736 :
5737 0 : tree base;
5738 :
5739 : /* Check if the PREFERRED_MODED is able to present niter. */
5740 0 : if (pref_prec > prec
5741 0 : || wi::ltu_p (iterations_max,
5742 0 : widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5743 : UNSIGNED)))
5744 : {
5745 : /* No wrap, it is safe to use preferred type after niter + 1. */
5746 0 : if (wi::ltu_p (iterations_max,
5747 0 : widest_int::from (wi::max_value (prec, UNSIGNED),
5748 : UNSIGNED)))
5749 : {
5750 : /* This could help to optimize "-1 +1" pair when niter looks
5751 : like "n-1": n is in original mode. "base = (n - 1) + 1"
5752 : in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
5753 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5754 : build_int_cst (ntype, 1));
5755 0 : base = fold_convert (pref_type, base);
5756 : }
5757 :
5758 : /* To avoid wrap, convert niter to preferred type before plus 1. */
5759 : else
5760 : {
5761 0 : niter = fold_convert (pref_type, niter);
5762 0 : base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5763 : build_int_cst (pref_type, 1));
5764 : }
5765 : }
5766 : else
5767 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5768 : build_int_cst (ntype, 1));
5769 : return base;
5770 : }
5771 :
5772 : /* Add one doloop dedicated IV candidate:
5773 : - Base is (may_be_zero ? 1 : (niter + 1)).
5774 : - Step is -1. */
5775 :
5776 : static void
5777 0 : add_iv_candidate_for_doloop (struct ivopts_data *data)
5778 : {
5779 0 : tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5780 0 : gcc_assert (niter_desc && niter_desc->assumptions);
5781 :
5782 0 : tree niter = niter_desc->niter;
5783 0 : tree ntype = TREE_TYPE (niter);
5784 0 : gcc_assert (INTEGRAL_NB_TYPE_P (ntype));
5785 :
5786 0 : tree may_be_zero = niter_desc->may_be_zero;
5787 0 : if (may_be_zero && integer_zerop (may_be_zero))
5788 : may_be_zero = NULL_TREE;
5789 0 : if (may_be_zero)
5790 : {
5791 0 : if (COMPARISON_CLASS_P (may_be_zero))
5792 : {
5793 0 : niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5794 : build_int_cst (ntype, 0),
5795 : rewrite_to_non_trapping_overflow (niter));
5796 : }
5797 : /* Don't try to obtain the iteration count expression when may_be_zero is
5798 : integer_nonzerop (actually iteration count is one) or else. */
5799 : else
5800 : return;
5801 : }
5802 :
5803 0 : machine_mode mode = TYPE_MODE (ntype);
5804 0 : machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5805 :
5806 0 : tree base;
5807 0 : if (mode != pref_mode)
5808 : {
5809 0 : base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5810 0 : ntype = TREE_TYPE (base);
5811 : }
5812 : else
5813 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5814 : build_int_cst (ntype, 1));
5815 :
5816 : /* For non integer types or non-mode precision types,
5817 : convert directly to an integer type. */
5818 0 : if (TREE_CODE (ntype) != INTEGER_TYPE
5819 0 : || !type_has_mode_precision_p (ntype))
5820 : {
5821 0 : ntype = lang_hooks.types.type_for_mode (TYPE_MODE (ntype),
5822 0 : TYPE_UNSIGNED (ntype));
5823 0 : base = fold_convert (ntype, base);
5824 : }
5825 :
5826 0 : add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5827 : }
5828 :
5829 : /* Finds the candidates for the induction variables. */
5830 :
5831 : static void
5832 500623 : find_iv_candidates (struct ivopts_data *data)
5833 : {
5834 : /* Add commonly used ivs. */
5835 500623 : add_standard_iv_candidates (data);
5836 :
5837 : /* Add doloop dedicated ivs. */
5838 500623 : if (data->doloop_use_p)
5839 0 : add_iv_candidate_for_doloop (data);
5840 :
5841 : /* Add old induction variables. */
5842 500623 : add_iv_candidate_for_bivs (data);
5843 :
5844 : /* Add induction variables derived from uses. */
5845 500623 : add_iv_candidate_for_groups (data);
5846 :
5847 500623 : set_autoinc_for_original_candidates (data);
5848 :
5849 : /* Record the important candidates. */
5850 500623 : record_important_candidates (data);
5851 :
5852 : /* Relate compare iv_use with all candidates. */
5853 500623 : if (!data->consider_all_candidates)
5854 801 : relate_compare_use_with_all_cands (data);
5855 :
5856 500623 : if (dump_file && (dump_flags & TDF_DETAILS))
5857 : {
5858 67 : unsigned i;
5859 :
5860 67 : fprintf (dump_file, "\n<Important Candidates>:\t");
5861 859 : for (i = 0; i < data->vcands.length (); i++)
5862 725 : if (data->vcands[i]->important)
5863 493 : fprintf (dump_file, " %d,", data->vcands[i]->id);
5864 67 : fprintf (dump_file, "\n");
5865 :
5866 67 : fprintf (dump_file, "\n<Group, Cand> Related:\n");
5867 287 : for (i = 0; i < data->vgroups.length (); i++)
5868 : {
5869 220 : struct iv_group *group = data->vgroups[i];
5870 :
5871 220 : if (group->related_cands)
5872 : {
5873 220 : fprintf (dump_file, " Group %d:\t", group->id);
5874 220 : dump_bitmap (dump_file, group->related_cands);
5875 : }
5876 : }
5877 67 : fprintf (dump_file, "\n");
5878 : }
5879 500623 : }
5880 :
5881 : /* Determines costs of computing use of iv with an iv candidate. */
5882 :
5883 : static void
5884 500623 : determine_group_iv_costs (struct ivopts_data *data)
5885 : {
5886 500623 : unsigned i, j;
5887 500623 : struct iv_cand *cand;
5888 500623 : struct iv_group *group;
5889 500623 : bitmap to_clear = BITMAP_ALLOC (NULL);
5890 :
5891 500623 : alloc_use_cost_map (data);
5892 :
5893 2142723 : for (i = 0; i < data->vgroups.length (); i++)
5894 : {
5895 1642100 : group = data->vgroups[i];
5896 :
5897 1642100 : if (data->consider_all_candidates)
5898 : {
5899 20133774 : for (j = 0; j < data->vcands.length (); j++)
5900 : {
5901 18491674 : cand = data->vcands[j];
5902 18491674 : determine_group_iv_cost (data, group, cand);
5903 : }
5904 : }
5905 : else
5906 : {
5907 23636 : bitmap_iterator bi;
5908 :
5909 695664 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5910 : {
5911 672028 : cand = data->vcands[j];
5912 672028 : if (!determine_group_iv_cost (data, group, cand))
5913 392782 : bitmap_set_bit (to_clear, j);
5914 : }
5915 :
5916 : /* Remove the candidates for that the cost is infinite from
5917 : the list of related candidates. */
5918 23636 : bitmap_and_compl_into (group->related_cands, to_clear);
5919 23636 : bitmap_clear (to_clear);
5920 : }
5921 : }
5922 :
5923 500623 : BITMAP_FREE (to_clear);
5924 :
5925 500623 : if (dump_file && (dump_flags & TDF_DETAILS))
5926 : {
5927 67 : bitmap_iterator bi;
5928 :
5929 : /* Dump invariant variables. */
5930 67 : fprintf (dump_file, "\n<Invariant Vars>:\n");
5931 1023 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5932 : {
5933 956 : struct version_info *info = ver_info (data, i);
5934 956 : if (info->inv_id)
5935 : {
5936 222 : fprintf (dump_file, "Inv %d:\t", info->inv_id);
5937 222 : print_generic_expr (dump_file, info->name, TDF_SLIM);
5938 222 : fprintf (dump_file, "%s\n",
5939 222 : info->has_nonlin_use ? "" : "\t(eliminable)");
5940 : }
5941 : }
5942 :
5943 : /* Dump invariant expressions. */
5944 67 : fprintf (dump_file, "\n<Invariant Expressions>:\n");
5945 67 : auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5946 :
5947 448 : for (hash_table<iv_inv_expr_hasher>::iterator it
5948 515 : = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5949 381 : ++it)
5950 381 : list.safe_push (*it);
5951 :
5952 67 : list.qsort (sort_iv_inv_expr_ent);
5953 :
5954 448 : for (i = 0; i < list.length (); ++i)
5955 : {
5956 381 : fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5957 381 : print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5958 381 : fprintf (dump_file, "\n");
5959 : }
5960 :
5961 67 : fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5962 :
5963 287 : for (i = 0; i < data->vgroups.length (); i++)
5964 : {
5965 220 : group = data->vgroups[i];
5966 :
5967 220 : fprintf (dump_file, "Group %d:\n", i);
5968 220 : fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5969 3089 : for (j = 0; j < group->n_map_members; j++)
5970 : {
5971 3976 : if (!group->cost_map[j].cand
5972 2869 : || group->cost_map[j].cost.infinite_cost_p ())
5973 1107 : continue;
5974 :
5975 1762 : fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5976 1762 : group->cost_map[j].cand->id,
5977 : group->cost_map[j].cost.cost,
5978 1762 : group->cost_map[j].cost.complexity);
5979 1762 : if (!group->cost_map[j].inv_exprs
5980 1762 : || bitmap_empty_p (group->cost_map[j].inv_exprs))
5981 1230 : fprintf (dump_file, "NIL;\t");
5982 : else
5983 532 : bitmap_print (dump_file,
5984 : group->cost_map[j].inv_exprs, "", ";\t");
5985 1762 : if (!group->cost_map[j].inv_vars
5986 1762 : || bitmap_empty_p (group->cost_map[j].inv_vars))
5987 1402 : fprintf (dump_file, "NIL;\n");
5988 : else
5989 360 : bitmap_print (dump_file,
5990 : group->cost_map[j].inv_vars, "", "\n");
5991 : }
5992 :
5993 220 : fprintf (dump_file, "\n");
5994 : }
5995 67 : fprintf (dump_file, "\n");
5996 67 : }
5997 500623 : }
5998 :
5999 : /* Determines cost of the candidate CAND. */
6000 :
6001 : static void
6002 4889523 : determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
6003 : {
6004 4889523 : comp_cost cost_base;
6005 4889523 : int64_t cost, cost_step;
6006 4889523 : tree base;
6007 :
6008 4889523 : gcc_assert (cand->iv != NULL);
6009 :
6010 : /* There are two costs associated with the candidate -- its increment
6011 : and its initialization. The second is almost negligible for any loop
6012 : that rolls enough, so we take it just very little into account. */
6013 :
6014 4889523 : base = cand->iv->base;
6015 4889523 : cost_base = force_var_cost (data, base, NULL);
6016 : /* It will be exceptional that the iv register happens to be initialized with
6017 : the proper value at no cost. In general, there will at least be a regcopy
6018 : or a const set. */
6019 4889523 : if (cost_base.cost == 0)
6020 3703245 : cost_base.cost = COSTS_N_INSNS (1);
6021 : /* Doloop decrement should be considered as zero cost. */
6022 4889523 : if (cand->doloop_p)
6023 : cost_step = 0;
6024 : else
6025 4889523 : cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6026 4889523 : cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6027 :
6028 : /* Prefer the original ivs unless we may gain something by replacing it.
6029 : The reason is to make debugging simpler; so this is not relevant for
6030 : artificial ivs created by other optimization passes. */
6031 4889523 : if ((cand->pos != IP_ORIGINAL
6032 866594 : || !SSA_NAME_VAR (cand->var_before)
6033 433487 : || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6034 : /* Prefer doloop as well. */
6035 5414541 : && !cand->doloop_p)
6036 4547947 : cost++;
6037 :
6038 : /* Prefer not to insert statements into latch unless there are some
6039 : already (so that we do not create unnecessary jumps). */
6040 4889523 : if (cand->pos == IP_END
6041 4889523 : && empty_block_p (ip_end_pos (data->current_loop)))
6042 2270 : cost++;
6043 :
6044 4889523 : cand->cost = cost;
6045 4889523 : cand->cost_step = cost_step;
6046 4889523 : }
6047 :
6048 : /* Determines costs of computation of the candidates. */
6049 :
6050 : static void
6051 500623 : determine_iv_costs (struct ivopts_data *data)
6052 : {
6053 500623 : unsigned i;
6054 :
6055 500623 : if (dump_file && (dump_flags & TDF_DETAILS))
6056 : {
6057 67 : fprintf (dump_file, "<Candidate Costs>:\n");
6058 67 : fprintf (dump_file, " cand\tcost\n");
6059 : }
6060 :
6061 5390146 : for (i = 0; i < data->vcands.length (); i++)
6062 : {
6063 4889523 : struct iv_cand *cand = data->vcands[i];
6064 :
6065 4889523 : determine_iv_cost (data, cand);
6066 :
6067 4889523 : if (dump_file && (dump_flags & TDF_DETAILS))
6068 725 : fprintf (dump_file, " %d\t%d\n", i, cand->cost);
6069 : }
6070 :
6071 500623 : if (dump_file && (dump_flags & TDF_DETAILS))
6072 67 : fprintf (dump_file, "\n");
6073 500623 : }
6074 :
6075 : /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6076 : induction variables. Note N_INVS includes both invariant variables and
6077 : invariant expressions. */
6078 :
6079 : static unsigned
6080 450908742 : ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6081 : unsigned n_cands)
6082 : {
6083 450908742 : unsigned cost;
6084 450908742 : unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6085 450908742 : unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6086 450908742 : bool speed = data->speed;
6087 :
6088 : /* If there is a call in the loop body, the call-clobbered registers
6089 : are not available for loop invariants. */
6090 450908742 : if (data->body_includes_call)
6091 106158658 : available_regs = available_regs - target_clobbered_regs;
6092 :
6093 : /* If we have enough registers. */
6094 450908742 : if (regs_needed + target_res_regs < available_regs)
6095 : cost = n_new;
6096 : /* If close to running out of registers, try to preserve them. */
6097 201359239 : else if (regs_needed <= available_regs)
6098 58683967 : cost = target_reg_cost [speed] * regs_needed;
6099 : /* If we run out of available registers but the number of candidates
6100 : does not, we penalize extra registers using target_spill_cost. */
6101 142675272 : else if (n_cands <= available_regs)
6102 125412097 : cost = target_reg_cost [speed] * available_regs
6103 125412097 : + target_spill_cost [speed] * (regs_needed - available_regs);
6104 : /* If the number of candidates runs out available registers, we penalize
6105 : extra candidate registers using target_spill_cost * 2. Because it is
6106 : more expensive to spill induction variable than invariant. */
6107 : else
6108 17263175 : cost = target_reg_cost [speed] * available_regs
6109 17263175 : + target_spill_cost [speed] * (n_cands - available_regs) * 2
6110 17263175 : + target_spill_cost [speed] * (regs_needed - n_cands);
6111 :
6112 : /* Finally, add the number of candidates, so that we prefer eliminating
6113 : induction variables if possible. */
6114 450908742 : return cost + n_cands;
6115 : }
6116 :
6117 : /* For each size of the induction variable set determine the penalty. */
6118 :
6119 : static void
6120 500623 : determine_set_costs (struct ivopts_data *data)
6121 : {
6122 500623 : unsigned j, n;
6123 500623 : gphi *phi;
6124 500623 : gphi_iterator psi;
6125 500623 : tree op;
6126 500623 : class loop *loop = data->current_loop;
6127 500623 : bitmap_iterator bi;
6128 :
6129 500623 : if (dump_file && (dump_flags & TDF_DETAILS))
6130 : {
6131 67 : fprintf (dump_file, "<Global Costs>:\n");
6132 67 : fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
6133 67 : fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6134 67 : fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6135 67 : fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6136 : }
6137 :
6138 500623 : n = 0;
6139 1950080 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6140 : {
6141 1449457 : phi = psi.phi ();
6142 1449457 : op = PHI_RESULT (phi);
6143 :
6144 2898914 : if (virtual_operand_p (op))
6145 306614 : continue;
6146 :
6147 1142843 : if (get_iv (data, op))
6148 871825 : continue;
6149 :
6150 500448 : if (!POINTER_TYPE_P (TREE_TYPE (op))
6151 500309 : && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6152 102129 : continue;
6153 :
6154 168889 : n++;
6155 : }
6156 :
6157 5567304 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6158 : {
6159 5066681 : struct version_info *info = ver_info (data, j);
6160 :
6161 5066681 : if (info->inv_id && info->has_nonlin_use)
6162 507766 : n++;
6163 : }
6164 :
6165 500623 : data->regs_used = n;
6166 500623 : if (dump_file && (dump_flags & TDF_DETAILS))
6167 67 : fprintf (dump_file, " regs_used %d\n", n);
6168 :
6169 500623 : if (dump_file && (dump_flags & TDF_DETAILS))
6170 : {
6171 67 : fprintf (dump_file, " cost for size:\n");
6172 67 : fprintf (dump_file, " ivs\tcost\n");
6173 2144 : for (j = 0; j <= 2 * target_avail_regs; j++)
6174 2077 : fprintf (dump_file, " %d\t%d\n", j,
6175 : ivopts_estimate_reg_pressure (data, 0, j));
6176 67 : fprintf (dump_file, "\n");
6177 : }
6178 500623 : }
6179 :
6180 : /* Returns true if A is a cheaper cost pair than B. */
6181 :
6182 : static bool
6183 91664515 : cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6184 : {
6185 91664515 : if (!a)
6186 : return false;
6187 :
6188 86186874 : if (!b)
6189 : return true;
6190 :
6191 82997930 : if (a->cost < b->cost)
6192 : return true;
6193 :
6194 61435865 : if (b->cost < a->cost)
6195 : return false;
6196 :
6197 : /* In case the costs are the same, prefer the cheaper candidate. */
6198 35077472 : if (a->cand->cost < b->cand->cost)
6199 : return true;
6200 :
6201 : return false;
6202 : }
6203 :
6204 : /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6205 : for more expensive, equal and cheaper respectively. */
6206 :
6207 : static int
6208 32856998 : compare_cost_pair (class cost_pair *a, class cost_pair *b)
6209 : {
6210 32856998 : if (cheaper_cost_pair (a, b))
6211 : return -1;
6212 25955481 : if (cheaper_cost_pair (b, a))
6213 16877888 : return 1;
6214 :
6215 : return 0;
6216 : }
6217 :
6218 : /* Returns candidate by that USE is expressed in IVS. */
6219 :
6220 : static class cost_pair *
6221 304027756 : iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6222 : {
6223 304027756 : return ivs->cand_for_group[group->id];
6224 : }
6225 :
6226 : /* Computes the cost field of IVS structure. */
6227 :
6228 : static void
6229 450906416 : iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6230 : {
6231 450906416 : comp_cost cost = ivs->cand_use_cost;
6232 :
6233 450906416 : cost += ivs->cand_cost;
6234 450906416 : cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6235 450906416 : ivs->cost = cost;
6236 450906416 : }
6237 :
6238 : /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6239 : and IVS. */
6240 :
6241 : static void
6242 618675910 : iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6243 : {
6244 618675910 : bitmap_iterator bi;
6245 618675910 : unsigned iid;
6246 :
6247 618675910 : if (!invs)
6248 498070616 : return;
6249 :
6250 120605294 : gcc_assert (n_inv_uses != NULL);
6251 211403668 : EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6252 : {
6253 90798374 : n_inv_uses[iid]--;
6254 90798374 : if (n_inv_uses[iid] == 0)
6255 67441477 : ivs->n_invs--;
6256 : }
6257 : }
6258 :
6259 : /* Set USE not to be expressed by any candidate in IVS. */
6260 :
6261 : static void
6262 223813029 : iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6263 : struct iv_group *group)
6264 : {
6265 223813029 : unsigned gid = group->id, cid;
6266 223813029 : class cost_pair *cp;
6267 :
6268 223813029 : cp = ivs->cand_for_group[gid];
6269 223813029 : if (!cp)
6270 : return;
6271 223813029 : cid = cp->cand->id;
6272 :
6273 223813029 : ivs->bad_groups++;
6274 223813029 : ivs->cand_for_group[gid] = NULL;
6275 223813029 : ivs->n_cand_uses[cid]--;
6276 :
6277 223813029 : if (ivs->n_cand_uses[cid] == 0)
6278 : {
6279 85524926 : bitmap_clear_bit (ivs->cands, cid);
6280 85524926 : if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6281 85524926 : ivs->n_cands--;
6282 85524926 : ivs->cand_cost -= cp->cand->cost;
6283 85524926 : iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6284 85524926 : iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6285 : }
6286 :
6287 223813029 : ivs->cand_use_cost -= cp->cost;
6288 223813029 : iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6289 223813029 : iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6290 223813029 : iv_ca_recount_cost (data, ivs);
6291 : }
6292 :
6293 : /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6294 : IVS. */
6295 :
6296 : static void
6297 628009272 : iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6298 : {
6299 628009272 : bitmap_iterator bi;
6300 628009272 : unsigned iid;
6301 :
6302 628009272 : if (!invs)
6303 506265636 : return;
6304 :
6305 121743636 : gcc_assert (n_inv_uses != NULL);
6306 213519112 : EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6307 : {
6308 91775476 : n_inv_uses[iid]++;
6309 91775476 : if (n_inv_uses[iid] == 1)
6310 68338181 : ivs->n_invs++;
6311 : }
6312 : }
6313 :
6314 : /* Set cost pair for GROUP in set IVS to CP. */
6315 :
6316 : static void
6317 241120328 : iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6318 : struct iv_group *group, class cost_pair *cp)
6319 : {
6320 241120328 : unsigned gid = group->id, cid;
6321 :
6322 241120328 : if (ivs->cand_for_group[gid] == cp)
6323 : return;
6324 :
6325 227093387 : if (ivs->cand_for_group[gid])
6326 211272186 : iv_ca_set_no_cp (data, ivs, group);
6327 :
6328 227093387 : if (cp)
6329 : {
6330 227093387 : cid = cp->cand->id;
6331 :
6332 227093387 : ivs->bad_groups--;
6333 227093387 : ivs->cand_for_group[gid] = cp;
6334 227093387 : ivs->n_cand_uses[cid]++;
6335 227093387 : if (ivs->n_cand_uses[cid] == 1)
6336 : {
6337 86911249 : bitmap_set_bit (ivs->cands, cid);
6338 86911249 : if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6339 86911249 : ivs->n_cands++;
6340 86911249 : ivs->cand_cost += cp->cand->cost;
6341 86911249 : iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6342 86911249 : iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6343 : }
6344 :
6345 227093387 : ivs->cand_use_cost += cp->cost;
6346 227093387 : iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6347 227093387 : iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6348 227093387 : iv_ca_recount_cost (data, ivs);
6349 : }
6350 : }
6351 :
6352 : /* Extend set IVS by expressing USE by some of the candidates in it
6353 : if possible. Consider all important candidates if candidates in
6354 : set IVS don't give any result. */
6355 :
6356 : static void
6357 3281656 : iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6358 : struct iv_group *group)
6359 : {
6360 3281656 : class cost_pair *best_cp = NULL, *cp;
6361 3281656 : bitmap_iterator bi;
6362 3281656 : unsigned i;
6363 3281656 : struct iv_cand *cand;
6364 :
6365 3281656 : gcc_assert (ivs->upto >= group->id);
6366 3281656 : ivs->upto++;
6367 3281656 : ivs->bad_groups++;
6368 :
6369 6283958 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6370 : {
6371 3002302 : cand = data->vcands[i];
6372 3002302 : cp = get_group_iv_cost (data, group, cand);
6373 3002302 : if (cheaper_cost_pair (cp, best_cp))
6374 2005749 : best_cp = cp;
6375 : }
6376 :
6377 3281656 : if (best_cp == NULL)
6378 : {
6379 11897565 : EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6380 : {
6381 10549325 : cand = data->vcands[i];
6382 10549325 : cp = get_group_iv_cost (data, group, cand);
6383 10549325 : if (cheaper_cost_pair (cp, best_cp))
6384 2401895 : best_cp = cp;
6385 : }
6386 : }
6387 :
6388 3281656 : iv_ca_set_cp (data, ivs, group, best_cp);
6389 3281656 : }
6390 :
6391 : /* Get cost for assignment IVS. */
6392 :
6393 : static comp_cost
6394 86357554 : iv_ca_cost (class iv_ca *ivs)
6395 : {
6396 : /* This was a conditional expression but it triggered a bug in
6397 : Sun C 5.5. */
6398 0 : if (ivs->bad_groups)
6399 92712 : return infinite_cost;
6400 : else
6401 86264842 : return ivs->cost;
6402 : }
6403 :
6404 : /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6405 : than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6406 : respectively. */
6407 :
6408 : static int
6409 43313259 : iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6410 : struct iv_group *group, class cost_pair *old_cp,
6411 : class cost_pair *new_cp)
6412 : {
6413 43313259 : gcc_assert (old_cp && new_cp && old_cp != new_cp);
6414 43313259 : unsigned old_n_invs = ivs->n_invs;
6415 43313259 : iv_ca_set_cp (data, ivs, group, new_cp);
6416 43313259 : unsigned new_n_invs = ivs->n_invs;
6417 43313259 : iv_ca_set_cp (data, ivs, group, old_cp);
6418 :
6419 43313259 : return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6420 : }
6421 :
6422 : /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6423 : it before NEXT. */
6424 :
6425 : static struct iv_ca_delta *
6426 50921680 : iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6427 : class cost_pair *new_cp, struct iv_ca_delta *next)
6428 : {
6429 0 : struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6430 :
6431 50921680 : change->group = group;
6432 50921680 : change->old_cp = old_cp;
6433 50921680 : change->new_cp = new_cp;
6434 50921680 : change->next = next;
6435 :
6436 50921680 : return change;
6437 : }
6438 :
6439 : /* Joins two lists of changes L1 and L2. Destructive -- old lists
6440 : are rewritten. */
6441 :
6442 : static struct iv_ca_delta *
6443 8656359 : iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6444 : {
6445 8656359 : struct iv_ca_delta *last;
6446 :
6447 0 : if (!l2)
6448 : return l1;
6449 :
6450 0 : if (!l1)
6451 : return l2;
6452 :
6453 3779604 : for (last = l1; last->next; last = last->next)
6454 1266368 : continue;
6455 2513236 : last->next = l2;
6456 :
6457 2513236 : return l1;
6458 1266368 : }
6459 :
6460 : /* Reverse the list of changes DELTA, forming the inverse to it. */
6461 :
6462 : static struct iv_ca_delta *
6463 0 : iv_ca_delta_reverse (struct iv_ca_delta *delta)
6464 : {
6465 0 : struct iv_ca_delta *act, *next, *prev = NULL;
6466 :
6467 171220778 : for (act = delta; act; act = next)
6468 : {
6469 97002358 : next = act->next;
6470 97002358 : act->next = prev;
6471 97002358 : prev = act;
6472 :
6473 97002358 : std::swap (act->old_cp, act->new_cp);
6474 : }
6475 :
6476 0 : return prev;
6477 : }
6478 :
6479 : /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6480 : reverted instead. */
6481 :
6482 : static void
6483 77983031 : iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6484 : struct iv_ca_delta *delta, bool forward)
6485 : {
6486 77983031 : class cost_pair *from, *to;
6487 77983031 : struct iv_ca_delta *act;
6488 :
6489 77983031 : if (!forward)
6490 77983031 : delta = iv_ca_delta_reverse (delta);
6491 :
6492 179871103 : for (act = delta; act; act = act->next)
6493 : {
6494 101888072 : from = act->old_cp;
6495 101888072 : to = act->new_cp;
6496 101888072 : gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6497 101888072 : iv_ca_set_cp (data, ivs, act->group, to);
6498 : }
6499 :
6500 77983031 : if (!forward)
6501 77983031 : iv_ca_delta_reverse (delta);
6502 77983031 : }
6503 :
6504 : /* Returns true if CAND is used in IVS. */
6505 :
6506 : static bool
6507 30256798 : iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6508 : {
6509 30256798 : return ivs->n_cand_uses[cand->id] > 0;
6510 : }
6511 :
6512 : /* Returns number of induction variable candidates in the set IVS. */
6513 :
6514 : static unsigned
6515 13616938 : iv_ca_n_cands (class iv_ca *ivs)
6516 : {
6517 13616938 : return ivs->n_cands;
6518 : }
6519 :
6520 : /* Free the list of changes DELTA. */
6521 :
6522 : static void
6523 45469126 : iv_ca_delta_free (struct iv_ca_delta **delta)
6524 : {
6525 45469126 : struct iv_ca_delta *act, *next;
6526 :
6527 96390806 : for (act = *delta; act; act = next)
6528 : {
6529 50921680 : next = act->next;
6530 50921680 : free (act);
6531 : }
6532 :
6533 45469126 : *delta = NULL;
6534 45469126 : }
6535 :
6536 : /* Allocates new iv candidates assignment. */
6537 :
6538 : static class iv_ca *
6539 1001246 : iv_ca_new (struct ivopts_data *data)
6540 : {
6541 1001246 : class iv_ca *nw = XNEW (class iv_ca);
6542 :
6543 1001246 : nw->upto = 0;
6544 1001246 : nw->bad_groups = 0;
6545 2002492 : nw->cand_for_group = XCNEWVEC (class cost_pair *,
6546 : data->vgroups.length ());
6547 2002492 : nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6548 1001246 : nw->cands = BITMAP_ALLOC (NULL);
6549 1001246 : nw->n_cands = 0;
6550 1001246 : nw->n_invs = 0;
6551 1001246 : nw->cand_use_cost = no_cost;
6552 1001246 : nw->cand_cost = 0;
6553 1001246 : nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6554 1001246 : nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6555 1001246 : nw->cost = no_cost;
6556 :
6557 1001246 : return nw;
6558 : }
6559 :
6560 : /* Free memory occupied by the set IVS. */
6561 :
6562 : static void
6563 1001246 : iv_ca_free (class iv_ca **ivs)
6564 : {
6565 1001246 : free ((*ivs)->cand_for_group);
6566 1001246 : free ((*ivs)->n_cand_uses);
6567 1001246 : BITMAP_FREE ((*ivs)->cands);
6568 1001246 : free ((*ivs)->n_inv_var_uses);
6569 1001246 : free ((*ivs)->n_inv_expr_uses);
6570 1001246 : free (*ivs);
6571 1001246 : *ivs = NULL;
6572 1001246 : }
6573 :
6574 : /* Dumps IVS to FILE. */
6575 :
6576 : static void
6577 249 : iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6578 : {
6579 249 : unsigned i;
6580 249 : comp_cost cost = iv_ca_cost (ivs);
6581 :
6582 249 : fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6583 : cost.complexity);
6584 249 : fprintf (file, " reg_cost: %d\n",
6585 : ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6586 249 : fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6587 : "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6588 : ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6589 249 : bitmap_print (file, ivs->cands, " candidates: ","\n");
6590 :
6591 1289 : for (i = 0; i < ivs->upto; i++)
6592 : {
6593 1040 : struct iv_group *group = data->vgroups[i];
6594 1040 : class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6595 1040 : if (cp)
6596 1040 : fprintf (file, " group:%d --> iv_cand:%d, cost=("
6597 1040 : "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6598 : cp->cost.cost, cp->cost.complexity);
6599 : else
6600 0 : fprintf (file, " group:%d --> ??\n", group->id);
6601 : }
6602 :
6603 249 : const char *pref = "";
6604 249 : fprintf (file, " invariant variables: ");
6605 1445 : for (i = 1; i <= data->max_inv_var_id; i++)
6606 947 : if (ivs->n_inv_var_uses[i])
6607 : {
6608 137 : fprintf (file, "%s%d", pref, i);
6609 137 : pref = ", ";
6610 : }
6611 :
6612 249 : pref = "";
6613 249 : fprintf (file, "\n invariant expressions: ");
6614 2518 : for (i = 1; i <= data->max_inv_expr_id; i++)
6615 2020 : if (ivs->n_inv_expr_uses[i])
6616 : {
6617 302 : fprintf (file, "%s%d", pref, i);
6618 302 : pref = ", ";
6619 : }
6620 :
6621 249 : fprintf (file, "\n\n");
6622 249 : }
6623 :
6624 : /* Try changing candidate in IVS to CAND for each use. Return cost of the
6625 : new set, and store differences in DELTA. Number of induction variables
6626 : in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6627 : the function will try to find a solution with minimal iv candidates. */
6628 :
6629 : static comp_cost
6630 22968837 : iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6631 : struct iv_cand *cand, struct iv_ca_delta **delta,
6632 : unsigned *n_ivs, bool min_ncand)
6633 : {
6634 22968837 : unsigned i;
6635 22968837 : comp_cost cost;
6636 22968837 : struct iv_group *group;
6637 22968837 : class cost_pair *old_cp, *new_cp;
6638 :
6639 22968837 : *delta = NULL;
6640 135573196 : for (i = 0; i < ivs->upto; i++)
6641 : {
6642 112604359 : group = data->vgroups[i];
6643 112604359 : old_cp = iv_ca_cand_for_group (ivs, group);
6644 :
6645 112604359 : if (old_cp
6646 112604359 : && old_cp->cand == cand)
6647 9351899 : continue;
6648 :
6649 103252460 : new_cp = get_group_iv_cost (data, group, cand);
6650 103252460 : if (!new_cp)
6651 43195317 : continue;
6652 :
6653 60057143 : if (!min_ncand)
6654 : {
6655 43313259 : int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6656 : /* Skip if new_cp depends on more invariants. */
6657 43313259 : if (cmp_invs > 0)
6658 10456261 : continue;
6659 :
6660 32856998 : int cmp_cost = compare_cost_pair (new_cp, old_cp);
6661 : /* Skip if new_cp is not cheaper. */
6662 32856998 : if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6663 25537663 : continue;
6664 : }
6665 :
6666 24063219 : *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6667 : }
6668 :
6669 22968837 : iv_ca_delta_commit (data, ivs, *delta, true);
6670 22968837 : cost = iv_ca_cost (ivs);
6671 22968837 : if (n_ivs)
6672 13616938 : *n_ivs = iv_ca_n_cands (ivs);
6673 22968837 : iv_ca_delta_commit (data, ivs, *delta, false);
6674 :
6675 22968837 : return cost;
6676 : }
6677 :
6678 : /* Try narrowing set IVS by removing CAND. Return the cost of
6679 : the new set and store the differences in DELTA. START is
6680 : the candidate with which we start narrowing. */
6681 :
6682 : static comp_cost
6683 15856857 : iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6684 : struct iv_cand *cand, struct iv_cand *start,
6685 : struct iv_ca_delta **delta)
6686 : {
6687 15856857 : unsigned i, ci;
6688 15856857 : struct iv_group *group;
6689 15856857 : class cost_pair *old_cp, *new_cp, *cp;
6690 15856857 : bitmap_iterator bi;
6691 15856857 : struct iv_cand *cnd;
6692 15856857 : comp_cost cost, best_cost, acost;
6693 :
6694 15856857 : *delta = NULL;
6695 85618472 : for (i = 0; i < data->vgroups.length (); i++)
6696 : {
6697 80134458 : group = data->vgroups[i];
6698 :
6699 80134458 : old_cp = iv_ca_cand_for_group (ivs, group);
6700 80134458 : if (old_cp->cand != cand)
6701 56947972 : continue;
6702 :
6703 23186486 : best_cost = iv_ca_cost (ivs);
6704 : /* Start narrowing with START. */
6705 23186486 : new_cp = get_group_iv_cost (data, group, start);
6706 :
6707 23186486 : if (data->consider_all_candidates)
6708 : {
6709 92026000 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6710 : {
6711 70770488 : if (ci == cand->id || (start && ci == start->id))
6712 36817936 : continue;
6713 :
6714 33952552 : cnd = data->vcands[ci];
6715 :
6716 33952552 : cp = get_group_iv_cost (data, group, cnd);
6717 33952552 : if (!cp)
6718 19428759 : continue;
6719 :
6720 14523793 : iv_ca_set_cp (data, ivs, group, cp);
6721 14523793 : acost = iv_ca_cost (ivs);
6722 :
6723 14523793 : if (acost < best_cost)
6724 : {
6725 1969380 : best_cost = acost;
6726 1969380 : new_cp = cp;
6727 : }
6728 : }
6729 : }
6730 : else
6731 : {
6732 7111563 : EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6733 : {
6734 5180589 : if (ci == cand->id || (start && ci == start->id))
6735 2918685 : continue;
6736 :
6737 2261904 : cnd = data->vcands[ci];
6738 :
6739 2261904 : cp = get_group_iv_cost (data, group, cnd);
6740 2261904 : if (!cp)
6741 0 : continue;
6742 :
6743 2261904 : iv_ca_set_cp (data, ivs, group, cp);
6744 2261904 : acost = iv_ca_cost (ivs);
6745 :
6746 2261904 : if (acost < best_cost)
6747 : {
6748 66819 : best_cost = acost;
6749 66819 : new_cp = cp;
6750 : }
6751 : }
6752 : }
6753 : /* Restore to old cp for use. */
6754 23186486 : iv_ca_set_cp (data, ivs, group, old_cp);
6755 :
6756 23186486 : if (!new_cp)
6757 : {
6758 10372843 : iv_ca_delta_free (delta);
6759 10372843 : return infinite_cost;
6760 : }
6761 :
6762 12813643 : *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6763 : }
6764 :
6765 5484014 : iv_ca_delta_commit (data, ivs, *delta, true);
6766 5484014 : cost = iv_ca_cost (ivs);
6767 5484014 : iv_ca_delta_commit (data, ivs, *delta, false);
6768 :
6769 5484014 : return cost;
6770 : }
6771 :
6772 : /* Try optimizing the set of candidates IVS by removing candidates different
6773 : from to EXCEPT_CAND from it. Return cost of the new set, and store
6774 : differences in DELTA. */
6775 :
6776 : static comp_cost
6777 9684913 : iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6778 : struct iv_cand *except_cand, struct iv_ca_delta **delta)
6779 : {
6780 9684913 : bitmap_iterator bi;
6781 9684913 : struct iv_ca_delta *act_delta, *best_delta;
6782 9684913 : unsigned i;
6783 9684913 : comp_cost best_cost, acost;
6784 9684913 : struct iv_cand *cand;
6785 :
6786 9684913 : best_delta = NULL;
6787 9684913 : best_cost = iv_ca_cost (ivs);
6788 :
6789 32492898 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6790 : {
6791 22807985 : cand = data->vcands[i];
6792 :
6793 22807985 : if (cand == except_cand)
6794 6951128 : continue;
6795 :
6796 15856857 : acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6797 :
6798 15856857 : if (acost < best_cost)
6799 : {
6800 2698182 : best_cost = acost;
6801 2698182 : iv_ca_delta_free (&best_delta);
6802 2698182 : best_delta = act_delta;
6803 : }
6804 : else
6805 13158675 : iv_ca_delta_free (&act_delta);
6806 : }
6807 :
6808 9684913 : if (!best_delta)
6809 : {
6810 7169754 : *delta = NULL;
6811 7169754 : return best_cost;
6812 : }
6813 :
6814 : /* Recurse to possibly remove other unnecessary ivs. */
6815 2515159 : iv_ca_delta_commit (data, ivs, best_delta, true);
6816 2515159 : best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6817 2515159 : iv_ca_delta_commit (data, ivs, best_delta, false);
6818 2515159 : *delta = iv_ca_delta_join (best_delta, *delta);
6819 2515159 : return best_cost;
6820 : }
6821 :
6822 : /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6823 : cheaper local cost for GROUP than BEST_CP. Return pointer to
6824 : the corresponding cost_pair, otherwise just return BEST_CP. */
6825 :
6826 : static class cost_pair*
6827 31534523 : cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6828 : unsigned int cand_idx, struct iv_cand *old_cand,
6829 : class cost_pair *best_cp)
6830 : {
6831 31534523 : struct iv_cand *cand;
6832 31534523 : class cost_pair *cp;
6833 :
6834 31534523 : gcc_assert (old_cand != NULL && best_cp != NULL);
6835 31534523 : if (cand_idx == old_cand->id)
6836 : return best_cp;
6837 :
6838 28710779 : cand = data->vcands[cand_idx];
6839 28710779 : cp = get_group_iv_cost (data, group, cand);
6840 28710779 : if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6841 : return cp;
6842 :
6843 : return best_cp;
6844 : }
6845 :
6846 : /* Try breaking local optimal fixed-point for IVS by replacing candidates
6847 : which are used by more than one iv uses. For each of those candidates,
6848 : this function tries to represent iv uses under that candidate using
6849 : other ones with lower local cost, then tries to prune the new set.
6850 : If the new set has lower cost, It returns the new cost after recording
6851 : candidate replacement in list DELTA. */
6852 :
6853 : static comp_cost
6854 999948 : iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6855 : struct iv_ca_delta **delta)
6856 : {
6857 999948 : bitmap_iterator bi, bj;
6858 999948 : unsigned int i, j, k;
6859 999948 : struct iv_cand *cand;
6860 999948 : comp_cost orig_cost, acost;
6861 999948 : struct iv_ca_delta *act_delta, *tmp_delta;
6862 999948 : class cost_pair *old_cp, *best_cp = NULL;
6863 :
6864 999948 : *delta = NULL;
6865 999948 : orig_cost = iv_ca_cost (ivs);
6866 :
6867 2327966 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6868 : {
6869 1354701 : if (ivs->n_cand_uses[i] == 1
6870 1022330 : || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6871 338135 : continue;
6872 :
6873 1016566 : cand = data->vcands[i];
6874 :
6875 1016566 : act_delta = NULL;
6876 : /* Represent uses under current candidate using other ones with
6877 : lower local cost. */
6878 5375573 : for (j = 0; j < ivs->upto; j++)
6879 : {
6880 4359007 : struct iv_group *group = data->vgroups[j];
6881 4359007 : old_cp = iv_ca_cand_for_group (ivs, group);
6882 :
6883 4359007 : if (old_cp->cand != cand)
6884 1535263 : continue;
6885 :
6886 2823744 : best_cp = old_cp;
6887 2823744 : if (data->consider_all_candidates)
6888 34065342 : for (k = 0; k < data->vcands.length (); k++)
6889 31269178 : best_cp = cheaper_cost_with_cand (data, group, k,
6890 : old_cp->cand, best_cp);
6891 : else
6892 292925 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6893 265345 : best_cp = cheaper_cost_with_cand (data, group, k,
6894 : old_cp->cand, best_cp);
6895 :
6896 2823744 : if (best_cp == old_cp)
6897 1319769 : continue;
6898 :
6899 1503975 : act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6900 : }
6901 : /* No need for further prune. */
6902 1016566 : if (!act_delta)
6903 232289 : continue;
6904 :
6905 : /* Prune the new candidate set. */
6906 784277 : iv_ca_delta_commit (data, ivs, act_delta, true);
6907 784277 : acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6908 784277 : iv_ca_delta_commit (data, ivs, act_delta, false);
6909 784277 : act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6910 :
6911 784277 : if (acost < orig_cost)
6912 : {
6913 26683 : *delta = act_delta;
6914 26683 : return acost;
6915 : }
6916 : else
6917 757594 : iv_ca_delta_free (&act_delta);
6918 : }
6919 :
6920 973265 : return orig_cost;
6921 : }
6922 :
6923 : /* Tries to extend the sets IVS in the best possible way in order to
6924 : express the GROUP. If ORIGINALP is true, prefer candidates from
6925 : the original set of IVs, otherwise favor important candidates not
6926 : based on any memory object. */
6927 :
6928 : static bool
6929 3281656 : try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6930 : struct iv_group *group, bool originalp)
6931 : {
6932 3281656 : comp_cost best_cost, act_cost;
6933 3281656 : unsigned i;
6934 3281656 : bitmap_iterator bi;
6935 3281656 : struct iv_cand *cand;
6936 3281656 : struct iv_ca_delta *best_delta = NULL, *act_delta;
6937 3281656 : class cost_pair *cp;
6938 :
6939 3281656 : iv_ca_add_group (data, ivs, group);
6940 3281656 : best_cost = iv_ca_cost (ivs);
6941 3281656 : cp = iv_ca_cand_for_group (ivs, group);
6942 3281656 : if (cp)
6943 : {
6944 3188944 : best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6945 3188944 : iv_ca_set_no_cp (data, ivs, group);
6946 : }
6947 :
6948 : /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6949 : first try important candidates not based on any memory object. Only if
6950 : this fails, try the specific ones. Rationale -- in loops with many
6951 : variables the best choice often is to use just one generic biv. If we
6952 : added here many ivs specific to the uses, the optimization algorithm later
6953 : would be likely to get stuck in a local minimum, thus causing us to create
6954 : too many ivs. The approach from few ivs to more seems more likely to be
6955 : successful -- starting from few ivs, replacing an expensive use by a
6956 : specific iv should always be a win. */
6957 31441700 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6958 : {
6959 28160044 : cand = data->vcands[i];
6960 :
6961 28160044 : if (originalp && cand->pos !=IP_ORIGINAL)
6962 11200803 : continue;
6963 :
6964 14080022 : if (!originalp && cand->iv->base_object != NULL_TREE)
6965 2660547 : continue;
6966 :
6967 14298694 : if (iv_ca_cand_used_p (ivs, cand))
6968 1486390 : continue;
6969 :
6970 12812304 : cp = get_group_iv_cost (data, group, cand);
6971 12812304 : if (!cp)
6972 3580057 : continue;
6973 :
6974 9232247 : iv_ca_set_cp (data, ivs, group, cp);
6975 9232247 : act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6976 : true);
6977 9232247 : iv_ca_set_no_cp (data, ivs, group);
6978 9232247 : act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6979 :
6980 9232247 : if (act_cost < best_cost)
6981 : {
6982 428189 : best_cost = act_cost;
6983 :
6984 428189 : iv_ca_delta_free (&best_delta);
6985 428189 : best_delta = act_delta;
6986 : }
6987 : else
6988 8804058 : iv_ca_delta_free (&act_delta);
6989 : }
6990 :
6991 3281656 : if (best_cost.infinite_cost_p ())
6992 : {
6993 746745 : for (i = 0; i < group->n_map_members; i++)
6994 : {
6995 679518 : cp = group->cost_map + i;
6996 679518 : cand = cp->cand;
6997 679518 : if (!cand)
6998 559866 : continue;
6999 :
7000 : /* Already tried this. */
7001 119652 : if (cand->important)
7002 : {
7003 0 : if (originalp && cand->pos == IP_ORIGINAL)
7004 0 : continue;
7005 0 : if (!originalp && cand->iv->base_object == NULL_TREE)
7006 0 : continue;
7007 : }
7008 :
7009 119652 : if (iv_ca_cand_used_p (ivs, cand))
7010 0 : continue;
7011 :
7012 119652 : act_delta = NULL;
7013 119652 : iv_ca_set_cp (data, ivs, group, cp);
7014 119652 : act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7015 119652 : iv_ca_set_no_cp (data, ivs, group);
7016 119652 : act_delta = iv_ca_delta_add (group,
7017 : iv_ca_cand_for_group (ivs, group),
7018 : cp, act_delta);
7019 :
7020 119652 : if (act_cost < best_cost)
7021 : {
7022 71452 : best_cost = act_cost;
7023 :
7024 71452 : if (best_delta)
7025 5523 : iv_ca_delta_free (&best_delta);
7026 71452 : best_delta = act_delta;
7027 : }
7028 : else
7029 48200 : iv_ca_delta_free (&act_delta);
7030 : }
7031 : }
7032 :
7033 3281656 : iv_ca_delta_commit (data, ivs, best_delta, true);
7034 3281656 : iv_ca_delta_free (&best_delta);
7035 :
7036 3281656 : return !best_cost.infinite_cost_p ();
7037 : }
7038 :
7039 : /* Finds an initial assignment of candidates to uses. */
7040 :
7041 : static class iv_ca *
7042 1001246 : get_initial_solution (struct ivopts_data *data, bool originalp)
7043 : {
7044 1001246 : unsigned i;
7045 1001246 : class iv_ca *ivs = iv_ca_new (data);
7046 :
7047 4281604 : for (i = 0; i < data->vgroups.length (); i++)
7048 3281656 : if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7049 : {
7050 1298 : iv_ca_free (&ivs);
7051 1298 : return NULL;
7052 : }
7053 :
7054 : return ivs;
7055 : }
7056 :
7057 : /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
7058 : points to a bool variable, this function tries to break local
7059 : optimal fixed-point by replacing candidates in IVS if it's true. */
7060 :
7061 : static bool
7062 1482903 : try_improve_iv_set (struct ivopts_data *data,
7063 : class iv_ca *ivs, bool *try_replace_p)
7064 : {
7065 1482903 : unsigned i, n_ivs;
7066 1482903 : comp_cost acost, best_cost = iv_ca_cost (ivs);
7067 1482903 : struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7068 1482903 : struct iv_cand *cand;
7069 :
7070 : /* Try extending the set of induction variables by one. */
7071 17321355 : for (i = 0; i < data->vcands.length (); i++)
7072 : {
7073 15838452 : cand = data->vcands[i];
7074 :
7075 15838452 : if (iv_ca_cand_used_p (ivs, cand))
7076 2221514 : continue;
7077 :
7078 13616938 : acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7079 13616938 : if (!act_delta)
7080 8185687 : continue;
7081 :
7082 : /* If we successfully added the candidate and the set is small enough,
7083 : try optimizing it by removing other candidates. */
7084 5431251 : if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7085 : {
7086 5356923 : iv_ca_delta_commit (data, ivs, act_delta, true);
7087 5356923 : acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7088 5356923 : iv_ca_delta_commit (data, ivs, act_delta, false);
7089 5356923 : act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7090 : }
7091 :
7092 5431251 : if (acost < best_cost)
7093 : {
7094 587623 : best_cost = acost;
7095 587623 : iv_ca_delta_free (&best_delta);
7096 587623 : best_delta = act_delta;
7097 : }
7098 : else
7099 4843628 : iv_ca_delta_free (&act_delta);
7100 : }
7101 :
7102 1482903 : if (!best_delta)
7103 : {
7104 : /* Try removing the candidates from the set instead. */
7105 1028554 : best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7106 :
7107 1028554 : if (!best_delta && *try_replace_p)
7108 : {
7109 999948 : *try_replace_p = false;
7110 : /* So far candidate selecting algorithm tends to choose fewer IVs
7111 : so that it can handle cases in which loops have many variables
7112 : but the best choice is often to use only one general biv. One
7113 : weakness is it can't handle opposite cases, in which different
7114 : candidates should be chosen with respect to each use. To solve
7115 : the problem, we replace candidates in a manner described by the
7116 : comments of iv_ca_replace, thus give general algorithm a chance
7117 : to break local optimal fixed-point in these cases. */
7118 999948 : best_cost = iv_ca_replace (data, ivs, &best_delta);
7119 : }
7120 :
7121 1028554 : if (!best_delta)
7122 : return false;
7123 : }
7124 :
7125 482955 : iv_ca_delta_commit (data, ivs, best_delta, true);
7126 482955 : iv_ca_delta_free (&best_delta);
7127 965910 : return best_cost == iv_ca_cost (ivs);
7128 : }
7129 :
7130 : /* Attempts to find the optimal set of induction variables. We do simple
7131 : greedy heuristic -- we try to replace at most one candidate in the selected
7132 : solution and remove the unused ivs while this improves the cost. */
7133 :
7134 : static class iv_ca *
7135 1001246 : find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7136 : {
7137 1001246 : class iv_ca *set;
7138 1001246 : bool try_replace_p = true;
7139 :
7140 : /* Get the initial solution. */
7141 1001246 : set = get_initial_solution (data, originalp);
7142 1001246 : if (!set)
7143 : {
7144 1298 : if (dump_file && (dump_flags & TDF_DETAILS))
7145 0 : fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7146 1298 : return NULL;
7147 : }
7148 :
7149 999948 : if (dump_file && (dump_flags & TDF_DETAILS))
7150 : {
7151 134 : fprintf (dump_file, "Initial set of candidates:\n");
7152 134 : iv_ca_dump (data, dump_file, set);
7153 : }
7154 :
7155 1482903 : while (try_improve_iv_set (data, set, &try_replace_p))
7156 : {
7157 482955 : if (dump_file && (dump_flags & TDF_DETAILS))
7158 : {
7159 115 : fprintf (dump_file, "Improved to:\n");
7160 115 : iv_ca_dump (data, dump_file, set);
7161 : }
7162 : }
7163 :
7164 : /* If the set has infinite_cost, it can't be optimal. */
7165 1999896 : if (iv_ca_cost (set).infinite_cost_p ())
7166 : {
7167 0 : if (dump_file && (dump_flags & TDF_DETAILS))
7168 0 : fprintf (dump_file,
7169 : "Overflow to infinite cost in try_improve_iv_set.\n");
7170 0 : iv_ca_free (&set);
7171 : }
7172 999948 : return set;
7173 : }
7174 :
7175 : static class iv_ca *
7176 500623 : find_optimal_iv_set (struct ivopts_data *data)
7177 : {
7178 500623 : unsigned i;
7179 500623 : comp_cost cost, origcost;
7180 500623 : class iv_ca *set, *origset;
7181 :
7182 : /* Determine the cost based on a strategy that starts with original IVs,
7183 : and try again using a strategy that prefers candidates not based
7184 : on any IVs. */
7185 500623 : origset = find_optimal_iv_set_1 (data, true);
7186 500623 : set = find_optimal_iv_set_1 (data, false);
7187 :
7188 500623 : if (!origset && !set)
7189 : return NULL;
7190 :
7191 499974 : origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7192 499974 : cost = set ? iv_ca_cost (set) : infinite_cost;
7193 :
7194 499974 : if (dump_file && (dump_flags & TDF_DETAILS))
7195 : {
7196 67 : fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7197 : origcost.cost, origcost.complexity);
7198 67 : fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7199 : cost.cost, cost.complexity);
7200 : }
7201 :
7202 : /* Choose the one with the best cost. */
7203 499974 : if (origcost <= cost)
7204 : {
7205 465965 : if (set)
7206 465965 : iv_ca_free (&set);
7207 465965 : set = origset;
7208 : }
7209 34009 : else if (origset)
7210 34009 : iv_ca_free (&origset);
7211 :
7212 2139486 : for (i = 0; i < data->vgroups.length (); i++)
7213 : {
7214 1639512 : struct iv_group *group = data->vgroups[i];
7215 1639512 : group->selected = iv_ca_cand_for_group (set, group)->cand;
7216 : }
7217 :
7218 499974 : return set;
7219 : }
7220 :
7221 : /* Creates a new induction variable corresponding to CAND. */
7222 :
7223 : static void
7224 673334 : create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7225 : {
7226 673334 : gimple_stmt_iterator incr_pos;
7227 673334 : tree base;
7228 673334 : struct iv_use *use;
7229 673334 : struct iv_group *group;
7230 673334 : bool after = false;
7231 :
7232 673334 : gcc_assert (cand->iv != NULL);
7233 :
7234 673334 : switch (cand->pos)
7235 : {
7236 465041 : case IP_NORMAL:
7237 465041 : incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7238 465041 : break;
7239 :
7240 10430 : case IP_END:
7241 10430 : incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7242 10430 : after = true;
7243 10430 : gcc_assert (gsi_end_p (incr_pos) || !stmt_ends_bb_p (*incr_pos));
7244 : break;
7245 :
7246 0 : case IP_AFTER_USE:
7247 0 : after = true;
7248 : /* fall through */
7249 0 : case IP_BEFORE_USE:
7250 0 : incr_pos = gsi_for_stmt (cand->incremented_at);
7251 0 : break;
7252 :
7253 197863 : case IP_ORIGINAL:
7254 : /* Mark that the iv is preserved. */
7255 197863 : name_info (data, cand->var_before)->preserve_biv = true;
7256 197863 : name_info (data, cand->var_after)->preserve_biv = true;
7257 :
7258 : /* Rewrite the increment so that it uses var_before directly. */
7259 197863 : use = find_interesting_uses_op (data, cand->var_after);
7260 197863 : group = data->vgroups[use->group_id];
7261 197863 : group->selected = cand;
7262 197863 : return;
7263 : }
7264 :
7265 475471 : gimple_add_tmp_var (cand->var_before);
7266 :
7267 475471 : base = unshare_expr (cand->iv->base);
7268 :
7269 : /* The step computation could invoke UB when the loop does not iterate.
7270 : Avoid inserting it on the preheader in its native form but rewrite
7271 : it to a well-defined form. This also helps masking SCEV issues
7272 : which freely re-associates the IV computations when building up
7273 : CHRECs without much regard for signed overflow invoking UB. */
7274 475471 : gimple_seq stmts = NULL;
7275 475471 : tree step = force_gimple_operand (unshare_expr (cand->iv->step), &stmts,
7276 : true, NULL_TREE);
7277 475471 : if (stmts)
7278 : {
7279 145070 : for (auto gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
7280 96193 : if (gimple_needing_rewrite_undefined (gsi_stmt (gsi)))
7281 10682 : rewrite_to_defined_unconditional (&gsi);
7282 48877 : gsi_insert_seq_on_edge_immediate
7283 48877 : (loop_preheader_edge (data->current_loop), stmts);
7284 : }
7285 :
7286 475471 : create_iv (base, PLUS_EXPR, step,
7287 : cand->var_before, data->current_loop,
7288 : &incr_pos, after, &cand->var_before, &cand->var_after);
7289 : }
7290 :
7291 : /* Creates new induction variables described in SET. */
7292 :
7293 : static void
7294 499974 : create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7295 : {
7296 499974 : unsigned i;
7297 499974 : struct iv_cand *cand;
7298 499974 : bitmap_iterator bi;
7299 :
7300 1173308 : EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7301 : {
7302 673334 : cand = data->vcands[i];
7303 673334 : create_new_iv (data, cand);
7304 : }
7305 :
7306 499974 : if (dump_file && (dump_flags & TDF_DETAILS))
7307 : {
7308 67 : fprintf (dump_file, "Selected IV set for loop %d",
7309 67 : data->current_loop->num);
7310 67 : if (data->loop_loc != UNKNOWN_LOCATION)
7311 65 : fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7312 130 : LOCATION_LINE (data->loop_loc));
7313 67 : fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " avg niters",
7314 : avg_loop_niter (data->current_loop));
7315 67 : fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7316 178 : EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7317 : {
7318 111 : cand = data->vcands[i];
7319 111 : dump_cand (dump_file, cand);
7320 : }
7321 67 : fprintf (dump_file, "\n");
7322 : }
7323 499974 : }
7324 :
7325 : /* Rewrites USE (definition of iv used in a nonlinear expression)
7326 : using candidate CAND. */
7327 :
7328 : static void
7329 618601 : rewrite_use_nonlinear_expr (struct ivopts_data *data,
7330 : struct iv_use *use, struct iv_cand *cand)
7331 : {
7332 618601 : gassign *ass;
7333 618601 : gimple_stmt_iterator bsi;
7334 618601 : tree comp, type = get_use_type (use), tgt;
7335 :
7336 : /* An important special case -- if we are asked to express value of
7337 : the original iv by itself, just exit; there is no need to
7338 : introduce a new computation (that might also need casting the
7339 : variable to unsigned and back). */
7340 618601 : if (cand->pos == IP_ORIGINAL
7341 327325 : && cand->incremented_at == use->stmt)
7342 : {
7343 197863 : tree op = NULL_TREE;
7344 197863 : enum tree_code stmt_code;
7345 :
7346 197863 : gcc_assert (is_gimple_assign (use->stmt));
7347 197863 : gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7348 :
7349 : /* Check whether we may leave the computation unchanged.
7350 : This is the case only if it does not rely on other
7351 : computations in the loop -- otherwise, the computation
7352 : we rely upon may be removed in remove_unused_ivs,
7353 : thus leading to ICE. */
7354 197863 : stmt_code = gimple_assign_rhs_code (use->stmt);
7355 197863 : if (stmt_code == PLUS_EXPR
7356 197863 : || stmt_code == MINUS_EXPR
7357 197863 : || stmt_code == POINTER_PLUS_EXPR)
7358 : {
7359 193862 : if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7360 192004 : op = gimple_assign_rhs2 (use->stmt);
7361 1858 : else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7362 : op = gimple_assign_rhs1 (use->stmt);
7363 : }
7364 :
7365 192494 : if (op != NULL_TREE)
7366 : {
7367 192494 : if (expr_invariant_in_loop_p (data->current_loop, op))
7368 274246 : return;
7369 185 : if (TREE_CODE (op) == SSA_NAME)
7370 : {
7371 185 : struct iv *iv = get_iv (data, op);
7372 185 : if (iv != NULL && integer_zerop (iv->step))
7373 : return;
7374 : }
7375 : }
7376 : }
7377 :
7378 426107 : switch (gimple_code (use->stmt))
7379 : {
7380 124162 : case GIMPLE_PHI:
7381 124162 : tgt = PHI_RESULT (use->stmt);
7382 :
7383 : /* If we should keep the biv, do not replace it. */
7384 124162 : if (name_info (data, tgt)->preserve_biv)
7385 : return;
7386 :
7387 42410 : bsi = gsi_after_labels (gimple_bb (use->stmt));
7388 42410 : break;
7389 :
7390 301945 : case GIMPLE_ASSIGN:
7391 301945 : tgt = gimple_assign_lhs (use->stmt);
7392 301945 : bsi = gsi_for_stmt (use->stmt);
7393 301945 : break;
7394 :
7395 0 : default:
7396 0 : gcc_unreachable ();
7397 : }
7398 :
7399 1033065 : aff_tree aff_inv, aff_var;
7400 344355 : if (!get_computation_aff_1 (data, use->stmt, use, cand, &aff_inv, &aff_var))
7401 0 : gcc_unreachable ();
7402 :
7403 344355 : unshare_aff_combination (&aff_inv);
7404 344355 : unshare_aff_combination (&aff_var);
7405 : /* Prefer CSE opportunity than loop invariant by adding offset at last
7406 : so that iv_uses have different offsets can be CSEed. */
7407 688710 : poly_widest_int offset = aff_inv.offset;
7408 344355 : aff_inv.offset = 0;
7409 :
7410 344355 : gimple_seq stmt_list = NULL, seq = NULL;
7411 344355 : tree comp_op1 = aff_combination_to_tree (&aff_inv);
7412 344355 : tree comp_op2 = aff_combination_to_tree (&aff_var);
7413 344355 : gcc_assert (comp_op1 && comp_op2);
7414 :
7415 344355 : comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7416 344355 : gimple_seq_add_seq (&stmt_list, seq);
7417 344355 : comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7418 344355 : gimple_seq_add_seq (&stmt_list, seq);
7419 :
7420 344355 : if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7421 : std::swap (comp_op1, comp_op2);
7422 :
7423 344355 : if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7424 : {
7425 0 : comp = fold_build_pointer_plus (comp_op1,
7426 : fold_convert (sizetype, comp_op2));
7427 0 : comp = fold_build_pointer_plus (comp,
7428 : wide_int_to_tree (sizetype, offset));
7429 : }
7430 : else
7431 : {
7432 344355 : comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7433 : fold_convert (TREE_TYPE (comp_op1), comp_op2));
7434 344355 : comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7435 : wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7436 : }
7437 :
7438 344355 : comp = fold_convert (type, comp);
7439 344355 : comp = force_gimple_operand (comp, &seq, false, NULL);
7440 344355 : gimple_seq_add_seq (&stmt_list, seq);
7441 344355 : if (gimple_code (use->stmt) != GIMPLE_PHI
7442 : /* We can't allow re-allocating the stmt as it might be pointed
7443 : to still. */
7444 344355 : && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7445 301945 : >= gimple_num_ops (gsi_stmt (bsi))))
7446 : {
7447 8418 : comp = force_gimple_operand (comp, &seq, true, NULL);
7448 8418 : gimple_seq_add_seq (&stmt_list, seq);
7449 8418 : if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7450 : {
7451 0 : duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7452 : /* As this isn't a plain copy we have to reset alignment
7453 : information. */
7454 0 : if (SSA_NAME_PTR_INFO (comp))
7455 0 : mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7456 : }
7457 : }
7458 :
7459 344355 : gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7460 344355 : if (gimple_code (use->stmt) == GIMPLE_PHI)
7461 : {
7462 42410 : ass = gimple_build_assign (tgt, comp);
7463 42410 : gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7464 :
7465 42410 : bsi = gsi_for_stmt (use->stmt);
7466 42410 : remove_phi_node (&bsi, false);
7467 : }
7468 : else
7469 : {
7470 301945 : gimple_assign_set_rhs_from_tree (&bsi, comp);
7471 301945 : use->stmt = gsi_stmt (bsi);
7472 : }
7473 : }
7474 :
7475 : /* Performs a peephole optimization to reorder the iv update statement with
7476 : a mem ref to enable instruction combining in later phases. The mem ref uses
7477 : the iv value before the update, so the reordering transformation requires
7478 : adjustment of the offset. CAND is the selected IV_CAND.
7479 :
7480 : Example:
7481 :
7482 : t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7483 : iv2 = iv1 + 1;
7484 :
7485 : if (t < val) (1)
7486 : goto L;
7487 : goto Head;
7488 :
7489 :
7490 : directly propagating t over to (1) will introduce overlapping live range
7491 : thus increase register pressure. This peephole transform it into:
7492 :
7493 :
7494 : iv2 = iv1 + 1;
7495 : t = MEM_REF (base, iv2, 8, 8);
7496 : if (t < val)
7497 : goto L;
7498 : goto Head;
7499 : */
7500 :
7501 : static void
7502 855493 : adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7503 : {
7504 855493 : tree var_after;
7505 855493 : gimple *iv_update, *stmt;
7506 855493 : basic_block bb;
7507 855493 : gimple_stmt_iterator gsi, gsi_iv;
7508 :
7509 855493 : if (cand->pos != IP_NORMAL)
7510 853329 : return;
7511 :
7512 662922 : var_after = cand->var_after;
7513 662922 : iv_update = SSA_NAME_DEF_STMT (var_after);
7514 :
7515 662922 : bb = gimple_bb (iv_update);
7516 662922 : gsi = gsi_last_nondebug_bb (bb);
7517 662922 : stmt = gsi_stmt (gsi);
7518 :
7519 : /* Only handle conditional statement for now. */
7520 662922 : if (gimple_code (stmt) != GIMPLE_COND)
7521 : return;
7522 :
7523 662922 : gsi_prev_nondebug (&gsi);
7524 662922 : stmt = gsi_stmt (gsi);
7525 662922 : if (stmt != iv_update)
7526 : return;
7527 :
7528 537451 : gsi_prev_nondebug (&gsi);
7529 537451 : if (gsi_end_p (gsi))
7530 : return;
7531 :
7532 534299 : stmt = gsi_stmt (gsi);
7533 534299 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
7534 : return;
7535 :
7536 534138 : if (stmt != use->stmt)
7537 : return;
7538 :
7539 4964 : if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7540 : return;
7541 :
7542 2164 : if (dump_file && (dump_flags & TDF_DETAILS))
7543 : {
7544 0 : fprintf (dump_file, "Reordering \n");
7545 0 : print_gimple_stmt (dump_file, iv_update, 0);
7546 0 : print_gimple_stmt (dump_file, use->stmt, 0);
7547 0 : fprintf (dump_file, "\n");
7548 : }
7549 :
7550 2164 : gsi = gsi_for_stmt (use->stmt);
7551 2164 : gsi_iv = gsi_for_stmt (iv_update);
7552 2164 : gsi_move_before (&gsi_iv, &gsi);
7553 :
7554 2164 : cand->pos = IP_BEFORE_USE;
7555 2164 : cand->incremented_at = use->stmt;
7556 : }
7557 :
7558 : /* Return the alias pointer type that should be used for a MEM_REF
7559 : associated with USE, which has type USE_PTR_ADDRESS. */
7560 :
7561 : static tree
7562 799 : get_alias_ptr_type_for_ptr_address (iv_use *use)
7563 : {
7564 799 : gcall *call = as_a <gcall *> (use->stmt);
7565 799 : switch (gimple_call_internal_fn (call))
7566 : {
7567 799 : case IFN_MASK_LOAD:
7568 799 : case IFN_MASK_STORE:
7569 799 : case IFN_MASK_LOAD_LANES:
7570 799 : case IFN_MASK_STORE_LANES:
7571 799 : case IFN_MASK_LEN_LOAD_LANES:
7572 799 : case IFN_MASK_LEN_STORE_LANES:
7573 799 : case IFN_LEN_LOAD:
7574 799 : case IFN_LEN_STORE:
7575 799 : case IFN_MASK_LEN_LOAD:
7576 799 : case IFN_MASK_LEN_STORE:
7577 : /* The second argument contains the correct alias type. */
7578 799 : gcc_assert (use->op_p == gimple_call_arg_ptr (call, 0));
7579 799 : return TREE_TYPE (gimple_call_arg (call, 1));
7580 :
7581 0 : default:
7582 0 : gcc_unreachable ();
7583 : }
7584 : }
7585 :
7586 :
7587 : /* Rewrites USE (address that is an iv) using candidate CAND. */
7588 :
7589 : static void
7590 855493 : rewrite_use_address (struct ivopts_data *data,
7591 : struct iv_use *use, struct iv_cand *cand)
7592 : {
7593 855493 : aff_tree aff;
7594 855493 : bool ok;
7595 :
7596 855493 : adjust_iv_update_pos (cand, use);
7597 855493 : ok = get_computation_aff (data, use->stmt, use, cand, &aff);
7598 855493 : gcc_assert (ok);
7599 855493 : unshare_aff_combination (&aff);
7600 :
7601 : /* To avoid undefined overflow problems, all IV candidates use unsigned
7602 : integer types. The drawback is that this makes it impossible for
7603 : create_mem_ref to distinguish an IV that is based on a memory object
7604 : from one that represents simply an offset.
7605 :
7606 : To work around this problem, we pass a hint to create_mem_ref that
7607 : indicates which variable (if any) in aff is an IV based on a memory
7608 : object. Note that we only consider the candidate. If this is not
7609 : based on an object, the base of the reference is in some subexpression
7610 : of the use -- but these will use pointer types, so they are recognized
7611 : by the create_mem_ref heuristics anyway. */
7612 855493 : tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7613 855493 : tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7614 855493 : gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7615 855493 : tree type = use->mem_type;
7616 855493 : tree alias_ptr_type;
7617 855493 : if (use->type == USE_PTR_ADDRESS)
7618 799 : alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7619 : else
7620 : {
7621 854694 : gcc_assert (type == TREE_TYPE (*use->op_p));
7622 854694 : unsigned int align = get_object_alignment (*use->op_p);
7623 854694 : if (align != TYPE_ALIGN (type))
7624 34208 : type = build_aligned_type (type, align);
7625 854694 : alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7626 : }
7627 1710986 : tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7628 855493 : iv, base_hint, data->speed);
7629 :
7630 855493 : if (use->type == USE_PTR_ADDRESS)
7631 : {
7632 799 : ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7633 799 : ref = fold_convert (get_use_type (use), ref);
7634 799 : ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7635 : true, GSI_SAME_STMT);
7636 : }
7637 : else
7638 : {
7639 : /* When we end up confused enough and have no suitable base but
7640 : stuffed everything to index2 use a LEA for the address and
7641 : create a plain MEM_REF to avoid basing a memory reference
7642 : on address zero which create_mem_ref_raw does as fallback. */
7643 854694 : if (TREE_CODE (ref) == TARGET_MEM_REF
7644 854694 : && TMR_INDEX2 (ref) != NULL_TREE
7645 863615 : && integer_zerop (TREE_OPERAND (ref, 0)))
7646 : {
7647 26 : ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7648 26 : ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7649 : true, GSI_SAME_STMT);
7650 26 : ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7651 : }
7652 854694 : copy_ref_info (ref, *use->op_p);
7653 : }
7654 :
7655 855493 : *use->op_p = ref;
7656 855493 : }
7657 :
7658 : /* Rewrites USE (the condition such that one of the arguments is an iv) using
7659 : candidate CAND. */
7660 :
7661 : static void
7662 596866 : rewrite_use_compare (struct ivopts_data *data,
7663 : struct iv_use *use, struct iv_cand *cand)
7664 : {
7665 596866 : tree comp, op, bound;
7666 596866 : gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7667 596866 : enum tree_code compare;
7668 596866 : struct iv_group *group = data->vgroups[use->group_id];
7669 596866 : class cost_pair *cp = get_group_iv_cost (data, group, cand);
7670 :
7671 596866 : bound = cp->value;
7672 596866 : if (bound)
7673 : {
7674 392649 : tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7675 392649 : tree var_type = TREE_TYPE (var);
7676 392649 : gimple_seq stmts;
7677 :
7678 392649 : if (dump_file && (dump_flags & TDF_DETAILS))
7679 : {
7680 58 : fprintf (dump_file, "Replacing exit test: ");
7681 58 : print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7682 : }
7683 392649 : compare = cp->comp;
7684 392649 : bound = unshare_expr (fold_convert (var_type, bound));
7685 392649 : op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7686 392649 : if (stmts)
7687 182509 : gsi_insert_seq_on_edge_immediate (
7688 182509 : loop_preheader_edge (data->current_loop),
7689 : stmts);
7690 :
7691 392649 : gcond *cond_stmt = as_a <gcond *> (use->stmt);
7692 392649 : gimple_cond_set_lhs (cond_stmt, var);
7693 392649 : gimple_cond_set_code (cond_stmt, compare);
7694 392649 : gimple_cond_set_rhs (cond_stmt, op);
7695 392649 : return;
7696 : }
7697 :
7698 : /* The induction variable elimination failed; just express the original
7699 : giv. */
7700 204217 : comp = get_computation_at (data, use->stmt, use, cand);
7701 204217 : gcc_assert (comp != NULL_TREE);
7702 204217 : gcc_assert (use->op_p != NULL);
7703 204217 : *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7704 204217 : SSA_NAME_VAR (*use->op_p),
7705 : true, GSI_SAME_STMT);
7706 : }
7707 :
7708 : /* Rewrite the groups using the selected induction variables. */
7709 :
7710 : static void
7711 499974 : rewrite_groups (struct ivopts_data *data)
7712 : {
7713 499974 : unsigned i, j;
7714 :
7715 2297155 : for (i = 0; i < data->vgroups.length (); i++)
7716 : {
7717 1797181 : struct iv_group *group = data->vgroups[i];
7718 1797181 : struct iv_cand *cand = group->selected;
7719 :
7720 1797181 : gcc_assert (cand);
7721 :
7722 1797181 : if (group->type == USE_NONLINEAR_EXPR)
7723 : {
7724 1237202 : for (j = 0; j < group->vuses.length (); j++)
7725 : {
7726 618601 : rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7727 618601 : update_stmt (group->vuses[j]->stmt);
7728 : }
7729 : }
7730 1178580 : else if (address_p (group->type))
7731 : {
7732 1437207 : for (j = 0; j < group->vuses.length (); j++)
7733 : {
7734 855493 : rewrite_use_address (data, group->vuses[j], cand);
7735 855493 : update_stmt (group->vuses[j]->stmt);
7736 : }
7737 : }
7738 : else
7739 : {
7740 596866 : gcc_assert (group->type == USE_COMPARE);
7741 :
7742 2394047 : for (j = 0; j < group->vuses.length (); j++)
7743 : {
7744 596866 : rewrite_use_compare (data, group->vuses[j], cand);
7745 596866 : update_stmt (group->vuses[j]->stmt);
7746 : }
7747 : }
7748 : }
7749 499974 : }
7750 :
7751 : /* Removes the ivs that are not used after rewriting. */
7752 :
7753 : static void
7754 499974 : remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7755 : {
7756 499974 : unsigned j;
7757 499974 : bitmap_iterator bi;
7758 :
7759 : /* Figure out an order in which to release SSA DEFs so that we don't
7760 : release something that we'd have to propagate into a debug stmt
7761 : afterwards. */
7762 5550888 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7763 : {
7764 5050914 : struct version_info *info;
7765 :
7766 5050914 : info = ver_info (data, j);
7767 5050914 : if (info->iv
7768 4906795 : && !integer_zerop (info->iv->step)
7769 3209937 : && !info->inv_id
7770 3209937 : && !info->iv->nonlin_use
7771 7642250 : && !info->preserve_biv)
7772 : {
7773 2475225 : bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7774 :
7775 2475225 : tree def = info->iv->ssa_name;
7776 :
7777 3229992 : if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7778 : {
7779 754767 : imm_use_iterator imm_iter;
7780 754767 : use_operand_p use_p;
7781 754767 : gimple *stmt;
7782 754767 : int count = 0;
7783 :
7784 2235582 : FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7785 : {
7786 754513 : if (!gimple_debug_bind_p (stmt))
7787 635103 : continue;
7788 :
7789 : /* We just want to determine whether to do nothing
7790 : (count == 0), to substitute the computed
7791 : expression into a single use of the SSA DEF by
7792 : itself (count == 1), or to use a debug temp
7793 : because the SSA DEF is used multiple times or as
7794 : part of a larger expression (count > 1). */
7795 119410 : count++;
7796 119410 : if (gimple_debug_bind_get_value (stmt) != def)
7797 8000 : count++;
7798 :
7799 119410 : if (count > 1)
7800 : break;
7801 754767 : }
7802 :
7803 754767 : if (!count)
7804 680157 : continue;
7805 :
7806 98631 : struct iv_use dummy_use;
7807 98631 : struct iv_cand *best_cand = NULL, *cand;
7808 98631 : unsigned i, best_pref = 0, cand_pref;
7809 98631 : tree comp = NULL_TREE;
7810 :
7811 98631 : memset (&dummy_use, 0, sizeof (dummy_use));
7812 98631 : dummy_use.iv = info->iv;
7813 510103 : for (i = 0; i < data->vgroups.length () && i < 64; i++)
7814 : {
7815 411472 : cand = data->vgroups[i]->selected;
7816 411472 : if (cand == best_cand)
7817 165704 : continue;
7818 167580 : cand_pref = operand_equal_p (cand->iv->step,
7819 245768 : info->iv->step, 0)
7820 245768 : ? 4 : 0;
7821 245768 : cand_pref
7822 245768 : += TYPE_MODE (TREE_TYPE (cand->iv->base))
7823 245768 : == TYPE_MODE (TREE_TYPE (info->iv->base))
7824 245768 : ? 2 : 0;
7825 245768 : cand_pref
7826 491536 : += TREE_CODE (cand->iv->base) == INTEGER_CST
7827 245768 : ? 1 : 0;
7828 245768 : if (best_cand == NULL || best_pref < cand_pref)
7829 : {
7830 190882 : tree this_comp
7831 381764 : = get_debug_computation_at (data,
7832 190882 : SSA_NAME_DEF_STMT (def),
7833 : &dummy_use, cand);
7834 190882 : if (this_comp)
7835 : {
7836 411472 : best_cand = cand;
7837 411472 : best_pref = cand_pref;
7838 411472 : comp = this_comp;
7839 : }
7840 : }
7841 : }
7842 :
7843 98631 : if (!best_cand)
7844 24021 : continue;
7845 :
7846 74610 : comp = unshare_expr (comp);
7847 74610 : if (count > 1)
7848 : {
7849 24012 : tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7850 : /* FIXME: Is setting the mode really necessary? */
7851 24012 : if (SSA_NAME_VAR (def))
7852 13611 : SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7853 : else
7854 10401 : SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7855 24012 : gdebug *def_temp
7856 24012 : = gimple_build_debug_bind (vexpr, comp, NULL);
7857 24012 : gimple_stmt_iterator gsi;
7858 :
7859 24012 : if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7860 13836 : gsi = gsi_after_labels (gimple_bb
7861 13836 : (SSA_NAME_DEF_STMT (def)));
7862 : else
7863 10176 : gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7864 :
7865 24012 : gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7866 24012 : comp = vexpr;
7867 : }
7868 :
7869 358394 : FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7870 : {
7871 209174 : if (!gimple_debug_bind_p (stmt))
7872 81451 : continue;
7873 :
7874 383249 : FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7875 127763 : SET_USE (use_p, comp);
7876 :
7877 127723 : update_stmt (stmt);
7878 74610 : }
7879 : }
7880 : }
7881 : }
7882 499974 : }
7883 :
7884 : /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7885 : for hash_map::traverse. */
7886 :
7887 : bool
7888 482672 : free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7889 : {
7890 482672 : if (value)
7891 : {
7892 443256 : value->~tree_niter_desc ();
7893 443256 : free (value);
7894 : }
7895 482672 : return true;
7896 : }
7897 :
7898 : /* Frees data allocated by the optimization of a single loop. */
7899 :
7900 : static void
7901 867129 : free_loop_data (struct ivopts_data *data)
7902 : {
7903 867129 : unsigned i, j;
7904 867129 : bitmap_iterator bi;
7905 867129 : tree obj;
7906 :
7907 867129 : if (data->niters)
7908 : {
7909 953231 : data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7910 941118 : delete data->niters;
7911 470559 : data->niters = NULL;
7912 : }
7913 :
7914 5933820 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7915 : {
7916 5066691 : struct version_info *info;
7917 :
7918 5066691 : info = ver_info (data, i);
7919 5066691 : info->iv = NULL;
7920 5066691 : info->has_nonlin_use = false;
7921 5066691 : info->preserve_biv = false;
7922 5066691 : info->inv_id = 0;
7923 : }
7924 867129 : bitmap_clear (data->relevant);
7925 867129 : bitmap_clear (data->important_candidates);
7926 :
7927 2666902 : for (i = 0; i < data->vgroups.length (); i++)
7928 : {
7929 1799773 : struct iv_group *group = data->vgroups[i];
7930 :
7931 3873364 : for (j = 0; j < group->vuses.length (); j++)
7932 2073591 : free (group->vuses[j]);
7933 1799773 : group->vuses.release ();
7934 :
7935 1799773 : BITMAP_FREE (group->related_cands);
7936 21181967 : for (j = 0; j < group->n_map_members; j++)
7937 : {
7938 19382194 : if (group->cost_map[j].inv_vars)
7939 4129799 : BITMAP_FREE (group->cost_map[j].inv_vars);
7940 19382194 : if (group->cost_map[j].inv_exprs)
7941 2237819 : BITMAP_FREE (group->cost_map[j].inv_exprs);
7942 : }
7943 :
7944 1799773 : free (group->cost_map);
7945 1799773 : free (group);
7946 : }
7947 867129 : data->vgroups.truncate (0);
7948 :
7949 5756652 : for (i = 0; i < data->vcands.length (); i++)
7950 : {
7951 4889523 : struct iv_cand *cand = data->vcands[i];
7952 :
7953 4889523 : if (cand->inv_vars)
7954 80077 : BITMAP_FREE (cand->inv_vars);
7955 4889523 : if (cand->inv_exprs)
7956 120981 : BITMAP_FREE (cand->inv_exprs);
7957 4889523 : free (cand);
7958 : }
7959 867129 : data->vcands.truncate (0);
7960 :
7961 867129 : if (data->version_info_size < num_ssa_names)
7962 : {
7963 139 : data->version_info_size = 2 * num_ssa_names;
7964 139 : free (data->version_info);
7965 139 : data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7966 : }
7967 :
7968 867129 : data->max_inv_var_id = 0;
7969 867129 : data->max_inv_expr_id = 0;
7970 :
7971 867129 : FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7972 0 : SET_DECL_RTL (obj, NULL_RTX);
7973 :
7974 867129 : decl_rtl_to_reset.truncate (0);
7975 :
7976 867129 : data->inv_expr_tab->empty ();
7977 :
7978 867129 : data->iv_common_cand_tab->empty ();
7979 867129 : data->iv_common_cands.truncate (0);
7980 867129 : }
7981 :
7982 : /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7983 : loop tree. */
7984 :
7985 : static void
7986 240894 : tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7987 : {
7988 240894 : free_loop_data (data);
7989 240894 : free (data->version_info);
7990 240894 : BITMAP_FREE (data->relevant);
7991 240894 : BITMAP_FREE (data->important_candidates);
7992 :
7993 240894 : decl_rtl_to_reset.release ();
7994 240894 : data->vgroups.release ();
7995 240894 : data->vcands.release ();
7996 240894 : delete data->inv_expr_tab;
7997 240894 : data->inv_expr_tab = NULL;
7998 240894 : free_affine_expand_cache (&data->name_expansion_cache);
7999 240894 : if (data->base_object_map)
8000 162892 : delete data->base_object_map;
8001 240894 : delete data->iv_common_cand_tab;
8002 240894 : data->iv_common_cand_tab = NULL;
8003 240894 : data->iv_common_cands.release ();
8004 240894 : obstack_free (&data->iv_obstack, NULL);
8005 240894 : }
8006 :
8007 : /* Returns true if the loop body BODY includes any function calls. */
8008 :
8009 : static bool
8010 626235 : loop_body_includes_call (basic_block *body, unsigned num_nodes)
8011 : {
8012 626235 : gimple_stmt_iterator gsi;
8013 626235 : unsigned i;
8014 :
8015 2834096 : for (i = 0; i < num_nodes; i++)
8016 23867277 : for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
8017 : {
8018 19244881 : gimple *stmt = gsi_stmt (gsi);
8019 19244881 : if (is_gimple_call (stmt)
8020 277487 : && !gimple_call_internal_p (stmt)
8021 19457716 : && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
8022 : return true;
8023 : }
8024 : return false;
8025 : }
8026 :
8027 : /* Determine cost scaling factor for basic blocks in loop. */
8028 : #define COST_SCALING_FACTOR_BOUND (20)
8029 :
8030 : static void
8031 500623 : determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8032 : {
8033 500623 : int lfreq = data->current_loop->header->count.to_frequency (cfun);
8034 500623 : if (!data->speed || lfreq <= 0)
8035 : return;
8036 :
8037 : int max_freq = lfreq;
8038 2854429 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8039 : {
8040 2440885 : body[i]->aux = (void *)(intptr_t) 1;
8041 2440885 : if (max_freq < body[i]->count.to_frequency (cfun))
8042 102718 : max_freq = body[i]->count.to_frequency (cfun);
8043 : }
8044 413544 : if (max_freq > lfreq)
8045 : {
8046 65536 : int divisor, factor;
8047 : /* Check if scaling factor itself needs to be scaled by the bound. This
8048 : is to avoid overflow when scaling cost according to profile info. */
8049 65536 : if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8050 : {
8051 : divisor = max_freq;
8052 : factor = COST_SCALING_FACTOR_BOUND;
8053 : }
8054 : else
8055 : {
8056 49676 : divisor = lfreq;
8057 49676 : factor = 1;
8058 : }
8059 993730 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8060 : {
8061 928194 : int bfreq = body[i]->count.to_frequency (cfun);
8062 928194 : if (bfreq <= lfreq)
8063 510863 : continue;
8064 :
8065 417331 : body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8066 : }
8067 : }
8068 : }
8069 :
8070 : /* Find doloop comparison use and set its doloop_p on if found. */
8071 :
8072 : static bool
8073 0 : find_doloop_use (struct ivopts_data *data)
8074 : {
8075 0 : struct loop *loop = data->current_loop;
8076 :
8077 0 : for (unsigned i = 0; i < data->vgroups.length (); i++)
8078 : {
8079 0 : struct iv_group *group = data->vgroups[i];
8080 0 : if (group->type == USE_COMPARE)
8081 : {
8082 0 : gcc_assert (group->vuses.length () == 1);
8083 0 : struct iv_use *use = group->vuses[0];
8084 0 : gimple *stmt = use->stmt;
8085 0 : if (gimple_code (stmt) == GIMPLE_COND)
8086 : {
8087 0 : basic_block bb = gimple_bb (stmt);
8088 0 : edge true_edge, false_edge;
8089 0 : extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8090 : /* This comparison is used for loop latch. Require latch is empty
8091 : for now. */
8092 0 : if ((loop->latch == true_edge->dest
8093 0 : || loop->latch == false_edge->dest)
8094 0 : && empty_block_p (loop->latch))
8095 : {
8096 0 : group->doloop_p = true;
8097 0 : if (dump_file && (dump_flags & TDF_DETAILS))
8098 : {
8099 0 : fprintf (dump_file, "Doloop cmp iv use: ");
8100 0 : print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8101 : }
8102 0 : return true;
8103 : }
8104 : }
8105 : }
8106 : }
8107 :
8108 : return false;
8109 : }
8110 :
8111 : /* For the targets which support doloop, to predict whether later RTL doloop
8112 : transformation will perform on this loop, further detect the doloop use and
8113 : mark the flag doloop_use_p if predicted. */
8114 :
8115 : void
8116 500623 : analyze_and_mark_doloop_use (struct ivopts_data *data)
8117 : {
8118 500623 : data->doloop_use_p = false;
8119 :
8120 500623 : if (!flag_branch_on_count_reg)
8121 : return;
8122 :
8123 500623 : if (data->current_loop->unroll == USHRT_MAX)
8124 : return;
8125 :
8126 500623 : if (!generic_predict_doloop_p (data))
8127 : return;
8128 :
8129 0 : if (find_doloop_use (data))
8130 : {
8131 0 : data->doloop_use_p = true;
8132 0 : if (dump_file && (dump_flags & TDF_DETAILS))
8133 : {
8134 0 : struct loop *loop = data->current_loop;
8135 0 : fprintf (dump_file,
8136 : "Predict loop %d can perform"
8137 : " doloop optimization later.\n",
8138 : loop->num);
8139 0 : flow_loop_dump (loop, dump_file, NULL, 1);
8140 : }
8141 : }
8142 : }
8143 :
8144 : /* Optimizes the LOOP. Returns true if anything changed. */
8145 :
8146 : static bool
8147 626235 : tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8148 : bitmap toremove)
8149 : {
8150 626235 : bool changed = false;
8151 626235 : class iv_ca *iv_ca;
8152 626235 : edge exit = single_dom_exit (loop);
8153 626235 : basic_block *body;
8154 :
8155 626235 : gcc_assert (!data->niters);
8156 626235 : data->current_loop = loop;
8157 626235 : data->loop_loc = find_loop_location (loop).get_location_t ();
8158 626235 : data->speed = optimize_loop_for_speed_p (loop);
8159 :
8160 626235 : if (dump_file && (dump_flags & TDF_DETAILS))
8161 : {
8162 67 : fprintf (dump_file, "Processing loop %d", loop->num);
8163 67 : if (data->loop_loc != UNKNOWN_LOCATION)
8164 65 : fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8165 130 : LOCATION_LINE (data->loop_loc));
8166 67 : fprintf (dump_file, "\n");
8167 :
8168 67 : if (exit)
8169 : {
8170 57 : fprintf (dump_file, " single exit %d -> %d, exit condition ",
8171 57 : exit->src->index, exit->dest->index);
8172 114 : print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8173 : 0, TDF_SLIM);
8174 57 : fprintf (dump_file, "\n");
8175 : }
8176 :
8177 67 : fprintf (dump_file, "\n");
8178 : }
8179 :
8180 626235 : body = get_loop_body (loop);
8181 626235 : data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8182 626235 : renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8183 :
8184 626235 : data->loop_single_exit_p
8185 626235 : = exit != NULL && loop_only_exit_p (loop, body, exit);
8186 :
8187 : /* For each ssa name determines whether it behaves as an induction variable
8188 : in some loop. */
8189 626235 : if (!find_induction_variables (data, body))
8190 125611 : goto finish;
8191 :
8192 : /* Finds interesting uses (item 1). */
8193 500624 : find_interesting_uses (data, body);
8194 500624 : if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8195 1 : goto finish;
8196 :
8197 : /* Determine cost scaling factor for basic blocks in loop. */
8198 500623 : determine_scaling_factor (data, body);
8199 :
8200 : /* Analyze doloop possibility and mark the doloop use if predicted. */
8201 500623 : analyze_and_mark_doloop_use (data);
8202 :
8203 : /* Finds candidates for the induction variables (item 2). */
8204 500623 : find_iv_candidates (data);
8205 :
8206 : /* Calculates the costs (item 3, part 1). */
8207 500623 : determine_iv_costs (data);
8208 500623 : determine_group_iv_costs (data);
8209 500623 : determine_set_costs (data);
8210 :
8211 : /* Find the optimal set of induction variables (item 3, part 2). */
8212 500623 : iv_ca = find_optimal_iv_set (data);
8213 : /* Cleanup basic block aux field. */
8214 3301461 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8215 2800838 : body[i]->aux = NULL;
8216 500623 : if (!iv_ca)
8217 649 : goto finish;
8218 499974 : changed = true;
8219 :
8220 : /* Create the new induction variables (item 4, part 1). */
8221 499974 : create_new_ivs (data, iv_ca);
8222 499974 : iv_ca_free (&iv_ca);
8223 :
8224 : /* Rewrite the uses (item 4, part 2). */
8225 499974 : rewrite_groups (data);
8226 :
8227 : /* Remove the ivs that are unused after rewriting. */
8228 499974 : remove_unused_ivs (data, toremove);
8229 :
8230 626235 : finish:
8231 626235 : free (body);
8232 626235 : free_loop_data (data);
8233 :
8234 626235 : return changed;
8235 : }
8236 :
8237 : /* Main entry point. Optimizes induction variables in loops. */
8238 :
8239 : void
8240 240894 : tree_ssa_iv_optimize (void)
8241 : {
8242 240894 : struct ivopts_data data;
8243 240894 : auto_bitmap toremove;
8244 :
8245 240894 : tree_ssa_iv_optimize_init (&data);
8246 240894 : mark_ssa_maybe_undefs ();
8247 :
8248 : /* Optimize the loops starting with the innermost ones. */
8249 1348917 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8250 : {
8251 626235 : if (!dbg_cnt (ivopts_loop))
8252 0 : continue;
8253 :
8254 626235 : if (dump_file && (dump_flags & TDF_DETAILS))
8255 67 : flow_loop_dump (loop, dump_file, NULL, 1);
8256 :
8257 626235 : tree_ssa_iv_optimize_loop (&data, loop, toremove);
8258 240894 : }
8259 :
8260 : /* Remove eliminated IV defs. */
8261 240894 : release_defs_bitset (toremove);
8262 :
8263 : /* We have changed the structure of induction variables; it might happen
8264 : that definitions in the scev database refer to some of them that were
8265 : eliminated. */
8266 240894 : scev_reset_htab ();
8267 : /* Likewise niter and control-IV information. */
8268 240894 : free_numbers_of_iterations_estimates (cfun);
8269 :
8270 240894 : tree_ssa_iv_optimize_finalize (&data);
8271 240894 : }
8272 :
8273 : #include "gt-tree-ssa-loop-ivopts.h"
|