Line data Source code
1 : /* Induction variable optimizations.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* This pass tries to find the optimal set of induction variables for the loop.
21 : It optimizes just the basic linear induction variables (although adding
22 : support for other types should not be too hard). It includes the
23 : optimizations commonly known as strength reduction, induction variable
24 : coalescing and induction variable elimination. It does it in the
25 : following steps:
26 :
27 : 1) The interesting uses of induction variables are found. This includes
28 :
29 : -- uses of induction variables in non-linear expressions
30 : -- addresses of arrays
31 : -- comparisons of induction variables
32 :
33 : Note the interesting uses are categorized and handled in group.
34 : Generally, address type uses are grouped together if their iv bases
35 : are different in constant offset.
36 :
37 : 2) Candidates for the induction variables are found. This includes
38 :
39 : -- old induction variables
40 : -- the variables defined by expressions derived from the "interesting
41 : groups/uses" above
42 :
43 : 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 : cost function assigns a cost to sets of induction variables and consists
45 : of three parts:
46 :
47 : -- The group/use costs. Each of the interesting groups/uses chooses
48 : the best induction variable in the set and adds its cost to the sum.
49 : The cost reflects the time spent on modifying the induction variables
50 : value to be usable for the given purpose (adding base and offset for
51 : arrays, etc.).
52 : -- The variable costs. Each of the variables has a cost assigned that
53 : reflects the costs associated with incrementing the value of the
54 : variable. The original variables are somewhat preferred.
55 : -- The set cost. Depending on the size of the set, extra cost may be
56 : added to reflect register pressure.
57 :
58 : All the costs are defined in a machine-specific way, using the target
59 : hooks and machine descriptions to determine them.
60 :
61 : 4) The trees are transformed to use the new variables, the dead code is
62 : removed.
63 :
64 : All of this is done loop by loop. Doing it globally is theoretically
65 : possible, it might give a better performance and it might enable us
66 : to decide costs more precisely, but getting all the interactions right
67 : would be complicated.
68 :
69 : For the targets supporting low-overhead loops, IVOPTs has to take care of
70 : the loops which will probably be transformed in RTL doloop optimization,
71 : to try to make selected IV candidate set optimal. The process of doloop
72 : support includes:
73 :
74 : 1) Analyze the current loop will be transformed to doloop or not, find and
75 : mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 : set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 : doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 : The target hook predict_doloop_p can be used for target specific checks.
79 :
80 : 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 : set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 : like biv. For cost determination between doloop IV cand and IV use, the
83 : target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 : provided to add on extra costs for generic type and address type IV use.
85 : Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 : use, and bound zero is set for IV elimination.
87 :
88 : 3) With the cost setting in step 2), the current cost model based IV
89 : selection algorithm will process as usual, pick up doloop dedicated IV if
90 : profitable. */
91 :
92 : #include "config.h"
93 : #include "system.h"
94 : #include "coretypes.h"
95 : #include "backend.h"
96 : #include "rtl.h"
97 : #include "tree.h"
98 : #include "gimple.h"
99 : #include "cfghooks.h"
100 : #include "tree-pass.h"
101 : #include "memmodel.h"
102 : #include "tm_p.h"
103 : #include "ssa.h"
104 : #include "expmed.h"
105 : #include "insn-config.h"
106 : #include "emit-rtl.h"
107 : #include "recog.h"
108 : #include "cgraph.h"
109 : #include "gimple-pretty-print.h"
110 : #include "alias.h"
111 : #include "fold-const.h"
112 : #include "stor-layout.h"
113 : #include "tree-eh.h"
114 : #include "gimplify.h"
115 : #include "gimple-iterator.h"
116 : #include "gimplify-me.h"
117 : #include "tree-cfg.h"
118 : #include "tree-ssa-loop-ivopts.h"
119 : #include "tree-ssa-loop-manip.h"
120 : #include "tree-ssa-loop-niter.h"
121 : #include "tree-ssa-loop.h"
122 : #include "explow.h"
123 : #include "expr.h"
124 : #include "tree-dfa.h"
125 : #include "tree-ssa.h"
126 : #include "cfgloop.h"
127 : #include "tree-scalar-evolution.h"
128 : #include "tree-affine.h"
129 : #include "tree-ssa-propagate.h"
130 : #include "tree-ssa-address.h"
131 : #include "builtins.h"
132 : #include "tree-vectorizer.h"
133 : #include "dbgcnt.h"
134 : #include "cfganal.h"
135 : #include "gimple-fold.h"
136 :
137 : /* For lang_hooks.types.type_for_mode. */
138 : #include "langhooks.h"
139 :
140 : /* FIXME: Expressions are expanded to RTL in this pass to determine the
141 : cost of different addressing modes. This should be moved to a TBD
142 : interface between the GIMPLE and RTL worlds. */
143 :
144 : /* The infinite cost. */
145 : #define INFTY 1000000000
146 :
147 : /* Returns the expected number of loop iterations for LOOP.
148 : The average trip count is computed from profile data if it
149 : exists. */
150 :
151 : static inline unsigned HOST_WIDE_INT
152 8701038 : avg_loop_niter (class loop *loop)
153 : {
154 8701038 : HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
155 8701038 : if (niter == -1)
156 : {
157 4907193 : niter = likely_max_stmt_executions_int (loop);
158 :
159 4907193 : if (niter == -1 || niter > param_avg_loop_niter)
160 4126726 : return param_avg_loop_niter;
161 : }
162 :
163 4574312 : return niter;
164 : }
165 :
166 : struct iv_use;
167 :
168 : /* Representation of the induction variable. */
169 : struct iv
170 : {
171 : tree base; /* Initial value of the iv. */
172 : tree base_object; /* A memory object to that the induction variable points. */
173 : tree step; /* Step of the iv (constant only). */
174 : tree ssa_name; /* The ssa name with the value. */
175 : struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
176 : bool biv_p; /* Is it a biv? */
177 : bool no_overflow; /* True if the iv doesn't overflow. */
178 : bool have_address_use;/* For biv, indicate if it's used in any address
179 : type use. */
180 : };
181 :
182 : /* Per-ssa version information (induction variable descriptions, etc.). */
183 : struct version_info
184 : {
185 : tree name; /* The ssa name. */
186 : struct iv *iv; /* Induction variable description. */
187 : bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
188 : an expression that is not an induction variable. */
189 : bool preserve_biv; /* For the original biv, whether to preserve it. */
190 : unsigned inv_id; /* Id of an invariant. */
191 : };
192 :
193 : /* Types of uses. */
194 : enum use_type
195 : {
196 : USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
197 : USE_REF_ADDRESS, /* Use is an address for an explicit memory
198 : reference. */
199 : USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
200 : cases where the expansion of the function
201 : will turn the argument into a normal address. */
202 : USE_COMPARE /* Use is a compare. */
203 : };
204 :
205 : /* Cost of a computation. */
206 : class comp_cost
207 : {
208 : public:
209 130696512 : comp_cost (): cost (0), complexity (0), scratch (0)
210 : {}
211 :
212 25147179 : comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
213 15175970 : : cost (cost), complexity (complexity), scratch (scratch)
214 14385276 : {}
215 :
216 : /* Returns true if COST is infinite. */
217 : bool infinite_cost_p ();
218 :
219 : /* Adds costs COST1 and COST2. */
220 : friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
221 :
222 : /* Adds COST to the comp_cost. */
223 : comp_cost operator+= (comp_cost cost);
224 :
225 : /* Adds constant C to this comp_cost. */
226 : comp_cost operator+= (HOST_WIDE_INT c);
227 :
228 : /* Subtracts constant C to this comp_cost. */
229 : comp_cost operator-= (HOST_WIDE_INT c);
230 :
231 : /* Divide the comp_cost by constant C. */
232 : comp_cost operator/= (HOST_WIDE_INT c);
233 :
234 : /* Multiply the comp_cost by constant C. */
235 : comp_cost operator*= (HOST_WIDE_INT c);
236 :
237 : /* Subtracts costs COST1 and COST2. */
238 : friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
239 :
240 : /* Subtracts COST from this comp_cost. */
241 : comp_cost operator-= (comp_cost cost);
242 :
243 : /* Returns true if COST1 is smaller than COST2. */
244 : friend bool operator< (comp_cost cost1, comp_cost cost2);
245 :
246 : /* Returns true if COST1 and COST2 are equal. */
247 : friend bool operator== (comp_cost cost1, comp_cost cost2);
248 :
249 : /* Returns true if COST1 is smaller or equal than COST2. */
250 : friend bool operator<= (comp_cost cost1, comp_cost cost2);
251 :
252 : int64_t cost; /* The runtime cost. */
253 : unsigned complexity; /* The estimate of the complexity of the code for
254 : the computation (in no concrete units --
255 : complexity field should be larger for more
256 : complex expressions and addressing modes). */
257 : int64_t scratch; /* Scratch used during cost computation. */
258 : };
259 :
260 : static const comp_cost no_cost;
261 : static const comp_cost infinite_cost (INFTY, 0, INFTY);
262 :
263 : bool
264 1825241915 : comp_cost::infinite_cost_p ()
265 : {
266 1825241915 : return cost == INFTY;
267 : }
268 :
269 : comp_cost
270 243878601 : operator+ (comp_cost cost1, comp_cost cost2)
271 : {
272 243878601 : if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
273 1881508 : return infinite_cost;
274 :
275 241997093 : gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
276 241997093 : cost1.cost += cost2.cost;
277 241997093 : cost1.complexity += cost2.complexity;
278 :
279 241997093 : return cost1;
280 : }
281 :
282 : comp_cost
283 208616457 : operator- (comp_cost cost1, comp_cost cost2)
284 : {
285 208616457 : if (cost1.infinite_cost_p ())
286 0 : return infinite_cost;
287 :
288 208616457 : gcc_assert (!cost2.infinite_cost_p ());
289 208616457 : gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
290 :
291 208616457 : cost1.cost -= cost2.cost;
292 208616457 : cost1.complexity -= cost2.complexity;
293 :
294 208616457 : return cost1;
295 : }
296 :
297 : comp_cost
298 243878601 : comp_cost::operator+= (comp_cost cost)
299 : {
300 243878601 : *this = *this + cost;
301 243878601 : return *this;
302 : }
303 :
304 : comp_cost
305 861920936 : comp_cost::operator+= (HOST_WIDE_INT c)
306 : {
307 861920936 : if (c >= INFTY)
308 0 : this->cost = INFTY;
309 :
310 861920936 : if (infinite_cost_p ())
311 0 : return *this;
312 :
313 861920936 : gcc_assert (this->cost + c < infinite_cost.cost);
314 861920936 : this->cost += c;
315 :
316 861920936 : return *this;
317 : }
318 :
319 : comp_cost
320 541775 : comp_cost::operator-= (HOST_WIDE_INT c)
321 : {
322 541775 : if (infinite_cost_p ())
323 0 : return *this;
324 :
325 541775 : gcc_assert (this->cost - c < infinite_cost.cost);
326 541775 : this->cost -= c;
327 :
328 541775 : return *this;
329 : }
330 :
331 : comp_cost
332 0 : comp_cost::operator/= (HOST_WIDE_INT c)
333 : {
334 0 : gcc_assert (c != 0);
335 0 : if (infinite_cost_p ())
336 0 : return *this;
337 :
338 0 : this->cost /= c;
339 :
340 0 : return *this;
341 : }
342 :
343 : comp_cost
344 0 : comp_cost::operator*= (HOST_WIDE_INT c)
345 : {
346 0 : if (infinite_cost_p ())
347 0 : return *this;
348 :
349 0 : gcc_assert (this->cost * c < infinite_cost.cost);
350 0 : this->cost *= c;
351 :
352 0 : return *this;
353 : }
354 :
355 : comp_cost
356 208616457 : comp_cost::operator-= (comp_cost cost)
357 : {
358 208616457 : *this = *this - cost;
359 208616457 : return *this;
360 : }
361 :
362 : bool
363 182334020 : operator< (comp_cost cost1, comp_cost cost2)
364 : {
365 182334020 : if (cost1.cost == cost2.cost)
366 80394850 : return cost1.complexity < cost2.complexity;
367 :
368 101939170 : return cost1.cost < cost2.cost;
369 : }
370 :
371 : bool
372 3934452 : operator== (comp_cost cost1, comp_cost cost2)
373 : {
374 3934452 : return cost1.cost == cost2.cost
375 3934452 : && cost1.complexity == cost2.complexity;
376 : }
377 :
378 : bool
379 6435184 : operator<= (comp_cost cost1, comp_cost cost2)
380 : {
381 6435184 : return cost1 < cost2 || cost1 == cost2;
382 : }
383 :
384 : struct iv_inv_expr_ent;
385 :
386 : /* The candidate - cost pair. */
387 : class cost_pair
388 : {
389 : public:
390 : struct iv_cand *cand; /* The candidate. */
391 : comp_cost cost; /* The cost. */
392 : enum tree_code comp; /* For iv elimination, the comparison. */
393 : bitmap inv_vars; /* The list of invariant ssa_vars that have to be
394 : preserved when representing iv_use with iv_cand. */
395 : bitmap inv_exprs; /* The list of newly created invariant expressions
396 : when representing iv_use with iv_cand. */
397 : tree value; /* For final value elimination, the expression for
398 : the final value of the iv. For iv elimination,
399 : the new bound to compare with. */
400 : };
401 :
402 : /* Use. */
403 : struct iv_use
404 : {
405 : unsigned id; /* The id of the use. */
406 : unsigned group_id; /* The group id the use belongs to. */
407 : enum use_type type; /* Type of the use. */
408 : tree mem_type; /* The memory type to use when testing whether an
409 : address is legitimate, and what the address's
410 : cost is. */
411 : struct iv *iv; /* The induction variable it is based on. */
412 : gimple *stmt; /* Statement in that it occurs. */
413 : tree *op_p; /* The place where it occurs. */
414 :
415 : tree addr_base; /* Base address with const offset stripped. */
416 : poly_uint64 addr_offset;
417 : /* Const offset stripped from base address. */
418 : };
419 :
420 : /* Group of uses. */
421 : struct iv_group
422 : {
423 : /* The id of the group. */
424 : unsigned id;
425 : /* Uses of the group are of the same type. */
426 : enum use_type type;
427 : /* The set of "related" IV candidates, plus the important ones. */
428 : bitmap related_cands;
429 : /* Number of IV candidates in the cost_map. */
430 : unsigned n_map_members;
431 : /* The costs wrto the iv candidates. */
432 : class cost_pair *cost_map;
433 : /* The selected candidate for the group. */
434 : struct iv_cand *selected;
435 : /* To indicate this is a doloop use group. */
436 : bool doloop_p;
437 : /* Uses in the group. */
438 : vec<struct iv_use *> vuses;
439 : };
440 :
441 : /* The position where the iv is computed. */
442 : enum iv_position
443 : {
444 : IP_NORMAL, /* At the end, just before the exit condition. */
445 : IP_END, /* At the end of the latch block. */
446 : IP_BEFORE_USE, /* Immediately before a specific use. */
447 : IP_AFTER_USE, /* Immediately after a specific use. */
448 : IP_ORIGINAL /* The original biv. */
449 : };
450 :
451 : /* The induction variable candidate. */
452 : struct iv_cand
453 : {
454 : unsigned id; /* The number of the candidate. */
455 : bool important; /* Whether this is an "important" candidate, i.e. such
456 : that it should be considered by all uses. */
457 : bool involves_undefs; /* Whether the IV involves undefined values. */
458 : ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
459 : gimple *incremented_at;/* For original biv, the statement where it is
460 : incremented. */
461 : tree var_before; /* The variable used for it before increment. */
462 : tree var_after; /* The variable used for it after increment. */
463 : struct iv *iv; /* The value of the candidate. NULL for
464 : "pseudocandidate" used to indicate the possibility
465 : to replace the final value of an iv by direct
466 : computation of the value. */
467 : unsigned cost; /* Cost of the candidate. */
468 : unsigned cost_step; /* Cost of the candidate's increment operation. */
469 : struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
470 : where it is incremented. */
471 : bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
472 : iv_cand. */
473 : bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
474 : handle it as a new invariant expression which will
475 : be hoisted out of loop. */
476 : struct iv *orig_iv; /* The original iv if this cand is added from biv with
477 : smaller type. */
478 : bool doloop_p; /* Whether this is a doloop candidate. */
479 : };
480 :
481 : /* Hashtable entry for common candidate derived from iv uses. */
482 2608190 : class iv_common_cand
483 : {
484 : public:
485 : tree base;
486 : tree step;
487 : /* IV uses from which this common candidate is derived. */
488 : auto_vec<struct iv_use *> uses;
489 : hashval_t hash;
490 : };
491 :
492 : /* Hashtable helpers. */
493 :
494 : struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
495 : {
496 : static inline hashval_t hash (const iv_common_cand *);
497 : static inline bool equal (const iv_common_cand *, const iv_common_cand *);
498 : };
499 :
500 : /* Hash function for possible common candidates. */
501 :
502 : inline hashval_t
503 9895935 : iv_common_cand_hasher::hash (const iv_common_cand *ccand)
504 : {
505 9895935 : return ccand->hash;
506 : }
507 :
508 : /* Hash table equality function for common candidates. */
509 :
510 : inline bool
511 11170512 : iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
512 : const iv_common_cand *ccand2)
513 : {
514 11170512 : return (ccand1->hash == ccand2->hash
515 1628013 : && operand_equal_p (ccand1->base, ccand2->base, 0)
516 1607042 : && operand_equal_p (ccand1->step, ccand2->step, 0)
517 12770662 : && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
518 1600150 : == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
519 : }
520 :
521 : /* Loop invariant expression hashtable entry. */
522 :
523 : struct iv_inv_expr_ent
524 : {
525 : /* Tree expression of the entry. */
526 : tree expr;
527 : /* Unique indentifier. */
528 : int id;
529 : /* Hash value. */
530 : hashval_t hash;
531 : };
532 :
533 : /* Sort iv_inv_expr_ent pair A and B by id field. */
534 :
535 : static int
536 5737 : sort_iv_inv_expr_ent (const void *a, const void *b)
537 : {
538 5737 : const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
539 5737 : const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
540 :
541 5737 : unsigned id1 = (*e1)->id;
542 5737 : unsigned id2 = (*e2)->id;
543 :
544 5737 : if (id1 < id2)
545 : return -1;
546 2670 : else if (id1 > id2)
547 : return 1;
548 : else
549 0 : return 0;
550 : }
551 :
552 : /* Hashtable helpers. */
553 :
554 : struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
555 : {
556 : static inline hashval_t hash (const iv_inv_expr_ent *);
557 : static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
558 : };
559 :
560 : /* Return true if uses of type TYPE represent some form of address. */
561 :
562 : inline bool
563 8978579 : address_p (use_type type)
564 : {
565 8978579 : return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
566 : }
567 :
568 : /* Hash function for loop invariant expressions. */
569 :
570 : inline hashval_t
571 6713217 : iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
572 : {
573 6713217 : return expr->hash;
574 : }
575 :
576 : /* Hash table equality function for expressions. */
577 :
578 : inline bool
579 8071107 : iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
580 : const iv_inv_expr_ent *expr2)
581 : {
582 8071107 : return expr1->hash == expr2->hash
583 8071107 : && operand_equal_p (expr1->expr, expr2->expr, 0);
584 : }
585 :
586 : struct ivopts_data
587 : {
588 : /* The currently optimized loop. */
589 : class loop *current_loop;
590 : location_t loop_loc;
591 :
592 : /* Numbers of iterations for all exits of the current loop. */
593 : hash_map<edge, tree_niter_desc *> *niters;
594 :
595 : /* Number of registers used in it. */
596 : unsigned regs_used;
597 :
598 : /* The size of version_info array allocated. */
599 : unsigned version_info_size;
600 :
601 : /* The array of information for the ssa names. */
602 : struct version_info *version_info;
603 :
604 : /* The hashtable of loop invariant expressions created
605 : by ivopt. */
606 : hash_table<iv_inv_expr_hasher> *inv_expr_tab;
607 :
608 : /* The bitmap of indices in version_info whose value was changed. */
609 : bitmap relevant;
610 :
611 : /* The uses of induction variables. */
612 : vec<iv_group *> vgroups;
613 :
614 : /* The candidates. */
615 : vec<iv_cand *> vcands;
616 :
617 : /* A bitmap of important candidates. */
618 : bitmap important_candidates;
619 :
620 : /* Cache used by tree_to_aff_combination_expand. */
621 : hash_map<tree, name_expansion *> *name_expansion_cache;
622 :
623 : /* The hashtable of common candidates derived from iv uses. */
624 : hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
625 :
626 : /* The common candidates. */
627 : vec<iv_common_cand *> iv_common_cands;
628 :
629 : /* Hash map recording base object information of tree exp. */
630 : hash_map<tree, tree> *base_object_map;
631 :
632 : /* The maximum invariant variable id. */
633 : unsigned max_inv_var_id;
634 :
635 : /* The maximum invariant expression id. */
636 : unsigned max_inv_expr_id;
637 :
638 : /* Number of no_overflow BIVs which are not used in memory address. */
639 : unsigned bivs_not_used_in_addr;
640 :
641 : /* Obstack for iv structure. */
642 : struct obstack iv_obstack;
643 :
644 : /* Whether to consider just related and important candidates when replacing a
645 : use. */
646 : bool consider_all_candidates;
647 :
648 : /* Are we optimizing for speed? */
649 : bool speed;
650 :
651 : /* Whether the loop body includes any function calls. */
652 : bool body_includes_call;
653 :
654 : /* Whether the loop body can only be exited via single exit. */
655 : bool loop_single_exit_p;
656 :
657 : /* Whether the loop has doloop comparison use. */
658 : bool doloop_use_p;
659 : };
660 :
661 : /* An assignment of iv candidates to uses. */
662 :
663 : class iv_ca
664 : {
665 : public:
666 : /* The number of uses covered by the assignment. */
667 : unsigned upto;
668 :
669 : /* Number of uses that cannot be expressed by the candidates in the set. */
670 : unsigned bad_groups;
671 :
672 : /* Candidate assigned to a use, together with the related costs. */
673 : class cost_pair **cand_for_group;
674 :
675 : /* Number of times each candidate is used. */
676 : unsigned *n_cand_uses;
677 :
678 : /* The candidates used. */
679 : bitmap cands;
680 :
681 : /* The number of candidates in the set. */
682 : unsigned n_cands;
683 :
684 : /* The number of invariants needed, including both invariant variants and
685 : invariant expressions. */
686 : unsigned n_invs;
687 :
688 : /* Total cost of expressing uses. */
689 : comp_cost cand_use_cost;
690 :
691 : /* Total cost of candidates. */
692 : int64_t cand_cost;
693 :
694 : /* Number of times each invariant variable is used. */
695 : unsigned *n_inv_var_uses;
696 :
697 : /* Number of times each invariant expression is used. */
698 : unsigned *n_inv_expr_uses;
699 :
700 : /* Total cost of the assignment. */
701 : comp_cost cost;
702 : };
703 :
704 : /* Difference of two iv candidate assignments. */
705 :
706 : struct iv_ca_delta
707 : {
708 : /* Changed group. */
709 : struct iv_group *group;
710 :
711 : /* An old assignment (for rollback purposes). */
712 : class cost_pair *old_cp;
713 :
714 : /* A new assignment. */
715 : class cost_pair *new_cp;
716 :
717 : /* Next change in the list. */
718 : struct iv_ca_delta *next;
719 : };
720 :
721 : /* Bound on number of candidates below that all candidates are considered. */
722 :
723 : #define CONSIDER_ALL_CANDIDATES_BOUND \
724 : ((unsigned) param_iv_consider_all_candidates_bound)
725 :
726 : /* If there are more iv occurrences, we just give up (it is quite unlikely that
727 : optimizing such a loop would help, and it would take ages). */
728 :
729 : #define MAX_CONSIDERED_GROUPS \
730 : ((unsigned) param_iv_max_considered_uses)
731 :
732 : /* If there are at most this number of ivs in the set, try removing unnecessary
733 : ivs from the set always. */
734 :
735 : #define ALWAYS_PRUNE_CAND_SET_BOUND \
736 : ((unsigned) param_iv_always_prune_cand_set_bound)
737 :
738 : /* The list of trees for that the decl_rtl field must be reset is stored
739 : here. */
740 :
741 : static vec<tree> decl_rtl_to_reset;
742 :
743 : static comp_cost force_expr_to_var_cost (tree, bool);
744 :
745 : /* The single loop exit if it dominates the latch, NULL otherwise. */
746 :
747 : edge
748 696527 : single_dom_exit (class loop *loop)
749 : {
750 696527 : edge exit = single_exit (loop);
751 :
752 696527 : if (!exit)
753 : return NULL;
754 :
755 463408 : if (!just_once_each_iteration_p (loop, exit->src))
756 : return NULL;
757 :
758 : return exit;
759 : }
760 :
761 : /* Dumps information about the induction variable IV to FILE. Don't dump
762 : variable's name if DUMP_NAME is FALSE. The information is dumped with
763 : preceding spaces indicated by INDENT_LEVEL. */
764 :
765 : void
766 1597 : dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
767 : {
768 1597 : const char *p;
769 1597 : const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
770 :
771 1597 : if (indent_level > 4)
772 : indent_level = 4;
773 1597 : p = spaces + 8 - (indent_level << 1);
774 :
775 1597 : fprintf (file, "%sIV struct:\n", p);
776 1597 : if (iv->ssa_name && dump_name)
777 : {
778 550 : fprintf (file, "%s SSA_NAME:\t", p);
779 550 : print_generic_expr (file, iv->ssa_name, TDF_SLIM);
780 550 : fprintf (file, "\n");
781 : }
782 :
783 1597 : fprintf (file, "%s Type:\t", p);
784 1597 : print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
785 1597 : fprintf (file, "\n");
786 :
787 1597 : fprintf (file, "%s Base:\t", p);
788 1597 : print_generic_expr (file, iv->base, TDF_SLIM);
789 1597 : fprintf (file, "\n");
790 :
791 1597 : fprintf (file, "%s Step:\t", p);
792 1597 : print_generic_expr (file, iv->step, TDF_SLIM);
793 1597 : fprintf (file, "\n");
794 :
795 1597 : if (iv->base_object)
796 : {
797 497 : fprintf (file, "%s Object:\t", p);
798 497 : print_generic_expr (file, iv->base_object, TDF_SLIM);
799 497 : fprintf (file, "\n");
800 : }
801 :
802 2887 : fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
803 :
804 1597 : fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
805 1597 : p, iv->no_overflow ? "No-overflow" : "Overflow");
806 1597 : }
807 :
808 : /* Dumps information about the USE to FILE. */
809 :
810 : void
811 250 : dump_use (FILE *file, struct iv_use *use)
812 : {
813 250 : fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
814 250 : fprintf (file, " At stmt:\t");
815 250 : print_gimple_stmt (file, use->stmt, 0);
816 250 : fprintf (file, " At pos:\t");
817 250 : if (use->op_p)
818 160 : print_generic_expr (file, *use->op_p, TDF_SLIM);
819 250 : fprintf (file, "\n");
820 250 : dump_iv (file, use->iv, false, 2);
821 250 : }
822 :
823 : /* Dumps information about the uses to FILE. */
824 :
825 : void
826 67 : dump_groups (FILE *file, struct ivopts_data *data)
827 : {
828 67 : unsigned i, j;
829 67 : struct iv_group *group;
830 :
831 287 : for (i = 0; i < data->vgroups.length (); i++)
832 : {
833 220 : group = data->vgroups[i];
834 220 : fprintf (file, "Group %d:\n", group->id);
835 220 : if (group->type == USE_NONLINEAR_EXPR)
836 90 : fprintf (file, " Type:\tGENERIC\n");
837 130 : else if (group->type == USE_REF_ADDRESS)
838 56 : fprintf (file, " Type:\tREFERENCE ADDRESS\n");
839 74 : else if (group->type == USE_PTR_ADDRESS)
840 0 : fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
841 : else
842 : {
843 74 : gcc_assert (group->type == USE_COMPARE);
844 74 : fprintf (file, " Type:\tCOMPARE\n");
845 : }
846 470 : for (j = 0; j < group->vuses.length (); j++)
847 250 : dump_use (file, group->vuses[j]);
848 : }
849 67 : }
850 :
851 : /* Dumps information about induction variable candidate CAND to FILE. */
852 :
853 : void
854 797 : dump_cand (FILE *file, struct iv_cand *cand)
855 : {
856 797 : struct iv *iv = cand->iv;
857 :
858 797 : fprintf (file, "Candidate %d:\n", cand->id);
859 797 : if (cand->inv_vars)
860 : {
861 26 : fprintf (file, " Depend on inv.vars: ");
862 26 : dump_bitmap (file, cand->inv_vars);
863 : }
864 797 : if (cand->inv_exprs)
865 : {
866 0 : fprintf (file, " Depend on inv.exprs: ");
867 0 : dump_bitmap (file, cand->inv_exprs);
868 : }
869 :
870 797 : if (cand->var_before)
871 : {
872 687 : fprintf (file, " Var befor: ");
873 687 : print_generic_expr (file, cand->var_before, TDF_SLIM);
874 687 : fprintf (file, "\n");
875 : }
876 797 : if (cand->var_after)
877 : {
878 687 : fprintf (file, " Var after: ");
879 687 : print_generic_expr (file, cand->var_after, TDF_SLIM);
880 687 : fprintf (file, "\n");
881 : }
882 :
883 797 : switch (cand->pos)
884 : {
885 653 : case IP_NORMAL:
886 653 : fprintf (file, " Incr POS: before exit test\n");
887 653 : break;
888 :
889 0 : case IP_BEFORE_USE:
890 0 : fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
891 0 : break;
892 :
893 0 : case IP_AFTER_USE:
894 0 : fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
895 0 : break;
896 :
897 0 : case IP_END:
898 0 : fprintf (file, " Incr POS: at end\n");
899 0 : break;
900 :
901 144 : case IP_ORIGINAL:
902 144 : fprintf (file, " Incr POS: orig biv\n");
903 144 : break;
904 : }
905 :
906 797 : dump_iv (file, iv, false, 1);
907 797 : }
908 :
909 : /* Returns the info for ssa version VER. */
910 :
911 : static inline struct version_info *
912 116876987 : ver_info (struct ivopts_data *data, unsigned ver)
913 : {
914 116876987 : return data->version_info + ver;
915 : }
916 :
917 : /* Returns the info for ssa name NAME. */
918 :
919 : static inline struct version_info *
920 94700312 : name_info (struct ivopts_data *data, tree name)
921 : {
922 94700312 : return ver_info (data, SSA_NAME_VERSION (name));
923 : }
924 :
925 : /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
926 : emitted in LOOP. */
927 :
928 : static bool
929 33539675 : stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
930 : {
931 33539675 : basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
932 :
933 33539675 : gcc_assert (bb);
934 :
935 33539675 : if (sbb == loop->latch)
936 : return true;
937 :
938 33433428 : if (sbb != bb)
939 : return false;
940 :
941 19375576 : return stmt == last_nondebug_stmt (bb);
942 : }
943 :
944 : /* Returns true if STMT if after the place where the original induction
945 : variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
946 : if the positions are identical. */
947 :
948 : static bool
949 7895812 : stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
950 : {
951 7895812 : basic_block cand_bb = gimple_bb (cand->incremented_at);
952 7895812 : basic_block stmt_bb = gimple_bb (stmt);
953 :
954 7895812 : if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
955 : return false;
956 :
957 5408655 : if (stmt_bb != cand_bb)
958 : return true;
959 :
960 5149688 : if (true_if_equal
961 5149688 : && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
962 : return true;
963 5143124 : return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
964 : }
965 :
966 : /* Returns true if STMT if after the place where the induction variable
967 : CAND is incremented in LOOP. */
968 :
969 : static bool
970 42604909 : stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
971 : {
972 42604909 : switch (cand->pos)
973 : {
974 : case IP_END:
975 : return false;
976 :
977 33539675 : case IP_NORMAL:
978 33539675 : return stmt_after_ip_normal_pos (loop, stmt);
979 :
980 7885717 : case IP_ORIGINAL:
981 7885717 : case IP_AFTER_USE:
982 7885717 : return stmt_after_inc_pos (cand, stmt, false);
983 :
984 10095 : case IP_BEFORE_USE:
985 10095 : return stmt_after_inc_pos (cand, stmt, true);
986 :
987 0 : default:
988 0 : gcc_unreachable ();
989 : }
990 : }
991 :
992 : /* walk_tree callback for contains_abnormal_ssa_name_p. */
993 :
994 : static tree
995 14553152 : contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
996 : {
997 14553152 : if (TREE_CODE (*tp) == SSA_NAME
998 14553152 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
999 : return *tp;
1000 :
1001 14553135 : if (!EXPR_P (*tp))
1002 9994625 : *walk_subtrees = 0;
1003 :
1004 : return NULL_TREE;
1005 : }
1006 :
1007 : /* Returns true if EXPR contains a ssa name that occurs in an
1008 : abnormal phi node. */
1009 :
1010 : bool
1011 7870114 : contains_abnormal_ssa_name_p (tree expr)
1012 : {
1013 7870114 : return walk_tree_without_duplicates
1014 7870114 : (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1015 : }
1016 :
1017 : /* Returns the structure describing number of iterations determined from
1018 : EXIT of DATA->current_loop, or NULL if something goes wrong. */
1019 :
1020 : static class tree_niter_desc *
1021 4362126 : niter_for_exit (struct ivopts_data *data, edge exit)
1022 : {
1023 4362126 : class tree_niter_desc *desc;
1024 4362126 : tree_niter_desc **slot;
1025 :
1026 4362126 : if (!data->niters)
1027 : {
1028 471009 : data->niters = new hash_map<edge, tree_niter_desc *>;
1029 471009 : slot = NULL;
1030 : }
1031 : else
1032 3891117 : slot = data->niters->get (exit);
1033 :
1034 4362126 : if (!slot)
1035 : {
1036 : /* Try to determine number of iterations. We cannot safely work with ssa
1037 : names that appear in phi nodes on abnormal edges, so that we do not
1038 : create overlapping life ranges for them (PR 27283). */
1039 483171 : desc = XNEW (class tree_niter_desc);
1040 483171 : ::new (static_cast<void*> (desc)) tree_niter_desc ();
1041 483171 : if (!number_of_iterations_exit (data->current_loop,
1042 : exit, desc, true)
1043 483171 : || contains_abnormal_ssa_name_p (desc->niter))
1044 : {
1045 40261 : desc->~tree_niter_desc ();
1046 40261 : XDELETE (desc);
1047 40261 : desc = NULL;
1048 : }
1049 483171 : data->niters->put (exit, desc);
1050 : }
1051 : else
1052 3878955 : desc = *slot;
1053 :
1054 4362126 : return desc;
1055 : }
1056 :
1057 : /* Returns the structure describing number of iterations determined from
1058 : single dominating exit of DATA->current_loop, or NULL if something
1059 : goes wrong. */
1060 :
1061 : static class tree_niter_desc *
1062 67 : niter_for_single_dom_exit (struct ivopts_data *data)
1063 : {
1064 67 : edge exit = single_dom_exit (data->current_loop);
1065 :
1066 67 : if (!exit)
1067 : return NULL;
1068 :
1069 57 : return niter_for_exit (data, exit);
1070 : }
1071 :
1072 : /* Initializes data structures used by the iv optimization pass, stored
1073 : in DATA. */
1074 :
1075 : static void
1076 240496 : tree_ssa_iv_optimize_init (struct ivopts_data *data)
1077 : {
1078 240496 : data->version_info_size = 2 * num_ssa_names;
1079 240496 : data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1080 240496 : data->relevant = BITMAP_ALLOC (NULL);
1081 240496 : data->important_candidates = BITMAP_ALLOC (NULL);
1082 240496 : data->max_inv_var_id = 0;
1083 240496 : data->max_inv_expr_id = 0;
1084 240496 : data->niters = NULL;
1085 240496 : data->vgroups.create (20);
1086 240496 : data->vcands.create (20);
1087 240496 : data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1088 240496 : data->name_expansion_cache = NULL;
1089 240496 : data->base_object_map = NULL;
1090 240496 : data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1091 240496 : data->iv_common_cands.create (20);
1092 240496 : decl_rtl_to_reset.create (20);
1093 240496 : gcc_obstack_init (&data->iv_obstack);
1094 240496 : }
1095 :
1096 : /* walk_tree callback for determine_base_object. */
1097 :
1098 : static tree
1099 19219024 : determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1100 : {
1101 19219024 : tree_code code = TREE_CODE (*tp);
1102 19219024 : tree obj = NULL_TREE;
1103 19219024 : if (code == ADDR_EXPR)
1104 : {
1105 1019028 : tree base = get_base_address (TREE_OPERAND (*tp, 0));
1106 1019028 : if (!base)
1107 0 : obj = *tp;
1108 1019028 : else if (TREE_CODE (base) != MEM_REF)
1109 1019000 : obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1110 : }
1111 18199996 : else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1112 1908545 : obj = fold_convert (ptr_type_node, *tp);
1113 :
1114 2927545 : if (!obj)
1115 : {
1116 16291479 : if (!EXPR_P (*tp))
1117 7118020 : *walk_subtrees = 0;
1118 :
1119 16291479 : return NULL_TREE;
1120 : }
1121 : /* Record special node for multiple base objects and stop. */
1122 2927545 : if (*static_cast<tree *> (wdata))
1123 : {
1124 4254 : *static_cast<tree *> (wdata) = integer_zero_node;
1125 4254 : return integer_zero_node;
1126 : }
1127 : /* Record the base object and continue looking. */
1128 2923291 : *static_cast<tree *> (wdata) = obj;
1129 2923291 : return NULL_TREE;
1130 : }
1131 :
1132 : /* Returns a memory object to that EXPR points with caching. Return NULL if we
1133 : are able to determine that it does not point to any such object; specially
1134 : return integer_zero_node if EXPR contains multiple base objects. */
1135 :
1136 : static tree
1137 10361271 : determine_base_object (struct ivopts_data *data, tree expr)
1138 : {
1139 10361271 : tree *slot, obj = NULL_TREE;
1140 10361271 : if (data->base_object_map)
1141 : {
1142 10198809 : if ((slot = data->base_object_map->get(expr)) != NULL)
1143 4735156 : return *slot;
1144 : }
1145 : else
1146 162462 : data->base_object_map = new hash_map<tree, tree>;
1147 :
1148 5626115 : (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1149 5626115 : data->base_object_map->put (expr, obj);
1150 5626115 : return obj;
1151 : }
1152 :
1153 : /* Allocates an induction variable with given initial value BASE and step STEP
1154 : for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1155 :
1156 : static struct iv *
1157 10361271 : alloc_iv (struct ivopts_data *data, tree base, tree step,
1158 : bool no_overflow = false)
1159 : {
1160 10361271 : tree expr = base;
1161 10361271 : struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1162 : sizeof (struct iv));
1163 10361271 : gcc_assert (step != NULL_TREE);
1164 :
1165 : /* Canonicalize the address expression in base if it were an unsigned
1166 : computation. That leads to more equalities being detected and results in:
1167 :
1168 : 1) More accurate cost can be computed for address expressions;
1169 : 2) Duplicate candidates won't be created for bases in different
1170 : forms, like &a[0] and &a.
1171 : 3) Duplicate candidates won't be created for IV expressions that differ
1172 : only in their sign. */
1173 10361271 : aff_tree comb;
1174 10361271 : STRIP_NOPS (expr);
1175 10361271 : expr = fold_convert (unsigned_type_for (TREE_TYPE (expr)), expr);
1176 10361271 : tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1177 10361271 : base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1178 :
1179 10361271 : iv->base = base;
1180 10361271 : iv->base_object = determine_base_object (data, base);
1181 10361271 : iv->step = step;
1182 10361271 : iv->biv_p = false;
1183 10361271 : iv->nonlin_use = NULL;
1184 10361271 : iv->ssa_name = NULL_TREE;
1185 10361271 : if (!no_overflow
1186 10361271 : && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1187 : base, step))
1188 : no_overflow = true;
1189 10361271 : iv->no_overflow = no_overflow;
1190 10361271 : iv->have_address_use = false;
1191 :
1192 20722542 : return iv;
1193 10361271 : }
1194 :
1195 : /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1196 : doesn't overflow. */
1197 :
1198 : static void
1199 4880694 : set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1200 : bool no_overflow)
1201 : {
1202 4880694 : struct version_info *info = name_info (data, iv);
1203 :
1204 4880694 : gcc_assert (!info->iv);
1205 :
1206 4880694 : bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1207 4880694 : info->iv = alloc_iv (data, base, step, no_overflow);
1208 4880694 : info->iv->ssa_name = iv;
1209 4880694 : }
1210 :
1211 : /* Finds induction variable declaration for VAR. */
1212 :
1213 : static struct iv *
1214 44414440 : get_iv (struct ivopts_data *data, tree var)
1215 : {
1216 44414440 : basic_block bb;
1217 44414440 : tree type = TREE_TYPE (var);
1218 :
1219 44414440 : if (!POINTER_TYPE_P (type)
1220 35231195 : && !INTEGRAL_TYPE_P (type))
1221 : return NULL;
1222 :
1223 38716163 : if (!name_info (data, var)->iv)
1224 : {
1225 17998580 : bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1226 :
1227 17998580 : if (!bb
1228 17998580 : || !flow_bb_inside_loop_p (data->current_loop, bb))
1229 : {
1230 797482 : if (POINTER_TYPE_P (type))
1231 316214 : type = sizetype;
1232 797482 : set_iv (data, var, var, build_int_cst (type, 0), true);
1233 : }
1234 : }
1235 :
1236 38716163 : return name_info (data, var)->iv;
1237 : }
1238 :
1239 : /* Return the first non-invariant ssa var found in EXPR. */
1240 :
1241 : static tree
1242 4068808 : extract_single_var_from_expr (tree expr)
1243 : {
1244 4068808 : int i, n;
1245 4068808 : tree tmp;
1246 4068808 : enum tree_code code;
1247 :
1248 4068808 : if (!expr || is_gimple_min_invariant (expr))
1249 3389991 : return NULL;
1250 :
1251 678817 : code = TREE_CODE (expr);
1252 678817 : if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1253 : {
1254 373764 : n = TREE_OPERAND_LENGTH (expr);
1255 747599 : for (i = 0; i < n; i++)
1256 : {
1257 373835 : tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1258 :
1259 373835 : if (tmp)
1260 : return tmp;
1261 : }
1262 : }
1263 305053 : return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1264 : }
1265 :
1266 : /* Finds basic ivs. */
1267 :
1268 : static bool
1269 627372 : find_bivs (struct ivopts_data *data)
1270 : {
1271 627372 : gphi *phi;
1272 627372 : affine_iv iv;
1273 627372 : tree step, type, base, stop;
1274 627372 : bool found = false;
1275 627372 : class loop *loop = data->current_loop;
1276 627372 : gphi_iterator psi;
1277 :
1278 2340542 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1279 : {
1280 1713170 : phi = psi.phi ();
1281 :
1282 1713170 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1283 237 : continue;
1284 :
1285 1712933 : if (virtual_operand_p (PHI_RESULT (phi)))
1286 410800 : continue;
1287 :
1288 1302133 : if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1289 431316 : continue;
1290 :
1291 870817 : if (integer_zerop (iv.step))
1292 0 : continue;
1293 :
1294 870817 : step = iv.step;
1295 870817 : base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1296 : /* Stop expanding iv base at the first ssa var referred by iv step.
1297 : Ideally we should stop at any ssa var, because that's expensive
1298 : and unusual to happen, we just do it on the first one.
1299 :
1300 : See PR64705 for the rationale. */
1301 870817 : stop = extract_single_var_from_expr (step);
1302 870817 : base = expand_simple_operations (base, stop);
1303 870817 : if (contains_abnormal_ssa_name_p (base)
1304 870817 : || contains_abnormal_ssa_name_p (step))
1305 10 : continue;
1306 :
1307 870807 : type = TREE_TYPE (PHI_RESULT (phi));
1308 870807 : base = fold_convert (type, base);
1309 870807 : if (step)
1310 : {
1311 870807 : if (POINTER_TYPE_P (type))
1312 163673 : step = convert_to_ptrofftype (step);
1313 : else
1314 707134 : step = fold_convert (type, step);
1315 : }
1316 :
1317 870807 : set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1318 870807 : found = true;
1319 : }
1320 :
1321 627372 : return found;
1322 : }
1323 :
1324 : /* Marks basic ivs. */
1325 :
1326 : static void
1327 500816 : mark_bivs (struct ivopts_data *data)
1328 : {
1329 500816 : gphi *phi;
1330 500816 : gimple *def;
1331 500816 : tree var;
1332 500816 : struct iv *iv, *incr_iv;
1333 500816 : class loop *loop = data->current_loop;
1334 500816 : basic_block incr_bb;
1335 500816 : gphi_iterator psi;
1336 :
1337 500816 : data->bivs_not_used_in_addr = 0;
1338 1948306 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1339 : {
1340 1447490 : phi = psi.phi ();
1341 :
1342 1447490 : iv = get_iv (data, PHI_RESULT (phi));
1343 1447490 : if (!iv)
1344 576683 : continue;
1345 :
1346 870807 : var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1347 870807 : def = SSA_NAME_DEF_STMT (var);
1348 : /* Don't mark iv peeled from other one as biv. */
1349 872387 : if (def
1350 870807 : && gimple_code (def) == GIMPLE_PHI
1351 873520 : && gimple_bb (def) == loop->header)
1352 1580 : continue;
1353 :
1354 869227 : incr_iv = get_iv (data, var);
1355 869227 : if (!incr_iv)
1356 1144 : continue;
1357 :
1358 : /* If the increment is in the subloop, ignore it. */
1359 868083 : incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1360 868083 : if (incr_bb->loop_father != data->current_loop
1361 868083 : || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1362 0 : continue;
1363 :
1364 868083 : iv->biv_p = true;
1365 868083 : incr_iv->biv_p = true;
1366 868083 : if (iv->no_overflow)
1367 579897 : data->bivs_not_used_in_addr++;
1368 868083 : if (incr_iv->no_overflow)
1369 571494 : data->bivs_not_used_in_addr++;
1370 : }
1371 500816 : }
1372 :
1373 : /* Checks whether STMT defines a linear induction variable and stores its
1374 : parameters to IV. */
1375 :
1376 : static bool
1377 12550334 : find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1378 : {
1379 12550334 : tree lhs, stop;
1380 12550334 : class loop *loop = data->current_loop;
1381 :
1382 12550334 : iv->base = NULL_TREE;
1383 12550334 : iv->step = NULL_TREE;
1384 :
1385 12550334 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
1386 : return false;
1387 :
1388 10515834 : lhs = gimple_assign_lhs (stmt);
1389 10515834 : if (TREE_CODE (lhs) != SSA_NAME)
1390 : return false;
1391 :
1392 18807514 : if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1393 : return false;
1394 :
1395 : /* Stop expanding iv base at the first ssa var referred by iv step.
1396 : Ideally we should stop at any ssa var, because that's expensive
1397 : and unusual to happen, we just do it on the first one.
1398 :
1399 : See PR64705 for the rationale. */
1400 2824156 : stop = extract_single_var_from_expr (iv->step);
1401 2824156 : iv->base = expand_simple_operations (iv->base, stop);
1402 2824156 : if (contains_abnormal_ssa_name_p (iv->base)
1403 2824156 : || contains_abnormal_ssa_name_p (iv->step))
1404 6 : return false;
1405 :
1406 : /* If STMT could throw, then do not consider STMT as defining a GIV.
1407 : While this will suppress optimizations, we cannot safely delete this
1408 : GIV and associated statements, even if it appears it is not used. */
1409 2824150 : if (stmt_could_throw_p (cfun, stmt))
1410 : return false;
1411 :
1412 : return true;
1413 : }
1414 :
1415 : /* Finds general ivs in statement STMT. */
1416 :
1417 : static void
1418 12550334 : find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1419 : {
1420 12550334 : affine_iv iv;
1421 :
1422 12550334 : if (!find_givs_in_stmt_scev (data, stmt, &iv))
1423 9726192 : return;
1424 :
1425 2824142 : set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1426 : }
1427 :
1428 : /* Finds general ivs in basic block BB. */
1429 :
1430 : static void
1431 2814328 : find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1432 : {
1433 2814328 : gimple_stmt_iterator bsi;
1434 :
1435 27525498 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1436 21896842 : if (!is_gimple_debug (gsi_stmt (bsi)))
1437 12550334 : find_givs_in_stmt (data, gsi_stmt (bsi));
1438 2814328 : }
1439 :
1440 : /* Finds general ivs. */
1441 :
1442 : static void
1443 500816 : find_givs (struct ivopts_data *data, basic_block *body)
1444 : {
1445 500816 : class loop *loop = data->current_loop;
1446 500816 : unsigned i;
1447 :
1448 3315144 : for (i = 0; i < loop->num_nodes; i++)
1449 2814328 : find_givs_in_bb (data, body[i]);
1450 500816 : }
1451 :
1452 : /* For each ssa name defined in LOOP determines whether it is an induction
1453 : variable and if so, its initial value and step. */
1454 :
1455 : static bool
1456 627372 : find_induction_variables (struct ivopts_data *data, basic_block *body)
1457 : {
1458 627372 : unsigned i;
1459 627372 : bitmap_iterator bi;
1460 :
1461 627372 : if (!find_bivs (data))
1462 : return false;
1463 :
1464 500816 : find_givs (data, body);
1465 500816 : mark_bivs (data);
1466 :
1467 500816 : if (dump_file && (dump_flags & TDF_DETAILS))
1468 : {
1469 67 : class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1470 :
1471 67 : if (niter)
1472 : {
1473 51 : fprintf (dump_file, " number of iterations ");
1474 51 : print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1475 51 : if (!integer_zerop (niter->may_be_zero))
1476 : {
1477 1 : fprintf (dump_file, "; zero if ");
1478 1 : print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1479 : }
1480 51 : fprintf (dump_file, "\n");
1481 67 : };
1482 :
1483 67 : fprintf (dump_file, "\n<Induction Vars>:\n");
1484 819 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1485 : {
1486 752 : struct version_info *info = ver_info (data, i);
1487 752 : if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1488 550 : dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1489 : }
1490 : }
1491 :
1492 : return true;
1493 : }
1494 :
1495 : /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1496 : For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1497 : is the const offset stripped from IV base and MEM_TYPE is the type
1498 : of the memory being addressed. For uses of other types, ADDR_BASE
1499 : and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1500 :
1501 : static struct iv_use *
1502 2087063 : record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1503 : gimple *stmt, enum use_type type, tree mem_type,
1504 : tree addr_base, poly_uint64 addr_offset)
1505 : {
1506 2087063 : struct iv_use *use = XCNEW (struct iv_use);
1507 :
1508 2087063 : use->id = group->vuses.length ();
1509 2087063 : use->group_id = group->id;
1510 2087063 : use->type = type;
1511 2087063 : use->mem_type = mem_type;
1512 2087063 : use->iv = iv;
1513 2087063 : use->stmt = stmt;
1514 2087063 : use->op_p = use_p;
1515 2087063 : use->addr_base = addr_base;
1516 2087063 : use->addr_offset = addr_offset;
1517 :
1518 2087063 : group->vuses.safe_push (use);
1519 2087063 : return use;
1520 : }
1521 :
1522 : /* Checks whether OP is a loop-level invariant and if so, records it.
1523 : NONLINEAR_USE is true if the invariant is used in a way we do not
1524 : handle specially. */
1525 :
1526 : static void
1527 22577255 : record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1528 : {
1529 22577255 : basic_block bb;
1530 22577255 : struct version_info *info;
1531 :
1532 22577255 : if (TREE_CODE (op) != SSA_NAME
1533 22577255 : || virtual_operand_p (op))
1534 : return;
1535 :
1536 21398240 : bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1537 21398240 : if (bb
1538 21398240 : && flow_bb_inside_loop_p (data->current_loop, bb))
1539 : return;
1540 :
1541 3817653 : info = name_info (data, op);
1542 3817653 : info->name = op;
1543 3817653 : info->has_nonlin_use |= nonlinear_use;
1544 3817653 : if (!info->inv_id)
1545 1329376 : info->inv_id = ++data->max_inv_var_id;
1546 3817653 : bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1547 : }
1548 :
1549 : /* Record a group of TYPE. */
1550 :
1551 : static struct iv_group *
1552 1804996 : record_group (struct ivopts_data *data, enum use_type type)
1553 : {
1554 1804996 : struct iv_group *group = XCNEW (struct iv_group);
1555 :
1556 1804996 : group->id = data->vgroups.length ();
1557 1804996 : group->type = type;
1558 1804996 : group->related_cands = BITMAP_ALLOC (NULL);
1559 1804996 : group->vuses.create (1);
1560 1804996 : group->doloop_p = false;
1561 :
1562 1804996 : data->vgroups.safe_push (group);
1563 1804996 : return group;
1564 : }
1565 :
1566 : /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1567 : New group will be created if there is no existing group for the use.
1568 : MEM_TYPE is the type of memory being addressed, or NULL if this
1569 : isn't an address reference. */
1570 :
1571 : static struct iv_use *
1572 2087063 : record_group_use (struct ivopts_data *data, tree *use_p,
1573 : struct iv *iv, gimple *stmt, enum use_type type,
1574 : tree mem_type)
1575 : {
1576 2087063 : tree addr_base = NULL;
1577 2087063 : struct iv_group *group = NULL;
1578 2087063 : poly_uint64 addr_offset = 0;
1579 :
1580 : /* Record non address type use in a new group. */
1581 2087063 : if (address_p (type))
1582 : {
1583 863391 : unsigned int i;
1584 :
1585 863391 : gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1586 863391 : tree addr_toffset;
1587 863391 : split_constant_offset (iv->base, &addr_base, &addr_toffset);
1588 863391 : addr_offset = int_cst_value (addr_toffset);
1589 1623102 : for (i = 0; i < data->vgroups.length (); i++)
1590 : {
1591 1094983 : struct iv_use *use;
1592 :
1593 1094983 : group = data->vgroups[i];
1594 1094983 : use = group->vuses[0];
1595 1094983 : if (!address_p (use->type))
1596 336247 : continue;
1597 :
1598 : /* Check if it has the same stripped base and step. */
1599 758736 : if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1600 403958 : && operand_equal_p (iv->step, use->iv->step, OEP_ASSUME_WRAPV)
1601 1159593 : && operand_equal_p (addr_base, use->addr_base, OEP_ASSUME_WRAPV))
1602 : break;
1603 : }
1604 1726782 : if (i == data->vgroups.length ())
1605 528119 : group = NULL;
1606 : }
1607 :
1608 863391 : if (!group)
1609 1751791 : group = record_group (data, type);
1610 :
1611 2087063 : return record_use (group, use_p, iv, stmt, type, mem_type,
1612 2087063 : addr_base, addr_offset);
1613 : }
1614 :
1615 : /* Checks whether the use OP is interesting and if so, records it. */
1616 :
1617 : static struct iv_use *
1618 7247485 : find_interesting_uses_op (struct ivopts_data *data, tree op)
1619 : {
1620 7247485 : struct iv *iv;
1621 7247485 : gimple *stmt;
1622 7247485 : struct iv_use *use;
1623 :
1624 7247485 : if (TREE_CODE (op) != SSA_NAME)
1625 : return NULL;
1626 :
1627 5826366 : iv = get_iv (data, op);
1628 5826366 : if (!iv)
1629 : return NULL;
1630 :
1631 2511849 : if (iv->nonlin_use)
1632 : {
1633 197023 : gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1634 : return iv->nonlin_use;
1635 : }
1636 :
1637 2314826 : if (integer_zerop (iv->step))
1638 : {
1639 1690663 : record_invariant (data, op, true);
1640 1690663 : return NULL;
1641 : }
1642 :
1643 624163 : stmt = SSA_NAME_DEF_STMT (op);
1644 624163 : gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1645 :
1646 624163 : use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1647 624163 : iv->nonlin_use = use;
1648 624163 : return use;
1649 : }
1650 :
1651 : /* Indicate how compare type iv_use can be handled. */
1652 : enum comp_iv_rewrite
1653 : {
1654 : COMP_IV_NA,
1655 : /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1656 : COMP_IV_EXPR,
1657 : /* We may rewrite compare type iv_uses on both sides of comparison by
1658 : expressing value of each iv_use. */
1659 : COMP_IV_EXPR_2,
1660 : /* We may rewrite compare type iv_use by expressing value of the iv_use
1661 : or by eliminating it with other iv_cand. */
1662 : COMP_IV_ELIM
1663 : };
1664 :
1665 : /* Given a condition in statement STMT, checks whether it is a compare
1666 : of an induction variable and an invariant. If this is the case,
1667 : CONTROL_VAR is set to location of the iv, BOUND to the location of
1668 : the invariant, IV_VAR and IV_BOUND are set to the corresponding
1669 : induction variable descriptions, and true is returned. If this is not
1670 : the case, CONTROL_VAR and BOUND are set to the arguments of the
1671 : condition and false is returned. */
1672 :
1673 : static enum comp_iv_rewrite
1674 7483136 : extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1675 : tree **control_var, tree **bound,
1676 : struct iv **iv_var, struct iv **iv_bound)
1677 : {
1678 : /* The objects returned when COND has constant operands. */
1679 7483136 : static struct iv const_iv;
1680 7483136 : static tree zero;
1681 7483136 : tree *op0 = &zero, *op1 = &zero;
1682 7483136 : struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1683 7483136 : enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1684 :
1685 7483136 : if (gimple_code (stmt) == GIMPLE_COND)
1686 : {
1687 7216309 : gcond *cond_stmt = as_a <gcond *> (stmt);
1688 7216309 : op0 = gimple_cond_lhs_ptr (cond_stmt);
1689 7216309 : op1 = gimple_cond_rhs_ptr (cond_stmt);
1690 : }
1691 : else
1692 : {
1693 266827 : op0 = gimple_assign_rhs1_ptr (stmt);
1694 266827 : op1 = gimple_assign_rhs2_ptr (stmt);
1695 : }
1696 :
1697 7483136 : zero = integer_zero_node;
1698 7483136 : const_iv.step = integer_zero_node;
1699 :
1700 7483136 : if (TREE_CODE (*op0) == SSA_NAME)
1701 7482979 : iv0 = get_iv (data, *op0);
1702 7483136 : if (TREE_CODE (*op1) == SSA_NAME)
1703 3391365 : iv1 = get_iv (data, *op1);
1704 :
1705 : /* If both sides of comparison are IVs. We can express ivs on both end. */
1706 7483136 : if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1707 : {
1708 91954 : rewrite_type = COMP_IV_EXPR_2;
1709 91954 : goto end;
1710 : }
1711 :
1712 : /* If none side of comparison is IV. */
1713 5779141 : if ((!iv0 || integer_zerop (iv0->step))
1714 8732264 : && (!iv1 || integer_zerop (iv1->step)))
1715 950749 : goto end;
1716 :
1717 : /* Control variable may be on the other side. */
1718 6440433 : if (!iv0 || integer_zerop (iv0->step))
1719 : {
1720 : std::swap (op0, op1);
1721 : std::swap (iv0, iv1);
1722 : }
1723 : /* If one side is IV and the other side isn't loop invariant. */
1724 6440433 : if (!iv1)
1725 : rewrite_type = COMP_IV_EXPR;
1726 : /* If one side is IV and the other side is loop invariant. */
1727 5426427 : else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1728 : rewrite_type = COMP_IV_ELIM;
1729 :
1730 7483136 : end:
1731 7483136 : if (control_var)
1732 7483136 : *control_var = op0;
1733 7483136 : if (iv_var)
1734 1548146 : *iv_var = iv0;
1735 7483136 : if (bound)
1736 7483136 : *bound = op1;
1737 7483136 : if (iv_bound)
1738 7483136 : *iv_bound = iv1;
1739 :
1740 7483136 : return rewrite_type;
1741 : }
1742 :
1743 : /* Checks whether the condition in STMT is interesting and if so,
1744 : records it. */
1745 :
1746 : static void
1747 1548146 : find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1748 : {
1749 1548146 : tree *var_p, *bound_p;
1750 1548146 : struct iv *var_iv, *bound_iv;
1751 1548146 : enum comp_iv_rewrite ret;
1752 :
1753 1548146 : ret = extract_cond_operands (data, stmt,
1754 : &var_p, &bound_p, &var_iv, &bound_iv);
1755 1548146 : if (ret == COMP_IV_NA)
1756 : {
1757 950749 : find_interesting_uses_op (data, *var_p);
1758 950749 : find_interesting_uses_op (data, *bound_p);
1759 950749 : return;
1760 : }
1761 :
1762 597397 : record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1763 : /* Record compare type iv_use for iv on the other side of comparison. */
1764 597397 : if (ret == COMP_IV_EXPR_2)
1765 2112 : record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1766 : }
1767 :
1768 : /* Returns the outermost loop EXPR is obviously invariant in
1769 : relative to the loop LOOP, i.e. if all its operands are defined
1770 : outside of the returned loop. Returns NULL if EXPR is not
1771 : even obviously invariant in LOOP. */
1772 :
1773 : class loop *
1774 247259 : outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1775 : {
1776 247259 : basic_block def_bb;
1777 247259 : unsigned i, len;
1778 :
1779 247259 : if (is_gimple_min_invariant (expr))
1780 37147 : return current_loops->tree_root;
1781 :
1782 210112 : if (TREE_CODE (expr) == SSA_NAME)
1783 : {
1784 129219 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1785 129219 : if (def_bb)
1786 : {
1787 80429 : if (flow_bb_inside_loop_p (loop, def_bb))
1788 : return NULL;
1789 160846 : return superloop_at_depth (loop,
1790 105452 : loop_depth (def_bb->loop_father) + 1);
1791 : }
1792 :
1793 48790 : return current_loops->tree_root;
1794 : }
1795 :
1796 80893 : if (!EXPR_P (expr))
1797 : return NULL;
1798 :
1799 80893 : unsigned maxdepth = 0;
1800 80893 : len = TREE_OPERAND_LENGTH (expr);
1801 210272 : for (i = 0; i < len; i++)
1802 : {
1803 129397 : class loop *ivloop;
1804 129397 : if (!TREE_OPERAND (expr, i))
1805 0 : continue;
1806 :
1807 129397 : ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1808 129397 : if (!ivloop)
1809 : return NULL;
1810 227081 : maxdepth = MAX (maxdepth, loop_depth (ivloop));
1811 : }
1812 :
1813 80875 : return superloop_at_depth (loop, maxdepth);
1814 : }
1815 :
1816 : /* Returns true if expression EXPR is obviously invariant in LOOP,
1817 : i.e. if all its operands are defined outside of the LOOP. LOOP
1818 : should not be the function body. */
1819 :
1820 : bool
1821 11953284 : expr_invariant_in_loop_p (class loop *loop, tree expr)
1822 : {
1823 11953284 : basic_block def_bb;
1824 11953284 : unsigned i, len;
1825 :
1826 11953284 : gcc_assert (loop_depth (loop) > 0);
1827 :
1828 11953284 : if (is_gimple_min_invariant (expr))
1829 : return true;
1830 :
1831 8397500 : if (TREE_CODE (expr) == SSA_NAME)
1832 : {
1833 7990474 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1834 7990474 : if (def_bb
1835 7990474 : && flow_bb_inside_loop_p (loop, def_bb))
1836 : return false;
1837 :
1838 4125548 : return true;
1839 : }
1840 :
1841 407026 : if (!EXPR_P (expr))
1842 : return false;
1843 :
1844 407023 : len = TREE_OPERAND_LENGTH (expr);
1845 875440 : for (i = 0; i < len; i++)
1846 517393 : if (TREE_OPERAND (expr, i)
1847 517393 : && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1848 : return false;
1849 :
1850 : return true;
1851 : }
1852 :
1853 : /* Given expression EXPR which computes inductive values with respect
1854 : to loop recorded in DATA, this function returns biv from which EXPR
1855 : is derived by tracing definition chains of ssa variables in EXPR. */
1856 :
1857 : static struct iv*
1858 870340 : find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1859 : {
1860 1406143 : struct iv *iv;
1861 1406143 : unsigned i, n;
1862 1406143 : tree e2, e1;
1863 1406143 : enum tree_code code;
1864 1406143 : gimple *stmt;
1865 :
1866 1406143 : if (expr == NULL_TREE)
1867 : return NULL;
1868 :
1869 1405830 : if (is_gimple_min_invariant (expr))
1870 : return NULL;
1871 :
1872 1122188 : code = TREE_CODE (expr);
1873 1122188 : if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1874 : {
1875 21730 : n = TREE_OPERAND_LENGTH (expr);
1876 23838 : for (i = 0; i < n; i++)
1877 : {
1878 23315 : iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1879 23315 : if (iv)
1880 : return iv;
1881 : }
1882 : }
1883 :
1884 : /* Stop if it's not ssa name. */
1885 1100981 : if (code != SSA_NAME)
1886 : return NULL;
1887 :
1888 1099840 : iv = get_iv (data, expr);
1889 1099840 : if (!iv || integer_zerop (iv->step))
1890 46662 : return NULL;
1891 1053178 : else if (iv->biv_p)
1892 : return iv;
1893 :
1894 782658 : stmt = SSA_NAME_DEF_STMT (expr);
1895 782658 : if (gphi *phi = dyn_cast <gphi *> (stmt))
1896 : {
1897 1874 : ssa_op_iter iter;
1898 1874 : use_operand_p use_p;
1899 1874 : basic_block phi_bb = gimple_bb (phi);
1900 :
1901 : /* Skip loop header PHI that doesn't define biv. */
1902 1874 : if (phi_bb->loop_father == data->current_loop)
1903 : return NULL;
1904 :
1905 0 : if (virtual_operand_p (gimple_phi_result (phi)))
1906 : return NULL;
1907 :
1908 0 : FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1909 : {
1910 0 : tree use = USE_FROM_PTR (use_p);
1911 0 : iv = find_deriving_biv_for_expr (data, use);
1912 0 : if (iv)
1913 : return iv;
1914 : }
1915 : return NULL;
1916 : }
1917 780784 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
1918 : return NULL;
1919 :
1920 780784 : e1 = gimple_assign_rhs1 (stmt);
1921 780784 : code = gimple_assign_rhs_code (stmt);
1922 780784 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1923 : return find_deriving_biv_for_expr (data, e1);
1924 :
1925 770673 : switch (code)
1926 : {
1927 573572 : case MULT_EXPR:
1928 573572 : case PLUS_EXPR:
1929 573572 : case MINUS_EXPR:
1930 573572 : case POINTER_PLUS_EXPR:
1931 : /* Increments, decrements and multiplications by a constant
1932 : are simple. */
1933 573572 : e2 = gimple_assign_rhs2 (stmt);
1934 573572 : iv = find_deriving_biv_for_expr (data, e2);
1935 573572 : if (iv)
1936 : return iv;
1937 525692 : gcc_fallthrough ();
1938 :
1939 525692 : CASE_CONVERT:
1940 : /* Casts are simple. */
1941 525692 : return find_deriving_biv_for_expr (data, e1);
1942 :
1943 : default:
1944 : break;
1945 : }
1946 :
1947 : return NULL;
1948 : }
1949 :
1950 : /* Record BIV, its predecessor and successor that they are used in
1951 : address type uses. */
1952 :
1953 : static void
1954 601938 : record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1955 : {
1956 601938 : unsigned i;
1957 601938 : tree type, base_1, base_2;
1958 601938 : bitmap_iterator bi;
1959 :
1960 599005 : if (!biv || !biv->biv_p || integer_zerop (biv->step)
1961 1200943 : || biv->have_address_use || !biv->no_overflow)
1962 336659 : return;
1963 :
1964 533479 : type = TREE_TYPE (biv->base);
1965 533479 : if (!INTEGRAL_TYPE_P (type))
1966 : return;
1967 :
1968 265279 : biv->have_address_use = true;
1969 265279 : data->bivs_not_used_in_addr--;
1970 265279 : base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1971 2442507 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1972 : {
1973 2177228 : struct iv *iv = ver_info (data, i)->iv;
1974 :
1975 1964081 : if (!iv || !iv->biv_p || integer_zerop (iv->step)
1976 3081281 : || iv->have_address_use || !iv->no_overflow)
1977 1884228 : continue;
1978 :
1979 293000 : if (type != TREE_TYPE (iv->base)
1980 293000 : || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1981 31263 : continue;
1982 :
1983 261737 : if (!operand_equal_p (biv->step, iv->step, 0))
1984 5794 : continue;
1985 :
1986 255943 : base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1987 255943 : if (operand_equal_p (base_1, iv->base, 0)
1988 255943 : || operand_equal_p (base_2, biv->base, 0))
1989 : {
1990 229344 : iv->have_address_use = true;
1991 229344 : data->bivs_not_used_in_addr--;
1992 : }
1993 : }
1994 : }
1995 :
1996 : /* Cumulates the steps of indices into DATA and replaces their values with the
1997 : initial ones. Returns false when the value of the index cannot be determined.
1998 : Callback for for_each_index. */
1999 :
2000 : struct ifs_ivopts_data
2001 : {
2002 : struct ivopts_data *ivopts_data;
2003 : gimple *stmt;
2004 : tree step;
2005 : };
2006 :
2007 : static bool
2008 2231809 : idx_find_step (tree base, tree *idx, void *data)
2009 : {
2010 2231809 : struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2011 2231809 : struct iv *iv;
2012 2231809 : bool use_overflow_semantics = false;
2013 2231809 : tree step, iv_base, iv_step, lbound, off;
2014 2231809 : class loop *loop = dta->ivopts_data->current_loop;
2015 :
2016 : /* If base is a component ref, require that the offset of the reference
2017 : be invariant. */
2018 2231809 : if (TREE_CODE (base) == COMPONENT_REF)
2019 : {
2020 78 : off = component_ref_field_offset (base);
2021 78 : return expr_invariant_in_loop_p (loop, off);
2022 : }
2023 :
2024 : /* If base is array, first check whether we will be able to move the
2025 : reference out of the loop (in order to take its address in strength
2026 : reduction). In order for this to work we need both lower bound
2027 : and step to be loop invariants. */
2028 2231731 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2029 : {
2030 : /* Moreover, for a range, the size needs to be invariant as well. */
2031 520367 : if (TREE_CODE (base) == ARRAY_RANGE_REF
2032 520367 : && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2033 : return false;
2034 :
2035 520367 : step = array_ref_element_size (base);
2036 520367 : lbound = array_ref_low_bound (base);
2037 :
2038 520367 : if (!expr_invariant_in_loop_p (loop, step)
2039 520367 : || !expr_invariant_in_loop_p (loop, lbound))
2040 3150 : return false;
2041 : }
2042 :
2043 2228581 : if (TREE_CODE (*idx) != SSA_NAME)
2044 : return true;
2045 :
2046 1807959 : iv = get_iv (dta->ivopts_data, *idx);
2047 1807959 : if (!iv)
2048 : return false;
2049 :
2050 : /* XXX We produce for a base of *D42 with iv->base being &x[0]
2051 : *&x[0], which is not folded and does not trigger the
2052 : ARRAY_REF path below. */
2053 1172054 : *idx = iv->base;
2054 :
2055 1172054 : if (integer_zerop (iv->step))
2056 : return true;
2057 :
2058 879884 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2059 : {
2060 307555 : step = array_ref_element_size (base);
2061 :
2062 : /* We only handle addresses whose step is an integer constant. */
2063 307555 : if (TREE_CODE (step) != INTEGER_CST)
2064 : return false;
2065 : }
2066 : else
2067 : /* The step for pointer arithmetics already is 1 byte. */
2068 572329 : step = size_one_node;
2069 :
2070 879867 : iv_base = iv->base;
2071 879867 : iv_step = iv->step;
2072 879867 : if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2073 : use_overflow_semantics = true;
2074 :
2075 879867 : if (!convert_affine_scev (dta->ivopts_data->current_loop,
2076 : sizetype, &iv_base, &iv_step, dta->stmt,
2077 : use_overflow_semantics))
2078 : {
2079 : /* The index might wrap. */
2080 : return false;
2081 : }
2082 :
2083 876550 : step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2084 876550 : dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2085 :
2086 876550 : if (dta->ivopts_data->bivs_not_used_in_addr)
2087 : {
2088 601938 : if (!iv->biv_p)
2089 273453 : iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2090 :
2091 601938 : record_biv_for_address_use (dta->ivopts_data, iv);
2092 : }
2093 : return true;
2094 : }
2095 :
2096 : /* Records use in index IDX. Callback for for_each_index. Ivopts data
2097 : object is passed to it in DATA. */
2098 :
2099 : static bool
2100 1820366 : idx_record_use (tree base, tree *idx,
2101 : void *vdata)
2102 : {
2103 1820366 : struct ivopts_data *data = (struct ivopts_data *) vdata;
2104 1820366 : find_interesting_uses_op (data, *idx);
2105 1820366 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2106 : {
2107 230297 : if (TREE_OPERAND (base, 2))
2108 5502 : find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2109 230297 : if (TREE_OPERAND (base, 3))
2110 16796 : find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2111 : }
2112 1820366 : return true;
2113 : }
2114 :
2115 : /* If we can prove that TOP = cst * BOT for some constant cst,
2116 : store cst to MUL and return true. Otherwise return false.
2117 : The returned value is always sign-extended, regardless of the
2118 : signedness of TOP and BOT. */
2119 :
2120 : static bool
2121 17298667 : constant_multiple_of (tree top, tree bot, widest_int *mul,
2122 : struct ivopts_data *data)
2123 : {
2124 34597334 : aff_tree aff_top, aff_bot;
2125 17298667 : tree_to_aff_combination_expand (top, TREE_TYPE (top), &aff_top,
2126 : &data->name_expansion_cache);
2127 17298667 : tree_to_aff_combination_expand (bot, TREE_TYPE (bot), &aff_bot,
2128 : &data->name_expansion_cache);
2129 :
2130 17298667 : poly_widest_int poly_mul;
2131 17298667 : if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul)
2132 17298667 : && poly_mul.is_constant (mul))
2133 14356154 : return true;
2134 :
2135 : return false;
2136 17298667 : }
2137 :
2138 : /* Return true if memory reference REF with step STEP may be unaligned. */
2139 :
2140 : static bool
2141 0 : may_be_unaligned_p (tree ref, tree step)
2142 : {
2143 : /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2144 : thus they are not misaligned. */
2145 0 : if (TREE_CODE (ref) == TARGET_MEM_REF)
2146 : return false;
2147 :
2148 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2149 0 : if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2150 0 : align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2151 :
2152 0 : unsigned HOST_WIDE_INT bitpos;
2153 0 : unsigned int ref_align;
2154 0 : get_object_alignment_1 (ref, &ref_align, &bitpos);
2155 0 : if (ref_align < align
2156 0 : || (bitpos % align) != 0
2157 0 : || (bitpos % BITS_PER_UNIT) != 0)
2158 : return true;
2159 :
2160 0 : unsigned int trailing_zeros = tree_ctz (step);
2161 0 : if (trailing_zeros < HOST_BITS_PER_INT
2162 0 : && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2163 : return true;
2164 :
2165 : return false;
2166 : }
2167 :
2168 : /* Return true if EXPR may be non-addressable. */
2169 :
2170 : bool
2171 13032046 : may_be_nonaddressable_p (tree expr)
2172 : {
2173 13913122 : switch (TREE_CODE (expr))
2174 : {
2175 9272176 : case VAR_DECL:
2176 : /* Check if it's a register variable. */
2177 9272176 : return DECL_HARD_REGISTER (expr);
2178 :
2179 : case TARGET_MEM_REF:
2180 : /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2181 : target, thus they are always addressable. */
2182 : return false;
2183 :
2184 1939893 : case MEM_REF:
2185 : /* Likewise for MEM_REFs, modulo the storage order. */
2186 1939893 : return REF_REVERSE_STORAGE_ORDER (expr);
2187 :
2188 80 : case BIT_FIELD_REF:
2189 80 : if (REF_REVERSE_STORAGE_ORDER (expr))
2190 : return true;
2191 80 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2192 :
2193 1243800 : case COMPONENT_REF:
2194 1243800 : if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2195 : return true;
2196 1243800 : return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2197 1243800 : || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2198 :
2199 858881 : case ARRAY_REF:
2200 858881 : case ARRAY_RANGE_REF:
2201 858881 : if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2202 : return true;
2203 858881 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2204 :
2205 22125 : case VIEW_CONVERT_EXPR:
2206 : /* This kind of view-conversions may wrap non-addressable objects
2207 : and make them look addressable. After some processing the
2208 : non-addressability may be uncovered again, causing ADDR_EXPRs
2209 : of inappropriate objects to be built. */
2210 22125 : if (is_gimple_reg (TREE_OPERAND (expr, 0))
2211 22125 : || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2212 : return true;
2213 22115 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2214 :
2215 : CASE_CONVERT:
2216 : return true;
2217 :
2218 : default:
2219 : break;
2220 : }
2221 :
2222 : return false;
2223 : }
2224 :
2225 : /* Finds addresses in *OP_P inside STMT. */
2226 :
2227 : static void
2228 2711890 : find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2229 : tree *op_p)
2230 : {
2231 2711890 : tree base = *op_p, step = size_zero_node;
2232 2711890 : struct iv *civ;
2233 2711890 : struct ifs_ivopts_data ifs_ivopts_data;
2234 :
2235 : /* Do not play with volatile memory references. A bit too conservative,
2236 : perhaps, but safe. */
2237 5423780 : if (gimple_has_volatile_ops (stmt))
2238 7529 : goto fail;
2239 :
2240 : /* Ignore bitfields for now. Not really something terribly complicated
2241 : to handle. TODO. */
2242 2704361 : if (TREE_CODE (base) == BIT_FIELD_REF)
2243 95023 : goto fail;
2244 :
2245 2609338 : base = unshare_expr (base);
2246 :
2247 2609338 : if (TREE_CODE (base) == TARGET_MEM_REF)
2248 : {
2249 315807 : tree type = build_pointer_type (TREE_TYPE (base));
2250 315807 : tree astep;
2251 :
2252 315807 : if (TMR_BASE (base)
2253 315807 : && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2254 : {
2255 293625 : civ = get_iv (data, TMR_BASE (base));
2256 293625 : if (!civ)
2257 258354 : goto fail;
2258 :
2259 35271 : TMR_BASE (base) = civ->base;
2260 35271 : step = civ->step;
2261 : }
2262 57453 : if (TMR_INDEX2 (base)
2263 57453 : && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2264 : {
2265 14056 : civ = get_iv (data, TMR_INDEX2 (base));
2266 14056 : if (!civ)
2267 4981 : goto fail;
2268 :
2269 9075 : TMR_INDEX2 (base) = civ->base;
2270 9075 : step = civ->step;
2271 : }
2272 52472 : if (TMR_INDEX (base)
2273 52472 : && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2274 : {
2275 52472 : civ = get_iv (data, TMR_INDEX (base));
2276 52472 : if (!civ)
2277 52472 : goto fail;
2278 :
2279 0 : TMR_INDEX (base) = civ->base;
2280 0 : astep = civ->step;
2281 :
2282 0 : if (astep)
2283 : {
2284 0 : if (TMR_STEP (base))
2285 0 : astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2286 :
2287 0 : step = fold_build2 (PLUS_EXPR, type, step, astep);
2288 : }
2289 : }
2290 :
2291 0 : if (integer_zerop (step))
2292 0 : goto fail;
2293 0 : base = tree_mem_ref_addr (type, base);
2294 : }
2295 : else
2296 : {
2297 2293531 : ifs_ivopts_data.ivopts_data = data;
2298 2293531 : ifs_ivopts_data.stmt = stmt;
2299 2293531 : ifs_ivopts_data.step = size_zero_node;
2300 2293531 : if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2301 2293531 : || integer_zerop (ifs_ivopts_data.step))
2302 1418949 : goto fail;
2303 874582 : step = ifs_ivopts_data.step;
2304 :
2305 : /* Check that the base expression is addressable. This needs
2306 : to be done after substituting bases of IVs into it. */
2307 874582 : if (may_be_nonaddressable_p (base))
2308 770 : goto fail;
2309 :
2310 : /* Moreover, on strict alignment platforms, check that it is
2311 : sufficiently aligned. */
2312 873812 : if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2313 : goto fail;
2314 :
2315 873812 : base = build_fold_addr_expr (base);
2316 :
2317 : /* Substituting bases of IVs into the base expression might
2318 : have caused folding opportunities. */
2319 873812 : if (TREE_CODE (base) == ADDR_EXPR)
2320 : {
2321 469230 : tree *ref = &TREE_OPERAND (base, 0);
2322 1606016 : while (handled_component_p (*ref))
2323 667556 : ref = &TREE_OPERAND (*ref, 0);
2324 469230 : if (TREE_CODE (*ref) == MEM_REF)
2325 : {
2326 310127 : tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2327 : TREE_OPERAND (*ref, 0),
2328 : TREE_OPERAND (*ref, 1));
2329 310127 : if (tem)
2330 0 : *ref = tem;
2331 : }
2332 : }
2333 : }
2334 :
2335 873812 : civ = alloc_iv (data, base, step);
2336 : /* Fail if base object of this memory reference is unknown. */
2337 873812 : if (civ->base_object == NULL_TREE)
2338 11216 : goto fail;
2339 :
2340 862596 : record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2341 862596 : return;
2342 :
2343 1849294 : fail:
2344 1849294 : for_each_index (op_p, idx_record_use, data);
2345 : }
2346 :
2347 : /* Finds and records invariants used in STMT. */
2348 :
2349 : static void
2350 15404171 : find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2351 : {
2352 15404171 : ssa_op_iter iter;
2353 15404171 : use_operand_p use_p;
2354 15404171 : tree op;
2355 :
2356 51306671 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2357 : {
2358 20498329 : op = USE_FROM_PTR (use_p);
2359 20498329 : record_invariant (data, op, false);
2360 : }
2361 15404171 : }
2362 :
2363 : /* CALL calls an internal function. If operand *OP_P will become an
2364 : address when the call is expanded, return the type of the memory
2365 : being addressed, otherwise return null. */
2366 :
2367 : static tree
2368 1768 : get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2369 : {
2370 1768 : switch (gimple_call_internal_fn (call))
2371 : {
2372 371 : case IFN_MASK_LOAD:
2373 371 : case IFN_MASK_LOAD_LANES:
2374 371 : case IFN_MASK_LEN_LOAD_LANES:
2375 371 : case IFN_LEN_LOAD:
2376 371 : case IFN_MASK_LEN_LOAD:
2377 371 : if (op_p == gimple_call_arg_ptr (call, 0))
2378 371 : return TREE_TYPE (gimple_call_lhs (call));
2379 : return NULL_TREE;
2380 :
2381 424 : case IFN_MASK_STORE:
2382 424 : case IFN_MASK_STORE_LANES:
2383 424 : case IFN_MASK_LEN_STORE_LANES:
2384 424 : case IFN_LEN_STORE:
2385 424 : case IFN_MASK_LEN_STORE:
2386 424 : {
2387 424 : if (op_p == gimple_call_arg_ptr (call, 0))
2388 : {
2389 424 : internal_fn ifn = gimple_call_internal_fn (call);
2390 424 : int index = internal_fn_stored_value_index (ifn);
2391 424 : return TREE_TYPE (gimple_call_arg (call, index));
2392 : }
2393 : return NULL_TREE;
2394 : }
2395 :
2396 : default:
2397 : return NULL_TREE;
2398 : }
2399 : }
2400 :
2401 : /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2402 : Return true if the operand will become an address when STMT
2403 : is expanded and record the associated address use if so. */
2404 :
2405 : static bool
2406 1702501 : find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2407 : struct iv *iv)
2408 : {
2409 : /* Fail if base object of this memory reference is unknown. */
2410 1702501 : if (iv->base_object == NULL_TREE)
2411 : return false;
2412 :
2413 644622 : tree mem_type = NULL_TREE;
2414 644622 : if (gcall *call = dyn_cast <gcall *> (stmt))
2415 123235 : if (gimple_call_internal_p (call))
2416 1768 : mem_type = get_mem_type_for_internal_fn (call, op_p);
2417 1768 : if (mem_type)
2418 : {
2419 795 : iv = alloc_iv (data, iv->base, iv->step);
2420 795 : record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2421 795 : return true;
2422 : }
2423 : return false;
2424 : }
2425 :
2426 : /* Finds interesting uses of induction variables in the statement STMT. */
2427 :
2428 : static void
2429 15404171 : find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2430 : {
2431 15404171 : struct iv *iv;
2432 15404171 : tree op, *lhs, *rhs;
2433 15404171 : ssa_op_iter iter;
2434 15404171 : use_operand_p use_p;
2435 15404171 : enum tree_code code;
2436 :
2437 15404171 : find_invariants_stmt (data, stmt);
2438 :
2439 15404171 : if (gimple_code (stmt) == GIMPLE_COND)
2440 : {
2441 1463757 : find_interesting_uses_cond (data, stmt);
2442 9033132 : return;
2443 : }
2444 :
2445 13940414 : if (is_gimple_assign (stmt))
2446 : {
2447 10515834 : lhs = gimple_assign_lhs_ptr (stmt);
2448 10515834 : rhs = gimple_assign_rhs1_ptr (stmt);
2449 :
2450 10515834 : if (TREE_CODE (*lhs) == SSA_NAME)
2451 : {
2452 : /* If the statement defines an induction variable, the uses are not
2453 : interesting by themselves. */
2454 :
2455 9403757 : iv = get_iv (data, *lhs);
2456 :
2457 9403757 : if (iv && !integer_zerop (iv->step))
2458 : return;
2459 : }
2460 :
2461 8176274 : code = gimple_assign_rhs_code (stmt);
2462 8176274 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2463 8176274 : && (REFERENCE_CLASS_P (*rhs)
2464 1259333 : || is_gimple_val (*rhs)))
2465 : {
2466 2810862 : if (REFERENCE_CLASS_P (*rhs))
2467 1749301 : find_interesting_uses_address (data, stmt, rhs);
2468 : else
2469 1061561 : find_interesting_uses_op (data, *rhs);
2470 :
2471 2810862 : if (REFERENCE_CLASS_P (*lhs))
2472 962589 : find_interesting_uses_address (data, stmt, lhs);
2473 2810862 : return;
2474 : }
2475 5365412 : else if (TREE_CODE_CLASS (code) == tcc_comparison)
2476 : {
2477 84389 : find_interesting_uses_cond (data, stmt);
2478 84389 : return;
2479 : }
2480 :
2481 : /* TODO -- we should also handle address uses of type
2482 :
2483 : memory = call (whatever);
2484 :
2485 : and
2486 :
2487 : call (memory). */
2488 : }
2489 :
2490 8705603 : if (gimple_code (stmt) == GIMPLE_PHI
2491 8705603 : && gimple_bb (stmt) == data->current_loop->header)
2492 : {
2493 1447490 : iv = get_iv (data, PHI_RESULT (stmt));
2494 :
2495 1447490 : if (iv && !integer_zerop (iv->step))
2496 : return;
2497 : }
2498 :
2499 26324792 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2500 : {
2501 10655200 : op = USE_FROM_PTR (use_p);
2502 :
2503 10655200 : if (TREE_CODE (op) != SSA_NAME)
2504 518278 : continue;
2505 :
2506 10136922 : iv = get_iv (data, op);
2507 10136922 : if (!iv)
2508 8434421 : continue;
2509 :
2510 1702501 : if (!find_address_like_use (data, stmt, use_p->use, iv))
2511 1701706 : find_interesting_uses_op (data, op);
2512 : }
2513 : }
2514 :
2515 : /* Finds interesting uses of induction variables outside of loops
2516 : on loop exit edge EXIT. */
2517 :
2518 : static void
2519 890508 : find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2520 : {
2521 890508 : gphi *phi;
2522 890508 : gphi_iterator psi;
2523 890508 : tree def;
2524 :
2525 1987717 : for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2526 : {
2527 1097209 : phi = psi.phi ();
2528 1097209 : def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2529 2103832 : if (!virtual_operand_p (def))
2530 540445 : find_interesting_uses_op (data, def);
2531 : }
2532 890508 : }
2533 :
2534 : /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2535 : mode for memory reference represented by USE. */
2536 :
2537 : static GTY (()) vec<rtx, va_gc> *addr_list;
2538 :
2539 : static bool
2540 224561 : addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2541 : {
2542 224561 : rtx reg, addr;
2543 224561 : unsigned list_index;
2544 224561 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2545 224561 : machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2546 :
2547 224561 : list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2548 224561 : if (list_index >= vec_safe_length (addr_list))
2549 10180 : vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2550 :
2551 224561 : addr = (*addr_list)[list_index];
2552 224561 : if (!addr)
2553 : {
2554 13294 : addr_mode = targetm.addr_space.address_mode (as);
2555 13294 : reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2556 13294 : addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2557 13294 : (*addr_list)[list_index] = addr;
2558 : }
2559 : else
2560 211267 : addr_mode = GET_MODE (addr);
2561 :
2562 224561 : XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2563 224561 : return (memory_address_addr_space_p (mem_mode, addr, as));
2564 : }
2565 :
2566 : /* Comparison function to sort group in ascending order of addr_offset. */
2567 :
2568 : static int
2569 3293880 : group_compare_offset (const void *a, const void *b)
2570 : {
2571 3293880 : const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2572 3293880 : const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2573 :
2574 3293880 : return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2575 : }
2576 :
2577 : /* Check if small groups should be split. Return true if no group
2578 : contains more than two uses with distinct addr_offsets. Return
2579 : false otherwise. We want to split such groups because:
2580 :
2581 : 1) Small groups don't have much benefit and may interfer with
2582 : general candidate selection.
2583 : 2) Size for problem with only small groups is usually small and
2584 : general algorithm can handle it well.
2585 :
2586 : TODO -- Above claim may not hold when we want to merge memory
2587 : accesses with conseuctive addresses. */
2588 :
2589 : static bool
2590 500816 : split_small_address_groups_p (struct ivopts_data *data)
2591 : {
2592 500816 : unsigned int i, j, distinct = 1;
2593 500816 : struct iv_use *pre;
2594 500816 : struct iv_group *group;
2595 :
2596 2093026 : for (i = 0; i < data->vgroups.length (); i++)
2597 : {
2598 1592210 : group = data->vgroups[i];
2599 1592210 : if (group->vuses.length () == 1)
2600 1453661 : continue;
2601 :
2602 138549 : gcc_assert (address_p (group->type));
2603 138549 : if (group->vuses.length () == 2)
2604 : {
2605 78874 : if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2606 78874 : group->vuses[1]->addr_offset) > 0)
2607 19396 : std::swap (group->vuses[0], group->vuses[1]);
2608 : }
2609 : else
2610 59675 : group->vuses.qsort (group_compare_offset);
2611 :
2612 138549 : if (distinct > 2)
2613 14160 : continue;
2614 :
2615 124389 : distinct = 1;
2616 1783595 : for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2617 : {
2618 191385 : if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2619 : {
2620 134094 : pre = group->vuses[j];
2621 134094 : distinct++;
2622 : }
2623 :
2624 191385 : if (distinct > 2)
2625 : break;
2626 : }
2627 : }
2628 :
2629 500816 : return (distinct <= 2);
2630 : }
2631 :
2632 : /* For each group of address type uses, this function further groups
2633 : these uses according to the maximum offset supported by target's
2634 : [base + offset] addressing mode. */
2635 :
2636 : static void
2637 500816 : split_address_groups (struct ivopts_data *data)
2638 : {
2639 500816 : unsigned int i, j;
2640 : /* Always split group. */
2641 500816 : bool split_p = split_small_address_groups_p (data);
2642 :
2643 2146231 : for (i = 0; i < data->vgroups.length (); i++)
2644 : {
2645 1645415 : struct iv_group *new_group = NULL;
2646 1645415 : struct iv_group *group = data->vgroups[i];
2647 1645415 : struct iv_use *use = group->vuses[0];
2648 :
2649 1645415 : use->id = 0;
2650 1645415 : use->group_id = group->id;
2651 1645415 : if (group->vuses.length () == 1)
2652 1501298 : continue;
2653 :
2654 144117 : gcc_assert (address_p (use->type));
2655 :
2656 1986738 : for (j = 1; j < group->vuses.length ();)
2657 : {
2658 341323 : struct iv_use *next = group->vuses[j];
2659 341323 : poly_int64 offset = next->addr_offset - use->addr_offset;
2660 :
2661 : /* Split group if aksed to, or the offset against the first
2662 : use can't fit in offset part of addressing mode. IV uses
2663 : having the same offset are still kept in one group. */
2664 400579 : if (maybe_ne (offset, 0)
2665 341323 : && (split_p || !addr_offset_valid_p (use, offset)))
2666 : {
2667 59256 : if (!new_group)
2668 53205 : new_group = record_group (data, group->type);
2669 59256 : group->vuses.ordered_remove (j);
2670 59256 : new_group->vuses.safe_push (next);
2671 59256 : continue;
2672 : }
2673 :
2674 282067 : next->id = j;
2675 282067 : next->group_id = group->id;
2676 282067 : j++;
2677 : }
2678 : }
2679 500816 : }
2680 :
2681 : /* Finds uses of the induction variables that are interesting. */
2682 :
2683 : static void
2684 500816 : find_interesting_uses (struct ivopts_data *data, basic_block *body)
2685 : {
2686 500816 : basic_block bb;
2687 500816 : gimple_stmt_iterator bsi;
2688 500816 : unsigned i;
2689 500816 : edge e;
2690 :
2691 3315144 : for (i = 0; i < data->current_loop->num_nodes; i++)
2692 : {
2693 2814328 : edge_iterator ei;
2694 2814328 : bb = body[i];
2695 :
2696 7180813 : FOR_EACH_EDGE (e, ei, bb->succs)
2697 4366485 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2698 4366485 : && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2699 890508 : find_interesting_uses_outside (data, e);
2700 :
2701 5668165 : for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2702 2853837 : find_interesting_uses_stmt (data, gsi_stmt (bsi));
2703 27525498 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2704 21896842 : if (!is_gimple_debug (gsi_stmt (bsi)))
2705 12550334 : find_interesting_uses_stmt (data, gsi_stmt (bsi));
2706 : }
2707 :
2708 500816 : split_address_groups (data);
2709 :
2710 500816 : if (dump_file && (dump_flags & TDF_DETAILS))
2711 : {
2712 67 : fprintf (dump_file, "\n<IV Groups>:\n");
2713 67 : dump_groups (dump_file, data);
2714 67 : fprintf (dump_file, "\n");
2715 : }
2716 500816 : }
2717 :
2718 : /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2719 : is true, assume we are inside an address. If TOP_COMPREF is true, assume
2720 : we are at the top-level of the processed address. */
2721 :
2722 : static tree
2723 3391068 : strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2724 : poly_int64 *offset)
2725 : {
2726 3391068 : tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2727 3391068 : enum tree_code code;
2728 3391068 : tree type, orig_type = TREE_TYPE (expr);
2729 3391068 : poly_int64 off0, off1;
2730 3391068 : HOST_WIDE_INT st;
2731 3391068 : tree orig_expr = expr;
2732 :
2733 3391068 : STRIP_NOPS (expr);
2734 :
2735 3391068 : type = TREE_TYPE (expr);
2736 3391068 : code = TREE_CODE (expr);
2737 3391068 : *offset = 0;
2738 :
2739 3391068 : switch (code)
2740 : {
2741 622740 : case POINTER_PLUS_EXPR:
2742 622740 : case PLUS_EXPR:
2743 622740 : case MINUS_EXPR:
2744 622740 : op0 = TREE_OPERAND (expr, 0);
2745 622740 : op1 = TREE_OPERAND (expr, 1);
2746 :
2747 622740 : op0 = strip_offset_1 (op0, false, false, &off0);
2748 622740 : op1 = strip_offset_1 (op1, false, false, &off1);
2749 :
2750 622740 : *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2751 622740 : if (op0 == TREE_OPERAND (expr, 0)
2752 622740 : && op1 == TREE_OPERAND (expr, 1))
2753 : return orig_expr;
2754 :
2755 384623 : if (integer_zerop (op1))
2756 : expr = op0;
2757 3181 : else if (integer_zerop (op0))
2758 : {
2759 602 : if (code == MINUS_EXPR)
2760 : {
2761 602 : if (TYPE_OVERFLOW_UNDEFINED (type))
2762 : {
2763 0 : type = unsigned_type_for (type);
2764 0 : op1 = fold_convert (type, op1);
2765 : }
2766 602 : expr = fold_build1 (NEGATE_EXPR, type, op1);
2767 : }
2768 : else
2769 : expr = op1;
2770 : }
2771 : else
2772 : {
2773 2579 : if (TYPE_OVERFLOW_UNDEFINED (type))
2774 : {
2775 0 : type = unsigned_type_for (type);
2776 0 : if (code == POINTER_PLUS_EXPR)
2777 0 : code = PLUS_EXPR;
2778 0 : op0 = fold_convert (type, op0);
2779 0 : op1 = fold_convert (type, op1);
2780 : }
2781 2579 : expr = fold_build2 (code, type, op0, op1);
2782 : }
2783 :
2784 384623 : return fold_convert (orig_type, expr);
2785 :
2786 222418 : case MULT_EXPR:
2787 222418 : op1 = TREE_OPERAND (expr, 1);
2788 222418 : if (!cst_and_fits_in_hwi (op1))
2789 : return orig_expr;
2790 :
2791 182712 : op0 = TREE_OPERAND (expr, 0);
2792 182712 : op0 = strip_offset_1 (op0, false, false, &off0);
2793 182712 : if (op0 == TREE_OPERAND (expr, 0))
2794 : return orig_expr;
2795 :
2796 7246 : *offset = off0 * int_cst_value (op1);
2797 7246 : if (integer_zerop (op0))
2798 : expr = op0;
2799 : else
2800 : {
2801 7246 : if (TYPE_OVERFLOW_UNDEFINED (type))
2802 : {
2803 0 : type = unsigned_type_for (type);
2804 0 : op0 = fold_convert (type, op0);
2805 0 : op1 = fold_convert (type, op1);
2806 : }
2807 7246 : expr = fold_build2 (MULT_EXPR, type, op0, op1);
2808 : }
2809 :
2810 7246 : return fold_convert (orig_type, expr);
2811 :
2812 11 : case ARRAY_REF:
2813 11 : case ARRAY_RANGE_REF:
2814 11 : if (!inside_addr)
2815 : return orig_expr;
2816 :
2817 11 : step = array_ref_element_size (expr);
2818 11 : if (!cst_and_fits_in_hwi (step))
2819 : break;
2820 :
2821 11 : st = int_cst_value (step);
2822 11 : op1 = TREE_OPERAND (expr, 1);
2823 11 : op1 = strip_offset_1 (op1, false, false, &off1);
2824 11 : *offset = off1 * st;
2825 :
2826 11 : if (top_compref
2827 11 : && integer_zerop (op1))
2828 : {
2829 : /* Strip the component reference completely. */
2830 9 : op0 = TREE_OPERAND (expr, 0);
2831 9 : op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2832 9 : *offset += off0;
2833 9 : return op0;
2834 : }
2835 : break;
2836 :
2837 1 : case COMPONENT_REF:
2838 1 : {
2839 1 : tree field;
2840 :
2841 1 : if (!inside_addr)
2842 : return orig_expr;
2843 :
2844 1 : tmp = component_ref_field_offset (expr);
2845 1 : field = TREE_OPERAND (expr, 1);
2846 1 : if (top_compref
2847 1 : && cst_and_fits_in_hwi (tmp)
2848 2 : && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2849 : {
2850 1 : HOST_WIDE_INT boffset, abs_off;
2851 :
2852 : /* Strip the component reference completely. */
2853 1 : op0 = TREE_OPERAND (expr, 0);
2854 1 : op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2855 1 : boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2856 1 : abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2857 1 : if (boffset < 0)
2858 0 : abs_off = -abs_off;
2859 :
2860 1 : *offset = off0 + int_cst_value (tmp) + abs_off;
2861 1 : return op0;
2862 : }
2863 : }
2864 : break;
2865 :
2866 317609 : case ADDR_EXPR:
2867 317609 : op0 = TREE_OPERAND (expr, 0);
2868 317609 : op0 = strip_offset_1 (op0, true, true, &off0);
2869 317609 : *offset += off0;
2870 :
2871 317609 : if (op0 == TREE_OPERAND (expr, 0))
2872 : return orig_expr;
2873 :
2874 10 : expr = build_fold_addr_expr (op0);
2875 10 : return fold_convert (orig_type, expr);
2876 :
2877 : case MEM_REF:
2878 : /* ??? Offset operand? */
2879 : inside_addr = false;
2880 : break;
2881 :
2882 2228287 : default:
2883 2228287 : if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2884 867071 : return build_int_cst (orig_type, 0);
2885 : return orig_expr;
2886 : }
2887 :
2888 : /* Default handling of expressions for that we want to recurse into
2889 : the first operand. */
2890 4 : op0 = TREE_OPERAND (expr, 0);
2891 4 : op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2892 4 : *offset += off0;
2893 :
2894 4 : if (op0 == TREE_OPERAND (expr, 0)
2895 4 : && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2896 : return orig_expr;
2897 :
2898 1 : expr = copy_node (expr);
2899 1 : TREE_OPERAND (expr, 0) = op0;
2900 1 : if (op1)
2901 1 : TREE_OPERAND (expr, 1) = op1;
2902 :
2903 : /* Inside address, we might strip the top level component references,
2904 : thus changing type of the expression. Handling of ADDR_EXPR
2905 : will fix that. */
2906 1 : expr = fold_convert (orig_type, expr);
2907 :
2908 1 : return expr;
2909 : }
2910 :
2911 : /* Strips constant offsets from EXPR and stores them to OFFSET. */
2912 :
2913 : static tree
2914 1645242 : strip_offset (tree expr, poly_uint64 *offset)
2915 : {
2916 1645242 : poly_int64 off;
2917 1645242 : tree core = strip_offset_1 (expr, false, false, &off);
2918 1645242 : *offset = off;
2919 1645242 : return core;
2920 : }
2921 :
2922 : /* Returns variant of TYPE that can be used as base for different uses.
2923 : We return unsigned type with the same precision, which avoids problems
2924 : with overflows. */
2925 :
2926 : static tree
2927 8038407 : generic_type_for (tree type)
2928 : {
2929 8038407 : if (POINTER_TYPE_P (type))
2930 1427762 : return unsigned_type_for (type);
2931 :
2932 6610645 : if (TYPE_UNSIGNED (type))
2933 : return type;
2934 :
2935 3097620 : return unsigned_type_for (type);
2936 : }
2937 :
2938 : /* Private data for walk_tree. */
2939 :
2940 : struct walk_tree_data
2941 : {
2942 : bitmap *inv_vars;
2943 : struct ivopts_data *idata;
2944 : };
2945 :
2946 : /* Callback function for walk_tree, it records invariants and symbol
2947 : reference in *EXPR_P. DATA is the structure storing result info. */
2948 :
2949 : static tree
2950 34337994 : find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2951 : {
2952 34337994 : tree op = *expr_p;
2953 34337994 : struct version_info *info;
2954 34337994 : struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2955 :
2956 34337994 : if (TREE_CODE (op) != SSA_NAME)
2957 : return NULL_TREE;
2958 :
2959 8044208 : info = name_info (wdata->idata, op);
2960 : /* Because we expand simple operations when finding IVs, loop invariant
2961 : variable that isn't referred by the original loop could be used now.
2962 : Record such invariant variables here. */
2963 8044208 : if (!info->iv)
2964 : {
2965 388263 : struct ivopts_data *idata = wdata->idata;
2966 388263 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2967 :
2968 388263 : if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2969 : {
2970 388263 : tree steptype = TREE_TYPE (op);
2971 388263 : if (POINTER_TYPE_P (steptype))
2972 196333 : steptype = sizetype;
2973 388263 : set_iv (idata, op, op, build_int_cst (steptype, 0), true);
2974 388263 : record_invariant (idata, op, false);
2975 : }
2976 : }
2977 8044208 : if (!info->inv_id || info->has_nonlin_use)
2978 : return NULL_TREE;
2979 :
2980 6679142 : if (!*wdata->inv_vars)
2981 5176561 : *wdata->inv_vars = BITMAP_ALLOC (NULL);
2982 6679142 : bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2983 :
2984 6679142 : return NULL_TREE;
2985 : }
2986 :
2987 : /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2988 : store it. */
2989 :
2990 : static inline void
2991 27797094 : find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2992 : {
2993 27797094 : struct walk_tree_data wdata;
2994 :
2995 27797094 : if (!inv_vars)
2996 11835489 : return;
2997 :
2998 15961605 : wdata.idata = data;
2999 15961605 : wdata.inv_vars = inv_vars;
3000 15961605 : walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3001 : }
3002 :
3003 : /* Get entry from invariant expr hash table for INV_EXPR. New entry
3004 : will be recorded if it doesn't exist yet. Given below two exprs:
3005 : inv_expr + cst1, inv_expr + cst2
3006 : It's hard to make decision whether constant part should be stripped
3007 : or not. We choose to not strip based on below facts:
3008 : 1) We need to count ADD cost for constant part if it's stripped,
3009 : which isn't always trivial where this functions is called.
3010 : 2) Stripping constant away may be conflict with following loop
3011 : invariant hoisting pass.
3012 : 3) Not stripping constant away results in more invariant exprs,
3013 : which usually leads to decision preferring lower reg pressure. */
3014 :
3015 : static iv_inv_expr_ent *
3016 2644014 : get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3017 : {
3018 2644014 : STRIP_NOPS (inv_expr);
3019 :
3020 2644014 : if (poly_int_tree_p (inv_expr)
3021 2644014 : || TREE_CODE (inv_expr) == SSA_NAME)
3022 : return NULL;
3023 :
3024 : /* Don't strip constant part away as we used to. */
3025 :
3026 : /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3027 2554915 : struct iv_inv_expr_ent ent;
3028 2554915 : ent.expr = inv_expr;
3029 2554915 : ent.hash = iterative_hash_expr (inv_expr, 0);
3030 2554915 : struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3031 :
3032 2554915 : if (!*slot)
3033 : {
3034 1147134 : *slot = XNEW (struct iv_inv_expr_ent);
3035 1147134 : (*slot)->expr = inv_expr;
3036 1147134 : (*slot)->hash = ent.hash;
3037 1147134 : (*slot)->id = ++data->max_inv_expr_id;
3038 : }
3039 :
3040 2554915 : return *slot;
3041 : }
3042 :
3043 :
3044 : /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3045 : unsuitable as ivopts candidates for potentially involving undefined
3046 : behavior. */
3047 :
3048 : static tree
3049 15343424 : find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3050 : {
3051 15343424 : basic_block bb = (basic_block) bb_;
3052 15343424 : if (TREE_CODE (*tp) == SSA_NAME
3053 2247819 : && ssa_name_maybe_undef_p (*tp)
3054 15352057 : && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3055 3094 : return *tp;
3056 15340330 : if (!EXPR_P (*tp))
3057 10387002 : *walk_subtrees = 0;
3058 : return NULL;
3059 : }
3060 :
3061 : /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3062 : position to POS. If USE is not NULL, the candidate is set as related to
3063 : it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3064 : replacement of the final value of the iv by a direct computation. */
3065 :
3066 : static struct iv_cand *
3067 9013051 : add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3068 : enum iv_position pos, struct iv_use *use,
3069 : gimple *incremented_at, struct iv *orig_iv = NULL,
3070 : bool doloop = false)
3071 : {
3072 9013051 : unsigned i;
3073 9013051 : struct iv_cand *cand = NULL;
3074 9013051 : tree type, orig_type;
3075 :
3076 9013051 : gcc_assert (base && step);
3077 :
3078 : /* -fkeep-gc-roots-live means that we have to keep a real pointer
3079 : live, but the ivopts code may replace a real pointer with one
3080 : pointing before or after the memory block that is then adjusted
3081 : into the memory block during the loop. FIXME: It would likely be
3082 : better to actually force the pointer live and still use ivopts;
3083 : for example, it would be enough to write the pointer into memory
3084 : and keep it there until after the loop. */
3085 9013051 : if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3086 : return NULL;
3087 :
3088 : /* If BASE contains undefined SSA names make sure we only record
3089 : the original IV. */
3090 8906906 : bool involves_undefs = false;
3091 8906906 : if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3092 : {
3093 3094 : if (pos != IP_ORIGINAL)
3094 : return NULL;
3095 : important = false;
3096 : involves_undefs = true;
3097 : }
3098 :
3099 : /* For non-original variables, make sure their values are computed in a type
3100 : that does not invoke undefined behavior on overflows (since in general,
3101 : we cannot prove that these induction variables are non-wrapping). */
3102 8903812 : if (pos != IP_ORIGINAL)
3103 : {
3104 8038407 : orig_type = TREE_TYPE (base);
3105 8038407 : type = generic_type_for (orig_type);
3106 8038407 : if (type != orig_type)
3107 : {
3108 4525382 : base = fold_convert (type, base);
3109 4525382 : step = fold_convert (type, step);
3110 : }
3111 : }
3112 :
3113 44625469 : for (i = 0; i < data->vcands.length (); i++)
3114 : {
3115 40019499 : cand = data->vcands[i];
3116 :
3117 40019499 : if (cand->pos != pos)
3118 9807246 : continue;
3119 :
3120 30212253 : if (cand->incremented_at != incremented_at
3121 29724212 : || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3122 0 : && cand->ainc_use != use))
3123 488041 : continue;
3124 :
3125 29724212 : if (operand_equal_p (base, cand->iv->base, 0)
3126 9426677 : && operand_equal_p (step, cand->iv->step, 0)
3127 35398659 : && (TYPE_PRECISION (TREE_TYPE (base))
3128 5674447 : == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3129 : break;
3130 : }
3131 :
3132 17808366 : if (i == data->vcands.length ())
3133 : {
3134 4605970 : cand = XCNEW (struct iv_cand);
3135 4605970 : cand->id = i;
3136 4605970 : cand->iv = alloc_iv (data, base, step);
3137 4605970 : cand->pos = pos;
3138 4605970 : if (pos != IP_ORIGINAL)
3139 : {
3140 3740349 : if (doloop)
3141 0 : cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3142 : else
3143 3740349 : cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3144 3740349 : cand->var_after = cand->var_before;
3145 : }
3146 4605970 : cand->important = important;
3147 4605970 : cand->involves_undefs = involves_undefs;
3148 4605970 : cand->incremented_at = incremented_at;
3149 4605970 : cand->doloop_p = doloop;
3150 4605970 : data->vcands.safe_push (cand);
3151 :
3152 4605970 : if (!poly_int_tree_p (step))
3153 : {
3154 182467 : find_inv_vars (data, &step, &cand->inv_vars);
3155 :
3156 182467 : iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3157 : /* Share bitmap between inv_vars and inv_exprs for cand. */
3158 182467 : if (inv_expr != NULL)
3159 : {
3160 100505 : cand->inv_exprs = cand->inv_vars;
3161 100505 : cand->inv_vars = NULL;
3162 100505 : if (cand->inv_exprs)
3163 83432 : bitmap_clear (cand->inv_exprs);
3164 : else
3165 17073 : cand->inv_exprs = BITMAP_ALLOC (NULL);
3166 :
3167 100505 : bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3168 : }
3169 : }
3170 :
3171 4605970 : if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3172 0 : cand->ainc_use = use;
3173 : else
3174 4605970 : cand->ainc_use = NULL;
3175 :
3176 4605970 : cand->orig_iv = orig_iv;
3177 4605970 : if (dump_file && (dump_flags & TDF_DETAILS))
3178 686 : dump_cand (dump_file, cand);
3179 : }
3180 :
3181 8904183 : cand->important |= important;
3182 8904183 : cand->doloop_p |= doloop;
3183 :
3184 : /* Relate candidate to the group for which it is added. */
3185 8904183 : if (use)
3186 2497064 : bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3187 :
3188 : return cand;
3189 : }
3190 :
3191 : /* Returns true if incrementing the induction variable at the end of the LOOP
3192 : is allowed.
3193 :
3194 : The purpose is to avoid splitting latch edge with a biv increment, thus
3195 : creating a jump, possibly confusing other optimization passes and leaving
3196 : less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3197 : available (so we do not have a better alternative), or if the latch edge
3198 : is already nonempty. */
3199 :
3200 : static bool
3201 7921859 : allow_ip_end_pos_p (class loop *loop)
3202 : {
3203 : /* Do not allow IP_END when creating the IV would need to split the
3204 : latch edge as that makes all IP_NORMAL invalid. */
3205 7921859 : auto pos = gsi_last_bb (ip_end_pos (loop));
3206 7921859 : if (!gsi_end_p (pos) && stmt_ends_bb_p (*pos))
3207 : return false;
3208 :
3209 7921859 : if (!ip_normal_pos (loop))
3210 : return true;
3211 :
3212 7825006 : if (!empty_block_p (ip_end_pos (loop)))
3213 : return true;
3214 :
3215 : return false;
3216 : }
3217 :
3218 : /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3219 : Important field is set to IMPORTANT. */
3220 :
3221 : static void
3222 581321 : add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3223 : bool important, struct iv_use *use)
3224 : {
3225 581321 : basic_block use_bb = gimple_bb (use->stmt);
3226 581321 : machine_mode mem_mode;
3227 581321 : unsigned HOST_WIDE_INT cstepi;
3228 :
3229 : /* If we insert the increment in any position other than the standard
3230 : ones, we must ensure that it is incremented once per iteration.
3231 : It must not be in an inner nested loop, or one side of an if
3232 : statement. */
3233 581321 : if (use_bb->loop_father != data->current_loop
3234 579891 : || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3235 552639 : || stmt_can_throw_internal (cfun, use->stmt)
3236 1130139 : || !cst_and_fits_in_hwi (step))
3237 62398 : return;
3238 :
3239 518923 : cstepi = int_cst_value (step);
3240 :
3241 518923 : mem_mode = TYPE_MODE (use->mem_type);
3242 : if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3243 : || USE_STORE_PRE_INCREMENT (mem_mode))
3244 : && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3245 : || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3246 : || USE_STORE_PRE_DECREMENT (mem_mode))
3247 : && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3248 : {
3249 : enum tree_code code = MINUS_EXPR;
3250 : tree new_base;
3251 : tree new_step = step;
3252 :
3253 : if (POINTER_TYPE_P (TREE_TYPE (base)))
3254 : {
3255 : new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3256 : code = POINTER_PLUS_EXPR;
3257 : }
3258 : else
3259 : new_step = fold_convert (TREE_TYPE (base), new_step);
3260 : new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3261 : add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3262 : use->stmt);
3263 : }
3264 : if (((USE_LOAD_POST_INCREMENT (mem_mode)
3265 : || USE_STORE_POST_INCREMENT (mem_mode))
3266 : && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3267 : || ((USE_LOAD_POST_DECREMENT (mem_mode)
3268 : || USE_STORE_POST_DECREMENT (mem_mode))
3269 : && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3270 : {
3271 : add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3272 : use->stmt);
3273 : }
3274 : }
3275 :
3276 : /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3277 : position to POS. If USE is not NULL, the candidate is set as related to
3278 : it. The candidate computation is scheduled before exit condition and at
3279 : the end of loop. */
3280 :
3281 : static void
3282 6966522 : add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3283 : struct iv_use *use, struct iv *orig_iv = NULL,
3284 : bool doloop = false)
3285 : {
3286 6966522 : if (ip_normal_pos (data->current_loop))
3287 6884429 : add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3288 : doloop);
3289 : /* Exclude doloop candidate here since it requires decrement then comparison
3290 : and jump, the IP_END position doesn't match. */
3291 6966522 : if (!doloop && ip_end_pos (data->current_loop)
3292 13933044 : && allow_ip_end_pos_p (data->current_loop))
3293 274723 : add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3294 6966522 : }
3295 :
3296 : /* Adds standard iv candidates. */
3297 :
3298 : static void
3299 500815 : add_standard_iv_candidates (struct ivopts_data *data)
3300 : {
3301 500815 : add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3302 :
3303 : /* The same for a double-integer type if it is still fast enough. */
3304 500815 : if (TYPE_PRECISION
3305 500815 : (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3306 500815 : && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3307 452729 : add_candidate (data, build_int_cst (long_integer_type_node, 0),
3308 : build_int_cst (long_integer_type_node, 1), true, NULL);
3309 :
3310 : /* The same for a double-integer type if it is still fast enough. */
3311 500815 : if (TYPE_PRECISION
3312 500815 : (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3313 548889 : && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3314 12 : add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3315 : build_int_cst (long_long_integer_type_node, 1), true, NULL);
3316 500815 : }
3317 :
3318 :
3319 : /* Adds candidates bases on the old induction variable IV. */
3320 :
3321 : static void
3322 1736005 : add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3323 : {
3324 1736005 : gimple *phi;
3325 1736005 : tree def;
3326 1736005 : struct iv_cand *cand;
3327 :
3328 : /* Check if this biv is used in address type use. */
3329 1151230 : if (iv->no_overflow && iv->have_address_use
3330 494623 : && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3331 2230628 : && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3332 : {
3333 280531 : tree base = fold_convert (sizetype, iv->base);
3334 280531 : tree step = fold_convert (sizetype, iv->step);
3335 :
3336 : /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3337 280531 : add_candidate (data, base, step, true, NULL, iv);
3338 : /* Add iv cand of the original type only if it has nonlinear use. */
3339 280531 : if (iv->nonlin_use)
3340 28469 : add_candidate (data, iv->base, iv->step, true, NULL);
3341 : }
3342 : else
3343 1455474 : add_candidate (data, iv->base, iv->step, true, NULL);
3344 :
3345 : /* The same, but with initial value zero. */
3346 1736005 : if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3347 324132 : add_candidate (data, size_int (0), iv->step, true, NULL);
3348 : else
3349 1411873 : add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3350 : iv->step, true, NULL);
3351 :
3352 1736005 : phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3353 1736005 : if (gimple_code (phi) == GIMPLE_PHI)
3354 : {
3355 : /* Additionally record the possibility of leaving the original iv
3356 : untouched. */
3357 868080 : def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3358 : /* Don't add candidate if it's from another PHI node because
3359 : it's an affine iv appearing in the form of PEELED_CHREC. */
3360 868080 : phi = SSA_NAME_DEF_STMT (def);
3361 868080 : if (gimple_code (phi) != GIMPLE_PHI)
3362 : {
3363 1736160 : cand = add_candidate_1 (data,
3364 : iv->base, iv->step, true, IP_ORIGINAL, NULL,
3365 868080 : SSA_NAME_DEF_STMT (def));
3366 868080 : if (cand)
3367 : {
3368 865776 : cand->var_before = iv->ssa_name;
3369 865776 : cand->var_after = def;
3370 : }
3371 : }
3372 : else
3373 0 : gcc_assert (gimple_bb (phi) == data->current_loop->header);
3374 : }
3375 1736005 : }
3376 :
3377 : /* Adds candidates based on the old induction variables. */
3378 :
3379 : static void
3380 500815 : add_iv_candidate_for_bivs (struct ivopts_data *data)
3381 : {
3382 500815 : unsigned i;
3383 500815 : struct iv *iv;
3384 500815 : bitmap_iterator bi;
3385 :
3386 5440985 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3387 : {
3388 4940170 : iv = ver_info (data, i)->iv;
3389 4940170 : if (iv && iv->biv_p && !integer_zerop (iv->step))
3390 1736005 : add_iv_candidate_for_biv (data, iv);
3391 : }
3392 500815 : }
3393 :
3394 : /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3395 :
3396 : static void
3397 4157560 : record_common_cand (struct ivopts_data *data, tree base,
3398 : tree step, struct iv_use *use)
3399 : {
3400 4157560 : class iv_common_cand ent;
3401 4157560 : class iv_common_cand **slot;
3402 :
3403 4157560 : ent.base = base;
3404 4157560 : ent.step = step;
3405 4157560 : ent.hash = iterative_hash_expr (base, 0);
3406 4157560 : ent.hash = iterative_hash_expr (step, ent.hash);
3407 :
3408 4157560 : slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3409 4157560 : if (*slot == NULL)
3410 : {
3411 2608190 : *slot = new iv_common_cand ();
3412 2608190 : (*slot)->base = base;
3413 2608190 : (*slot)->step = step;
3414 2608190 : (*slot)->uses.create (8);
3415 2608190 : (*slot)->hash = ent.hash;
3416 2608190 : data->iv_common_cands.safe_push ((*slot));
3417 : }
3418 :
3419 4157560 : gcc_assert (use != NULL);
3420 4157560 : (*slot)->uses.safe_push (use);
3421 4157560 : return;
3422 4157560 : }
3423 :
3424 : /* Comparison function used to sort common candidates. */
3425 :
3426 : static int
3427 19099144 : common_cand_cmp (const void *p1, const void *p2)
3428 : {
3429 19099144 : unsigned n1, n2;
3430 19099144 : const class iv_common_cand *const *const ccand1
3431 : = (const class iv_common_cand *const *)p1;
3432 19099144 : const class iv_common_cand *const *const ccand2
3433 : = (const class iv_common_cand *const *)p2;
3434 :
3435 19099144 : n1 = (*ccand1)->uses.length ();
3436 19099144 : n2 = (*ccand2)->uses.length ();
3437 19099144 : return n2 - n1;
3438 : }
3439 :
3440 : /* Adds IV candidates based on common candidated recorded. */
3441 :
3442 : static void
3443 500815 : add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3444 : {
3445 500815 : unsigned i, j;
3446 500815 : struct iv_cand *cand_1, *cand_2;
3447 :
3448 500815 : data->iv_common_cands.qsort (common_cand_cmp);
3449 1456152 : for (i = 0; i < data->iv_common_cands.length (); i++)
3450 : {
3451 1440363 : class iv_common_cand *ptr = data->iv_common_cands[i];
3452 :
3453 : /* Only add IV candidate if it's derived from multiple uses. */
3454 1440363 : if (ptr->uses.length () <= 1)
3455 : break;
3456 :
3457 955337 : cand_1 = NULL;
3458 955337 : cand_2 = NULL;
3459 955337 : if (ip_normal_pos (data->current_loop))
3460 940577 : cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3461 : false, IP_NORMAL, NULL, NULL);
3462 :
3463 955337 : if (ip_end_pos (data->current_loop)
3464 955337 : && allow_ip_end_pos_p (data->current_loop))
3465 45242 : cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3466 : false, IP_END, NULL, NULL);
3467 :
3468 : /* Bind deriving uses and the new candidates. */
3469 3460044 : for (j = 0; j < ptr->uses.length (); j++)
3470 : {
3471 2504707 : struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3472 2504707 : if (cand_1)
3473 2430295 : bitmap_set_bit (group->related_cands, cand_1->id);
3474 2504707 : if (cand_2)
3475 135878 : bitmap_set_bit (group->related_cands, cand_2->id);
3476 : }
3477 : }
3478 :
3479 : /* Release data since it is useless from this point. */
3480 500815 : data->iv_common_cand_tab->empty ();
3481 500815 : data->iv_common_cands.truncate (0);
3482 500815 : }
3483 :
3484 : /* Adds candidates based on the value of USE's iv. */
3485 :
3486 : static void
3487 1645411 : add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3488 : {
3489 1645411 : poly_uint64 offset;
3490 1645411 : tree base;
3491 1645411 : struct iv *iv = use->iv;
3492 1645411 : tree basetype = TREE_TYPE (iv->base);
3493 :
3494 : /* Don't add candidate for iv_use with non integer, pointer or non-mode
3495 : precision types, instead, add candidate for the corresponding scev in
3496 : unsigned type with the same precision. See PR93674 for more info. */
3497 776136 : if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3498 2421389 : || !type_has_mode_precision_p (basetype))
3499 : {
3500 169 : basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3501 169 : TYPE_UNSIGNED (basetype));
3502 169 : add_candidate (data, fold_convert (basetype, iv->base),
3503 : fold_convert (basetype, iv->step), false, NULL);
3504 169 : return;
3505 : }
3506 :
3507 1645242 : add_candidate (data, iv->base, iv->step, false, use);
3508 :
3509 : /* Record common candidate for use in case it can be shared by others. */
3510 1645242 : record_common_cand (data, iv->base, iv->step, use);
3511 :
3512 : /* Record common candidate with initial value zero. */
3513 1645242 : basetype = TREE_TYPE (iv->base);
3514 1645242 : if (POINTER_TYPE_P (basetype))
3515 775978 : basetype = sizetype;
3516 1645242 : record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3517 :
3518 : /* Compare the cost of an address with an unscaled index with the cost of
3519 : an address with a scaled index and add candidate if useful. */
3520 1645242 : poly_int64 step;
3521 1645242 : if (use != NULL
3522 1645242 : && poly_int_tree_p (iv->step, &step)
3523 1409435 : && address_p (use->type))
3524 : {
3525 531608 : poly_int64 new_step;
3526 531608 : unsigned int fact = preferred_mem_scale_factor
3527 531608 : (use->iv->base,
3528 531608 : TYPE_MODE (use->mem_type),
3529 531608 : optimize_loop_for_speed_p (data->current_loop));
3530 :
3531 531608 : if (fact != 1
3532 531608 : && multiple_p (step, fact, &new_step))
3533 0 : add_candidate (data, size_int (0),
3534 0 : wide_int_to_tree (sizetype, new_step),
3535 : true, NULL);
3536 : }
3537 :
3538 : /* Record common candidate with constant offset stripped in base.
3539 : Like the use itself, we also add candidate directly for it. */
3540 1645242 : base = strip_offset (iv->base, &offset);
3541 1645242 : if (maybe_ne (offset, 0U) || base != iv->base)
3542 : {
3543 867076 : record_common_cand (data, base, iv->step, use);
3544 867076 : add_candidate (data, base, iv->step, false, use);
3545 : }
3546 :
3547 : /* Record common candidate with base_object removed in base. */
3548 1645242 : base = iv->base;
3549 1645242 : STRIP_NOPS (base);
3550 1645242 : if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3551 : {
3552 0 : tree step = iv->step;
3553 :
3554 0 : STRIP_NOPS (step);
3555 0 : base = TREE_OPERAND (base, 1);
3556 0 : step = fold_convert (sizetype, step);
3557 0 : record_common_cand (data, base, step, use);
3558 : /* Also record common candidate with offset stripped. */
3559 0 : tree alt_base, alt_offset;
3560 0 : split_constant_offset (base, &alt_base, &alt_offset);
3561 0 : if (!integer_zerop (alt_offset))
3562 0 : record_common_cand (data, alt_base, step, use);
3563 : }
3564 :
3565 : /* At last, add auto-incremental candidates. Make such variables
3566 : important since other iv uses with same base object may be based
3567 : on it. */
3568 1645242 : if (use != NULL && address_p (use->type))
3569 581321 : add_autoinc_candidates (data, iv->base, iv->step, true, use);
3570 : }
3571 :
3572 : /* Adds candidates based on the uses. */
3573 :
3574 : static void
3575 500815 : add_iv_candidate_for_groups (struct ivopts_data *data)
3576 : {
3577 500815 : unsigned i;
3578 :
3579 : /* Only add candidate for the first use in group. */
3580 2146226 : for (i = 0; i < data->vgroups.length (); i++)
3581 : {
3582 1645411 : struct iv_group *group = data->vgroups[i];
3583 :
3584 1645411 : gcc_assert (group->vuses[0] != NULL);
3585 1645411 : add_iv_candidate_for_use (data, group->vuses[0]);
3586 : }
3587 500815 : add_iv_candidate_derived_from_uses (data);
3588 500815 : }
3589 :
3590 : /* Record important candidates and add them to related_cands bitmaps. */
3591 :
3592 : static void
3593 500815 : record_important_candidates (struct ivopts_data *data)
3594 : {
3595 500815 : unsigned i;
3596 500815 : struct iv_group *group;
3597 :
3598 5106785 : for (i = 0; i < data->vcands.length (); i++)
3599 : {
3600 4605970 : struct iv_cand *cand = data->vcands[i];
3601 :
3602 4605970 : if (cand->important)
3603 3684937 : bitmap_set_bit (data->important_candidates, i);
3604 : }
3605 :
3606 500815 : data->consider_all_candidates = (data->vcands.length ()
3607 500815 : <= CONSIDER_ALL_CANDIDATES_BOUND);
3608 :
3609 : /* Add important candidates to groups' related_cands bitmaps. */
3610 2146226 : for (i = 0; i < data->vgroups.length (); i++)
3611 : {
3612 1645411 : group = data->vgroups[i];
3613 1645411 : bitmap_ior_into (group->related_cands, data->important_candidates);
3614 : }
3615 500815 : }
3616 :
3617 : /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3618 : If consider_all_candidates is true, we use a two-dimensional array, otherwise
3619 : we allocate a simple list to every use. */
3620 :
3621 : static void
3622 500815 : alloc_use_cost_map (struct ivopts_data *data)
3623 : {
3624 500815 : unsigned i, size, s;
3625 :
3626 2146226 : for (i = 0; i < data->vgroups.length (); i++)
3627 : {
3628 1645411 : struct iv_group *group = data->vgroups[i];
3629 :
3630 1645411 : if (data->consider_all_candidates)
3631 1635710 : size = data->vcands.length ();
3632 : else
3633 : {
3634 9701 : s = bitmap_count_bits (group->related_cands);
3635 :
3636 : /* Round up to the power of two, so that moduling by it is fast. */
3637 19402 : size = s ? (1 << ceil_log2 (s)) : 1;
3638 : }
3639 :
3640 1645411 : group->n_map_members = size;
3641 1645411 : group->cost_map = XCNEWVEC (class cost_pair, size);
3642 : }
3643 500815 : }
3644 :
3645 : /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3646 : on invariants INV_VARS and that the value used in expressing it is
3647 : VALUE, and in case of iv elimination the comparison operator is COMP. */
3648 :
3649 : static void
3650 17755136 : set_group_iv_cost (struct ivopts_data *data,
3651 : struct iv_group *group, struct iv_cand *cand,
3652 : comp_cost cost, bitmap inv_vars, tree value,
3653 : enum tree_code comp, bitmap inv_exprs)
3654 : {
3655 17755136 : unsigned i, s;
3656 :
3657 17755136 : if (cost.infinite_cost_p ())
3658 : {
3659 6133721 : BITMAP_FREE (inv_vars);
3660 6133721 : BITMAP_FREE (inv_exprs);
3661 6133721 : return;
3662 : }
3663 :
3664 11621415 : if (data->consider_all_candidates)
3665 : {
3666 11477897 : group->cost_map[cand->id].cand = cand;
3667 11477897 : group->cost_map[cand->id].cost = cost;
3668 11477897 : group->cost_map[cand->id].inv_vars = inv_vars;
3669 11477897 : group->cost_map[cand->id].inv_exprs = inv_exprs;
3670 11477897 : group->cost_map[cand->id].value = value;
3671 11477897 : group->cost_map[cand->id].comp = comp;
3672 11477897 : return;
3673 : }
3674 :
3675 : /* n_map_members is a power of two, so this computes modulo. */
3676 143518 : s = cand->id & (group->n_map_members - 1);
3677 152845 : for (i = s; i < group->n_map_members; i++)
3678 152806 : if (!group->cost_map[i].cand)
3679 143479 : goto found;
3680 53 : for (i = 0; i < s; i++)
3681 53 : if (!group->cost_map[i].cand)
3682 39 : goto found;
3683 :
3684 0 : gcc_unreachable ();
3685 :
3686 143518 : found:
3687 143518 : group->cost_map[i].cand = cand;
3688 143518 : group->cost_map[i].cost = cost;
3689 143518 : group->cost_map[i].inv_vars = inv_vars;
3690 143518 : group->cost_map[i].inv_exprs = inv_exprs;
3691 143518 : group->cost_map[i].value = value;
3692 143518 : group->cost_map[i].comp = comp;
3693 : }
3694 :
3695 : /* Gets cost of (GROUP, CAND) pair. */
3696 :
3697 : static class cost_pair *
3698 203398182 : get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3699 : struct iv_cand *cand)
3700 : {
3701 203398182 : unsigned i, s;
3702 203398182 : class cost_pair *ret;
3703 :
3704 203398182 : if (!cand)
3705 : return NULL;
3706 :
3707 197591421 : if (data->consider_all_candidates)
3708 : {
3709 183829459 : ret = group->cost_map + cand->id;
3710 183829459 : if (!ret->cand)
3711 : return NULL;
3712 :
3713 108657361 : return ret;
3714 : }
3715 :
3716 : /* n_map_members is a power of two, so this computes modulo. */
3717 13761962 : s = cand->id & (group->n_map_members - 1);
3718 18681715 : for (i = s; i < group->n_map_members; i++)
3719 18619792 : if (group->cost_map[i].cand == cand)
3720 : return group->cost_map + i;
3721 10364098 : else if (group->cost_map[i].cand == NULL)
3722 : return NULL;
3723 191482 : for (i = 0; i < s; i++)
3724 171249 : if (group->cost_map[i].cand == cand)
3725 : return group->cost_map + i;
3726 169165 : else if (group->cost_map[i].cand == NULL)
3727 : return NULL;
3728 :
3729 : return NULL;
3730 : }
3731 :
3732 : /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3733 : static rtx
3734 41713 : produce_memory_decl_rtl (tree obj, int *regno)
3735 : {
3736 41713 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3737 41713 : machine_mode address_mode = targetm.addr_space.address_mode (as);
3738 41713 : rtx x;
3739 :
3740 41713 : gcc_assert (obj);
3741 41713 : if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3742 : {
3743 41713 : const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3744 41713 : x = gen_rtx_SYMBOL_REF (address_mode, name);
3745 41713 : SET_SYMBOL_REF_DECL (x, obj);
3746 41713 : x = gen_rtx_MEM (DECL_MODE (obj), x);
3747 41713 : set_mem_addr_space (x, as);
3748 41713 : targetm.encode_section_info (obj, x, true);
3749 : }
3750 : else
3751 : {
3752 0 : x = gen_raw_REG (address_mode, (*regno)++);
3753 0 : x = gen_rtx_MEM (DECL_MODE (obj), x);
3754 0 : set_mem_addr_space (x, as);
3755 : }
3756 :
3757 41713 : return x;
3758 : }
3759 :
3760 : /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3761 : walk_tree. DATA contains the actual fake register number. */
3762 :
3763 : static tree
3764 583982 : prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3765 : {
3766 583982 : tree obj = NULL_TREE;
3767 583982 : rtx x = NULL_RTX;
3768 583982 : int *regno = (int *) data;
3769 :
3770 583982 : switch (TREE_CODE (*expr_p))
3771 : {
3772 166852 : case ADDR_EXPR:
3773 166852 : for (expr_p = &TREE_OPERAND (*expr_p, 0);
3774 166852 : handled_component_p (*expr_p);
3775 0 : expr_p = &TREE_OPERAND (*expr_p, 0))
3776 0 : continue;
3777 166852 : obj = *expr_p;
3778 166852 : if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3779 0 : x = produce_memory_decl_rtl (obj, regno);
3780 : break;
3781 :
3782 0 : case SSA_NAME:
3783 0 : *ws = 0;
3784 0 : obj = SSA_NAME_VAR (*expr_p);
3785 : /* Defer handling of anonymous SSA_NAMEs to the expander. */
3786 0 : if (!obj)
3787 : return NULL_TREE;
3788 0 : if (!DECL_RTL_SET_P (obj))
3789 0 : x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3790 : break;
3791 :
3792 166852 : case VAR_DECL:
3793 166852 : case PARM_DECL:
3794 166852 : case RESULT_DECL:
3795 166852 : *ws = 0;
3796 166852 : obj = *expr_p;
3797 :
3798 166852 : if (DECL_RTL_SET_P (obj))
3799 : break;
3800 :
3801 0 : if (DECL_MODE (obj) == BLKmode)
3802 0 : x = produce_memory_decl_rtl (obj, regno);
3803 : else
3804 0 : x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3805 :
3806 : break;
3807 :
3808 : default:
3809 : break;
3810 : }
3811 :
3812 0 : if (x)
3813 : {
3814 0 : decl_rtl_to_reset.safe_push (obj);
3815 0 : SET_DECL_RTL (obj, x);
3816 : }
3817 :
3818 : return NULL_TREE;
3819 : }
3820 :
3821 : /* Predict whether the given loop will be transformed in the RTL
3822 : doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3823 : This is only for target independent checks, see targetm.predict_doloop_p
3824 : for the target dependent ones.
3825 :
3826 : Note that according to some initial investigation, some checks like costly
3827 : niter check and invalid stmt scanning don't have much gains among general
3828 : cases, so keep this as simple as possible first.
3829 :
3830 : Some RTL specific checks seems unable to be checked in gimple, if any new
3831 : checks or easy checks _are_ missing here, please add them. */
3832 :
3833 : static bool
3834 500815 : generic_predict_doloop_p (struct ivopts_data *data)
3835 : {
3836 500815 : class loop *loop = data->current_loop;
3837 :
3838 : /* Call target hook for target dependent checks. */
3839 500815 : if (!targetm.predict_doloop_p (loop))
3840 : {
3841 500815 : if (dump_file && (dump_flags & TDF_DETAILS))
3842 67 : fprintf (dump_file, "Predict doloop failure due to"
3843 : " target specific checks.\n");
3844 500815 : return false;
3845 : }
3846 :
3847 : /* Similar to doloop_optimize, check iteration description to know it's
3848 : suitable or not. Keep it as simple as possible, feel free to extend it
3849 : if you find any multiple exits cases matter. */
3850 0 : edge exit = single_dom_exit (loop);
3851 0 : class tree_niter_desc *niter_desc;
3852 0 : if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3853 : {
3854 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3855 0 : fprintf (dump_file, "Predict doloop failure due to"
3856 : " unexpected niters.\n");
3857 0 : return false;
3858 : }
3859 :
3860 : /* Similar to doloop_optimize, check whether iteration count too small
3861 : and not profitable. */
3862 0 : HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3863 0 : if (est_niter == -1)
3864 0 : est_niter = get_likely_max_loop_iterations_int (loop);
3865 0 : if (est_niter >= 0 && est_niter < 3)
3866 : {
3867 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3868 0 : fprintf (dump_file,
3869 : "Predict doloop failure due to"
3870 : " too few iterations (%u).\n",
3871 : (unsigned int) est_niter);
3872 0 : return false;
3873 : }
3874 :
3875 : return true;
3876 : }
3877 :
3878 : /* Determines cost of the computation of EXPR. */
3879 :
3880 : static unsigned
3881 250278 : computation_cost (tree expr, bool speed)
3882 : {
3883 250278 : rtx_insn *seq;
3884 250278 : rtx rslt;
3885 250278 : tree type = TREE_TYPE (expr);
3886 250278 : unsigned cost;
3887 : /* Avoid using hard regs in ways which may be unsupported. */
3888 250278 : int regno = LAST_VIRTUAL_REGISTER + 1;
3889 250278 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
3890 250278 : enum node_frequency real_frequency = node->frequency;
3891 :
3892 250278 : node->frequency = NODE_FREQUENCY_NORMAL;
3893 250278 : crtl->maybe_hot_insn_p = speed;
3894 250278 : walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
3895 250278 : start_sequence ();
3896 250278 : rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3897 250278 : seq = end_sequence ();
3898 250278 : default_rtl_profile ();
3899 250278 : node->frequency = real_frequency;
3900 :
3901 250278 : cost = seq_cost (seq, speed);
3902 250278 : if (MEM_P (rslt))
3903 0 : cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3904 0 : TYPE_ADDR_SPACE (type), speed);
3905 250278 : else if (!REG_P (rslt))
3906 500556 : cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3907 :
3908 250278 : return cost;
3909 : }
3910 :
3911 : /* Returns variable containing the value of candidate CAND at statement AT. */
3912 :
3913 : static tree
3914 18556844 : var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3915 : {
3916 18556844 : if (stmt_after_increment (loop, cand, stmt))
3917 4735468 : return cand->var_after;
3918 : else
3919 13821376 : return cand->var_before;
3920 : }
3921 :
3922 : /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3923 : same precision that is at least as wide as the precision of TYPE, stores
3924 : BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3925 : type of A and B. */
3926 :
3927 : static tree
3928 14353994 : determine_common_wider_type (tree *a, tree *b)
3929 : {
3930 14353994 : tree wider_type = NULL;
3931 14353994 : tree suba, subb;
3932 14353994 : tree atype = TREE_TYPE (*a);
3933 :
3934 14353994 : if (CONVERT_EXPR_P (*a))
3935 : {
3936 8104758 : suba = TREE_OPERAND (*a, 0);
3937 8104758 : wider_type = TREE_TYPE (suba);
3938 8104758 : if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3939 : return atype;
3940 : }
3941 : else
3942 : return atype;
3943 :
3944 8086572 : if (CONVERT_EXPR_P (*b))
3945 : {
3946 1598709 : subb = TREE_OPERAND (*b, 0);
3947 1598709 : if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3948 : return atype;
3949 : }
3950 : else
3951 : return atype;
3952 :
3953 1516679 : *a = suba;
3954 1516679 : *b = subb;
3955 1516679 : return wider_type;
3956 : }
3957 :
3958 : /* Determines the expression by that USE is expressed from induction variable
3959 : CAND at statement AT in DATA's current loop. The expression is stored in
3960 : two parts in a decomposed form. The invariant part is stored in AFF_INV;
3961 : while variant part in AFF_VAR. Store ratio of CAND.step over USE.step in
3962 : PRAT if it's non-null. Returns false if USE cannot be expressed using
3963 : CAND. */
3964 :
3965 : static bool
3966 17296175 : get_computation_aff_1 (struct ivopts_data *data, gimple *at, struct iv_use *use,
3967 : struct iv_cand *cand, class aff_tree *aff_inv,
3968 : class aff_tree *aff_var, widest_int *prat = NULL)
3969 : {
3970 17296175 : tree ubase = use->iv->base, ustep = use->iv->step;
3971 17296175 : tree cbase = cand->iv->base, cstep = cand->iv->step;
3972 17296175 : tree common_type, uutype, var, cstep_common;
3973 17296175 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3974 17296175 : aff_tree aff_cbase;
3975 17296175 : widest_int rat;
3976 :
3977 : /* We must have a precision to express the values of use. */
3978 17296175 : if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3979 : return false;
3980 :
3981 17295414 : var = var_at_stmt (data->current_loop, cand, at);
3982 17295414 : uutype = unsigned_type_for (utype);
3983 :
3984 : /* If the conversion is not noop, perform it. */
3985 17295414 : if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3986 : {
3987 265723 : if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3988 1639051 : && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3989 : {
3990 36293 : tree inner_base, inner_step, inner_type;
3991 36293 : inner_base = TREE_OPERAND (cbase, 0);
3992 36293 : if (CONVERT_EXPR_P (cstep))
3993 4503 : inner_step = TREE_OPERAND (cstep, 0);
3994 : else
3995 : inner_step = cstep;
3996 :
3997 36293 : inner_type = TREE_TYPE (inner_base);
3998 : /* If candidate is added from a biv whose type is smaller than
3999 : ctype, we know both candidate and the biv won't overflow.
4000 : In this case, it's safe to skip the convertion in candidate.
4001 : As an example, (unsigned short)((unsigned long)A) equals to
4002 : (unsigned short)A, if A has a type no larger than short. */
4003 36293 : if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4004 : {
4005 35333 : cbase = inner_base;
4006 35333 : cstep = inner_step;
4007 : }
4008 : }
4009 1602758 : cbase = fold_convert (uutype, cbase);
4010 1602758 : cstep = fold_convert (uutype, cstep);
4011 1602758 : var = fold_convert (uutype, var);
4012 : }
4013 :
4014 : /* Ratio is 1 when computing the value of biv cand by itself.
4015 : We can't rely on constant_multiple_of in this case because the
4016 : use is created after the original biv is selected. The call
4017 : could fail because of inconsistent fold behavior. See PR68021
4018 : for more information. */
4019 17295414 : if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4020 : {
4021 5331 : gcc_assert (is_gimple_assign (use->stmt));
4022 5331 : gcc_assert (use->iv->ssa_name == cand->var_after);
4023 5331 : gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4024 5331 : rat = 1;
4025 : }
4026 17290083 : else if (!constant_multiple_of (ustep, cstep, &rat, data))
4027 : return false;
4028 :
4029 14353994 : if (prat)
4030 12869997 : *prat = rat;
4031 :
4032 : /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4033 : type, we achieve better folding by computing their difference in this
4034 : wider type, and cast the result to UUTYPE. We do not need to worry about
4035 : overflows, as all the arithmetics will in the end be performed in UUTYPE
4036 : anyway. */
4037 14353994 : common_type = determine_common_wider_type (&ubase, &cbase);
4038 :
4039 : /* use = ubase - ratio * cbase + ratio * var. */
4040 14353994 : tree_to_aff_combination (ubase, common_type, aff_inv);
4041 14353994 : tree_to_aff_combination (cbase, common_type, &aff_cbase);
4042 14353994 : tree_to_aff_combination (var, uutype, aff_var);
4043 :
4044 : /* We need to shift the value if we are after the increment. */
4045 14353994 : if (stmt_after_increment (data->current_loop, cand, at))
4046 : {
4047 3221674 : aff_tree cstep_aff;
4048 :
4049 3221674 : if (common_type != uutype)
4050 839650 : cstep_common = fold_convert (common_type, cstep);
4051 : else
4052 : cstep_common = cstep;
4053 :
4054 3221674 : tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4055 3221674 : aff_combination_add (&aff_cbase, &cstep_aff);
4056 3221674 : }
4057 :
4058 14353994 : aff_combination_scale (&aff_cbase, -rat);
4059 14353994 : aff_combination_add (aff_inv, &aff_cbase);
4060 14353994 : if (common_type != uutype)
4061 9755851 : aff_combination_convert (aff_inv, uutype);
4062 :
4063 14353994 : aff_combination_scale (aff_var, rat);
4064 14353994 : return true;
4065 17296175 : }
4066 :
4067 : /* Determines the expression by that USE is expressed from induction variable
4068 : CAND at statement AT in DATA's current loop. The expression is stored in a
4069 : decomposed form into AFF. Returns false if USE cannot be expressed using
4070 : CAND. */
4071 :
4072 : static bool
4073 1247022 : get_computation_aff (struct ivopts_data *data, gimple *at, struct iv_use *use,
4074 : struct iv_cand *cand, class aff_tree *aff)
4075 : {
4076 1247022 : aff_tree aff_var;
4077 :
4078 1247022 : if (!get_computation_aff_1 (data, at, use, cand, aff, &aff_var))
4079 : return false;
4080 :
4081 1139452 : aff_combination_add (aff, &aff_var);
4082 1139452 : return true;
4083 1247022 : }
4084 :
4085 : /* Return the type of USE. */
4086 :
4087 : static tree
4088 1007187 : get_use_type (struct iv_use *use)
4089 : {
4090 1007187 : tree base_type = TREE_TYPE (use->iv->base);
4091 1007187 : tree type;
4092 :
4093 1007187 : if (use->type == USE_REF_ADDRESS)
4094 : {
4095 : /* The base_type may be a void pointer. Create a pointer type based on
4096 : the mem_ref instead. */
4097 0 : type = build_pointer_type (TREE_TYPE (*use->op_p));
4098 0 : gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4099 : == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4100 : }
4101 : else
4102 : type = base_type;
4103 :
4104 1007187 : return type;
4105 : }
4106 :
4107 : /* Determines the expression by that USE is expressed from induction variable
4108 : CAND at statement AT in DATA's current loop. The computation is
4109 : unshared. */
4110 :
4111 : static tree
4112 383853 : get_computation_at (struct ivopts_data *data, gimple *at,
4113 : struct iv_use *use, struct iv_cand *cand)
4114 : {
4115 383853 : aff_tree aff;
4116 383853 : tree type = get_use_type (use);
4117 :
4118 383853 : if (!get_computation_aff (data, at, use, cand, &aff))
4119 : return NULL_TREE;
4120 276283 : unshare_aff_combination (&aff);
4121 276283 : return fold_convert (type, aff_combination_to_tree (&aff));
4122 383853 : }
4123 :
4124 : /* Like get_computation_at, but try harder, even if the computation
4125 : is more expensive. Intended for debug stmts. */
4126 :
4127 : static tree
4128 176774 : get_debug_computation_at (struct ivopts_data *data, gimple *at,
4129 : struct iv_use *use, struct iv_cand *cand)
4130 : {
4131 176774 : if (tree ret = get_computation_at (data, at, use, cand))
4132 : return ret;
4133 :
4134 107570 : tree ubase = use->iv->base, ustep = use->iv->step;
4135 107570 : tree cbase = cand->iv->base, cstep = cand->iv->step;
4136 107570 : tree var;
4137 107570 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4138 107570 : widest_int rat;
4139 :
4140 : /* We must have a precision to express the values of use. */
4141 107570 : if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4142 : return NULL_TREE;
4143 :
4144 : /* Try to handle the case that get_computation_at doesn't,
4145 : try to express
4146 : use = ubase + (var - cbase) / ratio. */
4147 8584 : if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4148 : &rat, data))
4149 : return NULL_TREE;
4150 :
4151 7491 : bool neg_p = false;
4152 7491 : if (wi::neg_p (rat))
4153 : {
4154 825 : if (TYPE_UNSIGNED (ctype))
4155 : return NULL_TREE;
4156 0 : neg_p = true;
4157 0 : rat = wi::neg (rat);
4158 : }
4159 :
4160 : /* If both IVs can wrap around and CAND doesn't have a power of two step,
4161 : it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4162 : the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4163 : uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4164 : ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4165 6666 : if (!use->iv->no_overflow
4166 62 : && !cand->iv->no_overflow
4167 6715 : && !integer_pow2p (cstep))
4168 : return NULL_TREE;
4169 :
4170 6652 : int bits = wi::exact_log2 (rat);
4171 6652 : if (bits == -1)
4172 663 : bits = wi::floor_log2 (rat) + 1;
4173 6652 : if (!cand->iv->no_overflow
4174 6652 : && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4175 : return NULL_TREE;
4176 :
4177 6652 : var = var_at_stmt (data->current_loop, cand, at);
4178 :
4179 6652 : if (POINTER_TYPE_P (ctype))
4180 : {
4181 120 : ctype = unsigned_type_for (ctype);
4182 120 : cbase = fold_convert (ctype, cbase);
4183 120 : cstep = fold_convert (ctype, cstep);
4184 120 : var = fold_convert (ctype, var);
4185 : }
4186 :
4187 6652 : if (stmt_after_increment (data->current_loop, cand, at))
4188 70 : var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4189 : unshare_expr (cstep));
4190 :
4191 6652 : var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4192 6652 : var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4193 : wide_int_to_tree (TREE_TYPE (var), rat));
4194 6652 : if (POINTER_TYPE_P (utype))
4195 : {
4196 0 : var = fold_convert (sizetype, var);
4197 0 : if (neg_p)
4198 0 : var = fold_build1 (NEGATE_EXPR, sizetype, var);
4199 0 : var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4200 : }
4201 : else
4202 : {
4203 6652 : var = fold_convert (utype, var);
4204 13304 : var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4205 : ubase, var);
4206 : }
4207 : return var;
4208 107570 : }
4209 :
4210 : /* Adjust the cost COST for being in loop setup rather than loop body.
4211 : If we're optimizing for space, the loop setup overhead is constant;
4212 : if we're optimizing for speed, amortize it over the per-iteration cost.
4213 : If ROUND_UP_P is true, the result is round up rather than to zero when
4214 : optimizing for speed. */
4215 : static int64_t
4216 10337213 : adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4217 : bool round_up_p = false)
4218 : {
4219 10337213 : if (cost == INFTY)
4220 : return cost;
4221 10337213 : else if (optimize_loop_for_speed_p (data->current_loop))
4222 : {
4223 8700971 : uint64_t niters = avg_loop_niter (data->current_loop);
4224 8700971 : if (niters > (uint64_t) cost)
4225 13343632 : return (round_up_p && cost != 0) ? 1 : 0;
4226 1848235 : return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4227 : }
4228 : else
4229 : return cost;
4230 : }
4231 :
4232 : /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4233 : EXPR operand holding the shift. COST0 and COST1 are the costs for
4234 : calculating the operands of EXPR. Returns true if successful, and returns
4235 : the cost in COST. */
4236 :
4237 : static bool
4238 1443349 : get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4239 : comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4240 : {
4241 1443349 : comp_cost res;
4242 1443349 : tree op1 = TREE_OPERAND (expr, 1);
4243 1443349 : tree cst = TREE_OPERAND (mult, 1);
4244 1443349 : tree multop = TREE_OPERAND (mult, 0);
4245 1443349 : int m = exact_log2 (int_cst_value (cst));
4246 4329537 : int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4247 1443349 : int as_cost, sa_cost;
4248 1443349 : bool mult_in_op1;
4249 :
4250 1443349 : if (!(m >= 0 && m < maxm))
4251 : return false;
4252 :
4253 958340 : STRIP_NOPS (op1);
4254 958340 : mult_in_op1 = operand_equal_p (op1, mult, 0);
4255 :
4256 958340 : as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4257 :
4258 : /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4259 : use that in preference to a shift insn followed by an add insn. */
4260 958340 : sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4261 958340 : ? shiftadd_cost (speed, mode, m)
4262 : : (mult_in_op1
4263 145378 : ? shiftsub1_cost (speed, mode, m)
4264 26273 : : shiftsub0_cost (speed, mode, m)));
4265 :
4266 958340 : res = comp_cost (MIN (as_cost, sa_cost), 0);
4267 1734890 : res += (mult_in_op1 ? cost0 : cost1);
4268 :
4269 958340 : STRIP_NOPS (multop);
4270 958340 : if (!is_gimple_val (multop))
4271 487680 : res += force_expr_to_var_cost (multop, speed);
4272 :
4273 958340 : *cost = res;
4274 958340 : return true;
4275 : }
4276 :
4277 : /* Estimates cost of forcing expression EXPR into a variable. */
4278 :
4279 : static comp_cost
4280 28859730 : force_expr_to_var_cost (tree expr, bool speed)
4281 : {
4282 28859730 : static bool costs_initialized = false;
4283 28859730 : static unsigned integer_cost [2];
4284 28859730 : static unsigned symbol_cost [2];
4285 28859730 : static unsigned address_cost [2];
4286 28859730 : tree op0, op1;
4287 28859730 : comp_cost cost0, cost1, cost;
4288 28859730 : machine_mode mode;
4289 28859730 : scalar_int_mode int_mode;
4290 :
4291 28859730 : if (!costs_initialized)
4292 : {
4293 41713 : tree type = build_pointer_type (integer_type_node);
4294 41713 : tree var, addr;
4295 41713 : rtx x;
4296 41713 : int i;
4297 :
4298 41713 : var = create_tmp_var_raw (integer_type_node, "test_var");
4299 41713 : TREE_STATIC (var) = 1;
4300 41713 : x = produce_memory_decl_rtl (var, NULL);
4301 41713 : SET_DECL_RTL (var, x);
4302 :
4303 41713 : addr = build1 (ADDR_EXPR, type, var);
4304 :
4305 :
4306 166852 : for (i = 0; i < 2; i++)
4307 : {
4308 83426 : integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4309 : 2000), i);
4310 :
4311 83426 : symbol_cost[i] = computation_cost (addr, i) + 1;
4312 :
4313 83426 : address_cost[i]
4314 83426 : = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4315 83426 : if (dump_file && (dump_flags & TDF_DETAILS))
4316 : {
4317 105 : fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4318 70 : fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4319 70 : fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4320 70 : fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4321 70 : fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4322 70 : fprintf (dump_file, "\n");
4323 : }
4324 : }
4325 :
4326 41713 : costs_initialized = true;
4327 : }
4328 :
4329 28859730 : STRIP_NOPS (expr);
4330 :
4331 28859730 : if (SSA_VAR_P (expr))
4332 5461585 : return no_cost;
4333 :
4334 23398145 : if (is_gimple_min_invariant (expr))
4335 : {
4336 13932499 : if (poly_int_tree_p (expr))
4337 11846680 : return comp_cost (integer_cost [speed], 0);
4338 :
4339 2085819 : if (TREE_CODE (expr) == ADDR_EXPR)
4340 : {
4341 2085819 : tree obj = TREE_OPERAND (expr, 0);
4342 :
4343 2085819 : if (VAR_P (obj)
4344 : || TREE_CODE (obj) == PARM_DECL
4345 : || TREE_CODE (obj) == RESULT_DECL)
4346 2022251 : return comp_cost (symbol_cost [speed], 0);
4347 : }
4348 :
4349 63568 : return comp_cost (address_cost [speed], 0);
4350 : }
4351 :
4352 9465646 : switch (TREE_CODE (expr))
4353 : {
4354 8099132 : case POINTER_PLUS_EXPR:
4355 8099132 : case PLUS_EXPR:
4356 8099132 : case MINUS_EXPR:
4357 8099132 : case MULT_EXPR:
4358 8099132 : case EXACT_DIV_EXPR:
4359 8099132 : case TRUNC_DIV_EXPR:
4360 8099132 : case BIT_AND_EXPR:
4361 8099132 : case BIT_IOR_EXPR:
4362 8099132 : case LSHIFT_EXPR:
4363 8099132 : case RSHIFT_EXPR:
4364 8099132 : op0 = TREE_OPERAND (expr, 0);
4365 8099132 : op1 = TREE_OPERAND (expr, 1);
4366 8099132 : STRIP_NOPS (op0);
4367 8099132 : STRIP_NOPS (op1);
4368 8099132 : break;
4369 :
4370 1366474 : CASE_CONVERT:
4371 1366474 : case NEGATE_EXPR:
4372 1366474 : case BIT_NOT_EXPR:
4373 1366474 : op0 = TREE_OPERAND (expr, 0);
4374 1366474 : STRIP_NOPS (op0);
4375 1366474 : op1 = NULL_TREE;
4376 1366474 : break;
4377 : /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4378 : introduce COND_EXPR for IV base, need to support better cost estimation
4379 : for this COND_EXPR and tcc_comparison. */
4380 0 : case COND_EXPR:
4381 0 : op0 = TREE_OPERAND (expr, 1);
4382 0 : STRIP_NOPS (op0);
4383 0 : op1 = TREE_OPERAND (expr, 2);
4384 0 : STRIP_NOPS (op1);
4385 0 : break;
4386 0 : case LT_EXPR:
4387 0 : case LE_EXPR:
4388 0 : case GT_EXPR:
4389 0 : case GE_EXPR:
4390 0 : case EQ_EXPR:
4391 0 : case NE_EXPR:
4392 0 : case UNORDERED_EXPR:
4393 0 : case ORDERED_EXPR:
4394 0 : case UNLT_EXPR:
4395 0 : case UNLE_EXPR:
4396 0 : case UNGT_EXPR:
4397 0 : case UNGE_EXPR:
4398 0 : case UNEQ_EXPR:
4399 0 : case LTGT_EXPR:
4400 0 : case MAX_EXPR:
4401 0 : case MIN_EXPR:
4402 0 : op0 = TREE_OPERAND (expr, 0);
4403 0 : STRIP_NOPS (op0);
4404 0 : op1 = TREE_OPERAND (expr, 1);
4405 0 : STRIP_NOPS (op1);
4406 0 : break;
4407 :
4408 40 : default:
4409 : /* Just an arbitrary value, FIXME. */
4410 40 : return comp_cost (target_spill_cost[speed], 0);
4411 : }
4412 :
4413 9465606 : if (op0 == NULL_TREE
4414 9465606 : || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4415 4478812 : cost0 = no_cost;
4416 : else
4417 4986794 : cost0 = force_expr_to_var_cost (op0, speed);
4418 :
4419 9465606 : if (op1 == NULL_TREE
4420 8099132 : || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4421 8696039 : cost1 = no_cost;
4422 : else
4423 769567 : cost1 = force_expr_to_var_cost (op1, speed);
4424 :
4425 9465606 : mode = TYPE_MODE (TREE_TYPE (expr));
4426 9465606 : switch (TREE_CODE (expr))
4427 : {
4428 5683822 : case POINTER_PLUS_EXPR:
4429 5683822 : case PLUS_EXPR:
4430 5683822 : case MINUS_EXPR:
4431 5683822 : case NEGATE_EXPR:
4432 5683822 : cost = comp_cost (add_cost (speed, mode), 0);
4433 5683822 : if (TREE_CODE (expr) != NEGATE_EXPR)
4434 : {
4435 5543219 : tree mult = NULL_TREE;
4436 5543219 : comp_cost sa_cost;
4437 5543219 : if (TREE_CODE (op1) == MULT_EXPR)
4438 : mult = op1;
4439 5143039 : else if (TREE_CODE (op0) == MULT_EXPR)
4440 : mult = op0;
4441 :
4442 : if (mult != NULL_TREE
4443 4584879 : && is_a <scalar_int_mode> (mode, &int_mode)
4444 1692513 : && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4445 1443349 : && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4446 : speed, &sa_cost))
4447 958340 : return sa_cost;
4448 : }
4449 : break;
4450 :
4451 1213085 : CASE_CONVERT:
4452 1213085 : {
4453 1213085 : tree inner_mode, outer_mode;
4454 1213085 : outer_mode = TREE_TYPE (expr);
4455 1213085 : inner_mode = TREE_TYPE (op0);
4456 1213085 : cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4457 1213085 : TYPE_MODE (inner_mode), speed), 0);
4458 : }
4459 1213085 : break;
4460 :
4461 2458774 : case MULT_EXPR:
4462 2458774 : if (cst_and_fits_in_hwi (op0))
4463 0 : cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4464 0 : mode, speed), 0);
4465 2458774 : else if (cst_and_fits_in_hwi (op1))
4466 2006037 : cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4467 2006037 : mode, speed), 0);
4468 : else
4469 452737 : return comp_cost (target_spill_cost [speed], 0);
4470 : break;
4471 :
4472 52189 : case EXACT_DIV_EXPR:
4473 52189 : case TRUNC_DIV_EXPR:
4474 : /* Division by power of two is usually cheap, so we allow it. Forbid
4475 : anything else. */
4476 52189 : if (integer_pow2p (TREE_OPERAND (expr, 1)))
4477 52189 : cost = comp_cost (add_cost (speed, mode), 0);
4478 : else
4479 0 : cost = comp_cost (target_spill_cost[speed], 0);
4480 : break;
4481 :
4482 57736 : case BIT_AND_EXPR:
4483 57736 : case BIT_IOR_EXPR:
4484 57736 : case BIT_NOT_EXPR:
4485 57736 : case LSHIFT_EXPR:
4486 57736 : case RSHIFT_EXPR:
4487 57736 : cost = comp_cost (add_cost (speed, mode), 0);
4488 57736 : break;
4489 0 : case COND_EXPR:
4490 0 : op0 = TREE_OPERAND (expr, 0);
4491 0 : STRIP_NOPS (op0);
4492 0 : if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4493 0 : || CONSTANT_CLASS_P (op0))
4494 0 : cost = no_cost;
4495 : else
4496 0 : cost = force_expr_to_var_cost (op0, speed);
4497 : break;
4498 0 : case LT_EXPR:
4499 0 : case LE_EXPR:
4500 0 : case GT_EXPR:
4501 0 : case GE_EXPR:
4502 0 : case EQ_EXPR:
4503 0 : case NE_EXPR:
4504 0 : case UNORDERED_EXPR:
4505 0 : case ORDERED_EXPR:
4506 0 : case UNLT_EXPR:
4507 0 : case UNLE_EXPR:
4508 0 : case UNGT_EXPR:
4509 0 : case UNGE_EXPR:
4510 0 : case UNEQ_EXPR:
4511 0 : case LTGT_EXPR:
4512 0 : case MAX_EXPR:
4513 0 : case MIN_EXPR:
4514 : /* Simply use add cost for now, FIXME if there is some more accurate cost
4515 : evaluation way. */
4516 0 : cost = comp_cost (add_cost (speed, mode), 0);
4517 0 : break;
4518 :
4519 0 : default:
4520 0 : gcc_unreachable ();
4521 : }
4522 :
4523 8054529 : cost += cost0;
4524 8054529 : cost += cost1;
4525 8054529 : return cost;
4526 : }
4527 :
4528 : /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4529 : invariants the computation depends on. */
4530 :
4531 : static comp_cost
4532 24653976 : force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4533 : {
4534 24653976 : if (!expr)
4535 2038287 : return no_cost;
4536 :
4537 22615689 : find_inv_vars (data, &expr, inv_vars);
4538 22615689 : return force_expr_to_var_cost (expr, data->speed);
4539 : }
4540 :
4541 : /* Returns cost of auto-modifying address expression in shape base + offset.
4542 : AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4543 : address expression. The address expression has ADDR_MODE in addr space
4544 : AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4545 : speed or size. */
4546 :
4547 : enum ainc_type
4548 : {
4549 : AINC_PRE_INC, /* Pre increment. */
4550 : AINC_PRE_DEC, /* Pre decrement. */
4551 : AINC_POST_INC, /* Post increment. */
4552 : AINC_POST_DEC, /* Post decrement. */
4553 : AINC_NONE /* Also the number of auto increment types. */
4554 : };
4555 :
4556 : struct ainc_cost_data
4557 : {
4558 : int64_t costs[AINC_NONE];
4559 : };
4560 :
4561 : static comp_cost
4562 1828528 : get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4563 : machine_mode addr_mode, machine_mode mem_mode,
4564 : addr_space_t as, bool speed)
4565 : {
4566 1828528 : if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4567 : && !USE_STORE_PRE_DECREMENT (mem_mode)
4568 : && !USE_LOAD_POST_DECREMENT (mem_mode)
4569 : && !USE_STORE_POST_DECREMENT (mem_mode)
4570 : && !USE_LOAD_PRE_INCREMENT (mem_mode)
4571 : && !USE_STORE_PRE_INCREMENT (mem_mode)
4572 : && !USE_LOAD_POST_INCREMENT (mem_mode)
4573 : && !USE_STORE_POST_INCREMENT (mem_mode))
4574 1828528 : return infinite_cost;
4575 :
4576 : static vec<ainc_cost_data *> ainc_cost_data_list;
4577 : unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4578 : if (idx >= ainc_cost_data_list.length ())
4579 : {
4580 : unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4581 :
4582 : gcc_assert (nsize > idx);
4583 : ainc_cost_data_list.safe_grow_cleared (nsize, true);
4584 : }
4585 :
4586 : ainc_cost_data *data = ainc_cost_data_list[idx];
4587 : if (data == NULL)
4588 : {
4589 : rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4590 :
4591 : data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4592 : data->costs[AINC_PRE_DEC] = INFTY;
4593 : data->costs[AINC_POST_DEC] = INFTY;
4594 : data->costs[AINC_PRE_INC] = INFTY;
4595 : data->costs[AINC_POST_INC] = INFTY;
4596 : if (USE_LOAD_PRE_DECREMENT (mem_mode)
4597 : || USE_STORE_PRE_DECREMENT (mem_mode))
4598 : {
4599 : rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4600 :
4601 : if (memory_address_addr_space_p (mem_mode, addr, as))
4602 : data->costs[AINC_PRE_DEC]
4603 : = address_cost (addr, mem_mode, as, speed);
4604 : }
4605 : if (USE_LOAD_POST_DECREMENT (mem_mode)
4606 : || USE_STORE_POST_DECREMENT (mem_mode))
4607 : {
4608 : rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4609 :
4610 : if (memory_address_addr_space_p (mem_mode, addr, as))
4611 : data->costs[AINC_POST_DEC]
4612 : = address_cost (addr, mem_mode, as, speed);
4613 : }
4614 : if (USE_LOAD_PRE_INCREMENT (mem_mode)
4615 : || USE_STORE_PRE_INCREMENT (mem_mode))
4616 : {
4617 : rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4618 :
4619 : if (memory_address_addr_space_p (mem_mode, addr, as))
4620 : data->costs[AINC_PRE_INC]
4621 : = address_cost (addr, mem_mode, as, speed);
4622 : }
4623 : if (USE_LOAD_POST_INCREMENT (mem_mode)
4624 : || USE_STORE_POST_INCREMENT (mem_mode))
4625 : {
4626 : rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4627 :
4628 : if (memory_address_addr_space_p (mem_mode, addr, as))
4629 : data->costs[AINC_POST_INC]
4630 : = address_cost (addr, mem_mode, as, speed);
4631 : }
4632 : ainc_cost_data_list[idx] = data;
4633 : }
4634 :
4635 : poly_int64 msize = GET_MODE_SIZE (mem_mode);
4636 : if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4637 : return comp_cost (data->costs[AINC_POST_INC], 0);
4638 : if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4639 : return comp_cost (data->costs[AINC_POST_DEC], 0);
4640 : if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4641 : return comp_cost (data->costs[AINC_PRE_INC], 0);
4642 : if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4643 : return comp_cost (data->costs[AINC_PRE_DEC], 0);
4644 :
4645 : return infinite_cost;
4646 : }
4647 :
4648 : /* Return cost of computing USE's address expression by using CAND.
4649 : AFF_INV and AFF_VAR represent invariant and variant parts of the
4650 : address expression, respectively. If AFF_INV is simple, store
4651 : the loop invariant variables which are depended by it in INV_VARS;
4652 : if AFF_INV is complicated, handle it as a new invariant expression
4653 : and record it in INV_EXPR. RATIO indicates multiple times between
4654 : steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4655 : value to it indicating if this is an auto-increment address. */
4656 :
4657 : static comp_cost
4658 5594773 : get_address_cost (struct ivopts_data *data, struct iv_use *use,
4659 : struct iv_cand *cand, aff_tree *aff_inv,
4660 : aff_tree *aff_var, HOST_WIDE_INT ratio,
4661 : bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4662 : bool *can_autoinc, bool speed)
4663 : {
4664 5594773 : rtx addr;
4665 5594773 : bool simple_inv = true;
4666 5594773 : tree comp_inv = NULL_TREE, type = aff_var->type;
4667 5594773 : comp_cost var_cost = no_cost, cost = no_cost;
4668 5594773 : struct mem_address parts = {NULL_TREE, integer_one_node,
4669 5594773 : NULL_TREE, NULL_TREE, NULL_TREE};
4670 5594773 : machine_mode addr_mode = TYPE_MODE (type);
4671 5594773 : machine_mode mem_mode = TYPE_MODE (use->mem_type);
4672 5594773 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4673 : /* Only true if ratio != 1. */
4674 5594773 : bool ok_with_ratio_p = false;
4675 5594773 : bool ok_without_ratio_p = false;
4676 5594773 : code_helper code = ERROR_MARK;
4677 :
4678 5594773 : if (use->type == USE_PTR_ADDRESS)
4679 : {
4680 4496 : gcall *call = as_a<gcall *> (use->stmt);
4681 4496 : gcc_assert (gimple_call_internal_p (call));
4682 4496 : code = gimple_call_internal_fn (call);
4683 : }
4684 :
4685 5594773 : if (!aff_combination_const_p (aff_inv))
4686 : {
4687 3697086 : parts.index = integer_one_node;
4688 : /* Addressing mode "base + index". */
4689 3697086 : ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4690 3697086 : if (ratio != 1)
4691 : {
4692 2801306 : parts.step = wide_int_to_tree (type, ratio);
4693 : /* Addressing mode "base + index << scale". */
4694 2801306 : ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4695 2801306 : if (!ok_with_ratio_p)
4696 1715885 : parts.step = NULL_TREE;
4697 : }
4698 2611665 : if (ok_with_ratio_p || ok_without_ratio_p)
4699 : {
4700 3697086 : if (maybe_ne (aff_inv->offset, 0))
4701 : {
4702 2424037 : parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4703 : /* Addressing mode "base + index [<< scale] + offset". */
4704 2424037 : if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4705 493 : parts.offset = NULL_TREE;
4706 : else
4707 2423544 : aff_inv->offset = 0;
4708 : }
4709 :
4710 3697086 : move_fixed_address_to_symbol (&parts, aff_inv);
4711 : /* Base is fixed address and is moved to symbol part. */
4712 3697086 : if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4713 446049 : parts.base = NULL_TREE;
4714 :
4715 : /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4716 3697086 : if (parts.symbol != NULL_TREE
4717 3697086 : && !valid_mem_ref_p (mem_mode, as, &parts, code))
4718 : {
4719 6616 : aff_combination_add_elt (aff_inv, parts.symbol, 1);
4720 6616 : parts.symbol = NULL_TREE;
4721 : /* Reset SIMPLE_INV since symbol address needs to be computed
4722 : outside of address expression in this case. */
4723 6616 : simple_inv = false;
4724 : /* Symbol part is moved back to base part, it can't be NULL. */
4725 6616 : parts.base = integer_one_node;
4726 : }
4727 : }
4728 : else
4729 0 : parts.index = NULL_TREE;
4730 : }
4731 : else
4732 : {
4733 1897687 : poly_int64 ainc_step;
4734 1897687 : if (can_autoinc
4735 1897687 : && ratio == 1
4736 3795366 : && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4737 : {
4738 1828528 : poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4739 :
4740 1828528 : if (stmt_after_increment (data->current_loop, cand, use->stmt))
4741 : ainc_offset += ainc_step;
4742 1828528 : cost = get_address_cost_ainc (ainc_step, ainc_offset,
4743 : addr_mode, mem_mode, as, speed);
4744 1828528 : if (!cost.infinite_cost_p ())
4745 : {
4746 0 : *can_autoinc = true;
4747 0 : return cost;
4748 : }
4749 1828528 : cost = no_cost;
4750 : }
4751 1897687 : if (!aff_combination_zero_p (aff_inv))
4752 : {
4753 1079398 : parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4754 : /* Addressing mode "base + offset". */
4755 1079398 : if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4756 44 : parts.offset = NULL_TREE;
4757 : else
4758 1079354 : aff_inv->offset = 0;
4759 : }
4760 : }
4761 :
4762 1904303 : if (simple_inv)
4763 5588157 : simple_inv = (aff_inv == NULL
4764 8844971 : || aff_combination_const_p (aff_inv)
4765 8838355 : || aff_combination_singleton_var_p (aff_inv));
4766 5594773 : if (!aff_combination_zero_p (aff_inv))
4767 3256900 : comp_inv = aff_combination_to_tree (aff_inv);
4768 3256900 : if (comp_inv != NULL_TREE)
4769 3256900 : cost = force_var_cost (data, comp_inv, inv_vars);
4770 5594773 : if (ratio != 1 && parts.step == NULL_TREE)
4771 1715893 : var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4772 5594773 : if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4773 44 : var_cost += add_cost (speed, addr_mode);
4774 :
4775 5594773 : if (comp_inv && inv_expr && !simple_inv)
4776 : {
4777 749703 : *inv_expr = get_loop_invariant_expr (data, comp_inv);
4778 : /* Clear depends on. */
4779 749703 : if (*inv_expr != NULL && inv_vars && *inv_vars)
4780 431019 : bitmap_clear (*inv_vars);
4781 :
4782 : /* Cost of small invariant expression adjusted against loop niters
4783 : is usually zero, which makes it difficult to be differentiated
4784 : from candidate based on loop invariant variables. Secondly, the
4785 : generated invariant expression may not be hoisted out of loop by
4786 : following pass. We penalize the cost by rounding up in order to
4787 : neutralize such effects. */
4788 749703 : cost.cost = adjust_setup_cost (data, cost.cost, true);
4789 749703 : cost.scratch = cost.cost;
4790 : }
4791 :
4792 5594773 : cost += var_cost;
4793 5594773 : addr = addr_for_mem_ref (&parts, as, false);
4794 5594773 : gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4795 5594773 : cost += address_cost (addr, mem_mode, as, speed);
4796 :
4797 5594773 : if (parts.symbol != NULL_TREE)
4798 500094 : cost.complexity += 1;
4799 : /* Don't increase the complexity of adding a scaled index if it's
4800 : the only kind of index that the target allows. */
4801 5594773 : if (parts.step != NULL_TREE && ok_without_ratio_p)
4802 1085421 : cost.complexity += 1;
4803 5594773 : if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4804 3256856 : cost.complexity += 1;
4805 5594773 : if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4806 3502898 : cost.complexity += 1;
4807 :
4808 : return cost;
4809 : }
4810 :
4811 : /* Scale (multiply) the computed COST (except scratch part that should be
4812 : hoisted out a loop) by header->frequency / AT->frequency, which makes
4813 : expected cost more accurate. */
4814 :
4815 : static comp_cost
4816 12869997 : get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4817 : {
4818 12869997 : if (data->speed
4819 12869997 : && data->current_loop->header->count.to_frequency (cfun) > 0)
4820 : {
4821 11290028 : basic_block bb = gimple_bb (at);
4822 11290028 : gcc_assert (cost.scratch <= cost.cost);
4823 11290028 : int scale_factor = (int)(intptr_t) bb->aux;
4824 11290028 : if (scale_factor == 1)
4825 10728183 : return cost;
4826 :
4827 561845 : int64_t scaled_cost
4828 561845 : = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4829 :
4830 561845 : if (dump_file && (dump_flags & TDF_DETAILS))
4831 93 : fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4832 : "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4833 : 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4834 :
4835 : cost.cost = scaled_cost;
4836 : }
4837 :
4838 2141814 : return cost;
4839 : }
4840 :
4841 : /* Determines the cost of the computation by that USE is expressed
4842 : from induction variable CAND. If ADDRESS_P is true, we just need
4843 : to create an address from it, otherwise we want to get it into
4844 : register. A set of invariants we depend on is stored in INV_VARS.
4845 : If CAN_AUTOINC is nonnull, use it to record whether autoinc
4846 : addressing is likely. If INV_EXPR is nonnull, record invariant
4847 : expr entry in it. */
4848 :
4849 : static comp_cost
4850 20214101 : get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4851 : struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4852 : bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4853 : {
4854 20214101 : gimple *at = use->stmt;
4855 20214101 : tree ubase = use->iv->base, cbase = cand->iv->base;
4856 20214101 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4857 20214101 : tree comp_inv = NULL_TREE;
4858 20214101 : HOST_WIDE_INT ratio, aratio;
4859 20214101 : comp_cost cost;
4860 20214101 : widest_int rat;
4861 40428202 : aff_tree aff_inv, aff_var;
4862 20214101 : bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4863 :
4864 20214101 : if (inv_vars)
4865 17698201 : *inv_vars = NULL;
4866 20214101 : if (can_autoinc)
4867 8801481 : *can_autoinc = false;
4868 20214101 : if (inv_expr)
4869 19797952 : *inv_expr = NULL;
4870 :
4871 : /* Check if we have enough precision to express the values of use. */
4872 20214101 : if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4873 3054803 : return infinite_cost;
4874 :
4875 17159298 : if (address_p
4876 17159298 : || (use->iv->base_object
4877 2123208 : && cand->iv->base_object
4878 1039653 : && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4879 1028222 : && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4880 : {
4881 : /* Do not try to express address of an object with computation based
4882 : on address of a different object. This may cause problems in rtl
4883 : level alias analysis (that does not expect this to be happening,
4884 : as this is illegal in C), and would be unlikely to be useful
4885 : anyway. */
4886 7992307 : if (use->iv->base_object
4887 7992307 : && cand->iv->base_object
4888 12274713 : && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4889 1454690 : return infinite_cost;
4890 : }
4891 :
4892 15704608 : if (!get_computation_aff_1 (data, at, use, cand, &aff_inv, &aff_var, &rat)
4893 15704608 : || !wi::fits_shwi_p (rat))
4894 2834611 : return infinite_cost;
4895 :
4896 12869997 : ratio = rat.to_shwi ();
4897 12869997 : if (address_p)
4898 : {
4899 5594773 : cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4900 : inv_vars, inv_expr, can_autoinc, speed);
4901 5594773 : cost = get_scaled_computation_cost_at (data, at, cost);
4902 : /* For doloop IV cand, add on the extra cost. */
4903 5594773 : cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4904 5594773 : return cost;
4905 : }
4906 :
4907 7275224 : bool simple_inv = (aff_combination_const_p (&aff_inv)
4908 2007541 : || aff_combination_singleton_var_p (&aff_inv));
4909 7275224 : tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4910 7275224 : aff_combination_convert (&aff_inv, signed_type);
4911 7275224 : if (!aff_combination_zero_p (&aff_inv))
4912 5236937 : comp_inv = aff_combination_to_tree (&aff_inv);
4913 :
4914 7275224 : cost = force_var_cost (data, comp_inv, inv_vars);
4915 7275224 : if (comp_inv && inv_expr && !simple_inv)
4916 : {
4917 1400648 : *inv_expr = get_loop_invariant_expr (data, comp_inv);
4918 : /* Clear depends on. */
4919 1400648 : if (*inv_expr != NULL && inv_vars && *inv_vars)
4920 870847 : bitmap_clear (*inv_vars);
4921 :
4922 1400648 : cost.cost = adjust_setup_cost (data, cost.cost);
4923 : /* Record setup cost in scratch field. */
4924 1400648 : cost.scratch = cost.cost;
4925 : }
4926 : /* Cost of constant integer can be covered when adding invariant part to
4927 : variant part. */
4928 5874576 : else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4929 3229371 : cost = no_cost;
4930 :
4931 : /* Need type narrowing to represent use with cand. */
4932 7275224 : if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4933 : {
4934 790694 : machine_mode outer_mode = TYPE_MODE (utype);
4935 790694 : machine_mode inner_mode = TYPE_MODE (ctype);
4936 790694 : cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4937 : }
4938 :
4939 : /* Turn a + i * (-c) into a - i * c. */
4940 7275224 : if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4941 1842021 : aratio = -ratio;
4942 : else
4943 : aratio = ratio;
4944 :
4945 7275224 : if (ratio != 1)
4946 2742446 : cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4947 :
4948 : /* TODO: We may also need to check if we can compute a + i * 4 in one
4949 : instruction. */
4950 : /* Need to add up the invariant and variant parts. */
4951 7275224 : if (comp_inv && !integer_zerop (comp_inv))
4952 10466926 : cost += add_cost (speed, TYPE_MODE (utype));
4953 :
4954 7275224 : cost = get_scaled_computation_cost_at (data, at, cost);
4955 :
4956 : /* For doloop IV cand, add on the extra cost. */
4957 7275224 : if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4958 0 : cost += targetm.doloop_cost_for_generic;
4959 :
4960 7275224 : return cost;
4961 20214101 : }
4962 :
4963 : /* Determines cost of computing the use in GROUP with CAND in a generic
4964 : expression. */
4965 :
4966 : static bool
4967 5534783 : determine_group_iv_cost_generic (struct ivopts_data *data,
4968 : struct iv_group *group, struct iv_cand *cand)
4969 : {
4970 5534783 : comp_cost cost;
4971 5534783 : iv_inv_expr_ent *inv_expr = NULL;
4972 5534783 : bitmap inv_vars = NULL, inv_exprs = NULL;
4973 5534783 : struct iv_use *use = group->vuses[0];
4974 :
4975 : /* The simple case first -- if we need to express value of the preserved
4976 : original biv, the cost is 0. This also prevents us from counting the
4977 : cost of increment twice -- once at this use and once in the cost of
4978 : the candidate. */
4979 5534783 : if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4980 56935 : cost = no_cost;
4981 : /* If the IV candidate involves undefined SSA values and is not the
4982 : same IV as on the USE avoid using that candidate here. */
4983 5477848 : else if (cand->involves_undefs
4984 5477848 : && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
4985 218 : return false;
4986 : else
4987 5477630 : cost = get_computation_cost (data, use, cand, false,
4988 : &inv_vars, NULL, &inv_expr);
4989 :
4990 5534565 : if (inv_expr)
4991 : {
4992 984144 : inv_exprs = BITMAP_ALLOC (NULL);
4993 984144 : bitmap_set_bit (inv_exprs, inv_expr->id);
4994 : }
4995 5534565 : set_group_iv_cost (data, group, cand, cost, inv_vars,
4996 : NULL_TREE, ERROR_MARK, inv_exprs);
4997 5534565 : return !cost.infinite_cost_p ();
4998 : }
4999 :
5000 : /* Determines cost of computing uses in GROUP with CAND in addresses. */
5001 :
5002 : static bool
5003 6285581 : determine_group_iv_cost_address (struct ivopts_data *data,
5004 : struct iv_group *group, struct iv_cand *cand)
5005 : {
5006 6285581 : unsigned i;
5007 6285581 : bitmap inv_vars = NULL, inv_exprs = NULL;
5008 6285581 : bool can_autoinc;
5009 6285581 : iv_inv_expr_ent *inv_expr = NULL;
5010 6285581 : struct iv_use *use = group->vuses[0];
5011 6285581 : comp_cost sum_cost = no_cost, cost;
5012 :
5013 6285581 : cost = get_computation_cost (data, use, cand, true,
5014 : &inv_vars, &can_autoinc, &inv_expr);
5015 :
5016 6285581 : if (inv_expr)
5017 : {
5018 461266 : inv_exprs = BITMAP_ALLOC (NULL);
5019 461266 : bitmap_set_bit (inv_exprs, inv_expr->id);
5020 : }
5021 6285581 : sum_cost = cost;
5022 6285581 : if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5023 : {
5024 0 : if (can_autoinc)
5025 0 : sum_cost -= cand->cost_step;
5026 : /* If we generated the candidate solely for exploiting autoincrement
5027 : opportunities, and it turns out it can't be used, set the cost to
5028 : infinity to make sure we ignore it. */
5029 0 : else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5030 0 : sum_cost = infinite_cost;
5031 : }
5032 :
5033 : /* Compute and add costs for rest uses of this group. */
5034 8385332 : for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5035 : {
5036 2099751 : struct iv_use *next = group->vuses[i];
5037 :
5038 : /* TODO: We could skip computing cost for sub iv_use when it has the
5039 : same cost as the first iv_use, but the cost really depends on the
5040 : offset and where the iv_use is. */
5041 2099751 : cost = get_computation_cost (data, next, cand, true,
5042 : NULL, &can_autoinc, &inv_expr);
5043 2099751 : if (inv_expr)
5044 : {
5045 288208 : if (!inv_exprs)
5046 90 : inv_exprs = BITMAP_ALLOC (NULL);
5047 :
5048 : /* Uses in a group can share setup code,
5049 : so only add setup cost once. */
5050 288208 : if (bitmap_bit_p (inv_exprs, inv_expr->id))
5051 287821 : cost -= cost.scratch;
5052 : else
5053 387 : bitmap_set_bit (inv_exprs, inv_expr->id);
5054 : }
5055 2099751 : sum_cost += cost;
5056 : }
5057 6285581 : set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5058 : NULL_TREE, ERROR_MARK, inv_exprs);
5059 :
5060 6285581 : return !sum_cost.infinite_cost_p ();
5061 : }
5062 :
5063 : /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5064 : and stores it to VAL. */
5065 :
5066 : static void
5067 3795775 : cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5068 : class tree_niter_desc *desc, aff_tree *val)
5069 : {
5070 11387325 : aff_tree step, delta, nit;
5071 3795775 : struct iv *iv = cand->iv;
5072 3795775 : tree type = TREE_TYPE (iv->base);
5073 3795775 : tree niter = desc->niter;
5074 3795775 : bool after_adjust = stmt_after_increment (loop, cand, at);
5075 3795775 : tree steptype;
5076 :
5077 3795775 : if (POINTER_TYPE_P (type))
5078 106497 : steptype = sizetype;
5079 : else
5080 3689278 : steptype = unsigned_type_for (type);
5081 :
5082 : /* If AFTER_ADJUST is required, the code below generates the equivalent
5083 : of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5084 : BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5085 : SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5086 : doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5087 : class for common idioms that we know are safe. */
5088 3795775 : if (after_adjust
5089 3602245 : && desc->control.no_overflow
5090 3594575 : && integer_onep (desc->control.step)
5091 955965 : && (desc->cmp == LT_EXPR
5092 35518 : || desc->cmp == NE_EXPR)
5093 4751740 : && TREE_CODE (desc->bound) == SSA_NAME)
5094 : {
5095 505477 : if (integer_onep (desc->control.base))
5096 : {
5097 375400 : niter = desc->bound;
5098 375400 : after_adjust = false;
5099 : }
5100 130077 : else if (TREE_CODE (niter) == MINUS_EXPR
5101 130077 : && integer_onep (TREE_OPERAND (niter, 1)))
5102 : {
5103 67974 : niter = TREE_OPERAND (niter, 0);
5104 67974 : after_adjust = false;
5105 : }
5106 : }
5107 :
5108 3795775 : tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5109 3795775 : aff_combination_convert (&step, steptype);
5110 3795775 : tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5111 3795775 : aff_combination_convert (&nit, steptype);
5112 3795775 : aff_combination_mult (&nit, &step, &delta);
5113 3795775 : if (after_adjust)
5114 3158871 : aff_combination_add (&delta, &step);
5115 :
5116 3795775 : tree_to_aff_combination (iv->base, type, val);
5117 3795775 : if (!POINTER_TYPE_P (type))
5118 3689278 : aff_combination_convert (val, steptype);
5119 3795775 : aff_combination_add (val, &delta);
5120 3795775 : }
5121 :
5122 : /* Returns period of induction variable iv. */
5123 :
5124 : static tree
5125 4063116 : iv_period (struct iv *iv)
5126 : {
5127 4063116 : tree step = iv->step, period, type;
5128 4063116 : tree pow2div;
5129 :
5130 4063116 : gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5131 :
5132 4063116 : type = unsigned_type_for (TREE_TYPE (step));
5133 : /* Period of the iv is lcm (step, type_range)/step -1,
5134 : i.e., N*type_range/step - 1. Since type range is power
5135 : of two, N == (step >> num_of_ending_zeros_binary (step),
5136 : so the final result is
5137 :
5138 : (type_range >> num_of_ending_zeros_binary (step)) - 1
5139 :
5140 : */
5141 4063116 : pow2div = num_ending_zeros (step);
5142 :
5143 12189348 : period = build_low_bits_mask (type,
5144 4063116 : (TYPE_PRECISION (type)
5145 4063116 : - tree_to_uhwi (pow2div)));
5146 :
5147 4063116 : return period;
5148 : }
5149 :
5150 : /* Returns the comparison operator used when eliminating the iv USE. */
5151 :
5152 : static enum tree_code
5153 3795775 : iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5154 : {
5155 3795775 : class loop *loop = data->current_loop;
5156 3795775 : basic_block ex_bb;
5157 3795775 : edge exit;
5158 :
5159 3795775 : ex_bb = gimple_bb (use->stmt);
5160 3795775 : exit = EDGE_SUCC (ex_bb, 0);
5161 3795775 : if (flow_bb_inside_loop_p (loop, exit->dest))
5162 2848157 : exit = EDGE_SUCC (ex_bb, 1);
5163 :
5164 3795775 : return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5165 : }
5166 :
5167 : /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5168 : we only detect the situation that BASE = SOMETHING + OFFSET, where the
5169 : calculation is performed in non-wrapping type.
5170 :
5171 : TODO: More generally, we could test for the situation that
5172 : BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5173 : This would require knowing the sign of OFFSET. */
5174 :
5175 : static bool
5176 477 : difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5177 : {
5178 477 : enum tree_code code;
5179 477 : tree e1, e2;
5180 1431 : aff_tree aff_e1, aff_e2, aff_offset;
5181 :
5182 477 : if (!nowrap_type_p (TREE_TYPE (base)))
5183 : return false;
5184 :
5185 477 : base = expand_simple_operations (base);
5186 :
5187 477 : if (TREE_CODE (base) == SSA_NAME)
5188 : {
5189 476 : gimple *stmt = SSA_NAME_DEF_STMT (base);
5190 :
5191 476 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
5192 : return false;
5193 :
5194 18 : code = gimple_assign_rhs_code (stmt);
5195 18 : if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5196 : return false;
5197 :
5198 5 : e1 = gimple_assign_rhs1 (stmt);
5199 5 : e2 = gimple_assign_rhs2 (stmt);
5200 : }
5201 : else
5202 : {
5203 1 : code = TREE_CODE (base);
5204 1 : if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5205 : return false;
5206 0 : e1 = TREE_OPERAND (base, 0);
5207 0 : e2 = TREE_OPERAND (base, 1);
5208 : }
5209 :
5210 : /* Use affine expansion as deeper inspection to prove the equality. */
5211 5 : tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5212 : &aff_e2, &data->name_expansion_cache);
5213 5 : tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5214 : &aff_offset, &data->name_expansion_cache);
5215 5 : aff_combination_scale (&aff_offset, -1);
5216 5 : switch (code)
5217 : {
5218 3 : case PLUS_EXPR:
5219 3 : aff_combination_add (&aff_e2, &aff_offset);
5220 3 : if (aff_combination_zero_p (&aff_e2))
5221 : return true;
5222 :
5223 1 : tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5224 : &aff_e1, &data->name_expansion_cache);
5225 1 : aff_combination_add (&aff_e1, &aff_offset);
5226 1 : return aff_combination_zero_p (&aff_e1);
5227 :
5228 2 : case POINTER_PLUS_EXPR:
5229 2 : aff_combination_add (&aff_e2, &aff_offset);
5230 2 : return aff_combination_zero_p (&aff_e2);
5231 :
5232 : default:
5233 : return false;
5234 : }
5235 477 : }
5236 :
5237 : /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5238 : comparison with CAND. NITER describes the number of iterations of
5239 : the loops. If successful, the comparison in COMP_P is altered accordingly.
5240 :
5241 : We aim to handle the following situation:
5242 :
5243 : sometype *base, *p;
5244 : int a, b, i;
5245 :
5246 : i = a;
5247 : p = p_0 = base + a;
5248 :
5249 : do
5250 : {
5251 : bla (*p);
5252 : p++;
5253 : i++;
5254 : }
5255 : while (i < b);
5256 :
5257 : Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5258 : We aim to optimize this to
5259 :
5260 : p = p_0 = base + a;
5261 : do
5262 : {
5263 : bla (*p);
5264 : p++;
5265 : }
5266 : while (p < p_0 - a + b);
5267 :
5268 : This preserves the correctness, since the pointer arithmetics does not
5269 : overflow. More precisely:
5270 :
5271 : 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5272 : overflow in computing it or the values of p.
5273 : 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5274 : overflow. To prove this, we use the fact that p_0 = base + a. */
5275 :
5276 : static bool
5277 206012 : iv_elimination_compare_lt (struct ivopts_data *data,
5278 : struct iv_cand *cand, enum tree_code *comp_p,
5279 : class tree_niter_desc *niter)
5280 : {
5281 206012 : tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5282 618036 : class aff_tree nit, tmpa, tmpb;
5283 206012 : enum tree_code comp;
5284 206012 : HOST_WIDE_INT step;
5285 :
5286 : /* We need to know that the candidate induction variable does not overflow.
5287 : While more complex analysis may be used to prove this, for now just
5288 : check that the variable appears in the original program and that it
5289 : is computed in a type that guarantees no overflows. */
5290 206012 : cand_type = TREE_TYPE (cand->iv->base);
5291 206012 : if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5292 184055 : return false;
5293 :
5294 : /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5295 : the calculation of the BOUND could overflow, making the comparison
5296 : invalid. */
5297 21957 : if (!data->loop_single_exit_p)
5298 : return false;
5299 :
5300 : /* We need to be able to decide whether candidate is increasing or decreasing
5301 : in order to choose the right comparison operator. */
5302 15317 : if (!cst_and_fits_in_hwi (cand->iv->step))
5303 : return false;
5304 15317 : step = int_cst_value (cand->iv->step);
5305 :
5306 : /* Check that the number of iterations matches the expected pattern:
5307 : a + 1 > b ? 0 : b - a - 1. */
5308 15317 : mbz = niter->may_be_zero;
5309 15317 : if (TREE_CODE (mbz) == GT_EXPR)
5310 : {
5311 : /* Handle a + 1 > b. */
5312 1715 : tree op0 = TREE_OPERAND (mbz, 0);
5313 1715 : if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5314 : {
5315 794 : a = TREE_OPERAND (op0, 0);
5316 794 : b = TREE_OPERAND (mbz, 1);
5317 : }
5318 : else
5319 921 : return false;
5320 : }
5321 13602 : else if (TREE_CODE (mbz) == LT_EXPR)
5322 : {
5323 4645 : tree op1 = TREE_OPERAND (mbz, 1);
5324 :
5325 : /* Handle b < a + 1. */
5326 4645 : if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5327 : {
5328 82 : a = TREE_OPERAND (op1, 0);
5329 82 : b = TREE_OPERAND (mbz, 0);
5330 : }
5331 : else
5332 4563 : return false;
5333 : }
5334 : else
5335 : return false;
5336 :
5337 : /* Expected number of iterations is B - A - 1. Check that it matches
5338 : the actual number, i.e., that B - A - NITER = 1. */
5339 876 : tree_to_aff_combination (niter->niter, nit_type, &nit);
5340 876 : tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5341 876 : tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5342 876 : aff_combination_scale (&nit, -1);
5343 876 : aff_combination_scale (&tmpa, -1);
5344 876 : aff_combination_add (&tmpb, &tmpa);
5345 876 : aff_combination_add (&tmpb, &nit);
5346 876 : if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5347 399 : return false;
5348 :
5349 : /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5350 : overflow. */
5351 477 : offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5352 : cand->iv->step,
5353 : fold_convert (TREE_TYPE (cand->iv->step), a));
5354 477 : if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5355 : return false;
5356 :
5357 : /* Determine the new comparison operator. */
5358 4 : comp = step < 0 ? GT_EXPR : LT_EXPR;
5359 4 : if (*comp_p == NE_EXPR)
5360 4 : *comp_p = comp;
5361 0 : else if (*comp_p == EQ_EXPR)
5362 0 : *comp_p = invert_tree_comparison (comp, false);
5363 : else
5364 0 : gcc_unreachable ();
5365 :
5366 : return true;
5367 206012 : }
5368 :
5369 : /* Check whether it is possible to express the condition in USE by comparison
5370 : of candidate CAND. If so, store the value compared with to BOUND, and the
5371 : comparison operator to COMP. */
5372 :
5373 : static bool
5374 4909096 : may_eliminate_iv (struct ivopts_data *data,
5375 : struct iv_use *use, struct iv_cand *cand, tree *bound,
5376 : enum tree_code *comp)
5377 : {
5378 4909096 : basic_block ex_bb;
5379 4909096 : edge exit;
5380 4909096 : tree period;
5381 4909096 : class loop *loop = data->current_loop;
5382 4909096 : aff_tree bnd;
5383 4909096 : class tree_niter_desc *desc = NULL;
5384 :
5385 4909096 : if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5386 : return false;
5387 :
5388 : /* For now works only for exits that dominate the loop latch.
5389 : TODO: extend to other conditions inside loop body. */
5390 4714484 : ex_bb = gimple_bb (use->stmt);
5391 4714484 : if (use->stmt != last_nondebug_stmt (ex_bb)
5392 4607956 : || gimple_code (use->stmt) != GIMPLE_COND
5393 9320275 : || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5394 235915 : return false;
5395 :
5396 4478569 : exit = EDGE_SUCC (ex_bb, 0);
5397 4478569 : if (flow_bb_inside_loop_p (loop, exit->dest))
5398 3375714 : exit = EDGE_SUCC (ex_bb, 1);
5399 4478569 : if (flow_bb_inside_loop_p (loop, exit->dest))
5400 : return false;
5401 :
5402 4362069 : desc = niter_for_exit (data, exit);
5403 4362069 : if (!desc)
5404 : return false;
5405 :
5406 : /* Determine whether we can use the variable to test the exit condition.
5407 : This is the case iff the period of the induction variable is greater
5408 : than the number of iterations for which the exit condition is true. */
5409 4063116 : period = iv_period (cand->iv);
5410 :
5411 : /* If the number of iterations is constant, compare against it directly. */
5412 4063116 : if (TREE_CODE (desc->niter) == INTEGER_CST)
5413 : {
5414 : /* See cand_value_at. */
5415 2657423 : if (stmt_after_increment (loop, cand, use->stmt))
5416 : {
5417 2601781 : if (!tree_int_cst_lt (desc->niter, period))
5418 : return false;
5419 : }
5420 : else
5421 : {
5422 55642 : if (tree_int_cst_lt (period, desc->niter))
5423 : return false;
5424 : }
5425 : }
5426 :
5427 : /* If not, and if this is the only possible exit of the loop, see whether
5428 : we can get a conservative estimate on the number of iterations of the
5429 : entire loop and compare against that instead. */
5430 : else
5431 : {
5432 1405693 : widest_int period_value, max_niter;
5433 :
5434 1405693 : max_niter = desc->max;
5435 1405693 : if (stmt_after_increment (loop, cand, use->stmt))
5436 1198216 : max_niter += 1;
5437 1405693 : period_value = wi::to_widest (period);
5438 1405693 : if (wi::gtu_p (max_niter, period_value))
5439 : {
5440 : /* See if we can take advantage of inferred loop bound
5441 : information. */
5442 472382 : if (data->loop_single_exit_p)
5443 : {
5444 281988 : if (!max_loop_iterations (loop, &max_niter))
5445 : return false;
5446 : /* The loop bound is already adjusted by adding 1. */
5447 281988 : if (wi::gtu_p (max_niter, period_value))
5448 : return false;
5449 : }
5450 : else
5451 : return false;
5452 : }
5453 1405693 : }
5454 :
5455 : /* For doloop IV cand, the bound would be zero. It's safe whether
5456 : may_be_zero set or not. */
5457 3795775 : if (cand->doloop_p)
5458 : {
5459 0 : *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5460 0 : *comp = iv_elimination_compare (data, use);
5461 0 : return true;
5462 : }
5463 :
5464 3795775 : cand_value_at (loop, cand, use->stmt, desc, &bnd);
5465 :
5466 3795775 : *bound = fold_convert (TREE_TYPE (cand->iv->base),
5467 : aff_combination_to_tree (&bnd));
5468 3795775 : *comp = iv_elimination_compare (data, use);
5469 :
5470 : /* It is unlikely that computing the number of iterations using division
5471 : would be more profitable than keeping the original induction variable. */
5472 3795775 : bool cond_overflow_p;
5473 3795775 : if (expression_expensive_p (*bound, &cond_overflow_p))
5474 : return false;
5475 :
5476 : /* Sometimes, it is possible to handle the situation that the number of
5477 : iterations may be zero unless additional assumptions by using <
5478 : instead of != in the exit condition.
5479 :
5480 : TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5481 : base the exit condition on it. However, that is often too
5482 : expensive. */
5483 3786900 : if (!integer_zerop (desc->may_be_zero))
5484 206012 : return iv_elimination_compare_lt (data, cand, comp, desc);
5485 :
5486 : return true;
5487 4909096 : }
5488 :
5489 : /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5490 : be copied, if it is used in the loop body and DATA->body_includes_call. */
5491 :
5492 : static int
5493 8356751 : parm_decl_cost (struct ivopts_data *data, tree bound)
5494 : {
5495 8356751 : tree sbound = bound;
5496 8356751 : STRIP_NOPS (sbound);
5497 :
5498 8356751 : if (TREE_CODE (sbound) == SSA_NAME
5499 2921373 : && SSA_NAME_IS_DEFAULT_DEF (sbound)
5500 152624 : && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5501 8507329 : && data->body_includes_call)
5502 38169 : return COSTS_N_INSNS (1);
5503 :
5504 : return 0;
5505 : }
5506 :
5507 : /* Determines cost of computing the use in GROUP with CAND in a condition. */
5508 :
5509 : static bool
5510 5934990 : determine_group_iv_cost_cond (struct ivopts_data *data,
5511 : struct iv_group *group, struct iv_cand *cand)
5512 : {
5513 5934990 : tree bound = NULL_TREE;
5514 5934990 : struct iv *cmp_iv;
5515 5934990 : bitmap inv_exprs = NULL;
5516 5934990 : bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5517 5934990 : comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5518 5934990 : enum comp_iv_rewrite rewrite_type;
5519 5934990 : iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5520 5934990 : tree *control_var, *bound_cst;
5521 5934990 : enum tree_code comp = ERROR_MARK;
5522 5934990 : struct iv_use *use = group->vuses[0];
5523 :
5524 : /* Extract condition operands. */
5525 5934990 : rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5526 : &bound_cst, NULL, &cmp_iv);
5527 5934990 : gcc_assert (rewrite_type != COMP_IV_NA);
5528 :
5529 : /* Try iv elimination. */
5530 5934990 : if (rewrite_type == COMP_IV_ELIM
5531 5934990 : && may_eliminate_iv (data, use, cand, &bound, &comp))
5532 : {
5533 3580892 : elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5534 3580892 : if (elim_cost.cost == 0)
5535 2443063 : elim_cost.cost = parm_decl_cost (data, bound);
5536 1137829 : else if (TREE_CODE (bound) == INTEGER_CST)
5537 0 : elim_cost.cost = 0;
5538 : /* If we replace a loop condition 'i < n' with 'p < base + n',
5539 : inv_vars_elim will have 'base' and 'n' set, which implies that both
5540 : 'base' and 'n' will be live during the loop. More likely,
5541 : 'base + n' will be loop invariant, resulting in only one live value
5542 : during the loop. So in that case we clear inv_vars_elim and set
5543 : inv_expr_elim instead. */
5544 3580892 : if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5545 : {
5546 311196 : inv_expr_elim = get_loop_invariant_expr (data, bound);
5547 311196 : bitmap_clear (inv_vars_elim);
5548 : }
5549 : /* The bound is a loop invariant, so it will be only computed
5550 : once. */
5551 3580892 : elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5552 : }
5553 :
5554 : /* When the condition is a comparison of the candidate IV against
5555 : zero, prefer this IV.
5556 :
5557 : TODO: The constant that we're subtracting from the cost should
5558 : be target-dependent. This information should be added to the
5559 : target costs for each backend. */
5560 5934990 : if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5561 3580892 : && integer_zerop (*bound_cst)
5562 8521664 : && (operand_equal_p (*control_var, cand->var_after, 0)
5563 2338353 : || operand_equal_p (*control_var, cand->var_before, 0)))
5564 253954 : elim_cost -= 1;
5565 :
5566 5934990 : express_cost = get_computation_cost (data, use, cand, false,
5567 : &inv_vars_express, NULL,
5568 : &inv_expr_express);
5569 5934990 : if (cmp_iv != NULL)
5570 4998938 : find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5571 :
5572 : /* Count the cost of the original bound as well. */
5573 5934990 : bound_cost = force_var_cost (data, *bound_cst, NULL);
5574 5934990 : if (bound_cost.cost == 0)
5575 5913688 : bound_cost.cost = parm_decl_cost (data, *bound_cst);
5576 21302 : else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5577 0 : bound_cost.cost = 0;
5578 5934990 : express_cost += bound_cost;
5579 :
5580 : /* Choose the better approach, preferring the eliminated IV. */
5581 5934990 : if (elim_cost <= express_cost)
5582 : {
5583 4501990 : cost = elim_cost;
5584 4501990 : inv_vars = inv_vars_elim;
5585 4501990 : inv_vars_elim = NULL;
5586 4501990 : inv_expr = inv_expr_elim;
5587 : /* For doloop candidate/use pair, adjust to zero cost. */
5588 4501990 : if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5589 0 : cost = no_cost;
5590 : }
5591 : else
5592 : {
5593 1433000 : cost = express_cost;
5594 1433000 : inv_vars = inv_vars_express;
5595 1433000 : inv_vars_express = NULL;
5596 1433000 : bound = NULL_TREE;
5597 1433000 : comp = ERROR_MARK;
5598 1433000 : inv_expr = inv_expr_express;
5599 : }
5600 :
5601 5934990 : if (inv_expr)
5602 : {
5603 603720 : inv_exprs = BITMAP_ALLOC (NULL);
5604 603720 : bitmap_set_bit (inv_exprs, inv_expr->id);
5605 : }
5606 5934990 : set_group_iv_cost (data, group, cand, cost,
5607 : inv_vars, bound, comp, inv_exprs);
5608 :
5609 5934990 : if (inv_vars_elim)
5610 24324 : BITMAP_FREE (inv_vars_elim);
5611 5934990 : if (inv_vars_express)
5612 1252424 : BITMAP_FREE (inv_vars_express);
5613 :
5614 5934990 : return !cost.infinite_cost_p ();
5615 : }
5616 :
5617 : /* Determines cost of computing uses in GROUP with CAND. Returns false
5618 : if USE cannot be represented with CAND. */
5619 :
5620 : static bool
5621 17755354 : determine_group_iv_cost (struct ivopts_data *data,
5622 : struct iv_group *group, struct iv_cand *cand)
5623 : {
5624 17755354 : switch (group->type)
5625 : {
5626 5534783 : case USE_NONLINEAR_EXPR:
5627 5534783 : return determine_group_iv_cost_generic (data, group, cand);
5628 :
5629 6285581 : case USE_REF_ADDRESS:
5630 6285581 : case USE_PTR_ADDRESS:
5631 6285581 : return determine_group_iv_cost_address (data, group, cand);
5632 :
5633 5934990 : case USE_COMPARE:
5634 5934990 : return determine_group_iv_cost_cond (data, group, cand);
5635 :
5636 0 : default:
5637 0 : gcc_unreachable ();
5638 : }
5639 : }
5640 :
5641 : /* Return true if get_computation_cost indicates that autoincrement is
5642 : a possibility for the pair of USE and CAND, false otherwise. */
5643 :
5644 : static bool
5645 1279361 : autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5646 : struct iv_cand *cand)
5647 : {
5648 1279361 : if (!address_p (use->type))
5649 : return false;
5650 :
5651 416149 : bool can_autoinc = false;
5652 416149 : get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5653 416149 : return can_autoinc;
5654 : }
5655 :
5656 : /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5657 : use that allows autoincrement, and set their AINC_USE if possible. */
5658 :
5659 : static void
5660 500815 : set_autoinc_for_original_candidates (struct ivopts_data *data)
5661 : {
5662 500815 : unsigned i, j;
5663 :
5664 5106785 : for (i = 0; i < data->vcands.length (); i++)
5665 : {
5666 4605970 : struct iv_cand *cand = data->vcands[i];
5667 4605970 : struct iv_use *closest_before = NULL;
5668 4605970 : struct iv_use *closest_after = NULL;
5669 4605970 : if (cand->pos != IP_ORIGINAL)
5670 3740349 : continue;
5671 :
5672 3820712 : for (j = 0; j < data->vgroups.length (); j++)
5673 : {
5674 2955091 : struct iv_group *group = data->vgroups[j];
5675 2955091 : struct iv_use *use = group->vuses[0];
5676 2955091 : unsigned uid = gimple_uid (use->stmt);
5677 :
5678 2955091 : if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5679 1180106 : continue;
5680 :
5681 1774985 : if (uid < gimple_uid (cand->incremented_at)
5682 1774985 : && (closest_before == NULL
5683 372291 : || uid > gimple_uid (closest_before->stmt)))
5684 : closest_before = use;
5685 :
5686 1774985 : if (uid > gimple_uid (cand->incremented_at)
5687 1774985 : && (closest_after == NULL
5688 66398 : || uid < gimple_uid (closest_after->stmt)))
5689 : closest_after = use;
5690 : }
5691 :
5692 865621 : if (closest_before != NULL
5693 865621 : && autoinc_possible_for_pair (data, closest_before, cand))
5694 0 : cand->ainc_use = closest_before;
5695 865621 : else if (closest_after != NULL
5696 865621 : && autoinc_possible_for_pair (data, closest_after, cand))
5697 0 : cand->ainc_use = closest_after;
5698 : }
5699 500815 : }
5700 :
5701 : /* Relate compare use with all candidates. */
5702 :
5703 : static void
5704 299 : relate_compare_use_with_all_cands (struct ivopts_data *data)
5705 : {
5706 299 : unsigned i, count = data->vcands.length ();
5707 10000 : for (i = 0; i < data->vgroups.length (); i++)
5708 : {
5709 9701 : struct iv_group *group = data->vgroups[i];
5710 :
5711 9701 : if (group->type == USE_COMPARE)
5712 2185 : bitmap_set_range (group->related_cands, 0, count);
5713 : }
5714 299 : }
5715 :
5716 : /* If PREFERRED_MODE is suitable and profitable, use the preferred
5717 : PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
5718 :
5719 : static tree
5720 0 : compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5721 : const widest_int &iterations_max)
5722 : {
5723 0 : tree ntype = TREE_TYPE (niter);
5724 0 : tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5725 0 : if (!pref_type)
5726 0 : return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5727 : build_int_cst (ntype, 1));
5728 :
5729 0 : gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5730 :
5731 0 : int prec = TYPE_PRECISION (ntype);
5732 0 : int pref_prec = TYPE_PRECISION (pref_type);
5733 :
5734 0 : tree base;
5735 :
5736 : /* Check if the PREFERRED_MODED is able to present niter. */
5737 0 : if (pref_prec > prec
5738 0 : || wi::ltu_p (iterations_max,
5739 0 : widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5740 : UNSIGNED)))
5741 : {
5742 : /* No wrap, it is safe to use preferred type after niter + 1. */
5743 0 : if (wi::ltu_p (iterations_max,
5744 0 : widest_int::from (wi::max_value (prec, UNSIGNED),
5745 : UNSIGNED)))
5746 : {
5747 : /* This could help to optimize "-1 +1" pair when niter looks
5748 : like "n-1": n is in original mode. "base = (n - 1) + 1"
5749 : in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
5750 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5751 : build_int_cst (ntype, 1));
5752 0 : base = fold_convert (pref_type, base);
5753 : }
5754 :
5755 : /* To avoid wrap, convert niter to preferred type before plus 1. */
5756 : else
5757 : {
5758 0 : niter = fold_convert (pref_type, niter);
5759 0 : base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5760 : build_int_cst (pref_type, 1));
5761 : }
5762 : }
5763 : else
5764 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5765 : build_int_cst (ntype, 1));
5766 : return base;
5767 : }
5768 :
5769 : /* Add one doloop dedicated IV candidate:
5770 : - Base is (may_be_zero ? 1 : (niter + 1)).
5771 : - Step is -1. */
5772 :
5773 : static void
5774 0 : add_iv_candidate_for_doloop (struct ivopts_data *data)
5775 : {
5776 0 : tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5777 0 : gcc_assert (niter_desc && niter_desc->assumptions);
5778 :
5779 0 : tree niter = niter_desc->niter;
5780 0 : tree ntype = TREE_TYPE (niter);
5781 0 : gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5782 :
5783 0 : tree may_be_zero = niter_desc->may_be_zero;
5784 0 : if (may_be_zero && integer_zerop (may_be_zero))
5785 : may_be_zero = NULL_TREE;
5786 0 : if (may_be_zero)
5787 : {
5788 0 : if (COMPARISON_CLASS_P (may_be_zero))
5789 : {
5790 0 : niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5791 : build_int_cst (ntype, 0),
5792 : rewrite_to_non_trapping_overflow (niter));
5793 : }
5794 : /* Don't try to obtain the iteration count expression when may_be_zero is
5795 : integer_nonzerop (actually iteration count is one) or else. */
5796 : else
5797 : return;
5798 : }
5799 :
5800 0 : machine_mode mode = TYPE_MODE (ntype);
5801 0 : machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5802 :
5803 0 : tree base;
5804 0 : if (mode != pref_mode)
5805 : {
5806 0 : base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5807 0 : ntype = TREE_TYPE (base);
5808 : }
5809 : else
5810 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5811 : build_int_cst (ntype, 1));
5812 :
5813 :
5814 0 : add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5815 : }
5816 :
5817 : /* Finds the candidates for the induction variables. */
5818 :
5819 : static void
5820 500815 : find_iv_candidates (struct ivopts_data *data)
5821 : {
5822 : /* Add commonly used ivs. */
5823 500815 : add_standard_iv_candidates (data);
5824 :
5825 : /* Add doloop dedicated ivs. */
5826 500815 : if (data->doloop_use_p)
5827 0 : add_iv_candidate_for_doloop (data);
5828 :
5829 : /* Add old induction variables. */
5830 500815 : add_iv_candidate_for_bivs (data);
5831 :
5832 : /* Add induction variables derived from uses. */
5833 500815 : add_iv_candidate_for_groups (data);
5834 :
5835 500815 : set_autoinc_for_original_candidates (data);
5836 :
5837 : /* Record the important candidates. */
5838 500815 : record_important_candidates (data);
5839 :
5840 : /* Relate compare iv_use with all candidates. */
5841 500815 : if (!data->consider_all_candidates)
5842 299 : relate_compare_use_with_all_cands (data);
5843 :
5844 500815 : if (dump_file && (dump_flags & TDF_DETAILS))
5845 : {
5846 67 : unsigned i;
5847 :
5848 67 : fprintf (dump_file, "\n<Important Candidates>:\t");
5849 820 : for (i = 0; i < data->vcands.length (); i++)
5850 686 : if (data->vcands[i]->important)
5851 492 : fprintf (dump_file, " %d,", data->vcands[i]->id);
5852 67 : fprintf (dump_file, "\n");
5853 :
5854 67 : fprintf (dump_file, "\n<Group, Cand> Related:\n");
5855 287 : for (i = 0; i < data->vgroups.length (); i++)
5856 : {
5857 220 : struct iv_group *group = data->vgroups[i];
5858 :
5859 220 : if (group->related_cands)
5860 : {
5861 220 : fprintf (dump_file, " Group %d:\t", group->id);
5862 220 : dump_bitmap (dump_file, group->related_cands);
5863 : }
5864 : }
5865 67 : fprintf (dump_file, "\n");
5866 : }
5867 500815 : }
5868 :
5869 : /* Determines costs of computing use of iv with an iv candidate. */
5870 :
5871 : static void
5872 500815 : determine_group_iv_costs (struct ivopts_data *data)
5873 : {
5874 500815 : unsigned i, j;
5875 500815 : struct iv_cand *cand;
5876 500815 : struct iv_group *group;
5877 500815 : bitmap to_clear = BITMAP_ALLOC (NULL);
5878 :
5879 500815 : alloc_use_cost_map (data);
5880 :
5881 2146226 : for (i = 0; i < data->vgroups.length (); i++)
5882 : {
5883 1645411 : group = data->vgroups[i];
5884 :
5885 1645411 : if (data->consider_all_candidates)
5886 : {
5887 19046329 : for (j = 0; j < data->vcands.length (); j++)
5888 : {
5889 17400918 : cand = data->vcands[j];
5890 17400918 : determine_group_iv_cost (data, group, cand);
5891 : }
5892 : }
5893 : else
5894 : {
5895 9701 : bitmap_iterator bi;
5896 :
5897 364137 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5898 : {
5899 354436 : cand = data->vcands[j];
5900 354436 : if (!determine_group_iv_cost (data, group, cand))
5901 210918 : bitmap_set_bit (to_clear, j);
5902 : }
5903 :
5904 : /* Remove the candidates for that the cost is infinite from
5905 : the list of related candidates. */
5906 9701 : bitmap_and_compl_into (group->related_cands, to_clear);
5907 9701 : bitmap_clear (to_clear);
5908 : }
5909 : }
5910 :
5911 500815 : BITMAP_FREE (to_clear);
5912 :
5913 500815 : if (dump_file && (dump_flags & TDF_DETAILS))
5914 : {
5915 67 : bitmap_iterator bi;
5916 :
5917 : /* Dump invariant variables. */
5918 67 : fprintf (dump_file, "\n<Invariant Vars>:\n");
5919 1041 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5920 : {
5921 974 : struct version_info *info = ver_info (data, i);
5922 974 : if (info->inv_id)
5923 : {
5924 222 : fprintf (dump_file, "Inv %d:\t", info->inv_id);
5925 222 : print_generic_expr (dump_file, info->name, TDF_SLIM);
5926 222 : fprintf (dump_file, "%s\n",
5927 222 : info->has_nonlin_use ? "" : "\t(eliminable)");
5928 : }
5929 : }
5930 :
5931 : /* Dump invariant expressions. */
5932 67 : fprintf (dump_file, "\n<Invariant Expressions>:\n");
5933 67 : auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5934 :
5935 439 : for (hash_table<iv_inv_expr_hasher>::iterator it
5936 506 : = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5937 372 : ++it)
5938 372 : list.safe_push (*it);
5939 :
5940 67 : list.qsort (sort_iv_inv_expr_ent);
5941 :
5942 439 : for (i = 0; i < list.length (); ++i)
5943 : {
5944 372 : fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5945 372 : print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5946 372 : fprintf (dump_file, "\n");
5947 : }
5948 :
5949 67 : fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5950 :
5951 287 : for (i = 0; i < data->vgroups.length (); i++)
5952 : {
5953 220 : group = data->vgroups[i];
5954 :
5955 220 : fprintf (dump_file, "Group %d:\n", i);
5956 220 : fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5957 2982 : for (j = 0; j < group->n_map_members; j++)
5958 : {
5959 3856 : if (!group->cost_map[j].cand
5960 2762 : || group->cost_map[j].cost.infinite_cost_p ())
5961 1094 : continue;
5962 :
5963 1668 : fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5964 1668 : group->cost_map[j].cand->id,
5965 : group->cost_map[j].cost.cost,
5966 1668 : group->cost_map[j].cost.complexity);
5967 1668 : if (!group->cost_map[j].inv_exprs
5968 1668 : || bitmap_empty_p (group->cost_map[j].inv_exprs))
5969 1168 : fprintf (dump_file, "NIL;\t");
5970 : else
5971 500 : bitmap_print (dump_file,
5972 : group->cost_map[j].inv_exprs, "", ";\t");
5973 1668 : if (!group->cost_map[j].inv_vars
5974 1668 : || bitmap_empty_p (group->cost_map[j].inv_vars))
5975 1347 : fprintf (dump_file, "NIL;\n");
5976 : else
5977 321 : bitmap_print (dump_file,
5978 : group->cost_map[j].inv_vars, "", "\n");
5979 : }
5980 :
5981 220 : fprintf (dump_file, "\n");
5982 : }
5983 67 : fprintf (dump_file, "\n");
5984 67 : }
5985 500815 : }
5986 :
5987 : /* Determines cost of the candidate CAND. */
5988 :
5989 : static void
5990 4605970 : determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5991 : {
5992 4605970 : comp_cost cost_base;
5993 4605970 : int64_t cost, cost_step;
5994 4605970 : tree base;
5995 :
5996 4605970 : gcc_assert (cand->iv != NULL);
5997 :
5998 : /* There are two costs associated with the candidate -- its increment
5999 : and its initialization. The second is almost negligible for any loop
6000 : that rolls enough, so we take it just very little into account. */
6001 :
6002 4605970 : base = cand->iv->base;
6003 4605970 : cost_base = force_var_cost (data, base, NULL);
6004 : /* It will be exceptional that the iv register happens to be initialized with
6005 : the proper value at no cost. In general, there will at least be a regcopy
6006 : or a const set. */
6007 4605970 : if (cost_base.cost == 0)
6008 3654026 : cost_base.cost = COSTS_N_INSNS (1);
6009 : /* Doloop decrement should be considered as zero cost. */
6010 4605970 : if (cand->doloop_p)
6011 : cost_step = 0;
6012 : else
6013 4605970 : cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6014 4605970 : cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6015 :
6016 : /* Prefer the original ivs unless we may gain something by replacing it.
6017 : The reason is to make debugging simpler; so this is not relevant for
6018 : artificial ivs created by other optimization passes. */
6019 4605970 : if ((cand->pos != IP_ORIGINAL
6020 865621 : || !SSA_NAME_VAR (cand->var_before)
6021 433478 : || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6022 : /* Prefer doloop as well. */
6023 5128503 : && !cand->doloop_p)
6024 4262882 : cost++;
6025 :
6026 : /* Prefer not to insert statements into latch unless there are some
6027 : already (so that we do not create unnecessary jumps). */
6028 4605970 : if (cand->pos == IP_END
6029 4605970 : && empty_block_p (ip_end_pos (data->current_loop)))
6030 1931 : cost++;
6031 :
6032 4605970 : cand->cost = cost;
6033 4605970 : cand->cost_step = cost_step;
6034 4605970 : }
6035 :
6036 : /* Determines costs of computation of the candidates. */
6037 :
6038 : static void
6039 500815 : determine_iv_costs (struct ivopts_data *data)
6040 : {
6041 500815 : unsigned i;
6042 :
6043 500815 : if (dump_file && (dump_flags & TDF_DETAILS))
6044 : {
6045 67 : fprintf (dump_file, "<Candidate Costs>:\n");
6046 67 : fprintf (dump_file, " cand\tcost\n");
6047 : }
6048 :
6049 5106785 : for (i = 0; i < data->vcands.length (); i++)
6050 : {
6051 4605970 : struct iv_cand *cand = data->vcands[i];
6052 :
6053 4605970 : determine_iv_cost (data, cand);
6054 :
6055 4605970 : if (dump_file && (dump_flags & TDF_DETAILS))
6056 686 : fprintf (dump_file, " %d\t%d\n", i, cand->cost);
6057 : }
6058 :
6059 500815 : if (dump_file && (dump_flags & TDF_DETAILS))
6060 67 : fprintf (dump_file, "\n");
6061 500815 : }
6062 :
6063 : /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6064 : induction variables. Note N_INVS includes both invariant variables and
6065 : invariant expressions. */
6066 :
6067 : static unsigned
6068 420522097 : ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6069 : unsigned n_cands)
6070 : {
6071 420522097 : unsigned cost;
6072 420522097 : unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6073 420522097 : unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6074 420522097 : bool speed = data->speed;
6075 :
6076 : /* If there is a call in the loop body, the call-clobbered registers
6077 : are not available for loop invariants. */
6078 420522097 : if (data->body_includes_call)
6079 94187836 : available_regs = available_regs - target_clobbered_regs;
6080 :
6081 : /* If we have enough registers. */
6082 420522097 : if (regs_needed + target_res_regs < available_regs)
6083 : cost = n_new;
6084 : /* If close to running out of registers, try to preserve them. */
6085 185141596 : else if (regs_needed <= available_regs)
6086 52353614 : cost = target_reg_cost [speed] * regs_needed;
6087 : /* If we run out of available registers but the number of candidates
6088 : does not, we penalize extra registers using target_spill_cost. */
6089 132787982 : else if (n_cands <= available_regs)
6090 118689350 : cost = target_reg_cost [speed] * available_regs
6091 118689350 : + target_spill_cost [speed] * (regs_needed - available_regs);
6092 : /* If the number of candidates runs out available registers, we penalize
6093 : extra candidate registers using target_spill_cost * 2. Because it is
6094 : more expensive to spill induction variable than invariant. */
6095 : else
6096 14098632 : cost = target_reg_cost [speed] * available_regs
6097 14098632 : + target_spill_cost [speed] * (n_cands - available_regs) * 2
6098 14098632 : + target_spill_cost [speed] * (regs_needed - n_cands);
6099 :
6100 : /* Finally, add the number of candidates, so that we prefer eliminating
6101 : induction variables if possible. */
6102 420522097 : return cost + n_cands;
6103 : }
6104 :
6105 : /* For each size of the induction variable set determine the penalty. */
6106 :
6107 : static void
6108 500815 : determine_set_costs (struct ivopts_data *data)
6109 : {
6110 500815 : unsigned j, n;
6111 500815 : gphi *phi;
6112 500815 : gphi_iterator psi;
6113 500815 : tree op;
6114 500815 : class loop *loop = data->current_loop;
6115 500815 : bitmap_iterator bi;
6116 :
6117 500815 : if (dump_file && (dump_flags & TDF_DETAILS))
6118 : {
6119 67 : fprintf (dump_file, "<Global Costs>:\n");
6120 67 : fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
6121 67 : fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6122 67 : fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6123 67 : fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6124 : }
6125 :
6126 500815 : n = 0;
6127 1948301 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6128 : {
6129 1447486 : phi = psi.phi ();
6130 1447486 : op = PHI_RESULT (phi);
6131 :
6132 2894972 : if (virtual_operand_p (op))
6133 306775 : continue;
6134 :
6135 1140711 : if (get_iv (data, op))
6136 870804 : continue;
6137 :
6138 498910 : if (!POINTER_TYPE_P (TREE_TYPE (op))
6139 498725 : && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6140 101832 : continue;
6141 :
6142 168075 : n++;
6143 : }
6144 :
6145 5525130 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6146 : {
6147 5024315 : struct version_info *info = ver_info (data, j);
6148 :
6149 5024315 : if (info->inv_id && info->has_nonlin_use)
6150 507159 : n++;
6151 : }
6152 :
6153 500815 : data->regs_used = n;
6154 500815 : if (dump_file && (dump_flags & TDF_DETAILS))
6155 67 : fprintf (dump_file, " regs_used %d\n", n);
6156 :
6157 500815 : if (dump_file && (dump_flags & TDF_DETAILS))
6158 : {
6159 67 : fprintf (dump_file, " cost for size:\n");
6160 67 : fprintf (dump_file, " ivs\tcost\n");
6161 2144 : for (j = 0; j <= 2 * target_avail_regs; j++)
6162 2077 : fprintf (dump_file, " %d\t%d\n", j,
6163 : ivopts_estimate_reg_pressure (data, 0, j));
6164 67 : fprintf (dump_file, "\n");
6165 : }
6166 500815 : }
6167 :
6168 : /* Returns true if A is a cheaper cost pair than B. */
6169 :
6170 : static bool
6171 82954493 : cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6172 : {
6173 82954493 : if (!a)
6174 : return false;
6175 :
6176 77658107 : if (!b)
6177 : return true;
6178 :
6179 74457194 : if (a->cost < b->cost)
6180 : return true;
6181 :
6182 55076234 : if (b->cost < a->cost)
6183 : return false;
6184 :
6185 : /* In case the costs are the same, prefer the cheaper candidate. */
6186 31587213 : if (a->cand->cost < b->cand->cost)
6187 : return true;
6188 :
6189 : return false;
6190 : }
6191 :
6192 : /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6193 : for more expensive, equal and cheaper respectively. */
6194 :
6195 : static int
6196 29252858 : compare_cost_pair (class cost_pair *a, class cost_pair *b)
6197 : {
6198 29252858 : if (cheaper_cost_pair (a, b))
6199 : return -1;
6200 23068463 : if (cheaper_cost_pair (b, a))
6201 14919790 : return 1;
6202 :
6203 : return 0;
6204 : }
6205 :
6206 : /* Returns candidate by that USE is expressed in IVS. */
6207 :
6208 : static class cost_pair *
6209 280482994 : iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6210 : {
6211 280482994 : return ivs->cand_for_group[group->id];
6212 : }
6213 :
6214 : /* Computes the cost field of IVS structure. */
6215 :
6216 : static void
6217 420519772 : iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6218 : {
6219 420519772 : comp_cost cost = ivs->cand_use_cost;
6220 :
6221 420519772 : cost += ivs->cand_cost;
6222 420519772 : cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6223 420519772 : ivs->cost = cost;
6224 420519772 : }
6225 :
6226 : /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6227 : and IVS. */
6228 :
6229 : static void
6230 575819070 : iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6231 : {
6232 575819070 : bitmap_iterator bi;
6233 575819070 : unsigned iid;
6234 :
6235 575819070 : if (!invs)
6236 457227605 : return;
6237 :
6238 118591465 : gcc_assert (n_inv_uses != NULL);
6239 204572095 : EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6240 : {
6241 85980630 : n_inv_uses[iid]--;
6242 85980630 : if (n_inv_uses[iid] == 0)
6243 63536836 : ivs->n_invs--;
6244 : }
6245 : }
6246 :
6247 : /* Set USE not to be expressed by any candidate in IVS. */
6248 :
6249 : static void
6250 208616457 : iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6251 : struct iv_group *group)
6252 : {
6253 208616457 : unsigned gid = group->id, cid;
6254 208616457 : class cost_pair *cp;
6255 :
6256 208616457 : cp = ivs->cand_for_group[gid];
6257 208616457 : if (!cp)
6258 : return;
6259 208616457 : cid = cp->cand->id;
6260 :
6261 208616457 : ivs->bad_groups++;
6262 208616457 : ivs->cand_for_group[gid] = NULL;
6263 208616457 : ivs->n_cand_uses[cid]--;
6264 :
6265 208616457 : if (ivs->n_cand_uses[cid] == 0)
6266 : {
6267 79293078 : bitmap_clear_bit (ivs->cands, cid);
6268 79293078 : if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6269 79293078 : ivs->n_cands--;
6270 79293078 : ivs->cand_cost -= cp->cand->cost;
6271 79293078 : iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6272 79293078 : iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6273 : }
6274 :
6275 208616457 : ivs->cand_use_cost -= cp->cost;
6276 208616457 : iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6277 208616457 : iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6278 208616457 : iv_ca_recount_cost (data, ivs);
6279 : }
6280 :
6281 : /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6282 : IVS. */
6283 :
6284 : static void
6285 585176898 : iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6286 : {
6287 585176898 : bitmap_iterator bi;
6288 585176898 : unsigned iid;
6289 :
6290 585176898 : if (!invs)
6291 465451060 : return;
6292 :
6293 119725838 : gcc_assert (n_inv_uses != NULL);
6294 206673682 : EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6295 : {
6296 86947844 : n_inv_uses[iid]++;
6297 86947844 : if (n_inv_uses[iid] == 1)
6298 64426340 : ivs->n_invs++;
6299 : }
6300 : }
6301 :
6302 : /* Set cost pair for GROUP in set IVS to CP. */
6303 :
6304 : static void
6305 224919905 : iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6306 : struct iv_group *group, class cost_pair *cp)
6307 : {
6308 224919905 : unsigned gid = group->id, cid;
6309 :
6310 224919905 : if (ivs->cand_for_group[gid] == cp)
6311 : return;
6312 :
6313 211903315 : if (ivs->cand_for_group[gid])
6314 196372891 : iv_ca_set_no_cp (data, ivs, group);
6315 :
6316 211903315 : if (cp)
6317 : {
6318 211903315 : cid = cp->cand->id;
6319 :
6320 211903315 : ivs->bad_groups--;
6321 211903315 : ivs->cand_for_group[gid] = cp;
6322 211903315 : ivs->n_cand_uses[cid]++;
6323 211903315 : if (ivs->n_cand_uses[cid] == 1)
6324 : {
6325 80685134 : bitmap_set_bit (ivs->cands, cid);
6326 80685134 : if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6327 80685134 : ivs->n_cands++;
6328 80685134 : ivs->cand_cost += cp->cand->cost;
6329 80685134 : iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6330 80685134 : iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6331 : }
6332 :
6333 211903315 : ivs->cand_use_cost += cp->cost;
6334 211903315 : iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6335 211903315 : iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6336 211903315 : iv_ca_recount_cost (data, ivs);
6337 : }
6338 : }
6339 :
6340 : /* Extend set IVS by expressing USE by some of the candidates in it
6341 : if possible. Consider all important candidates if candidates in
6342 : set IVS don't give any result. */
6343 :
6344 : static void
6345 3288100 : iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6346 : struct iv_group *group)
6347 : {
6348 3288100 : class cost_pair *best_cp = NULL, *cp;
6349 3288100 : bitmap_iterator bi;
6350 3288100 : unsigned i;
6351 3288100 : struct iv_cand *cand;
6352 :
6353 3288100 : gcc_assert (ivs->upto >= group->id);
6354 3288100 : ivs->upto++;
6355 3288100 : ivs->bad_groups++;
6356 :
6357 6201671 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6358 : {
6359 2913571 : cand = data->vcands[i];
6360 2913571 : cp = get_group_iv_cost (data, group, cand);
6361 2913571 : if (cheaper_cost_pair (cp, best_cp))
6362 2023337 : best_cp = cp;
6363 : }
6364 :
6365 3288100 : if (best_cp == NULL)
6366 : {
6367 11753633 : EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6368 : {
6369 10415790 : cand = data->vcands[i];
6370 10415790 : cp = get_group_iv_cost (data, group, cand);
6371 10415790 : if (cheaper_cost_pair (cp, best_cp))
6372 2388483 : best_cp = cp;
6373 : }
6374 : }
6375 :
6376 3288100 : iv_ca_set_cp (data, ivs, group, best_cp);
6377 3288100 : }
6378 :
6379 : /* Get cost for assignment IVS. */
6380 :
6381 : static comp_cost
6382 82624549 : iv_ca_cost (class iv_ca *ivs)
6383 : {
6384 : /* This was a conditional expression but it triggered a bug in
6385 : Sun C 5.5. */
6386 0 : if (ivs->bad_groups)
6387 87187 : return infinite_cost;
6388 : else
6389 82537362 : return ivs->cost;
6390 : }
6391 :
6392 : /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6393 : than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6394 : respectively. */
6395 :
6396 : static int
6397 38912812 : iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6398 : struct iv_group *group, class cost_pair *old_cp,
6399 : class cost_pair *new_cp)
6400 : {
6401 38912812 : gcc_assert (old_cp && new_cp && old_cp != new_cp);
6402 38912812 : unsigned old_n_invs = ivs->n_invs;
6403 38912812 : iv_ca_set_cp (data, ivs, group, new_cp);
6404 38912812 : unsigned new_n_invs = ivs->n_invs;
6405 38912812 : iv_ca_set_cp (data, ivs, group, old_cp);
6406 :
6407 38912812 : return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6408 : }
6409 :
6410 : /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6411 : it before NEXT. */
6412 :
6413 : static struct iv_ca_delta *
6414 48679705 : iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6415 : class cost_pair *new_cp, struct iv_ca_delta *next)
6416 : {
6417 0 : struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6418 :
6419 48679705 : change->group = group;
6420 48679705 : change->old_cp = old_cp;
6421 48679705 : change->new_cp = new_cp;
6422 48679705 : change->next = next;
6423 :
6424 48679705 : return change;
6425 : }
6426 :
6427 : /* Joins two lists of changes L1 and L2. Destructive -- old lists
6428 : are rewritten. */
6429 :
6430 : static struct iv_ca_delta *
6431 8151854 : iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6432 : {
6433 8151854 : struct iv_ca_delta *last;
6434 :
6435 0 : if (!l2)
6436 : return l1;
6437 :
6438 0 : if (!l1)
6439 : return l2;
6440 :
6441 3506672 : for (last = l1; last->next; last = last->next)
6442 1136296 : continue;
6443 2370376 : last->next = l2;
6444 :
6445 2370376 : return l1;
6446 1136296 : }
6447 :
6448 : /* Reverse the list of changes DELTA, forming the inverse to it. */
6449 :
6450 : static struct iv_ca_delta *
6451 0 : iv_ca_delta_reverse (struct iv_ca_delta *delta)
6452 : {
6453 0 : struct iv_ca_delta *act, *next, *prev = NULL;
6454 :
6455 161686738 : for (act = delta; act; act = next)
6456 : {
6457 91591926 : next = act->next;
6458 91591926 : act->next = prev;
6459 91591926 : prev = act;
6460 :
6461 91591926 : std::swap (act->old_cp, act->new_cp);
6462 : }
6463 :
6464 0 : return prev;
6465 : }
6466 :
6467 : /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6468 : reverted instead. */
6469 :
6470 : static void
6471 73869012 : iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6472 : struct iv_ca_delta *delta, bool forward)
6473 : {
6474 73869012 : class cost_pair *from, *to;
6475 73869012 : struct iv_ca_delta *act;
6476 :
6477 73869012 : if (!forward)
6478 73869012 : delta = iv_ca_delta_reverse (delta);
6479 :
6480 170348348 : for (act = delta; act; act = act->next)
6481 : {
6482 96479336 : from = act->old_cp;
6483 96479336 : to = act->new_cp;
6484 96479336 : gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6485 96479336 : iv_ca_set_cp (data, ivs, act->group, to);
6486 : }
6487 :
6488 73869012 : if (!forward)
6489 73869012 : iv_ca_delta_reverse (delta);
6490 73869012 : }
6491 :
6492 : /* Returns true if CAND is used in IVS. */
6493 :
6494 : static bool
6495 29065174 : iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6496 : {
6497 29065174 : return ivs->n_cand_uses[cand->id] > 0;
6498 : }
6499 :
6500 : /* Returns number of induction variable candidates in the set IVS. */
6501 :
6502 : static unsigned
6503 12659230 : iv_ca_n_cands (class iv_ca *ivs)
6504 : {
6505 12659230 : return ivs->n_cands;
6506 : }
6507 :
6508 : /* Free the list of changes DELTA. */
6509 :
6510 : static void
6511 43580998 : iv_ca_delta_free (struct iv_ca_delta **delta)
6512 : {
6513 43580998 : struct iv_ca_delta *act, *next;
6514 :
6515 92260703 : for (act = *delta; act; act = next)
6516 : {
6517 48679705 : next = act->next;
6518 48679705 : free (act);
6519 : }
6520 :
6521 43580998 : *delta = NULL;
6522 43580998 : }
6523 :
6524 : /* Allocates new iv candidates assignment. */
6525 :
6526 : static class iv_ca *
6527 1001630 : iv_ca_new (struct ivopts_data *data)
6528 : {
6529 1001630 : class iv_ca *nw = XNEW (class iv_ca);
6530 :
6531 1001630 : nw->upto = 0;
6532 1001630 : nw->bad_groups = 0;
6533 2003260 : nw->cand_for_group = XCNEWVEC (class cost_pair *,
6534 : data->vgroups.length ());
6535 2003260 : nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6536 1001630 : nw->cands = BITMAP_ALLOC (NULL);
6537 1001630 : nw->n_cands = 0;
6538 1001630 : nw->n_invs = 0;
6539 1001630 : nw->cand_use_cost = no_cost;
6540 1001630 : nw->cand_cost = 0;
6541 1001630 : nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6542 1001630 : nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6543 1001630 : nw->cost = no_cost;
6544 :
6545 1001630 : return nw;
6546 : }
6547 :
6548 : /* Free memory occupied by the set IVS. */
6549 :
6550 : static void
6551 1001630 : iv_ca_free (class iv_ca **ivs)
6552 : {
6553 1001630 : free ((*ivs)->cand_for_group);
6554 1001630 : free ((*ivs)->n_cand_uses);
6555 1001630 : BITMAP_FREE ((*ivs)->cands);
6556 1001630 : free ((*ivs)->n_inv_var_uses);
6557 1001630 : free ((*ivs)->n_inv_expr_uses);
6558 1001630 : free (*ivs);
6559 1001630 : *ivs = NULL;
6560 1001630 : }
6561 :
6562 : /* Dumps IVS to FILE. */
6563 :
6564 : static void
6565 248 : iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6566 : {
6567 248 : unsigned i;
6568 248 : comp_cost cost = iv_ca_cost (ivs);
6569 :
6570 248 : fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6571 : cost.complexity);
6572 248 : fprintf (file, " reg_cost: %d\n",
6573 : ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6574 248 : fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6575 : "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6576 : ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6577 248 : bitmap_print (file, ivs->cands, " candidates: ","\n");
6578 :
6579 1285 : for (i = 0; i < ivs->upto; i++)
6580 : {
6581 1037 : struct iv_group *group = data->vgroups[i];
6582 1037 : class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6583 1037 : if (cp)
6584 1037 : fprintf (file, " group:%d --> iv_cand:%d, cost=("
6585 1037 : "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6586 : cp->cost.cost, cp->cost.complexity);
6587 : else
6588 0 : fprintf (file, " group:%d --> ??\n", group->id);
6589 : }
6590 :
6591 248 : const char *pref = "";
6592 248 : fprintf (file, " invariant variables: ");
6593 1438 : for (i = 1; i <= data->max_inv_var_id; i++)
6594 942 : if (ivs->n_inv_var_uses[i])
6595 : {
6596 133 : fprintf (file, "%s%d", pref, i);
6597 133 : pref = ", ";
6598 : }
6599 :
6600 248 : pref = "";
6601 248 : fprintf (file, "\n invariant expressions: ");
6602 2486 : for (i = 1; i <= data->max_inv_expr_id; i++)
6603 1990 : if (ivs->n_inv_expr_uses[i])
6604 : {
6605 303 : fprintf (file, "%s%d", pref, i);
6606 303 : pref = ", ";
6607 : }
6608 :
6609 248 : fprintf (file, "\n\n");
6610 248 : }
6611 :
6612 : /* Try changing candidate in IVS to CAND for each use. Return cost of the
6613 : new set, and store differences in DELTA. Number of induction variables
6614 : in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6615 : the function will try to find a solution with mimimal iv candidates. */
6616 :
6617 : static comp_cost
6618 21701883 : iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6619 : struct iv_cand *cand, struct iv_ca_delta **delta,
6620 : unsigned *n_ivs, bool min_ncand)
6621 : {
6622 21701883 : unsigned i;
6623 21701883 : comp_cost cost;
6624 21701883 : struct iv_group *group;
6625 21701883 : class cost_pair *old_cp, *new_cp;
6626 :
6627 21701883 : *delta = NULL;
6628 121078581 : for (i = 0; i < ivs->upto; i++)
6629 : {
6630 99376698 : group = data->vgroups[i];
6631 99376698 : old_cp = iv_ca_cand_for_group (ivs, group);
6632 :
6633 99376698 : if (old_cp
6634 99376698 : && old_cp->cand == cand)
6635 9042653 : continue;
6636 :
6637 90334045 : new_cp = get_group_iv_cost (data, group, cand);
6638 90334045 : if (!new_cp)
6639 35027377 : continue;
6640 :
6641 55306668 : if (!min_ncand)
6642 : {
6643 38912812 : int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6644 : /* Skip if new_cp depends on more invariants. */
6645 38912812 : if (cmp_invs > 0)
6646 9659954 : continue;
6647 :
6648 29252858 : int cmp_cost = compare_cost_pair (new_cp, old_cp);
6649 : /* Skip if new_cp is not cheaper. */
6650 29252858 : if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6651 22673995 : continue;
6652 : }
6653 :
6654 22972719 : *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6655 : }
6656 :
6657 21701883 : iv_ca_delta_commit (data, ivs, *delta, true);
6658 21701883 : cost = iv_ca_cost (ivs);
6659 21701883 : if (n_ivs)
6660 12659230 : *n_ivs = iv_ca_n_cands (ivs);
6661 21701883 : iv_ca_delta_commit (data, ivs, *delta, false);
6662 :
6663 21701883 : return cost;
6664 : }
6665 :
6666 : /* Try narrowing set IVS by removing CAND. Return the cost of
6667 : the new set and store the differences in DELTA. START is
6668 : the candidate with which we start narrowing. */
6669 :
6670 : static comp_cost
6671 15114410 : iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6672 : struct iv_cand *cand, struct iv_cand *start,
6673 : struct iv_ca_delta **delta)
6674 : {
6675 15114410 : unsigned i, ci;
6676 15114410 : struct iv_group *group;
6677 15114410 : class cost_pair *old_cp, *new_cp, *cp;
6678 15114410 : bitmap_iterator bi;
6679 15114410 : struct iv_cand *cnd;
6680 15114410 : comp_cost cost, best_cost, acost;
6681 :
6682 15114410 : *delta = NULL;
6683 80591351 : for (i = 0; i < data->vgroups.length (); i++)
6684 : {
6685 75397682 : group = data->vgroups[i];
6686 :
6687 75397682 : old_cp = iv_ca_cand_for_group (ivs, group);
6688 75397682 : if (old_cp->cand != cand)
6689 53501122 : continue;
6690 :
6691 21896560 : best_cost = iv_ca_cost (ivs);
6692 : /* Start narrowing with START. */
6693 21896560 : new_cp = get_group_iv_cost (data, group, start);
6694 :
6695 21896560 : if (data->consider_all_candidates)
6696 : {
6697 92856538 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6698 : {
6699 72182533 : if (ci == cand->id || (start && ci == start->id))
6700 35585379 : continue;
6701 :
6702 36597154 : cnd = data->vcands[ci];
6703 :
6704 36597154 : cp = get_group_iv_cost (data, group, cnd);
6705 36597154 : if (!cp)
6706 21900124 : continue;
6707 :
6708 14697030 : iv_ca_set_cp (data, ivs, group, cp);
6709 14697030 : acost = iv_ca_cost (ivs);
6710 :
6711 14697030 : if (acost < best_cost)
6712 : {
6713 1949044 : best_cost = acost;
6714 1949044 : new_cp = cp;
6715 : }
6716 : }
6717 : }
6718 : else
6719 : {
6720 4982112 : EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6721 : {
6722 3759557 : if (ci == cand->id || (start && ci == start->id))
6723 2068955 : continue;
6724 :
6725 1690602 : cnd = data->vcands[ci];
6726 :
6727 1690602 : cp = get_group_iv_cost (data, group, cnd);
6728 1690602 : if (!cp)
6729 0 : continue;
6730 :
6731 1690602 : iv_ca_set_cp (data, ivs, group, cp);
6732 1690602 : acost = iv_ca_cost (ivs);
6733 :
6734 1690602 : if (acost < best_cost)
6735 : {
6736 43334 : best_cost = acost;
6737 43334 : new_cp = cp;
6738 : }
6739 : }
6740 : }
6741 : /* Restore to old cp for use. */
6742 21896560 : iv_ca_set_cp (data, ivs, group, old_cp);
6743 :
6744 21896560 : if (!new_cp)
6745 : {
6746 9920741 : iv_ca_delta_free (delta);
6747 9920741 : return infinite_cost;
6748 : }
6749 :
6750 11975819 : *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6751 : }
6752 :
6753 5193669 : iv_ca_delta_commit (data, ivs, *delta, true);
6754 5193669 : cost = iv_ca_cost (ivs);
6755 5193669 : iv_ca_delta_commit (data, ivs, *delta, false);
6756 :
6757 5193669 : return cost;
6758 : }
6759 :
6760 : /* Try optimizing the set of candidates IVS by removing candidates different
6761 : from to EXCEPT_CAND from it. Return cost of the new set, and store
6762 : differences in DELTA. */
6763 :
6764 : static comp_cost
6765 9182705 : iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6766 : struct iv_cand *except_cand, struct iv_ca_delta **delta)
6767 : {
6768 9182705 : bitmap_iterator bi;
6769 9182705 : struct iv_ca_delta *act_delta, *best_delta;
6770 9182705 : unsigned i;
6771 9182705 : comp_cost best_cost, acost;
6772 9182705 : struct iv_cand *cand;
6773 :
6774 9182705 : best_delta = NULL;
6775 9182705 : best_cost = iv_ca_cost (ivs);
6776 :
6777 30742847 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6778 : {
6779 21560142 : cand = data->vcands[i];
6780 :
6781 21560142 : if (cand == except_cand)
6782 6445732 : continue;
6783 :
6784 15114410 : acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6785 :
6786 15114410 : if (acost < best_cost)
6787 : {
6788 2550134 : best_cost = acost;
6789 2550134 : iv_ca_delta_free (&best_delta);
6790 2550134 : best_delta = act_delta;
6791 : }
6792 : else
6793 12564276 : iv_ca_delta_free (&act_delta);
6794 : }
6795 :
6796 9182705 : if (!best_delta)
6797 : {
6798 6811148 : *delta = NULL;
6799 6811148 : return best_cost;
6800 : }
6801 :
6802 : /* Recurse to possibly remove other unnecessary ivs. */
6803 2371557 : iv_ca_delta_commit (data, ivs, best_delta, true);
6804 2371557 : best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6805 2371557 : iv_ca_delta_commit (data, ivs, best_delta, false);
6806 2371557 : *delta = iv_ca_delta_join (best_delta, *delta);
6807 2371557 : return best_cost;
6808 : }
6809 :
6810 : /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6811 : cheaper local cost for GROUP than BEST_CP. Return pointer to
6812 : the corresponding cost_pair, otherwise just return BEST_CP. */
6813 :
6814 : static class cost_pair*
6815 29212573 : cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6816 : unsigned int cand_idx, struct iv_cand *old_cand,
6817 : class cost_pair *best_cp)
6818 : {
6819 29212573 : struct iv_cand *cand;
6820 29212573 : class cost_pair *cp;
6821 :
6822 29212573 : gcc_assert (old_cand != NULL && best_cp != NULL);
6823 29212573 : if (cand_idx == old_cand->id)
6824 : return best_cp;
6825 :
6826 26393953 : cand = data->vcands[cand_idx];
6827 26393953 : cp = get_group_iv_cost (data, group, cand);
6828 26393953 : if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6829 : return cp;
6830 :
6831 : return best_cp;
6832 : }
6833 :
6834 : /* Try breaking local optimal fixed-point for IVS by replacing candidates
6835 : which are used by more than one iv uses. For each of those candidates,
6836 : this function tries to represent iv uses under that candidate using
6837 : other ones with lower local cost, then tries to prune the new set.
6838 : If the new set has lower cost, It returns the new cost after recording
6839 : candidate replacement in list DELTA. */
6840 :
6841 : static comp_cost
6842 1000388 : iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6843 : struct iv_ca_delta **delta)
6844 : {
6845 1000388 : bitmap_iterator bi, bj;
6846 1000388 : unsigned int i, j, k;
6847 1000388 : struct iv_cand *cand;
6848 1000388 : comp_cost orig_cost, acost;
6849 1000388 : struct iv_ca_delta *act_delta, *tmp_delta;
6850 1000388 : class cost_pair *old_cp, *best_cp = NULL;
6851 :
6852 1000388 : *delta = NULL;
6853 1000388 : orig_cost = iv_ca_cost (ivs);
6854 :
6855 2328497 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6856 : {
6857 1357391 : if (ivs->n_cand_uses[i] == 1
6858 1020413 : || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6859 343129 : continue;
6860 :
6861 1014262 : cand = data->vcands[i];
6862 :
6863 1014262 : act_delta = NULL;
6864 : /* Represent uses under current candidate using other ones with
6865 : lower local cost. */
6866 5204531 : for (j = 0; j < ivs->upto; j++)
6867 : {
6868 4190269 : struct iv_group *group = data->vgroups[j];
6869 4190269 : old_cp = iv_ca_cand_for_group (ivs, group);
6870 :
6871 4190269 : if (old_cp->cand != cand)
6872 1371649 : continue;
6873 :
6874 2818620 : best_cp = old_cp;
6875 2818620 : if (data->consider_all_candidates)
6876 31931701 : for (k = 0; k < data->vcands.length (); k++)
6877 29120414 : best_cp = cheaper_cost_with_cand (data, group, k,
6878 : old_cp->cand, best_cp);
6879 : else
6880 99492 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6881 92159 : best_cp = cheaper_cost_with_cand (data, group, k,
6882 : old_cp->cand, best_cp);
6883 :
6884 2818620 : if (best_cp == old_cp)
6885 1331019 : continue;
6886 :
6887 1487601 : act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6888 : }
6889 : /* No need for further prune. */
6890 1014262 : if (!act_delta)
6891 231396 : continue;
6892 :
6893 : /* Prune the new candidate set. */
6894 782866 : iv_ca_delta_commit (data, ivs, act_delta, true);
6895 782866 : acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6896 782866 : iv_ca_delta_commit (data, ivs, act_delta, false);
6897 782866 : act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6898 :
6899 782866 : if (acost < orig_cost)
6900 : {
6901 29282 : *delta = act_delta;
6902 29282 : return acost;
6903 : }
6904 : else
6905 753584 : iv_ca_delta_free (&act_delta);
6906 : }
6907 :
6908 971106 : return orig_cost;
6909 : }
6910 :
6911 : /* Tries to extend the sets IVS in the best possible way in order to
6912 : express the GROUP. If ORIGINALP is true, prefer candidates from
6913 : the original set of IVs, otherwise favor important candidates not
6914 : based on any memory object. */
6915 :
6916 : static bool
6917 3288100 : try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6918 : struct iv_group *group, bool originalp)
6919 : {
6920 3288100 : comp_cost best_cost, act_cost;
6921 3288100 : unsigned i;
6922 3288100 : bitmap_iterator bi;
6923 3288100 : struct iv_cand *cand;
6924 3288100 : struct iv_ca_delta *best_delta = NULL, *act_delta;
6925 3288100 : class cost_pair *cp;
6926 :
6927 3288100 : iv_ca_add_group (data, ivs, group);
6928 3288100 : best_cost = iv_ca_cost (ivs);
6929 3288100 : cp = iv_ca_cand_for_group (ivs, group);
6930 3288100 : if (cp)
6931 : {
6932 3200913 : best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6933 3200913 : iv_ca_set_no_cp (data, ivs, group);
6934 : }
6935 :
6936 : /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6937 : first try important candidates not based on any memory object. Only if
6938 : this fails, try the specific ones. Rationale -- in loops with many
6939 : variables the best choice often is to use just one generic biv. If we
6940 : added here many ivs specific to the uses, the optimization algorithm later
6941 : would be likely to get stuck in a local minimum, thus causing us to create
6942 : too many ivs. The approach from few ivs to more seems more likely to be
6943 : successful -- starting from few ivs, replacing an expensive use by a
6944 : specific iv should always be a win. */
6945 30655596 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6946 : {
6947 27367496 : cand = data->vcands[i];
6948 :
6949 27367496 : if (originalp && cand->pos !=IP_ORIGINAL)
6950 10787015 : continue;
6951 :
6952 13683748 : if (!originalp && cand->iv->base_object != NULL_TREE)
6953 2505082 : continue;
6954 :
6955 14075399 : if (iv_ca_cand_used_p (ivs, cand))
6956 1517580 : continue;
6957 :
6958 12557819 : cp = get_group_iv_cost (data, group, cand);
6959 12557819 : if (!cp)
6960 3622251 : continue;
6961 :
6962 8935568 : iv_ca_set_cp (data, ivs, group, cp);
6963 8935568 : act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6964 : true);
6965 8935568 : iv_ca_set_no_cp (data, ivs, group);
6966 8935568 : act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6967 :
6968 8935568 : if (act_cost < best_cost)
6969 : {
6970 396212 : best_cost = act_cost;
6971 :
6972 396212 : iv_ca_delta_free (&best_delta);
6973 396212 : best_delta = act_delta;
6974 : }
6975 : else
6976 8539356 : iv_ca_delta_free (&act_delta);
6977 : }
6978 :
6979 3288100 : if (best_cost.infinite_cost_p ())
6980 : {
6981 693204 : for (i = 0; i < group->n_map_members; i++)
6982 : {
6983 629525 : cp = group->cost_map + i;
6984 629525 : cand = cp->cand;
6985 629525 : if (!cand)
6986 522440 : continue;
6987 :
6988 : /* Already tried this. */
6989 107085 : if (cand->important)
6990 : {
6991 0 : if (originalp && cand->pos == IP_ORIGINAL)
6992 0 : continue;
6993 0 : if (!originalp && cand->iv->base_object == NULL_TREE)
6994 0 : continue;
6995 : }
6996 :
6997 107085 : if (iv_ca_cand_used_p (ivs, cand))
6998 0 : continue;
6999 :
7000 107085 : act_delta = NULL;
7001 107085 : iv_ca_set_cp (data, ivs, group, cp);
7002 107085 : act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7003 107085 : iv_ca_set_no_cp (data, ivs, group);
7004 107085 : act_delta = iv_ca_delta_add (group,
7005 : iv_ca_cand_for_group (ivs, group),
7006 : cp, act_delta);
7007 :
7008 107085 : if (act_cost < best_cost)
7009 : {
7010 65187 : best_cost = act_cost;
7011 :
7012 65187 : if (best_delta)
7013 2750 : iv_ca_delta_free (&best_delta);
7014 65187 : best_delta = act_delta;
7015 : }
7016 : else
7017 41898 : iv_ca_delta_free (&act_delta);
7018 : }
7019 : }
7020 :
7021 3288100 : iv_ca_delta_commit (data, ivs, best_delta, true);
7022 3288100 : iv_ca_delta_free (&best_delta);
7023 :
7024 3288100 : return !best_cost.infinite_cost_p ();
7025 : }
7026 :
7027 : /* Finds an initial assignment of candidates to uses. */
7028 :
7029 : static class iv_ca *
7030 1001630 : get_initial_solution (struct ivopts_data *data, bool originalp)
7031 : {
7032 1001630 : unsigned i;
7033 1001630 : class iv_ca *ivs = iv_ca_new (data);
7034 :
7035 4288488 : for (i = 0; i < data->vgroups.length (); i++)
7036 3288100 : if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7037 : {
7038 1242 : iv_ca_free (&ivs);
7039 1242 : return NULL;
7040 : }
7041 :
7042 : return ivs;
7043 : }
7044 :
7045 : /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
7046 : points to a bool variable, this function tries to break local
7047 : optimal fixed-point by replacing candidates in IVS if it's true. */
7048 :
7049 : static bool
7050 1486488 : try_improve_iv_set (struct ivopts_data *data,
7051 : class iv_ca *ivs, bool *try_replace_p)
7052 : {
7053 1486488 : unsigned i, n_ivs;
7054 1486488 : comp_cost acost, best_cost = iv_ca_cost (ivs);
7055 1486488 : struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7056 1486488 : struct iv_cand *cand;
7057 :
7058 : /* Try extending the set of induction variables by one. */
7059 16369178 : for (i = 0; i < data->vcands.length (); i++)
7060 : {
7061 14882690 : cand = data->vcands[i];
7062 :
7063 14882690 : if (iv_ca_cand_used_p (ivs, cand))
7064 2223460 : continue;
7065 :
7066 12659230 : acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7067 12659230 : if (!act_delta)
7068 7621383 : continue;
7069 :
7070 : /* If we successfully added the candidate and the set is small enough,
7071 : try optimizing it by removing other candidates. */
7072 5037847 : if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7073 : {
7074 4997431 : iv_ca_delta_commit (data, ivs, act_delta, true);
7075 4997431 : acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7076 4997431 : iv_ca_delta_commit (data, ivs, act_delta, false);
7077 4997431 : act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7078 : }
7079 :
7080 5037847 : if (acost < best_cost)
7081 : {
7082 581305 : best_cost = acost;
7083 581305 : iv_ca_delta_free (&best_delta);
7084 581305 : best_delta = act_delta;
7085 : }
7086 : else
7087 4456542 : iv_ca_delta_free (&act_delta);
7088 : }
7089 :
7090 1486488 : if (!best_delta)
7091 : {
7092 : /* Try removing the candidates from the set instead. */
7093 1030851 : best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7094 :
7095 1030851 : if (!best_delta && *try_replace_p)
7096 : {
7097 1000388 : *try_replace_p = false;
7098 : /* So far candidate selecting algorithm tends to choose fewer IVs
7099 : so that it can handle cases in which loops have many variables
7100 : but the best choice is often to use only one general biv. One
7101 : weakness is it can't handle opposite cases, in which different
7102 : candidates should be chosen with respect to each use. To solve
7103 : the problem, we replace candidates in a manner described by the
7104 : comments of iv_ca_replace, thus give general algorithm a chance
7105 : to break local optimal fixed-point in these cases. */
7106 1000388 : best_cost = iv_ca_replace (data, ivs, &best_delta);
7107 : }
7108 :
7109 1030851 : if (!best_delta)
7110 : return false;
7111 : }
7112 :
7113 486100 : iv_ca_delta_commit (data, ivs, best_delta, true);
7114 486100 : iv_ca_delta_free (&best_delta);
7115 972200 : return best_cost == iv_ca_cost (ivs);
7116 : }
7117 :
7118 : /* Attempts to find the optimal set of induction variables. We do simple
7119 : greedy heuristic -- we try to replace at most one candidate in the selected
7120 : solution and remove the unused ivs while this improves the cost. */
7121 :
7122 : static class iv_ca *
7123 1001630 : find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7124 : {
7125 1001630 : class iv_ca *set;
7126 1001630 : bool try_replace_p = true;
7127 :
7128 : /* Get the initial solution. */
7129 1001630 : set = get_initial_solution (data, originalp);
7130 1001630 : if (!set)
7131 : {
7132 1242 : if (dump_file && (dump_flags & TDF_DETAILS))
7133 0 : fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7134 1242 : return NULL;
7135 : }
7136 :
7137 1000388 : if (dump_file && (dump_flags & TDF_DETAILS))
7138 : {
7139 134 : fprintf (dump_file, "Initial set of candidates:\n");
7140 134 : iv_ca_dump (data, dump_file, set);
7141 : }
7142 :
7143 1486488 : while (try_improve_iv_set (data, set, &try_replace_p))
7144 : {
7145 486100 : if (dump_file && (dump_flags & TDF_DETAILS))
7146 : {
7147 114 : fprintf (dump_file, "Improved to:\n");
7148 114 : iv_ca_dump (data, dump_file, set);
7149 : }
7150 : }
7151 :
7152 : /* If the set has infinite_cost, it can't be optimal. */
7153 2000776 : if (iv_ca_cost (set).infinite_cost_p ())
7154 : {
7155 0 : if (dump_file && (dump_flags & TDF_DETAILS))
7156 0 : fprintf (dump_file,
7157 : "Overflow to infinite cost in try_improve_iv_set.\n");
7158 0 : iv_ca_free (&set);
7159 : }
7160 1000388 : return set;
7161 : }
7162 :
7163 : static class iv_ca *
7164 500815 : find_optimal_iv_set (struct ivopts_data *data)
7165 : {
7166 500815 : unsigned i;
7167 500815 : comp_cost cost, origcost;
7168 500815 : class iv_ca *set, *origset;
7169 :
7170 : /* Determine the cost based on a strategy that starts with original IVs,
7171 : and try again using a strategy that prefers candidates not based
7172 : on any IVs. */
7173 500815 : origset = find_optimal_iv_set_1 (data, true);
7174 500815 : set = find_optimal_iv_set_1 (data, false);
7175 :
7176 500815 : if (!origset && !set)
7177 : return NULL;
7178 :
7179 500194 : origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7180 500194 : cost = set ? iv_ca_cost (set) : infinite_cost;
7181 :
7182 500194 : if (dump_file && (dump_flags & TDF_DETAILS))
7183 : {
7184 67 : fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7185 : origcost.cost, origcost.complexity);
7186 67 : fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7187 : cost.cost, cost.complexity);
7188 : }
7189 :
7190 : /* Choose the one with the best cost. */
7191 500194 : if (origcost <= cost)
7192 : {
7193 465129 : if (set)
7194 465129 : iv_ca_free (&set);
7195 465129 : set = origset;
7196 : }
7197 35065 : else if (origset)
7198 35065 : iv_ca_free (&origset);
7199 :
7200 2142981 : for (i = 0; i < data->vgroups.length (); i++)
7201 : {
7202 1642787 : struct iv_group *group = data->vgroups[i];
7203 1642787 : group->selected = iv_ca_cand_for_group (set, group)->cand;
7204 : }
7205 :
7206 500194 : return set;
7207 : }
7208 :
7209 : /* Creates a new induction variable corresponding to CAND. */
7210 :
7211 : static void
7212 675623 : create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7213 : {
7214 675623 : gimple_stmt_iterator incr_pos;
7215 675623 : tree base;
7216 675623 : struct iv_use *use;
7217 675623 : struct iv_group *group;
7218 675623 : bool after = false;
7219 :
7220 675623 : gcc_assert (cand->iv != NULL);
7221 :
7222 675623 : switch (cand->pos)
7223 : {
7224 465785 : case IP_NORMAL:
7225 465785 : incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7226 465785 : break;
7227 :
7228 10227 : case IP_END:
7229 10227 : incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7230 10227 : after = true;
7231 10227 : gcc_assert (gsi_end_p (incr_pos) || !stmt_ends_bb_p (*incr_pos));
7232 : break;
7233 :
7234 0 : case IP_AFTER_USE:
7235 0 : after = true;
7236 : /* fall through */
7237 0 : case IP_BEFORE_USE:
7238 0 : incr_pos = gsi_for_stmt (cand->incremented_at);
7239 0 : break;
7240 :
7241 199611 : case IP_ORIGINAL:
7242 : /* Mark that the iv is preserved. */
7243 199611 : name_info (data, cand->var_before)->preserve_biv = true;
7244 199611 : name_info (data, cand->var_after)->preserve_biv = true;
7245 :
7246 : /* Rewrite the increment so that it uses var_before directly. */
7247 199611 : use = find_interesting_uses_op (data, cand->var_after);
7248 199611 : group = data->vgroups[use->group_id];
7249 199611 : group->selected = cand;
7250 199611 : return;
7251 : }
7252 :
7253 476012 : gimple_add_tmp_var (cand->var_before);
7254 :
7255 476012 : base = unshare_expr (cand->iv->base);
7256 :
7257 : /* The step computation could invoke UB when the loop does not iterate.
7258 : Avoid inserting it on the preheader in its native form but rewrite
7259 : it to a well-defined form. This also helps masking SCEV issues
7260 : which freely re-associates the IV computations when building up
7261 : CHRECs without much regard for signed overflow invoking UB. */
7262 476012 : gimple_seq stmts = NULL;
7263 476012 : tree step = force_gimple_operand (unshare_expr (cand->iv->step), &stmts,
7264 : true, NULL_TREE);
7265 476012 : if (stmts)
7266 : {
7267 143206 : for (auto gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
7268 95282 : if (gimple_needing_rewrite_undefined (gsi_stmt (gsi)))
7269 10753 : rewrite_to_defined_unconditional (&gsi);
7270 47924 : gsi_insert_seq_on_edge_immediate
7271 47924 : (loop_preheader_edge (data->current_loop), stmts);
7272 : }
7273 :
7274 476012 : create_iv (base, PLUS_EXPR, step,
7275 : cand->var_before, data->current_loop,
7276 : &incr_pos, after, &cand->var_before, &cand->var_after);
7277 : }
7278 :
7279 : /* Creates new induction variables described in SET. */
7280 :
7281 : static void
7282 500194 : create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7283 : {
7284 500194 : unsigned i;
7285 500194 : struct iv_cand *cand;
7286 500194 : bitmap_iterator bi;
7287 :
7288 1175817 : EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7289 : {
7290 675623 : cand = data->vcands[i];
7291 675623 : create_new_iv (data, cand);
7292 : }
7293 :
7294 500194 : if (dump_file && (dump_flags & TDF_DETAILS))
7295 : {
7296 67 : fprintf (dump_file, "Selected IV set for loop %d",
7297 67 : data->current_loop->num);
7298 67 : if (data->loop_loc != UNKNOWN_LOCATION)
7299 65 : fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7300 130 : LOCATION_LINE (data->loop_loc));
7301 67 : fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " avg niters",
7302 : avg_loop_niter (data->current_loop));
7303 67 : fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7304 178 : EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7305 : {
7306 111 : cand = data->vcands[i];
7307 111 : dump_cand (dump_file, cand);
7308 : }
7309 67 : fprintf (dump_file, "\n");
7310 : }
7311 500194 : }
7312 :
7313 : /* Rewrites USE (definition of iv used in a nonlinear expression)
7314 : using candidate CAND. */
7315 :
7316 : static void
7317 622539 : rewrite_use_nonlinear_expr (struct ivopts_data *data,
7318 : struct iv_use *use, struct iv_cand *cand)
7319 : {
7320 622539 : gassign *ass;
7321 622539 : gimple_stmt_iterator bsi;
7322 622539 : tree comp, type = get_use_type (use), tgt;
7323 :
7324 : /* An important special case -- if we are asked to express value of
7325 : the original iv by itself, just exit; there is no need to
7326 : introduce a new computation (that might also need casting the
7327 : variable to unsigned and back). */
7328 622539 : if (cand->pos == IP_ORIGINAL
7329 330959 : && cand->incremented_at == use->stmt)
7330 : {
7331 199611 : tree op = NULL_TREE;
7332 199611 : enum tree_code stmt_code;
7333 :
7334 199611 : gcc_assert (is_gimple_assign (use->stmt));
7335 199611 : gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7336 :
7337 : /* Check whether we may leave the computation unchanged.
7338 : This is the case only if it does not rely on other
7339 : computations in the loop -- otherwise, the computation
7340 : we rely upon may be removed in remove_unused_ivs,
7341 : thus leading to ICE. */
7342 199611 : stmt_code = gimple_assign_rhs_code (use->stmt);
7343 199611 : if (stmt_code == PLUS_EXPR
7344 199611 : || stmt_code == MINUS_EXPR
7345 199611 : || stmt_code == POINTER_PLUS_EXPR)
7346 : {
7347 195632 : if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7348 193780 : op = gimple_assign_rhs2 (use->stmt);
7349 1852 : else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7350 : op = gimple_assign_rhs1 (use->stmt);
7351 : }
7352 :
7353 194280 : if (op != NULL_TREE)
7354 : {
7355 194280 : if (expr_invariant_in_loop_p (data->current_loop, op))
7356 277994 : return;
7357 181 : if (TREE_CODE (op) == SSA_NAME)
7358 : {
7359 181 : struct iv *iv = get_iv (data, op);
7360 181 : if (iv != NULL && integer_zerop (iv->step))
7361 : return;
7362 : }
7363 : }
7364 : }
7365 :
7366 428259 : switch (gimple_code (use->stmt))
7367 : {
7368 126209 : case GIMPLE_PHI:
7369 126209 : tgt = PHI_RESULT (use->stmt);
7370 :
7371 : /* If we should keep the biv, do not replace it. */
7372 126209 : if (name_info (data, tgt)->preserve_biv)
7373 : return;
7374 :
7375 42495 : bsi = gsi_after_labels (gimple_bb (use->stmt));
7376 42495 : break;
7377 :
7378 302050 : case GIMPLE_ASSIGN:
7379 302050 : tgt = gimple_assign_lhs (use->stmt);
7380 302050 : bsi = gsi_for_stmt (use->stmt);
7381 302050 : break;
7382 :
7383 0 : default:
7384 0 : gcc_unreachable ();
7385 : }
7386 :
7387 1033635 : aff_tree aff_inv, aff_var;
7388 344545 : if (!get_computation_aff_1 (data, use->stmt, use, cand, &aff_inv, &aff_var))
7389 0 : gcc_unreachable ();
7390 :
7391 344545 : unshare_aff_combination (&aff_inv);
7392 344545 : unshare_aff_combination (&aff_var);
7393 : /* Prefer CSE opportunity than loop invariant by adding offset at last
7394 : so that iv_uses have different offsets can be CSEed. */
7395 689090 : poly_widest_int offset = aff_inv.offset;
7396 344545 : aff_inv.offset = 0;
7397 :
7398 344545 : gimple_seq stmt_list = NULL, seq = NULL;
7399 344545 : tree comp_op1 = aff_combination_to_tree (&aff_inv);
7400 344545 : tree comp_op2 = aff_combination_to_tree (&aff_var);
7401 344545 : gcc_assert (comp_op1 && comp_op2);
7402 :
7403 344545 : comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7404 344545 : gimple_seq_add_seq (&stmt_list, seq);
7405 344545 : comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7406 344545 : gimple_seq_add_seq (&stmt_list, seq);
7407 :
7408 344545 : if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7409 : std::swap (comp_op1, comp_op2);
7410 :
7411 344545 : if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7412 : {
7413 0 : comp = fold_build_pointer_plus (comp_op1,
7414 : fold_convert (sizetype, comp_op2));
7415 0 : comp = fold_build_pointer_plus (comp,
7416 : wide_int_to_tree (sizetype, offset));
7417 : }
7418 : else
7419 : {
7420 344545 : comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7421 : fold_convert (TREE_TYPE (comp_op1), comp_op2));
7422 344545 : comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7423 : wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7424 : }
7425 :
7426 344545 : comp = fold_convert (type, comp);
7427 344545 : comp = force_gimple_operand (comp, &seq, false, NULL);
7428 344545 : gimple_seq_add_seq (&stmt_list, seq);
7429 344545 : if (gimple_code (use->stmt) != GIMPLE_PHI
7430 : /* We can't allow re-allocating the stmt as it might be pointed
7431 : to still. */
7432 344545 : && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7433 302050 : >= gimple_num_ops (gsi_stmt (bsi))))
7434 : {
7435 8481 : comp = force_gimple_operand (comp, &seq, true, NULL);
7436 8481 : gimple_seq_add_seq (&stmt_list, seq);
7437 8481 : if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7438 : {
7439 0 : duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7440 : /* As this isn't a plain copy we have to reset alignment
7441 : information. */
7442 0 : if (SSA_NAME_PTR_INFO (comp))
7443 0 : mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7444 : }
7445 : }
7446 :
7447 344545 : gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7448 344545 : if (gimple_code (use->stmt) == GIMPLE_PHI)
7449 : {
7450 42495 : ass = gimple_build_assign (tgt, comp);
7451 42495 : gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7452 :
7453 42495 : bsi = gsi_for_stmt (use->stmt);
7454 42495 : remove_phi_node (&bsi, false);
7455 : }
7456 : else
7457 : {
7458 302050 : gimple_assign_set_rhs_from_tree (&bsi, comp);
7459 302050 : use->stmt = gsi_stmt (bsi);
7460 : }
7461 : }
7462 :
7463 : /* Performs a peephole optimization to reorder the iv update statement with
7464 : a mem ref to enable instruction combining in later phases. The mem ref uses
7465 : the iv value before the update, so the reordering transformation requires
7466 : adjustment of the offset. CAND is the selected IV_CAND.
7467 :
7468 : Example:
7469 :
7470 : t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7471 : iv2 = iv1 + 1;
7472 :
7473 : if (t < val) (1)
7474 : goto L;
7475 : goto Head;
7476 :
7477 :
7478 : directly propagating t over to (1) will introduce overlapping live range
7479 : thus increase register pressure. This peephole transform it into:
7480 :
7481 :
7482 : iv2 = iv1 + 1;
7483 : t = MEM_REF (base, iv2, 8, 8);
7484 : if (t < val)
7485 : goto L;
7486 : goto Head;
7487 : */
7488 :
7489 : static void
7490 863169 : adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7491 : {
7492 863169 : tree var_after;
7493 863169 : gimple *iv_update, *stmt;
7494 863169 : basic_block bb;
7495 863169 : gimple_stmt_iterator gsi, gsi_iv;
7496 :
7497 863169 : if (cand->pos != IP_NORMAL)
7498 860981 : return;
7499 :
7500 661160 : var_after = cand->var_after;
7501 661160 : iv_update = SSA_NAME_DEF_STMT (var_after);
7502 :
7503 661160 : bb = gimple_bb (iv_update);
7504 661160 : gsi = gsi_last_nondebug_bb (bb);
7505 661160 : stmt = gsi_stmt (gsi);
7506 :
7507 : /* Only handle conditional statement for now. */
7508 661160 : if (gimple_code (stmt) != GIMPLE_COND)
7509 : return;
7510 :
7511 661160 : gsi_prev_nondebug (&gsi);
7512 661160 : stmt = gsi_stmt (gsi);
7513 661160 : if (stmt != iv_update)
7514 : return;
7515 :
7516 532901 : gsi_prev_nondebug (&gsi);
7517 532901 : if (gsi_end_p (gsi))
7518 : return;
7519 :
7520 529732 : stmt = gsi_stmt (gsi);
7521 529732 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
7522 : return;
7523 :
7524 529571 : if (stmt != use->stmt)
7525 : return;
7526 :
7527 4971 : if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7528 : return;
7529 :
7530 2188 : if (dump_file && (dump_flags & TDF_DETAILS))
7531 : {
7532 0 : fprintf (dump_file, "Reordering \n");
7533 0 : print_gimple_stmt (dump_file, iv_update, 0);
7534 0 : print_gimple_stmt (dump_file, use->stmt, 0);
7535 0 : fprintf (dump_file, "\n");
7536 : }
7537 :
7538 2188 : gsi = gsi_for_stmt (use->stmt);
7539 2188 : gsi_iv = gsi_for_stmt (iv_update);
7540 2188 : gsi_move_before (&gsi_iv, &gsi);
7541 :
7542 2188 : cand->pos = IP_BEFORE_USE;
7543 2188 : cand->incremented_at = use->stmt;
7544 : }
7545 :
7546 : /* Return the alias pointer type that should be used for a MEM_REF
7547 : associated with USE, which has type USE_PTR_ADDRESS. */
7548 :
7549 : static tree
7550 795 : get_alias_ptr_type_for_ptr_address (iv_use *use)
7551 : {
7552 795 : gcall *call = as_a <gcall *> (use->stmt);
7553 795 : switch (gimple_call_internal_fn (call))
7554 : {
7555 795 : case IFN_MASK_LOAD:
7556 795 : case IFN_MASK_STORE:
7557 795 : case IFN_MASK_LOAD_LANES:
7558 795 : case IFN_MASK_STORE_LANES:
7559 795 : case IFN_MASK_LEN_LOAD_LANES:
7560 795 : case IFN_MASK_LEN_STORE_LANES:
7561 795 : case IFN_LEN_LOAD:
7562 795 : case IFN_LEN_STORE:
7563 795 : case IFN_MASK_LEN_LOAD:
7564 795 : case IFN_MASK_LEN_STORE:
7565 : /* The second argument contains the correct alias type. */
7566 795 : gcc_assert (use->op_p == gimple_call_arg_ptr (call, 0));
7567 795 : return TREE_TYPE (gimple_call_arg (call, 1));
7568 :
7569 0 : default:
7570 0 : gcc_unreachable ();
7571 : }
7572 : }
7573 :
7574 :
7575 : /* Rewrites USE (address that is an iv) using candidate CAND. */
7576 :
7577 : static void
7578 863169 : rewrite_use_address (struct ivopts_data *data,
7579 : struct iv_use *use, struct iv_cand *cand)
7580 : {
7581 863169 : aff_tree aff;
7582 863169 : bool ok;
7583 :
7584 863169 : adjust_iv_update_pos (cand, use);
7585 863169 : ok = get_computation_aff (data, use->stmt, use, cand, &aff);
7586 863169 : gcc_assert (ok);
7587 863169 : unshare_aff_combination (&aff);
7588 :
7589 : /* To avoid undefined overflow problems, all IV candidates use unsigned
7590 : integer types. The drawback is that this makes it impossible for
7591 : create_mem_ref to distinguish an IV that is based on a memory object
7592 : from one that represents simply an offset.
7593 :
7594 : To work around this problem, we pass a hint to create_mem_ref that
7595 : indicates which variable (if any) in aff is an IV based on a memory
7596 : object. Note that we only consider the candidate. If this is not
7597 : based on an object, the base of the reference is in some subexpression
7598 : of the use -- but these will use pointer types, so they are recognized
7599 : by the create_mem_ref heuristics anyway. */
7600 863169 : tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7601 863169 : tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7602 863169 : gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7603 863169 : tree type = use->mem_type;
7604 863169 : tree alias_ptr_type;
7605 863169 : if (use->type == USE_PTR_ADDRESS)
7606 795 : alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7607 : else
7608 : {
7609 862374 : gcc_assert (type == TREE_TYPE (*use->op_p));
7610 862374 : unsigned int align = get_object_alignment (*use->op_p);
7611 862374 : if (align != TYPE_ALIGN (type))
7612 34099 : type = build_aligned_type (type, align);
7613 862374 : alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7614 : }
7615 1726338 : tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7616 863169 : iv, base_hint, data->speed);
7617 :
7618 863169 : if (use->type == USE_PTR_ADDRESS)
7619 : {
7620 795 : ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7621 795 : ref = fold_convert (get_use_type (use), ref);
7622 795 : ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7623 : true, GSI_SAME_STMT);
7624 : }
7625 : else
7626 : {
7627 : /* When we end up confused enough and have no suitable base but
7628 : stuffed everything to index2 use a LEA for the address and
7629 : create a plain MEM_REF to avoid basing a memory reference
7630 : on address zero which create_mem_ref_raw does as fallback. */
7631 862374 : if (TREE_CODE (ref) == TARGET_MEM_REF
7632 862374 : && TMR_INDEX2 (ref) != NULL_TREE
7633 872917 : && integer_zerop (TREE_OPERAND (ref, 0)))
7634 : {
7635 20 : ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7636 20 : ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7637 : true, GSI_SAME_STMT);
7638 20 : ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7639 : }
7640 862374 : copy_ref_info (ref, *use->op_p);
7641 : }
7642 :
7643 863169 : *use->op_p = ref;
7644 863169 : }
7645 :
7646 : /* Rewrites USE (the condition such that one of the arguments is an iv) using
7647 : candidate CAND. */
7648 :
7649 : static void
7650 598688 : rewrite_use_compare (struct ivopts_data *data,
7651 : struct iv_use *use, struct iv_cand *cand)
7652 : {
7653 598688 : tree comp, op, bound;
7654 598688 : gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7655 598688 : enum tree_code compare;
7656 598688 : struct iv_group *group = data->vgroups[use->group_id];
7657 598688 : class cost_pair *cp = get_group_iv_cost (data, group, cand);
7658 :
7659 598688 : bound = cp->value;
7660 598688 : if (bound)
7661 : {
7662 391609 : tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7663 391609 : tree var_type = TREE_TYPE (var);
7664 391609 : gimple_seq stmts;
7665 :
7666 391609 : if (dump_file && (dump_flags & TDF_DETAILS))
7667 : {
7668 58 : fprintf (dump_file, "Replacing exit test: ");
7669 58 : print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7670 : }
7671 391609 : compare = cp->comp;
7672 391609 : bound = unshare_expr (fold_convert (var_type, bound));
7673 391609 : op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7674 391609 : if (stmts)
7675 179939 : gsi_insert_seq_on_edge_immediate (
7676 179939 : loop_preheader_edge (data->current_loop),
7677 : stmts);
7678 :
7679 391609 : gcond *cond_stmt = as_a <gcond *> (use->stmt);
7680 391609 : gimple_cond_set_lhs (cond_stmt, var);
7681 391609 : gimple_cond_set_code (cond_stmt, compare);
7682 391609 : gimple_cond_set_rhs (cond_stmt, op);
7683 391609 : return;
7684 : }
7685 :
7686 : /* The induction variable elimination failed; just express the original
7687 : giv. */
7688 207079 : comp = get_computation_at (data, use->stmt, use, cand);
7689 207079 : gcc_assert (comp != NULL_TREE);
7690 207079 : gcc_assert (use->op_p != NULL);
7691 207079 : *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7692 207079 : SSA_NAME_VAR (*use->op_p),
7693 : true, GSI_SAME_STMT);
7694 : }
7695 :
7696 : /* Rewrite the groups using the selected induction variables. */
7697 :
7698 : static void
7699 500194 : rewrite_groups (struct ivopts_data *data)
7700 : {
7701 500194 : unsigned i, j;
7702 :
7703 2302562 : for (i = 0; i < data->vgroups.length (); i++)
7704 : {
7705 1802368 : struct iv_group *group = data->vgroups[i];
7706 1802368 : struct iv_cand *cand = group->selected;
7707 :
7708 1802368 : gcc_assert (cand);
7709 :
7710 1802368 : if (group->type == USE_NONLINEAR_EXPR)
7711 : {
7712 1245078 : for (j = 0; j < group->vuses.length (); j++)
7713 : {
7714 622539 : rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7715 622539 : update_stmt (group->vuses[j]->stmt);
7716 : }
7717 : }
7718 1179829 : else if (address_p (group->type))
7719 : {
7720 1444310 : for (j = 0; j < group->vuses.length (); j++)
7721 : {
7722 863169 : rewrite_use_address (data, group->vuses[j], cand);
7723 863169 : update_stmt (group->vuses[j]->stmt);
7724 : }
7725 : }
7726 : else
7727 : {
7728 598688 : gcc_assert (group->type == USE_COMPARE);
7729 :
7730 2401056 : for (j = 0; j < group->vuses.length (); j++)
7731 : {
7732 598688 : rewrite_use_compare (data, group->vuses[j], cand);
7733 598688 : update_stmt (group->vuses[j]->stmt);
7734 : }
7735 : }
7736 : }
7737 500194 : }
7738 :
7739 : /* Removes the ivs that are not used after rewriting. */
7740 :
7741 : static void
7742 500194 : remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7743 : {
7744 500194 : unsigned j;
7745 500194 : bitmap_iterator bi;
7746 :
7747 : /* Figure out an order in which to release SSA DEFs so that we don't
7748 : release something that we'd have to propagate into a debug stmt
7749 : afterwards. */
7750 5508555 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7751 : {
7752 5008361 : struct version_info *info;
7753 :
7754 5008361 : info = ver_info (data, j);
7755 5008361 : if (info->iv
7756 4865564 : && !integer_zerop (info->iv->step)
7757 3203973 : && !info->inv_id
7758 3203973 : && !info->iv->nonlin_use
7759 7589795 : && !info->preserve_biv)
7760 : {
7761 2465537 : bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7762 :
7763 2465537 : tree def = info->iv->ssa_name;
7764 :
7765 3202645 : if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7766 : {
7767 737108 : imm_use_iterator imm_iter;
7768 737108 : use_operand_p use_p;
7769 737108 : gimple *stmt;
7770 737108 : int count = 0;
7771 :
7772 2183304 : FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7773 : {
7774 736547 : if (!gimple_debug_bind_p (stmt))
7775 621572 : continue;
7776 :
7777 : /* We just want to determine whether to do nothing
7778 : (count == 0), to substitute the computed
7779 : expression into a single use of the SSA DEF by
7780 : itself (count == 1), or to use a debug temp
7781 : because the SSA DEF is used multiple times or as
7782 : part of a larger expression (count > 1). */
7783 114975 : count++;
7784 114975 : if (gimple_debug_bind_get_value (stmt) != def)
7785 7598 : count++;
7786 :
7787 114975 : if (count > 1)
7788 : break;
7789 737108 : }
7790 :
7791 737108 : if (!count)
7792 663592 : continue;
7793 :
7794 94893 : struct iv_use dummy_use;
7795 94893 : struct iv_cand *best_cand = NULL, *cand;
7796 94893 : unsigned i, best_pref = 0, cand_pref;
7797 94893 : tree comp = NULL_TREE;
7798 :
7799 94893 : memset (&dummy_use, 0, sizeof (dummy_use));
7800 94893 : dummy_use.iv = info->iv;
7801 489004 : for (i = 0; i < data->vgroups.length () && i < 64; i++)
7802 : {
7803 394111 : cand = data->vgroups[i]->selected;
7804 394111 : if (cand == best_cand)
7805 162889 : continue;
7806 153413 : cand_pref = operand_equal_p (cand->iv->step,
7807 231222 : info->iv->step, 0)
7808 231222 : ? 4 : 0;
7809 231222 : cand_pref
7810 231222 : += TYPE_MODE (TREE_TYPE (cand->iv->base))
7811 231222 : == TYPE_MODE (TREE_TYPE (info->iv->base))
7812 231222 : ? 2 : 0;
7813 231222 : cand_pref
7814 462444 : += TREE_CODE (cand->iv->base) == INTEGER_CST
7815 231222 : ? 1 : 0;
7816 231222 : if (best_cand == NULL || best_pref < cand_pref)
7817 : {
7818 176774 : tree this_comp
7819 353548 : = get_debug_computation_at (data,
7820 176774 : SSA_NAME_DEF_STMT (def),
7821 : &dummy_use, cand);
7822 176774 : if (this_comp)
7823 : {
7824 394111 : best_cand = cand;
7825 394111 : best_pref = cand_pref;
7826 394111 : comp = this_comp;
7827 : }
7828 : }
7829 : }
7830 :
7831 94893 : if (!best_cand)
7832 21377 : continue;
7833 :
7834 73516 : comp = unshare_expr (comp);
7835 73516 : if (count > 1)
7836 : {
7837 23251 : tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7838 : /* FIXME: Is setting the mode really necessary? */
7839 23251 : if (SSA_NAME_VAR (def))
7840 13372 : SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7841 : else
7842 9879 : SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7843 23251 : gdebug *def_temp
7844 23251 : = gimple_build_debug_bind (vexpr, comp, NULL);
7845 23251 : gimple_stmt_iterator gsi;
7846 :
7847 23251 : if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7848 13559 : gsi = gsi_after_labels (gimple_bb
7849 13559 : (SSA_NAME_DEF_STMT (def)));
7850 : else
7851 9692 : gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7852 :
7853 23251 : gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7854 23251 : comp = vexpr;
7855 : }
7856 :
7857 355799 : FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7858 : {
7859 208767 : if (!gimple_debug_bind_p (stmt))
7860 80854 : continue;
7861 :
7862 383811 : FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7863 127949 : SET_USE (use_p, comp);
7864 :
7865 127913 : update_stmt (stmt);
7866 73516 : }
7867 : }
7868 : }
7869 : }
7870 500194 : }
7871 :
7872 : /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7873 : for hash_map::traverse. */
7874 :
7875 : bool
7876 483171 : free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7877 : {
7878 483171 : if (value)
7879 : {
7880 442910 : value->~tree_niter_desc ();
7881 442910 : free (value);
7882 : }
7883 483171 : return true;
7884 : }
7885 :
7886 : /* Frees data allocated by the optimization of a single loop. */
7887 :
7888 : static void
7889 867868 : free_loop_data (struct ivopts_data *data)
7890 : {
7891 867868 : unsigned i, j;
7892 867868 : bitmap_iterator bi;
7893 867868 : tree obj;
7894 :
7895 867868 : if (data->niters)
7896 : {
7897 954180 : data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7898 942018 : delete data->niters;
7899 471009 : data->niters = NULL;
7900 : }
7901 :
7902 5892193 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7903 : {
7904 5024325 : struct version_info *info;
7905 :
7906 5024325 : info = ver_info (data, i);
7907 5024325 : info->iv = NULL;
7908 5024325 : info->has_nonlin_use = false;
7909 5024325 : info->preserve_biv = false;
7910 5024325 : info->inv_id = 0;
7911 : }
7912 867868 : bitmap_clear (data->relevant);
7913 867868 : bitmap_clear (data->important_candidates);
7914 :
7915 2672864 : for (i = 0; i < data->vgroups.length (); i++)
7916 : {
7917 1804996 : struct iv_group *group = data->vgroups[i];
7918 :
7919 3892059 : for (j = 0; j < group->vuses.length (); j++)
7920 2087063 : free (group->vuses[j]);
7921 1804996 : group->vuses.release ();
7922 :
7923 1804996 : BITMAP_FREE (group->related_cands);
7924 19680490 : for (j = 0; j < group->n_map_members; j++)
7925 : {
7926 17875494 : if (group->cost_map[j].inv_vars)
7927 3741613 : BITMAP_FREE (group->cost_map[j].inv_vars);
7928 17875494 : if (group->cost_map[j].inv_exprs)
7929 2049220 : BITMAP_FREE (group->cost_map[j].inv_exprs);
7930 : }
7931 :
7932 1804996 : free (group->cost_map);
7933 1804996 : free (group);
7934 : }
7935 867868 : data->vgroups.truncate (0);
7936 :
7937 5473838 : for (i = 0; i < data->vcands.length (); i++)
7938 : {
7939 4605970 : struct iv_cand *cand = data->vcands[i];
7940 :
7941 4605970 : if (cand->inv_vars)
7942 74768 : BITMAP_FREE (cand->inv_vars);
7943 4605970 : if (cand->inv_exprs)
7944 100505 : BITMAP_FREE (cand->inv_exprs);
7945 4605970 : free (cand);
7946 : }
7947 867868 : data->vcands.truncate (0);
7948 :
7949 867868 : if (data->version_info_size < num_ssa_names)
7950 : {
7951 162 : data->version_info_size = 2 * num_ssa_names;
7952 162 : free (data->version_info);
7953 162 : data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7954 : }
7955 :
7956 867868 : data->max_inv_var_id = 0;
7957 867868 : data->max_inv_expr_id = 0;
7958 :
7959 867868 : FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7960 0 : SET_DECL_RTL (obj, NULL_RTX);
7961 :
7962 867868 : decl_rtl_to_reset.truncate (0);
7963 :
7964 867868 : data->inv_expr_tab->empty ();
7965 :
7966 867868 : data->iv_common_cand_tab->empty ();
7967 867868 : data->iv_common_cands.truncate (0);
7968 867868 : }
7969 :
7970 : /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7971 : loop tree. */
7972 :
7973 : static void
7974 240496 : tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7975 : {
7976 240496 : free_loop_data (data);
7977 240496 : free (data->version_info);
7978 240496 : BITMAP_FREE (data->relevant);
7979 240496 : BITMAP_FREE (data->important_candidates);
7980 :
7981 240496 : decl_rtl_to_reset.release ();
7982 240496 : data->vgroups.release ();
7983 240496 : data->vcands.release ();
7984 240496 : delete data->inv_expr_tab;
7985 240496 : data->inv_expr_tab = NULL;
7986 240496 : free_affine_expand_cache (&data->name_expansion_cache);
7987 240496 : if (data->base_object_map)
7988 162462 : delete data->base_object_map;
7989 240496 : delete data->iv_common_cand_tab;
7990 240496 : data->iv_common_cand_tab = NULL;
7991 240496 : data->iv_common_cands.release ();
7992 240496 : obstack_free (&data->iv_obstack, NULL);
7993 240496 : }
7994 :
7995 : /* Returns true if the loop body BODY includes any function calls. */
7996 :
7997 : static bool
7998 627372 : loop_body_includes_call (basic_block *body, unsigned num_nodes)
7999 : {
8000 627372 : gimple_stmt_iterator gsi;
8001 627372 : unsigned i;
8002 :
8003 2823754 : for (i = 0; i < num_nodes; i++)
8004 23557945 : for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
8005 : {
8006 18955082 : gimple *stmt = gsi_stmt (gsi);
8007 18955082 : if (is_gimple_call (stmt)
8008 281360 : && !gimple_call_internal_p (stmt)
8009 19171322 : && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
8010 : return true;
8011 : }
8012 : return false;
8013 : }
8014 :
8015 : /* Determine cost scaling factor for basic blocks in loop. */
8016 : #define COST_SCALING_FACTOR_BOUND (20)
8017 :
8018 : static void
8019 500815 : determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8020 : {
8021 500815 : int lfreq = data->current_loop->header->count.to_frequency (cfun);
8022 500815 : if (!data->speed || lfreq <= 0)
8023 : return;
8024 :
8025 : int max_freq = lfreq;
8026 2867923 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8027 : {
8028 2454194 : body[i]->aux = (void *)(intptr_t) 1;
8029 2454194 : if (max_freq < body[i]->count.to_frequency (cfun))
8030 102675 : max_freq = body[i]->count.to_frequency (cfun);
8031 : }
8032 413729 : if (max_freq > lfreq)
8033 : {
8034 65470 : int divisor, factor;
8035 : /* Check if scaling factor itself needs to be scaled by the bound. This
8036 : is to avoid overflow when scaling cost according to profile info. */
8037 65470 : if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8038 : {
8039 : divisor = max_freq;
8040 : factor = COST_SCALING_FACTOR_BOUND;
8041 : }
8042 : else
8043 : {
8044 49637 : divisor = lfreq;
8045 49637 : factor = 1;
8046 : }
8047 999283 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8048 : {
8049 933813 : int bfreq = body[i]->count.to_frequency (cfun);
8050 933813 : if (bfreq <= lfreq)
8051 516518 : continue;
8052 :
8053 417295 : body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8054 : }
8055 : }
8056 : }
8057 :
8058 : /* Find doloop comparison use and set its doloop_p on if found. */
8059 :
8060 : static bool
8061 0 : find_doloop_use (struct ivopts_data *data)
8062 : {
8063 0 : struct loop *loop = data->current_loop;
8064 :
8065 0 : for (unsigned i = 0; i < data->vgroups.length (); i++)
8066 : {
8067 0 : struct iv_group *group = data->vgroups[i];
8068 0 : if (group->type == USE_COMPARE)
8069 : {
8070 0 : gcc_assert (group->vuses.length () == 1);
8071 0 : struct iv_use *use = group->vuses[0];
8072 0 : gimple *stmt = use->stmt;
8073 0 : if (gimple_code (stmt) == GIMPLE_COND)
8074 : {
8075 0 : basic_block bb = gimple_bb (stmt);
8076 0 : edge true_edge, false_edge;
8077 0 : extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8078 : /* This comparison is used for loop latch. Require latch is empty
8079 : for now. */
8080 0 : if ((loop->latch == true_edge->dest
8081 0 : || loop->latch == false_edge->dest)
8082 0 : && empty_block_p (loop->latch))
8083 : {
8084 0 : group->doloop_p = true;
8085 0 : if (dump_file && (dump_flags & TDF_DETAILS))
8086 : {
8087 0 : fprintf (dump_file, "Doloop cmp iv use: ");
8088 0 : print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8089 : }
8090 0 : return true;
8091 : }
8092 : }
8093 : }
8094 : }
8095 :
8096 : return false;
8097 : }
8098 :
8099 : /* For the targets which support doloop, to predict whether later RTL doloop
8100 : transformation will perform on this loop, further detect the doloop use and
8101 : mark the flag doloop_use_p if predicted. */
8102 :
8103 : void
8104 500815 : analyze_and_mark_doloop_use (struct ivopts_data *data)
8105 : {
8106 500815 : data->doloop_use_p = false;
8107 :
8108 500815 : if (!flag_branch_on_count_reg)
8109 : return;
8110 :
8111 500815 : if (data->current_loop->unroll == USHRT_MAX)
8112 : return;
8113 :
8114 500815 : if (!generic_predict_doloop_p (data))
8115 : return;
8116 :
8117 0 : if (find_doloop_use (data))
8118 : {
8119 0 : data->doloop_use_p = true;
8120 0 : if (dump_file && (dump_flags & TDF_DETAILS))
8121 : {
8122 0 : struct loop *loop = data->current_loop;
8123 0 : fprintf (dump_file,
8124 : "Predict loop %d can perform"
8125 : " doloop optimization later.\n",
8126 : loop->num);
8127 0 : flow_loop_dump (loop, dump_file, NULL, 1);
8128 : }
8129 : }
8130 : }
8131 :
8132 : /* Optimizes the LOOP. Returns true if anything changed. */
8133 :
8134 : static bool
8135 627372 : tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8136 : bitmap toremove)
8137 : {
8138 627372 : bool changed = false;
8139 627372 : class iv_ca *iv_ca;
8140 627372 : edge exit = single_dom_exit (loop);
8141 627372 : basic_block *body;
8142 :
8143 627372 : gcc_assert (!data->niters);
8144 627372 : data->current_loop = loop;
8145 627372 : data->loop_loc = find_loop_location (loop).get_location_t ();
8146 627372 : data->speed = optimize_loop_for_speed_p (loop);
8147 :
8148 627372 : if (dump_file && (dump_flags & TDF_DETAILS))
8149 : {
8150 67 : fprintf (dump_file, "Processing loop %d", loop->num);
8151 67 : if (data->loop_loc != UNKNOWN_LOCATION)
8152 65 : fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8153 130 : LOCATION_LINE (data->loop_loc));
8154 67 : fprintf (dump_file, "\n");
8155 :
8156 67 : if (exit)
8157 : {
8158 57 : fprintf (dump_file, " single exit %d -> %d, exit condition ",
8159 57 : exit->src->index, exit->dest->index);
8160 114 : print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8161 : 0, TDF_SLIM);
8162 57 : fprintf (dump_file, "\n");
8163 : }
8164 :
8165 67 : fprintf (dump_file, "\n");
8166 : }
8167 :
8168 627372 : body = get_loop_body (loop);
8169 627372 : data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8170 627372 : renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8171 :
8172 627372 : data->loop_single_exit_p
8173 627372 : = exit != NULL && loop_only_exit_p (loop, body, exit);
8174 :
8175 : /* For each ssa name determines whether it behaves as an induction variable
8176 : in some loop. */
8177 627372 : if (!find_induction_variables (data, body))
8178 126556 : goto finish;
8179 :
8180 : /* Finds interesting uses (item 1). */
8181 500816 : find_interesting_uses (data, body);
8182 500816 : if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8183 1 : goto finish;
8184 :
8185 : /* Determine cost scaling factor for basic blocks in loop. */
8186 500815 : determine_scaling_factor (data, body);
8187 :
8188 : /* Analyze doloop possibility and mark the doloop use if predicted. */
8189 500815 : analyze_and_mark_doloop_use (data);
8190 :
8191 : /* Finds candidates for the induction variables (item 2). */
8192 500815 : find_iv_candidates (data);
8193 :
8194 : /* Calculates the costs (item 3, part 1). */
8195 500815 : determine_iv_costs (data);
8196 500815 : determine_group_iv_costs (data);
8197 500815 : determine_set_costs (data);
8198 :
8199 : /* Find the optimal set of induction variables (item 3, part 2). */
8200 500815 : iv_ca = find_optimal_iv_set (data);
8201 : /* Cleanup basic block aux field. */
8202 3315141 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8203 2814326 : body[i]->aux = NULL;
8204 500815 : if (!iv_ca)
8205 621 : goto finish;
8206 500194 : changed = true;
8207 :
8208 : /* Create the new induction variables (item 4, part 1). */
8209 500194 : create_new_ivs (data, iv_ca);
8210 500194 : iv_ca_free (&iv_ca);
8211 :
8212 : /* Rewrite the uses (item 4, part 2). */
8213 500194 : rewrite_groups (data);
8214 :
8215 : /* Remove the ivs that are unused after rewriting. */
8216 500194 : remove_unused_ivs (data, toremove);
8217 :
8218 627372 : finish:
8219 627372 : free (body);
8220 627372 : free_loop_data (data);
8221 :
8222 627372 : return changed;
8223 : }
8224 :
8225 : /* Main entry point. Optimizes induction variables in loops. */
8226 :
8227 : void
8228 240496 : tree_ssa_iv_optimize (void)
8229 : {
8230 240496 : struct ivopts_data data;
8231 240496 : auto_bitmap toremove;
8232 :
8233 240496 : tree_ssa_iv_optimize_init (&data);
8234 240496 : mark_ssa_maybe_undefs ();
8235 :
8236 : /* Optimize the loops starting with the innermost ones. */
8237 1348860 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8238 : {
8239 627372 : if (!dbg_cnt (ivopts_loop))
8240 0 : continue;
8241 :
8242 627372 : if (dump_file && (dump_flags & TDF_DETAILS))
8243 67 : flow_loop_dump (loop, dump_file, NULL, 1);
8244 :
8245 627372 : tree_ssa_iv_optimize_loop (&data, loop, toremove);
8246 240496 : }
8247 :
8248 : /* Remove eliminated IV defs. */
8249 240496 : release_defs_bitset (toremove);
8250 :
8251 : /* We have changed the structure of induction variables; it might happen
8252 : that definitions in the scev database refer to some of them that were
8253 : eliminated. */
8254 240496 : scev_reset_htab ();
8255 : /* Likewise niter and control-IV information. */
8256 240496 : free_numbers_of_iterations_estimates (cfun);
8257 :
8258 240496 : tree_ssa_iv_optimize_finalize (&data);
8259 240496 : }
8260 :
8261 : #include "gt-tree-ssa-loop-ivopts.h"
|