Line data Source code
1 : /* Induction variable optimizations.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* This pass tries to find the optimal set of induction variables for the loop.
21 : It optimizes just the basic linear induction variables (although adding
22 : support for other types should not be too hard). It includes the
23 : optimizations commonly known as strength reduction, induction variable
24 : coalescing and induction variable elimination. It does it in the
25 : following steps:
26 :
27 : 1) The interesting uses of induction variables are found. This includes
28 :
29 : -- uses of induction variables in non-linear expressions
30 : -- addresses of arrays
31 : -- comparisons of induction variables
32 :
33 : Note the interesting uses are categorized and handled in group.
34 : Generally, address type uses are grouped together if their iv bases
35 : are different in constant offset.
36 :
37 : 2) Candidates for the induction variables are found. This includes
38 :
39 : -- old induction variables
40 : -- the variables defined by expressions derived from the "interesting
41 : groups/uses" above
42 :
43 : 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 : cost function assigns a cost to sets of induction variables and consists
45 : of three parts:
46 :
47 : -- The group/use costs. Each of the interesting groups/uses chooses
48 : the best induction variable in the set and adds its cost to the sum.
49 : The cost reflects the time spent on modifying the induction variables
50 : value to be usable for the given purpose (adding base and offset for
51 : arrays, etc.).
52 : -- The variable costs. Each of the variables has a cost assigned that
53 : reflects the costs associated with incrementing the value of the
54 : variable. The original variables are somewhat preferred.
55 : -- The set cost. Depending on the size of the set, extra cost may be
56 : added to reflect register pressure.
57 :
58 : All the costs are defined in a machine-specific way, using the target
59 : hooks and machine descriptions to determine them.
60 :
61 : 4) The trees are transformed to use the new variables, the dead code is
62 : removed.
63 :
64 : All of this is done loop by loop. Doing it globally is theoretically
65 : possible, it might give a better performance and it might enable us
66 : to decide costs more precisely, but getting all the interactions right
67 : would be complicated.
68 :
69 : For the targets supporting low-overhead loops, IVOPTs has to take care of
70 : the loops which will probably be transformed in RTL doloop optimization,
71 : to try to make selected IV candidate set optimal. The process of doloop
72 : support includes:
73 :
74 : 1) Analyze the current loop will be transformed to doloop or not, find and
75 : mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 : set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 : doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 : The target hook predict_doloop_p can be used for target specific checks.
79 :
80 : 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 : set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 : like biv. For cost determination between doloop IV cand and IV use, the
83 : target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 : provided to add on extra costs for generic type and address type IV use.
85 : Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 : use, and bound zero is set for IV elimination.
87 :
88 : 3) With the cost setting in step 2), the current cost model based IV
89 : selection algorithm will process as usual, pick up doloop dedicated IV if
90 : profitable. */
91 :
92 : #include "config.h"
93 : #include "system.h"
94 : #include "coretypes.h"
95 : #include "backend.h"
96 : #include "rtl.h"
97 : #include "tree.h"
98 : #include "gimple.h"
99 : #include "cfghooks.h"
100 : #include "tree-pass.h"
101 : #include "memmodel.h"
102 : #include "tm_p.h"
103 : #include "ssa.h"
104 : #include "expmed.h"
105 : #include "insn-config.h"
106 : #include "emit-rtl.h"
107 : #include "recog.h"
108 : #include "cgraph.h"
109 : #include "gimple-pretty-print.h"
110 : #include "alias.h"
111 : #include "fold-const.h"
112 : #include "stor-layout.h"
113 : #include "tree-eh.h"
114 : #include "gimplify.h"
115 : #include "gimple-iterator.h"
116 : #include "gimplify-me.h"
117 : #include "tree-cfg.h"
118 : #include "tree-ssa-loop-ivopts.h"
119 : #include "tree-ssa-loop-manip.h"
120 : #include "tree-ssa-loop-niter.h"
121 : #include "tree-ssa-loop.h"
122 : #include "explow.h"
123 : #include "expr.h"
124 : #include "tree-dfa.h"
125 : #include "tree-ssa.h"
126 : #include "cfgloop.h"
127 : #include "tree-scalar-evolution.h"
128 : #include "tree-affine.h"
129 : #include "tree-ssa-propagate.h"
130 : #include "tree-ssa-address.h"
131 : #include "builtins.h"
132 : #include "tree-vectorizer.h"
133 : #include "dbgcnt.h"
134 : #include "cfganal.h"
135 : #include "gimple-fold.h"
136 :
137 : /* For lang_hooks.types.type_for_mode. */
138 : #include "langhooks.h"
139 :
140 : /* FIXME: Expressions are expanded to RTL in this pass to determine the
141 : cost of different addressing modes. This should be moved to a TBD
142 : interface between the GIMPLE and RTL worlds. */
143 :
144 : /* The infinite cost. */
145 : #define INFTY 1000000000
146 :
147 : /* Returns the expected number of loop iterations for LOOP.
148 : The average trip count is computed from profile data if it
149 : exists. */
150 :
151 : static inline unsigned HOST_WIDE_INT
152 8735922 : avg_loop_niter (class loop *loop)
153 : {
154 8735922 : HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
155 8735922 : if (niter == -1)
156 : {
157 4947666 : niter = likely_max_stmt_executions_int (loop);
158 :
159 4947666 : if (niter == -1 || niter > param_avg_loop_niter)
160 4166915 : return param_avg_loop_niter;
161 : }
162 :
163 4569007 : return niter;
164 : }
165 :
166 : struct iv_use;
167 :
168 : /* Representation of the induction variable. */
169 : struct iv
170 : {
171 : tree base; /* Initial value of the iv. */
172 : tree base_object; /* A memory object to that the induction variable points. */
173 : tree step; /* Step of the iv (constant only). */
174 : tree ssa_name; /* The ssa name with the value. */
175 : struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
176 : bool biv_p; /* Is it a biv? */
177 : bool no_overflow; /* True if the iv doesn't overflow. */
178 : bool have_address_use;/* For biv, indicate if it's used in any address
179 : type use. */
180 : };
181 :
182 : /* Per-ssa version information (induction variable descriptions, etc.). */
183 : struct version_info
184 : {
185 : tree name; /* The ssa name. */
186 : struct iv *iv; /* Induction variable description. */
187 : bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
188 : an expression that is not an induction variable. */
189 : bool preserve_biv; /* For the original biv, whether to preserve it. */
190 : unsigned inv_id; /* Id of an invariant. */
191 : };
192 :
193 : /* Types of uses. */
194 : enum use_type
195 : {
196 : USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
197 : USE_REF_ADDRESS, /* Use is an address for an explicit memory
198 : reference. */
199 : USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
200 : cases where the expansion of the function
201 : will turn the argument into a normal address. */
202 : USE_COMPARE /* Use is a compare. */
203 : };
204 :
205 : /* Cost of a computation. */
206 : class comp_cost
207 : {
208 : public:
209 130744369 : comp_cost (): cost (0), complexity (0), scratch (0)
210 : {}
211 :
212 25213387 : comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
213 15192546 : : cost (cost), complexity (complexity), scratch (scratch)
214 14394155 : {}
215 :
216 : /* Returns true if COST is infinite. */
217 : bool infinite_cost_p ();
218 :
219 : /* Adds costs COST1 and COST2. */
220 : friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
221 :
222 : /* Adds COST to the comp_cost. */
223 : comp_cost operator+= (comp_cost cost);
224 :
225 : /* Adds constant C to this comp_cost. */
226 : comp_cost operator+= (HOST_WIDE_INT c);
227 :
228 : /* Subtracts constant C to this comp_cost. */
229 : comp_cost operator-= (HOST_WIDE_INT c);
230 :
231 : /* Divide the comp_cost by constant C. */
232 : comp_cost operator/= (HOST_WIDE_INT c);
233 :
234 : /* Multiply the comp_cost by constant C. */
235 : comp_cost operator*= (HOST_WIDE_INT c);
236 :
237 : /* Subtracts costs COST1 and COST2. */
238 : friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
239 :
240 : /* Subtracts COST from this comp_cost. */
241 : comp_cost operator-= (comp_cost cost);
242 :
243 : /* Returns true if COST1 is smaller than COST2. */
244 : friend bool operator< (comp_cost cost1, comp_cost cost2);
245 :
246 : /* Returns true if COST1 and COST2 are equal. */
247 : friend bool operator== (comp_cost cost1, comp_cost cost2);
248 :
249 : /* Returns true if COST1 is smaller or equal than COST2. */
250 : friend bool operator<= (comp_cost cost1, comp_cost cost2);
251 :
252 : int64_t cost; /* The runtime cost. */
253 : unsigned complexity; /* The estimate of the complexity of the code for
254 : the computation (in no concrete units --
255 : complexity field should be larger for more
256 : complex expressions and addressing modes). */
257 : int64_t scratch; /* Scratch used during cost computation. */
258 : };
259 :
260 : static const comp_cost no_cost;
261 : static const comp_cost infinite_cost (INFTY, 0, INFTY);
262 :
263 : bool
264 1820712906 : comp_cost::infinite_cost_p ()
265 : {
266 1820712906 : return cost == INFTY;
267 : }
268 :
269 : comp_cost
270 243288727 : operator+ (comp_cost cost1, comp_cost cost2)
271 : {
272 243288727 : if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
273 1878723 : return infinite_cost;
274 :
275 241410004 : gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
276 241410004 : cost1.cost += cost2.cost;
277 241410004 : cost1.complexity += cost2.complexity;
278 :
279 241410004 : return cost1;
280 : }
281 :
282 : comp_cost
283 208118978 : operator- (comp_cost cost1, comp_cost cost2)
284 : {
285 208118978 : if (cost1.infinite_cost_p ())
286 0 : return infinite_cost;
287 :
288 208118978 : gcc_assert (!cost2.infinite_cost_p ());
289 208118978 : gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
290 :
291 208118978 : cost1.cost -= cost2.cost;
292 208118978 : cost1.complexity -= cost2.complexity;
293 :
294 208118978 : return cost1;
295 : }
296 :
297 : comp_cost
298 243288727 : comp_cost::operator+= (comp_cost cost)
299 : {
300 243288727 : *this = *this + cost;
301 243288727 : return *this;
302 : }
303 :
304 : comp_cost
305 859744855 : comp_cost::operator+= (HOST_WIDE_INT c)
306 : {
307 859744855 : if (c >= INFTY)
308 0 : this->cost = INFTY;
309 :
310 859744855 : if (infinite_cost_p ())
311 0 : return *this;
312 :
313 859744855 : gcc_assert (this->cost + c < infinite_cost.cost);
314 859744855 : this->cost += c;
315 :
316 859744855 : return *this;
317 : }
318 :
319 : comp_cost
320 542292 : comp_cost::operator-= (HOST_WIDE_INT c)
321 : {
322 542292 : if (infinite_cost_p ())
323 0 : return *this;
324 :
325 542292 : gcc_assert (this->cost - c < infinite_cost.cost);
326 542292 : this->cost -= c;
327 :
328 542292 : return *this;
329 : }
330 :
331 : comp_cost
332 0 : comp_cost::operator/= (HOST_WIDE_INT c)
333 : {
334 0 : gcc_assert (c != 0);
335 0 : if (infinite_cost_p ())
336 0 : return *this;
337 :
338 0 : this->cost /= c;
339 :
340 0 : return *this;
341 : }
342 :
343 : comp_cost
344 0 : comp_cost::operator*= (HOST_WIDE_INT c)
345 : {
346 0 : if (infinite_cost_p ())
347 0 : return *this;
348 :
349 0 : gcc_assert (this->cost * c < infinite_cost.cost);
350 0 : this->cost *= c;
351 :
352 0 : return *this;
353 : }
354 :
355 : comp_cost
356 208118978 : comp_cost::operator-= (comp_cost cost)
357 : {
358 208118978 : *this = *this - cost;
359 208118978 : return *this;
360 : }
361 :
362 : bool
363 182200644 : operator< (comp_cost cost1, comp_cost cost2)
364 : {
365 182200644 : if (cost1.cost == cost2.cost)
366 80276865 : return cost1.complexity < cost2.complexity;
367 :
368 101923779 : return cost1.cost < cost2.cost;
369 : }
370 :
371 : bool
372 3901954 : operator== (comp_cost cost1, comp_cost cost2)
373 : {
374 3901954 : return cost1.cost == cost2.cost
375 3901954 : && cost1.complexity == cost2.complexity;
376 : }
377 :
378 : bool
379 6410825 : operator<= (comp_cost cost1, comp_cost cost2)
380 : {
381 6410825 : return cost1 < cost2 || cost1 == cost2;
382 : }
383 :
384 : struct iv_inv_expr_ent;
385 :
386 : /* The candidate - cost pair. */
387 : class cost_pair
388 : {
389 : public:
390 : struct iv_cand *cand; /* The candidate. */
391 : comp_cost cost; /* The cost. */
392 : enum tree_code comp; /* For iv elimination, the comparison. */
393 : bitmap inv_vars; /* The list of invariant ssa_vars that have to be
394 : preserved when representing iv_use with iv_cand. */
395 : bitmap inv_exprs; /* The list of newly created invariant expressions
396 : when representing iv_use with iv_cand. */
397 : tree value; /* For final value elimination, the expression for
398 : the final value of the iv. For iv elimination,
399 : the new bound to compare with. */
400 : };
401 :
402 : /* Use. */
403 : struct iv_use
404 : {
405 : unsigned id; /* The id of the use. */
406 : unsigned group_id; /* The group id the use belongs to. */
407 : enum use_type type; /* Type of the use. */
408 : tree mem_type; /* The memory type to use when testing whether an
409 : address is legitimate, and what the address's
410 : cost is. */
411 : struct iv *iv; /* The induction variable it is based on. */
412 : gimple *stmt; /* Statement in that it occurs. */
413 : tree *op_p; /* The place where it occurs. */
414 :
415 : tree addr_base; /* Base address with const offset stripped. */
416 : poly_uint64 addr_offset;
417 : /* Const offset stripped from base address. */
418 : };
419 :
420 : /* Group of uses. */
421 : struct iv_group
422 : {
423 : /* The id of the group. */
424 : unsigned id;
425 : /* Uses of the group are of the same type. */
426 : enum use_type type;
427 : /* The set of "related" IV candidates, plus the important ones. */
428 : bitmap related_cands;
429 : /* Number of IV candidates in the cost_map. */
430 : unsigned n_map_members;
431 : /* The costs wrto the iv candidates. */
432 : class cost_pair *cost_map;
433 : /* The selected candidate for the group. */
434 : struct iv_cand *selected;
435 : /* To indicate this is a doloop use group. */
436 : bool doloop_p;
437 : /* Uses in the group. */
438 : vec<struct iv_use *> vuses;
439 : };
440 :
441 : /* The position where the iv is computed. */
442 : enum iv_position
443 : {
444 : IP_NORMAL, /* At the end, just before the exit condition. */
445 : IP_END, /* At the end of the latch block. */
446 : IP_BEFORE_USE, /* Immediately before a specific use. */
447 : IP_AFTER_USE, /* Immediately after a specific use. */
448 : IP_ORIGINAL /* The original biv. */
449 : };
450 :
451 : /* The induction variable candidate. */
452 : struct iv_cand
453 : {
454 : unsigned id; /* The number of the candidate. */
455 : bool important; /* Whether this is an "important" candidate, i.e. such
456 : that it should be considered by all uses. */
457 : bool involves_undefs; /* Whether the IV involves undefined values. */
458 : ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
459 : gimple *incremented_at;/* For original biv, the statement where it is
460 : incremented. */
461 : tree var_before; /* The variable used for it before increment. */
462 : tree var_after; /* The variable used for it after increment. */
463 : struct iv *iv; /* The value of the candidate. NULL for
464 : "pseudocandidate" used to indicate the possibility
465 : to replace the final value of an iv by direct
466 : computation of the value. */
467 : unsigned cost; /* Cost of the candidate. */
468 : unsigned cost_step; /* Cost of the candidate's increment operation. */
469 : struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
470 : where it is incremented. */
471 : bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
472 : iv_cand. */
473 : bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
474 : handle it as a new invariant expression which will
475 : be hoisted out of loop. */
476 : struct iv *orig_iv; /* The original iv if this cand is added from biv with
477 : smaller type. */
478 : bool doloop_p; /* Whether this is a doloop candidate. */
479 : };
480 :
481 : /* Hashtable entry for common candidate derived from iv uses. */
482 2618078 : class iv_common_cand
483 : {
484 : public:
485 : tree base;
486 : tree step;
487 : /* IV uses from which this common candidate is derived. */
488 : auto_vec<struct iv_use *> uses;
489 : hashval_t hash;
490 : };
491 :
492 : /* Hashtable helpers. */
493 :
494 : struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
495 : {
496 : static inline hashval_t hash (const iv_common_cand *);
497 : static inline bool equal (const iv_common_cand *, const iv_common_cand *);
498 : };
499 :
500 : /* Hash function for possible common candidates. */
501 :
502 : inline hashval_t
503 9892429 : iv_common_cand_hasher::hash (const iv_common_cand *ccand)
504 : {
505 9892429 : return ccand->hash;
506 : }
507 :
508 : /* Hash table equality function for common candidates. */
509 :
510 : inline bool
511 11166474 : iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
512 : const iv_common_cand *ccand2)
513 : {
514 11166474 : return (ccand1->hash == ccand2->hash
515 1625457 : && operand_equal_p (ccand1->base, ccand2->base, 0)
516 1604505 : && operand_equal_p (ccand1->step, ccand2->step, 0)
517 12764087 : && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
518 1597613 : == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
519 : }
520 :
521 : /* Loop invariant expression hashtable entry. */
522 :
523 : struct iv_inv_expr_ent
524 : {
525 : /* Tree expression of the entry. */
526 : tree expr;
527 : /* Unique indentifier. */
528 : int id;
529 : /* Hash value. */
530 : hashval_t hash;
531 : };
532 :
533 : /* Sort iv_inv_expr_ent pair A and B by id field. */
534 :
535 : static int
536 5739 : sort_iv_inv_expr_ent (const void *a, const void *b)
537 : {
538 5739 : const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
539 5739 : const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
540 :
541 5739 : unsigned id1 = (*e1)->id;
542 5739 : unsigned id2 = (*e2)->id;
543 :
544 5739 : if (id1 < id2)
545 : return -1;
546 2669 : else if (id1 > id2)
547 : return 1;
548 : else
549 0 : return 0;
550 : }
551 :
552 : /* Hashtable helpers. */
553 :
554 : struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
555 : {
556 : static inline hashval_t hash (const iv_inv_expr_ent *);
557 : static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
558 : };
559 :
560 : /* Return true if uses of type TYPE represent some form of address. */
561 :
562 : inline bool
563 8977127 : address_p (use_type type)
564 : {
565 8977127 : return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
566 : }
567 :
568 : /* Hash function for loop invariant expressions. */
569 :
570 : inline hashval_t
571 6719446 : iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
572 : {
573 6719446 : return expr->hash;
574 : }
575 :
576 : /* Hash table equality function for expressions. */
577 :
578 : inline bool
579 8075356 : iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
580 : const iv_inv_expr_ent *expr2)
581 : {
582 8075356 : return expr1->hash == expr2->hash
583 8075356 : && operand_equal_p (expr1->expr, expr2->expr, 0);
584 : }
585 :
586 : struct ivopts_data
587 : {
588 : /* The currently optimized loop. */
589 : class loop *current_loop;
590 : location_t loop_loc;
591 :
592 : /* Numbers of iterations for all exits of the current loop. */
593 : hash_map<edge, tree_niter_desc *> *niters;
594 :
595 : /* Number of registers used in it. */
596 : unsigned regs_used;
597 :
598 : /* The size of version_info array allocated. */
599 : unsigned version_info_size;
600 :
601 : /* The array of information for the ssa names. */
602 : struct version_info *version_info;
603 :
604 : /* The hashtable of loop invariant expressions created
605 : by ivopt. */
606 : hash_table<iv_inv_expr_hasher> *inv_expr_tab;
607 :
608 : /* The bitmap of indices in version_info whose value was changed. */
609 : bitmap relevant;
610 :
611 : /* The uses of induction variables. */
612 : vec<iv_group *> vgroups;
613 :
614 : /* The candidates. */
615 : vec<iv_cand *> vcands;
616 :
617 : /* A bitmap of important candidates. */
618 : bitmap important_candidates;
619 :
620 : /* Cache used by tree_to_aff_combination_expand. */
621 : hash_map<tree, name_expansion *> *name_expansion_cache;
622 :
623 : /* The hashtable of common candidates derived from iv uses. */
624 : hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
625 :
626 : /* The common candidates. */
627 : vec<iv_common_cand *> iv_common_cands;
628 :
629 : /* Hash map recording base object information of tree exp. */
630 : hash_map<tree, tree> *base_object_map;
631 :
632 : /* The maximum invariant variable id. */
633 : unsigned max_inv_var_id;
634 :
635 : /* The maximum invariant expression id. */
636 : unsigned max_inv_expr_id;
637 :
638 : /* Number of no_overflow BIVs which are not used in memory address. */
639 : unsigned bivs_not_used_in_addr;
640 :
641 : /* Obstack for iv structure. */
642 : struct obstack iv_obstack;
643 :
644 : /* Whether to consider just related and important candidates when replacing a
645 : use. */
646 : bool consider_all_candidates;
647 :
648 : /* Are we optimizing for speed? */
649 : bool speed;
650 :
651 : /* Whether the loop body includes any function calls. */
652 : bool body_includes_call;
653 :
654 : /* Whether the loop body can only be exited via single exit. */
655 : bool loop_single_exit_p;
656 :
657 : /* Whether the loop has doloop comparison use. */
658 : bool doloop_use_p;
659 : };
660 :
661 : /* An assignment of iv candidates to uses. */
662 :
663 : class iv_ca
664 : {
665 : public:
666 : /* The number of uses covered by the assignment. */
667 : unsigned upto;
668 :
669 : /* Number of uses that cannot be expressed by the candidates in the set. */
670 : unsigned bad_groups;
671 :
672 : /* Candidate assigned to a use, together with the related costs. */
673 : class cost_pair **cand_for_group;
674 :
675 : /* Number of times each candidate is used. */
676 : unsigned *n_cand_uses;
677 :
678 : /* The candidates used. */
679 : bitmap cands;
680 :
681 : /* The number of candidates in the set. */
682 : unsigned n_cands;
683 :
684 : /* The number of invariants needed, including both invariant variants and
685 : invariant expressions. */
686 : unsigned n_invs;
687 :
688 : /* Total cost of expressing uses. */
689 : comp_cost cand_use_cost;
690 :
691 : /* Total cost of candidates. */
692 : int64_t cand_cost;
693 :
694 : /* Number of times each invariant variable is used. */
695 : unsigned *n_inv_var_uses;
696 :
697 : /* Number of times each invariant expression is used. */
698 : unsigned *n_inv_expr_uses;
699 :
700 : /* Total cost of the assignment. */
701 : comp_cost cost;
702 : };
703 :
704 : /* Difference of two iv candidate assignments. */
705 :
706 : struct iv_ca_delta
707 : {
708 : /* Changed group. */
709 : struct iv_group *group;
710 :
711 : /* An old assignment (for rollback purposes). */
712 : class cost_pair *old_cp;
713 :
714 : /* A new assignment. */
715 : class cost_pair *new_cp;
716 :
717 : /* Next change in the list. */
718 : struct iv_ca_delta *next;
719 : };
720 :
721 : /* Bound on number of candidates below that all candidates are considered. */
722 :
723 : #define CONSIDER_ALL_CANDIDATES_BOUND \
724 : ((unsigned) param_iv_consider_all_candidates_bound)
725 :
726 : /* If there are more iv occurrences, we just give up (it is quite unlikely that
727 : optimizing such a loop would help, and it would take ages). */
728 :
729 : #define MAX_CONSIDERED_GROUPS \
730 : ((unsigned) param_iv_max_considered_uses)
731 :
732 : /* If there are at most this number of ivs in the set, try removing unnecessary
733 : ivs from the set always. */
734 :
735 : #define ALWAYS_PRUNE_CAND_SET_BOUND \
736 : ((unsigned) param_iv_always_prune_cand_set_bound)
737 :
738 : /* The list of trees for that the decl_rtl field must be reset is stored
739 : here. */
740 :
741 : static vec<tree> decl_rtl_to_reset;
742 :
743 : static comp_cost force_expr_to_var_cost (tree, bool);
744 :
745 : /* The single loop exit if it dominates the latch, NULL otherwise. */
746 :
747 : edge
748 698812 : single_dom_exit (class loop *loop)
749 : {
750 698812 : edge exit = single_exit (loop);
751 :
752 698812 : if (!exit)
753 : return NULL;
754 :
755 466149 : if (!just_once_each_iteration_p (loop, exit->src))
756 : return NULL;
757 :
758 : return exit;
759 : }
760 :
761 : /* Dumps information about the induction variable IV to FILE. Don't dump
762 : variable's name if DUMP_NAME is FALSE. The information is dumped with
763 : preceding spaces indicated by INDENT_LEVEL. */
764 :
765 : void
766 1597 : dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
767 : {
768 1597 : const char *p;
769 1597 : const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
770 :
771 1597 : if (indent_level > 4)
772 : indent_level = 4;
773 1597 : p = spaces + 8 - (indent_level << 1);
774 :
775 1597 : fprintf (file, "%sIV struct:\n", p);
776 1597 : if (iv->ssa_name && dump_name)
777 : {
778 550 : fprintf (file, "%s SSA_NAME:\t", p);
779 550 : print_generic_expr (file, iv->ssa_name, TDF_SLIM);
780 550 : fprintf (file, "\n");
781 : }
782 :
783 1597 : fprintf (file, "%s Type:\t", p);
784 1597 : print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
785 1597 : fprintf (file, "\n");
786 :
787 1597 : fprintf (file, "%s Base:\t", p);
788 1597 : print_generic_expr (file, iv->base, TDF_SLIM);
789 1597 : fprintf (file, "\n");
790 :
791 1597 : fprintf (file, "%s Step:\t", p);
792 1597 : print_generic_expr (file, iv->step, TDF_SLIM);
793 1597 : fprintf (file, "\n");
794 :
795 1597 : if (iv->base_object)
796 : {
797 497 : fprintf (file, "%s Object:\t", p);
798 497 : print_generic_expr (file, iv->base_object, TDF_SLIM);
799 497 : fprintf (file, "\n");
800 : }
801 :
802 2887 : fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
803 :
804 1597 : fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
805 1597 : p, iv->no_overflow ? "No-overflow" : "Overflow");
806 1597 : }
807 :
808 : /* Dumps information about the USE to FILE. */
809 :
810 : void
811 250 : dump_use (FILE *file, struct iv_use *use)
812 : {
813 250 : fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
814 250 : fprintf (file, " At stmt:\t");
815 250 : print_gimple_stmt (file, use->stmt, 0);
816 250 : fprintf (file, " At pos:\t");
817 250 : if (use->op_p)
818 160 : print_generic_expr (file, *use->op_p, TDF_SLIM);
819 250 : fprintf (file, "\n");
820 250 : dump_iv (file, use->iv, false, 2);
821 250 : }
822 :
823 : /* Dumps information about the uses to FILE. */
824 :
825 : void
826 67 : dump_groups (FILE *file, struct ivopts_data *data)
827 : {
828 67 : unsigned i, j;
829 67 : struct iv_group *group;
830 :
831 287 : for (i = 0; i < data->vgroups.length (); i++)
832 : {
833 220 : group = data->vgroups[i];
834 220 : fprintf (file, "Group %d:\n", group->id);
835 220 : if (group->type == USE_NONLINEAR_EXPR)
836 90 : fprintf (file, " Type:\tGENERIC\n");
837 130 : else if (group->type == USE_REF_ADDRESS)
838 56 : fprintf (file, " Type:\tREFERENCE ADDRESS\n");
839 74 : else if (group->type == USE_PTR_ADDRESS)
840 0 : fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
841 : else
842 : {
843 74 : gcc_assert (group->type == USE_COMPARE);
844 74 : fprintf (file, " Type:\tCOMPARE\n");
845 : }
846 470 : for (j = 0; j < group->vuses.length (); j++)
847 250 : dump_use (file, group->vuses[j]);
848 : }
849 67 : }
850 :
851 : /* Dumps information about induction variable candidate CAND to FILE. */
852 :
853 : void
854 797 : dump_cand (FILE *file, struct iv_cand *cand)
855 : {
856 797 : struct iv *iv = cand->iv;
857 :
858 797 : fprintf (file, "Candidate %d:\n", cand->id);
859 797 : if (cand->inv_vars)
860 : {
861 26 : fprintf (file, " Depend on inv.vars: ");
862 26 : dump_bitmap (file, cand->inv_vars);
863 : }
864 797 : if (cand->inv_exprs)
865 : {
866 0 : fprintf (file, " Depend on inv.exprs: ");
867 0 : dump_bitmap (file, cand->inv_exprs);
868 : }
869 :
870 797 : if (cand->var_before)
871 : {
872 687 : fprintf (file, " Var befor: ");
873 687 : print_generic_expr (file, cand->var_before, TDF_SLIM);
874 687 : fprintf (file, "\n");
875 : }
876 797 : if (cand->var_after)
877 : {
878 687 : fprintf (file, " Var after: ");
879 687 : print_generic_expr (file, cand->var_after, TDF_SLIM);
880 687 : fprintf (file, "\n");
881 : }
882 :
883 797 : switch (cand->pos)
884 : {
885 653 : case IP_NORMAL:
886 653 : fprintf (file, " Incr POS: before exit test\n");
887 653 : break;
888 :
889 0 : case IP_BEFORE_USE:
890 0 : fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
891 0 : break;
892 :
893 0 : case IP_AFTER_USE:
894 0 : fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
895 0 : break;
896 :
897 0 : case IP_END:
898 0 : fprintf (file, " Incr POS: at end\n");
899 0 : break;
900 :
901 144 : case IP_ORIGINAL:
902 144 : fprintf (file, " Incr POS: orig biv\n");
903 144 : break;
904 : }
905 :
906 797 : dump_iv (file, iv, false, 1);
907 797 : }
908 :
909 : /* Returns the info for ssa version VER. */
910 :
911 : static inline struct version_info *
912 116836149 : ver_info (struct ivopts_data *data, unsigned ver)
913 : {
914 116836149 : return data->version_info + ver;
915 : }
916 :
917 : /* Returns the info for ssa name NAME. */
918 :
919 : static inline struct version_info *
920 94611436 : name_info (struct ivopts_data *data, tree name)
921 : {
922 94611436 : return ver_info (data, SSA_NAME_VERSION (name));
923 : }
924 :
925 : /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
926 : emitted in LOOP. */
927 :
928 : static bool
929 33340143 : stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
930 : {
931 33340143 : basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
932 :
933 33340143 : gcc_assert (bb);
934 :
935 33340143 : if (sbb == loop->latch)
936 : return true;
937 :
938 33235362 : if (sbb != bb)
939 : return false;
940 :
941 19482527 : return stmt == last_nondebug_stmt (bb);
942 : }
943 :
944 : /* Returns true if STMT if after the place where the original induction
945 : variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
946 : if the positions are identical. */
947 :
948 : static bool
949 7866978 : stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
950 : {
951 7866978 : basic_block cand_bb = gimple_bb (cand->incremented_at);
952 7866978 : basic_block stmt_bb = gimple_bb (stmt);
953 :
954 7866978 : if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
955 : return false;
956 :
957 5437680 : if (stmt_bb != cand_bb)
958 : return true;
959 :
960 5178095 : if (true_if_equal
961 5178095 : && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
962 : return true;
963 5171540 : return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
964 : }
965 :
966 : /* Returns true if STMT if after the place where the induction variable
967 : CAND is incremented in LOOP. */
968 :
969 : static bool
970 42351243 : stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
971 : {
972 42351243 : switch (cand->pos)
973 : {
974 : case IP_END:
975 : return false;
976 :
977 33340143 : case IP_NORMAL:
978 33340143 : return stmt_after_ip_normal_pos (loop, stmt);
979 :
980 7856904 : case IP_ORIGINAL:
981 7856904 : case IP_AFTER_USE:
982 7856904 : return stmt_after_inc_pos (cand, stmt, false);
983 :
984 10074 : case IP_BEFORE_USE:
985 10074 : return stmt_after_inc_pos (cand, stmt, true);
986 :
987 0 : default:
988 0 : gcc_unreachable ();
989 : }
990 : }
991 :
992 : /* walk_tree callback for contains_abnormal_ssa_name_p. */
993 :
994 : static tree
995 14606769 : contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
996 : {
997 14606769 : if (TREE_CODE (*tp) == SSA_NAME
998 14606769 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
999 : return *tp;
1000 :
1001 14606752 : if (!EXPR_P (*tp))
1002 10022615 : *walk_subtrees = 0;
1003 :
1004 : return NULL_TREE;
1005 : }
1006 :
1007 : /* Returns true if EXPR contains a ssa name that occurs in an
1008 : abnormal phi node. */
1009 :
1010 : bool
1011 7885419 : contains_abnormal_ssa_name_p (tree expr)
1012 : {
1013 7885419 : return walk_tree_without_duplicates
1014 7885419 : (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1015 : }
1016 :
1017 : /* Returns the structure describing number of iterations determined from
1018 : EXIT of DATA->current_loop, or NULL if something goes wrong. */
1019 :
1020 : static class tree_niter_desc *
1021 4368323 : niter_for_exit (struct ivopts_data *data, edge exit)
1022 : {
1023 4368323 : class tree_niter_desc *desc;
1024 4368323 : tree_niter_desc **slot;
1025 :
1026 4368323 : if (!data->niters)
1027 : {
1028 472130 : data->niters = new hash_map<edge, tree_niter_desc *>;
1029 472130 : slot = NULL;
1030 : }
1031 : else
1032 3896193 : slot = data->niters->get (exit);
1033 :
1034 4368323 : if (!slot)
1035 : {
1036 : /* Try to determine number of iterations. We cannot safely work with ssa
1037 : names that appear in phi nodes on abnormal edges, so that we do not
1038 : create overlapping life ranges for them (PR 27283). */
1039 484287 : desc = XNEW (class tree_niter_desc);
1040 484287 : ::new (static_cast<void*> (desc)) tree_niter_desc ();
1041 484287 : if (!number_of_iterations_exit (data->current_loop,
1042 : exit, desc, true)
1043 484287 : || contains_abnormal_ssa_name_p (desc->niter))
1044 : {
1045 39643 : desc->~tree_niter_desc ();
1046 39643 : XDELETE (desc);
1047 39643 : desc = NULL;
1048 : }
1049 484287 : data->niters->put (exit, desc);
1050 : }
1051 : else
1052 3884036 : desc = *slot;
1053 :
1054 4368323 : return desc;
1055 : }
1056 :
1057 : /* Returns the structure describing number of iterations determined from
1058 : single dominating exit of DATA->current_loop, or NULL if something
1059 : goes wrong. */
1060 :
1061 : static class tree_niter_desc *
1062 67 : niter_for_single_dom_exit (struct ivopts_data *data)
1063 : {
1064 67 : edge exit = single_dom_exit (data->current_loop);
1065 :
1066 67 : if (!exit)
1067 : return NULL;
1068 :
1069 57 : return niter_for_exit (data, exit);
1070 : }
1071 :
1072 : /* Initializes data structures used by the iv optimization pass, stored
1073 : in DATA. */
1074 :
1075 : static void
1076 240808 : tree_ssa_iv_optimize_init (struct ivopts_data *data)
1077 : {
1078 240808 : data->version_info_size = 2 * num_ssa_names;
1079 240808 : data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1080 240808 : data->relevant = BITMAP_ALLOC (NULL);
1081 240808 : data->important_candidates = BITMAP_ALLOC (NULL);
1082 240808 : data->max_inv_var_id = 0;
1083 240808 : data->max_inv_expr_id = 0;
1084 240808 : data->niters = NULL;
1085 240808 : data->vgroups.create (20);
1086 240808 : data->vcands.create (20);
1087 240808 : data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1088 240808 : data->name_expansion_cache = NULL;
1089 240808 : data->base_object_map = NULL;
1090 240808 : data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1091 240808 : data->iv_common_cands.create (20);
1092 240808 : decl_rtl_to_reset.create (20);
1093 240808 : gcc_obstack_init (&data->iv_obstack);
1094 240808 : }
1095 :
1096 : /* walk_tree callback for determine_base_object. */
1097 :
1098 : static tree
1099 19263585 : determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1100 : {
1101 19263585 : tree_code code = TREE_CODE (*tp);
1102 19263585 : tree obj = NULL_TREE;
1103 19263585 : if (code == ADDR_EXPR)
1104 : {
1105 1027724 : tree base = get_base_address (TREE_OPERAND (*tp, 0));
1106 1027724 : if (!base)
1107 0 : obj = *tp;
1108 1027724 : else if (TREE_CODE (base) != MEM_REF)
1109 1027696 : obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1110 : }
1111 18235861 : else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1112 1897619 : obj = fold_convert (ptr_type_node, *tp);
1113 :
1114 2925315 : if (!obj)
1115 : {
1116 16338270 : if (!EXPR_P (*tp))
1117 7154029 : *walk_subtrees = 0;
1118 :
1119 16338270 : return NULL_TREE;
1120 : }
1121 : /* Record special node for multiple base objects and stop. */
1122 2925315 : if (*static_cast<tree *> (wdata))
1123 : {
1124 4254 : *static_cast<tree *> (wdata) = integer_zero_node;
1125 4254 : return integer_zero_node;
1126 : }
1127 : /* Record the base object and continue looking. */
1128 2921061 : *static_cast<tree *> (wdata) = obj;
1129 2921061 : return NULL_TREE;
1130 : }
1131 :
1132 : /* Returns a memory object to that EXPR points with caching. Return NULL if we
1133 : are able to determine that it does not point to any such object; specially
1134 : return integer_zero_node if EXPR contains multiple base objects. */
1135 :
1136 : static tree
1137 10387274 : determine_base_object (struct ivopts_data *data, tree expr)
1138 : {
1139 10387274 : tree *slot, obj = NULL_TREE;
1140 10387274 : if (data->base_object_map)
1141 : {
1142 10224320 : if ((slot = data->base_object_map->get(expr)) != NULL)
1143 4744749 : return *slot;
1144 : }
1145 : else
1146 162954 : data->base_object_map = new hash_map<tree, tree>;
1147 :
1148 5642525 : (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1149 5642525 : data->base_object_map->put (expr, obj);
1150 5642525 : return obj;
1151 : }
1152 :
1153 : /* Allocates an induction variable with given initial value BASE and step STEP
1154 : for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1155 :
1156 : static struct iv *
1157 10387274 : alloc_iv (struct ivopts_data *data, tree base, tree step,
1158 : bool no_overflow = false)
1159 : {
1160 10387274 : tree expr = base;
1161 10387274 : struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1162 : sizeof (struct iv));
1163 10387274 : gcc_assert (step != NULL_TREE);
1164 :
1165 : /* Canonicalize the address expression in base if it were an unsigned
1166 : computation. That leads to more equalities being detected and results in:
1167 :
1168 : 1) More accurate cost can be computed for address expressions;
1169 : 2) Duplicate candidates won't be created for bases in different
1170 : forms, like &a[0] and &a.
1171 : 3) Duplicate candidates won't be created for IV expressions that differ
1172 : only in their sign. */
1173 10387274 : aff_tree comb;
1174 10387274 : STRIP_NOPS (expr);
1175 10387274 : expr = fold_convert (unsigned_type_for (TREE_TYPE (expr)), expr);
1176 10387274 : tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1177 10387274 : base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1178 :
1179 10387274 : iv->base = base;
1180 10387274 : iv->base_object = determine_base_object (data, base);
1181 10387274 : iv->step = step;
1182 10387274 : iv->biv_p = false;
1183 10387274 : iv->nonlin_use = NULL;
1184 10387274 : iv->ssa_name = NULL_TREE;
1185 10387274 : if (!no_overflow
1186 10387274 : && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1187 : base, step))
1188 : no_overflow = true;
1189 10387274 : iv->no_overflow = no_overflow;
1190 10387274 : iv->have_address_use = false;
1191 :
1192 20774548 : return iv;
1193 10387274 : }
1194 :
1195 : /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1196 : doesn't overflow. */
1197 :
1198 : static void
1199 4892459 : set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1200 : bool no_overflow)
1201 : {
1202 4892459 : struct version_info *info = name_info (data, iv);
1203 :
1204 4892459 : gcc_assert (!info->iv);
1205 :
1206 4892459 : bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1207 4892459 : info->iv = alloc_iv (data, base, step, no_overflow);
1208 4892459 : info->iv->ssa_name = iv;
1209 4892459 : }
1210 :
1211 : /* Finds induction variable declaration for VAR. */
1212 :
1213 : static struct iv *
1214 44357099 : get_iv (struct ivopts_data *data, tree var)
1215 : {
1216 44357099 : basic_block bb;
1217 44357099 : tree type = TREE_TYPE (var);
1218 :
1219 44357099 : if (!POINTER_TYPE_P (type)
1220 35289057 : && !INTEGRAL_TYPE_P (type))
1221 : return NULL;
1222 :
1223 38651339 : if (!name_info (data, var)->iv)
1224 : {
1225 17946959 : bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1226 :
1227 17946959 : if (!bb
1228 17946959 : || !flow_bb_inside_loop_p (data->current_loop, bb))
1229 : {
1230 800668 : if (POINTER_TYPE_P (type))
1231 316496 : type = sizetype;
1232 800668 : set_iv (data, var, var, build_int_cst (type, 0), true);
1233 : }
1234 : }
1235 :
1236 38651339 : return name_info (data, var)->iv;
1237 : }
1238 :
1239 : /* Return the first non-invariant ssa var found in EXPR. */
1240 :
1241 : static tree
1242 4074635 : extract_single_var_from_expr (tree expr)
1243 : {
1244 4074635 : int i, n;
1245 4074635 : tree tmp;
1246 4074635 : enum tree_code code;
1247 :
1248 4074635 : if (!expr || is_gimple_min_invariant (expr))
1249 3396142 : return NULL;
1250 :
1251 678493 : code = TREE_CODE (expr);
1252 678493 : if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1253 : {
1254 373578 : n = TREE_OPERAND_LENGTH (expr);
1255 747227 : for (i = 0; i < n; i++)
1256 : {
1257 373649 : tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1258 :
1259 373649 : if (tmp)
1260 : return tmp;
1261 : }
1262 : }
1263 304915 : return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1264 : }
1265 :
1266 : /* Finds basic ivs. */
1267 :
1268 : static bool
1269 629857 : find_bivs (struct ivopts_data *data)
1270 : {
1271 629857 : gphi *phi;
1272 629857 : affine_iv iv;
1273 629857 : tree step, type, base, stop;
1274 629857 : bool found = false;
1275 629857 : class loop *loop = data->current_loop;
1276 629857 : gphi_iterator psi;
1277 :
1278 2350638 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1279 : {
1280 1720781 : phi = psi.phi ();
1281 :
1282 1720781 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1283 237 : continue;
1284 :
1285 1720544 : if (virtual_operand_p (PHI_RESULT (phi)))
1286 413309 : continue;
1287 :
1288 1307235 : if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1289 433028 : continue;
1290 :
1291 874207 : if (integer_zerop (iv.step))
1292 0 : continue;
1293 :
1294 874207 : step = iv.step;
1295 874207 : base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1296 : /* Stop expanding iv base at the first ssa var referred by iv step.
1297 : Ideally we should stop at any ssa var, because that's expensive
1298 : and unusual to happen, we just do it on the first one.
1299 :
1300 : See PR64705 for the rationale. */
1301 874207 : stop = extract_single_var_from_expr (step);
1302 874207 : base = expand_simple_operations (base, stop);
1303 874207 : if (contains_abnormal_ssa_name_p (base)
1304 874207 : || contains_abnormal_ssa_name_p (step))
1305 10 : continue;
1306 :
1307 874197 : type = TREE_TYPE (PHI_RESULT (phi));
1308 874197 : base = fold_convert (type, base);
1309 874197 : if (step)
1310 : {
1311 874197 : if (POINTER_TYPE_P (type))
1312 164762 : step = convert_to_ptrofftype (step);
1313 : else
1314 709435 : step = fold_convert (type, step);
1315 : }
1316 :
1317 874197 : set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1318 874197 : found = true;
1319 : }
1320 :
1321 629857 : return found;
1322 : }
1323 :
1324 : /* Marks basic ivs. */
1325 :
1326 : static void
1327 503696 : mark_bivs (struct ivopts_data *data)
1328 : {
1329 503696 : gphi *phi;
1330 503696 : gimple *def;
1331 503696 : tree var;
1332 503696 : struct iv *iv, *incr_iv;
1333 503696 : class loop *loop = data->current_loop;
1334 503696 : basic_block incr_bb;
1335 503696 : gphi_iterator psi;
1336 :
1337 503696 : data->bivs_not_used_in_addr = 0;
1338 1958873 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1339 : {
1340 1455177 : phi = psi.phi ();
1341 :
1342 1455177 : iv = get_iv (data, PHI_RESULT (phi));
1343 1455177 : if (!iv)
1344 580980 : continue;
1345 :
1346 874197 : var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1347 874197 : def = SSA_NAME_DEF_STMT (var);
1348 : /* Don't mark iv peeled from other one as biv. */
1349 875777 : if (def
1350 874197 : && gimple_code (def) == GIMPLE_PHI
1351 876909 : && gimple_bb (def) == loop->header)
1352 1580 : continue;
1353 :
1354 872617 : incr_iv = get_iv (data, var);
1355 872617 : if (!incr_iv)
1356 1143 : continue;
1357 :
1358 : /* If the increment is in the subloop, ignore it. */
1359 871474 : incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1360 871474 : if (incr_bb->loop_father != data->current_loop
1361 871474 : || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1362 0 : continue;
1363 :
1364 871474 : iv->biv_p = true;
1365 871474 : incr_iv->biv_p = true;
1366 871474 : if (iv->no_overflow)
1367 580608 : data->bivs_not_used_in_addr++;
1368 871474 : if (incr_iv->no_overflow)
1369 572086 : data->bivs_not_used_in_addr++;
1370 : }
1371 503696 : }
1372 :
1373 : /* Checks whether STMT defines a linear induction variable and stores its
1374 : parameters to IV. */
1375 :
1376 : static bool
1377 12536987 : find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1378 : {
1379 12536987 : tree lhs, stop;
1380 12536987 : class loop *loop = data->current_loop;
1381 :
1382 12536987 : iv->base = NULL_TREE;
1383 12536987 : iv->step = NULL_TREE;
1384 :
1385 12536987 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
1386 : return false;
1387 :
1388 10516968 : lhs = gimple_assign_lhs (stmt);
1389 10516968 : if (TREE_CODE (lhs) != SSA_NAME)
1390 : return false;
1391 :
1392 18810018 : if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1393 : return false;
1394 :
1395 : /* Stop expanding iv base at the first ssa var referred by iv step.
1396 : Ideally we should stop at any ssa var, because that's expensive
1397 : and unusual to happen, we just do it on the first one.
1398 :
1399 : See PR64705 for the rationale. */
1400 2826779 : stop = extract_single_var_from_expr (iv->step);
1401 2826779 : iv->base = expand_simple_operations (iv->base, stop);
1402 2826779 : if (contains_abnormal_ssa_name_p (iv->base)
1403 2826779 : || contains_abnormal_ssa_name_p (iv->step))
1404 6 : return false;
1405 :
1406 : /* If STMT could throw, then do not consider STMT as defining a GIV.
1407 : While this will suppress optimizations, we cannot safely delete this
1408 : GIV and associated statements, even if it appears it is not used. */
1409 2826773 : if (stmt_could_throw_p (cfun, stmt))
1410 : return false;
1411 :
1412 : return true;
1413 : }
1414 :
1415 : /* Finds general ivs in statement STMT. */
1416 :
1417 : static void
1418 12536987 : find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1419 : {
1420 12536987 : affine_iv iv;
1421 :
1422 12536987 : if (!find_givs_in_stmt_scev (data, stmt, &iv))
1423 9710222 : return;
1424 :
1425 2826765 : set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1426 : }
1427 :
1428 : /* Finds general ivs in basic block BB. */
1429 :
1430 : static void
1431 2798138 : find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1432 : {
1433 2798138 : gimple_stmt_iterator bsi;
1434 :
1435 27132171 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1436 21535895 : if (!is_gimple_debug (gsi_stmt (bsi)))
1437 12536987 : find_givs_in_stmt (data, gsi_stmt (bsi));
1438 2798138 : }
1439 :
1440 : /* Finds general ivs. */
1441 :
1442 : static void
1443 503696 : find_givs (struct ivopts_data *data, basic_block *body)
1444 : {
1445 503696 : class loop *loop = data->current_loop;
1446 503696 : unsigned i;
1447 :
1448 3301834 : for (i = 0; i < loop->num_nodes; i++)
1449 2798138 : find_givs_in_bb (data, body[i]);
1450 503696 : }
1451 :
1452 : /* For each ssa name defined in LOOP determines whether it is an induction
1453 : variable and if so, its initial value and step. */
1454 :
1455 : static bool
1456 629857 : find_induction_variables (struct ivopts_data *data, basic_block *body)
1457 : {
1458 629857 : unsigned i;
1459 629857 : bitmap_iterator bi;
1460 :
1461 629857 : if (!find_bivs (data))
1462 : return false;
1463 :
1464 503696 : find_givs (data, body);
1465 503696 : mark_bivs (data);
1466 :
1467 503696 : if (dump_file && (dump_flags & TDF_DETAILS))
1468 : {
1469 67 : class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1470 :
1471 67 : if (niter)
1472 : {
1473 51 : fprintf (dump_file, " number of iterations ");
1474 51 : print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1475 51 : if (!integer_zerop (niter->may_be_zero))
1476 : {
1477 1 : fprintf (dump_file, "; zero if ");
1478 1 : print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1479 : }
1480 51 : fprintf (dump_file, "\n");
1481 67 : };
1482 :
1483 67 : fprintf (dump_file, "\n<Induction Vars>:\n");
1484 819 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1485 : {
1486 752 : struct version_info *info = ver_info (data, i);
1487 752 : if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1488 550 : dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1489 : }
1490 : }
1491 :
1492 : return true;
1493 : }
1494 :
1495 : /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1496 : For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1497 : is the const offset stripped from IV base and MEM_TYPE is the type
1498 : of the memory being addressed. For uses of other types, ADDR_BASE
1499 : and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1500 :
1501 : static struct iv_use *
1502 2084817 : record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1503 : gimple *stmt, enum use_type type, tree mem_type,
1504 : tree addr_base, poly_uint64 addr_offset)
1505 : {
1506 2084817 : struct iv_use *use = XCNEW (struct iv_use);
1507 :
1508 2084817 : use->id = group->vuses.length ();
1509 2084817 : use->group_id = group->id;
1510 2084817 : use->type = type;
1511 2084817 : use->mem_type = mem_type;
1512 2084817 : use->iv = iv;
1513 2084817 : use->stmt = stmt;
1514 2084817 : use->op_p = use_p;
1515 2084817 : use->addr_base = addr_base;
1516 2084817 : use->addr_offset = addr_offset;
1517 :
1518 2084817 : group->vuses.safe_push (use);
1519 2084817 : return use;
1520 : }
1521 :
1522 : /* Checks whether OP is a loop-level invariant and if so, records it.
1523 : NONLINEAR_USE is true if the invariant is used in a way we do not
1524 : handle specially. */
1525 :
1526 : static void
1527 22574170 : record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1528 : {
1529 22574170 : basic_block bb;
1530 22574170 : struct version_info *info;
1531 :
1532 22574170 : if (TREE_CODE (op) != SSA_NAME
1533 22574170 : || virtual_operand_p (op))
1534 : return;
1535 :
1536 21395159 : bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1537 21395159 : if (bb
1538 21395159 : && flow_bb_inside_loop_p (data->current_loop, bb))
1539 : return;
1540 :
1541 3825879 : info = name_info (data, op);
1542 3825879 : info->name = op;
1543 3825879 : info->has_nonlin_use |= nonlinear_use;
1544 3825879 : if (!info->inv_id)
1545 1336137 : info->inv_id = ++data->max_inv_var_id;
1546 3825879 : bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1547 : }
1548 :
1549 : /* Record a group of TYPE. */
1550 :
1551 : static struct iv_group *
1552 1808881 : record_group (struct ivopts_data *data, enum use_type type)
1553 : {
1554 1808881 : struct iv_group *group = XCNEW (struct iv_group);
1555 :
1556 1808881 : group->id = data->vgroups.length ();
1557 1808881 : group->type = type;
1558 1808881 : group->related_cands = BITMAP_ALLOC (NULL);
1559 1808881 : group->vuses.create (1);
1560 1808881 : group->doloop_p = false;
1561 :
1562 1808881 : data->vgroups.safe_push (group);
1563 1808881 : return group;
1564 : }
1565 :
1566 : /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1567 : New group will be created if there is no existing group for the use.
1568 : MEM_TYPE is the type of memory being addressed, or NULL if this
1569 : isn't an address reference. */
1570 :
1571 : static struct iv_use *
1572 2084817 : record_group_use (struct ivopts_data *data, tree *use_p,
1573 : struct iv *iv, gimple *stmt, enum use_type type,
1574 : tree mem_type)
1575 : {
1576 2084817 : tree addr_base = NULL;
1577 2084817 : struct iv_group *group = NULL;
1578 2084817 : poly_uint64 addr_offset = 0;
1579 :
1580 : /* Record non address type use in a new group. */
1581 2084817 : if (address_p (type))
1582 : {
1583 858709 : unsigned int i;
1584 :
1585 858709 : gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1586 858709 : tree addr_toffset;
1587 858709 : split_constant_offset (iv->base, &addr_base, &addr_toffset);
1588 858709 : addr_offset = int_cst_value (addr_toffset);
1589 1616084 : for (i = 0; i < data->vgroups.length (); i++)
1590 : {
1591 1086117 : struct iv_use *use;
1592 :
1593 1086117 : group = data->vgroups[i];
1594 1086117 : use = group->vuses[0];
1595 1086117 : if (!address_p (use->type))
1596 333510 : continue;
1597 :
1598 : /* Check if it has the same stripped base and step. */
1599 752607 : if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1600 397397 : && operand_equal_p (iv->step, use->iv->step, OEP_ASSUME_WRAPV)
1601 1146906 : && operand_equal_p (addr_base, use->addr_base, OEP_ASSUME_WRAPV))
1602 : break;
1603 : }
1604 1717418 : if (i == data->vgroups.length ())
1605 529967 : group = NULL;
1606 : }
1607 :
1608 858709 : if (!group)
1609 1756075 : group = record_group (data, type);
1610 :
1611 2084817 : return record_use (group, use_p, iv, stmt, type, mem_type,
1612 2084817 : addr_base, addr_offset);
1613 : }
1614 :
1615 : /* Checks whether the use OP is interesting and if so, records it. */
1616 :
1617 : static struct iv_use *
1618 7229367 : find_interesting_uses_op (struct ivopts_data *data, tree op)
1619 : {
1620 7229367 : struct iv *iv;
1621 7229367 : gimple *stmt;
1622 7229367 : struct iv_use *use;
1623 :
1624 7229367 : if (TREE_CODE (op) != SSA_NAME)
1625 : return NULL;
1626 :
1627 5821394 : iv = get_iv (data, op);
1628 5821394 : if (!iv)
1629 : return NULL;
1630 :
1631 2518792 : if (iv->nonlin_use)
1632 : {
1633 198075 : gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1634 : return iv->nonlin_use;
1635 : }
1636 :
1637 2320717 : if (integer_zerop (iv->step))
1638 : {
1639 1692951 : record_invariant (data, op, true);
1640 1692951 : return NULL;
1641 : }
1642 :
1643 627766 : stmt = SSA_NAME_DEF_STMT (op);
1644 627766 : gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1645 :
1646 627766 : use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1647 627766 : iv->nonlin_use = use;
1648 627766 : return use;
1649 : }
1650 :
1651 : /* Indicate how compare type iv_use can be handled. */
1652 : enum comp_iv_rewrite
1653 : {
1654 : COMP_IV_NA,
1655 : /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1656 : COMP_IV_EXPR,
1657 : /* We may rewrite compare type iv_uses on both sides of comparison by
1658 : expressing value of each iv_use. */
1659 : COMP_IV_EXPR_2,
1660 : /* We may rewrite compare type iv_use by expressing value of the iv_use
1661 : or by eliminating it with other iv_cand. */
1662 : COMP_IV_ELIM
1663 : };
1664 :
1665 : /* Given a condition in statement STMT, checks whether it is a compare
1666 : of an induction variable and an invariant. If this is the case,
1667 : CONTROL_VAR is set to location of the iv, BOUND to the location of
1668 : the invariant, IV_VAR and IV_BOUND are set to the corresponding
1669 : induction variable descriptions, and true is returned. If this is not
1670 : the case, CONTROL_VAR and BOUND are set to the arguments of the
1671 : condition and false is returned. */
1672 :
1673 : static enum comp_iv_rewrite
1674 7448181 : extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1675 : tree **control_var, tree **bound,
1676 : struct iv **iv_var, struct iv **iv_bound)
1677 : {
1678 : /* The objects returned when COND has constant operands. */
1679 7448181 : static struct iv const_iv;
1680 7448181 : static tree zero;
1681 7448181 : tree *op0 = &zero, *op1 = &zero;
1682 7448181 : struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1683 7448181 : enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1684 :
1685 7448181 : if (gimple_code (stmt) == GIMPLE_COND)
1686 : {
1687 7181885 : gcond *cond_stmt = as_a <gcond *> (stmt);
1688 7181885 : op0 = gimple_cond_lhs_ptr (cond_stmt);
1689 7181885 : op1 = gimple_cond_rhs_ptr (cond_stmt);
1690 : }
1691 : else
1692 : {
1693 266296 : op0 = gimple_assign_rhs1_ptr (stmt);
1694 266296 : op1 = gimple_assign_rhs2_ptr (stmt);
1695 : }
1696 :
1697 7448181 : zero = integer_zero_node;
1698 7448181 : const_iv.step = integer_zero_node;
1699 :
1700 7448181 : if (TREE_CODE (*op0) == SSA_NAME)
1701 7448024 : iv0 = get_iv (data, *op0);
1702 7448181 : if (TREE_CODE (*op1) == SSA_NAME)
1703 3355255 : iv1 = get_iv (data, *op1);
1704 :
1705 : /* If both sides of comparison are IVs. We can express ivs on both end. */
1706 7448181 : if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1707 : {
1708 89408 : rewrite_type = COMP_IV_EXPR_2;
1709 89408 : goto end;
1710 : }
1711 :
1712 : /* If none side of comparison is IV. */
1713 5781129 : if ((!iv0 || integer_zerop (iv0->step))
1714 8698924 : && (!iv1 || integer_zerop (iv1->step)))
1715 944116 : goto end;
1716 :
1717 : /* Control variable may be on the other side. */
1718 6414657 : if (!iv0 || integer_zerop (iv0->step))
1719 : {
1720 : std::swap (op0, op1);
1721 : std::swap (iv0, iv1);
1722 : }
1723 : /* If one side is IV and the other side isn't loop invariant. */
1724 6414657 : if (!iv1)
1725 : rewrite_type = COMP_IV_EXPR;
1726 : /* If one side is IV and the other side is loop invariant. */
1727 5434176 : else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1728 : rewrite_type = COMP_IV_ELIM;
1729 :
1730 7448181 : end:
1731 7448181 : if (control_var)
1732 7448181 : *control_var = op0;
1733 7448181 : if (iv_var)
1734 1540432 : *iv_var = iv0;
1735 7448181 : if (bound)
1736 7448181 : *bound = op1;
1737 7448181 : if (iv_bound)
1738 7448181 : *iv_bound = iv1;
1739 :
1740 7448181 : return rewrite_type;
1741 : }
1742 :
1743 : /* Checks whether the condition in STMT is interesting and if so,
1744 : records it. */
1745 :
1746 : static void
1747 1540432 : find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1748 : {
1749 1540432 : tree *var_p, *bound_p;
1750 1540432 : struct iv *var_iv, *bound_iv;
1751 1540432 : enum comp_iv_rewrite ret;
1752 :
1753 1540432 : ret = extract_cond_operands (data, stmt,
1754 : &var_p, &bound_p, &var_iv, &bound_iv);
1755 1540432 : if (ret == COMP_IV_NA)
1756 : {
1757 944116 : find_interesting_uses_op (data, *var_p);
1758 944116 : find_interesting_uses_op (data, *bound_p);
1759 944116 : return;
1760 : }
1761 :
1762 596316 : record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1763 : /* Record compare type iv_use for iv on the other side of comparison. */
1764 596316 : if (ret == COMP_IV_EXPR_2)
1765 2026 : record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1766 : }
1767 :
1768 : /* Returns the outermost loop EXPR is obviously invariant in
1769 : relative to the loop LOOP, i.e. if all its operands are defined
1770 : outside of the returned loop. Returns NULL if EXPR is not
1771 : even obviously invariant in LOOP. */
1772 :
1773 : class loop *
1774 247907 : outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1775 : {
1776 247907 : basic_block def_bb;
1777 247907 : unsigned i, len;
1778 :
1779 247907 : if (is_gimple_min_invariant (expr))
1780 37245 : return current_loops->tree_root;
1781 :
1782 210662 : if (TREE_CODE (expr) == SSA_NAME)
1783 : {
1784 129265 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1785 129265 : if (def_bb)
1786 : {
1787 80328 : if (flow_bb_inside_loop_p (loop, def_bb))
1788 : return NULL;
1789 160644 : return superloop_at_depth (loop,
1790 105447 : loop_depth (def_bb->loop_father) + 1);
1791 : }
1792 :
1793 48937 : return current_loops->tree_root;
1794 : }
1795 :
1796 81397 : if (!EXPR_P (expr))
1797 : return NULL;
1798 :
1799 81397 : unsigned maxdepth = 0;
1800 81397 : len = TREE_OPERAND_LENGTH (expr);
1801 211580 : for (i = 0; i < len; i++)
1802 : {
1803 130201 : class loop *ivloop;
1804 130201 : if (!TREE_OPERAND (expr, i))
1805 0 : continue;
1806 :
1807 130201 : ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1808 130201 : if (!ivloop)
1809 : return NULL;
1810 228245 : maxdepth = MAX (maxdepth, loop_depth (ivloop));
1811 : }
1812 :
1813 81379 : return superloop_at_depth (loop, maxdepth);
1814 : }
1815 :
1816 : /* Returns true if expression EXPR is obviously invariant in LOOP,
1817 : i.e. if all its operands are defined outside of the LOOP. LOOP
1818 : should not be the function body. */
1819 :
1820 : bool
1821 12164459 : expr_invariant_in_loop_p (class loop *loop, tree expr)
1822 : {
1823 12164459 : basic_block def_bb;
1824 12164459 : unsigned i, len;
1825 :
1826 12164459 : gcc_assert (loop_depth (loop) > 0);
1827 :
1828 12164459 : if (is_gimple_min_invariant (expr))
1829 : return true;
1830 :
1831 8520948 : if (TREE_CODE (expr) == SSA_NAME)
1832 : {
1833 8092857 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1834 8092857 : if (def_bb
1835 8092857 : && flow_bb_inside_loop_p (loop, def_bb))
1836 : return false;
1837 :
1838 4131191 : return true;
1839 : }
1840 :
1841 428091 : if (!EXPR_P (expr))
1842 : return false;
1843 :
1844 428088 : len = TREE_OPERAND_LENGTH (expr);
1845 917890 : for (i = 0; i < len; i++)
1846 538758 : if (TREE_OPERAND (expr, i)
1847 538758 : && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1848 : return false;
1849 :
1850 : return true;
1851 : }
1852 :
1853 : /* Given expression EXPR which computes inductive values with respect
1854 : to loop recorded in DATA, this function returns biv from which EXPR
1855 : is derived by tracing definition chains of ssa variables in EXPR. */
1856 :
1857 : static struct iv*
1858 870986 : find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1859 : {
1860 1407345 : struct iv *iv;
1861 1407345 : unsigned i, n;
1862 1407345 : tree e2, e1;
1863 1407345 : enum tree_code code;
1864 1407345 : gimple *stmt;
1865 :
1866 1407345 : if (expr == NULL_TREE)
1867 : return NULL;
1868 :
1869 1407032 : if (is_gimple_min_invariant (expr))
1870 : return NULL;
1871 :
1872 1122980 : code = TREE_CODE (expr);
1873 1122980 : if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1874 : {
1875 21629 : n = TREE_OPERAND_LENGTH (expr);
1876 23737 : for (i = 0; i < n; i++)
1877 : {
1878 23214 : iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1879 23214 : if (iv)
1880 : return iv;
1881 : }
1882 : }
1883 :
1884 : /* Stop if it's not ssa name. */
1885 1101874 : if (code != SSA_NAME)
1886 : return NULL;
1887 :
1888 1100733 : iv = get_iv (data, expr);
1889 1100733 : if (!iv || integer_zerop (iv->step))
1890 46603 : return NULL;
1891 1054130 : else if (iv->biv_p)
1892 : return iv;
1893 :
1894 783224 : stmt = SSA_NAME_DEF_STMT (expr);
1895 783224 : if (gphi *phi = dyn_cast <gphi *> (stmt))
1896 : {
1897 1864 : ssa_op_iter iter;
1898 1864 : use_operand_p use_p;
1899 1864 : basic_block phi_bb = gimple_bb (phi);
1900 :
1901 : /* Skip loop header PHI that doesn't define biv. */
1902 1864 : if (phi_bb->loop_father == data->current_loop)
1903 : return NULL;
1904 :
1905 0 : if (virtual_operand_p (gimple_phi_result (phi)))
1906 : return NULL;
1907 :
1908 0 : FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1909 : {
1910 0 : tree use = USE_FROM_PTR (use_p);
1911 0 : iv = find_deriving_biv_for_expr (data, use);
1912 0 : if (iv)
1913 : return iv;
1914 : }
1915 : return NULL;
1916 : }
1917 781360 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
1918 : return NULL;
1919 :
1920 781360 : e1 = gimple_assign_rhs1 (stmt);
1921 781360 : code = gimple_assign_rhs_code (stmt);
1922 781360 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1923 : return find_deriving_biv_for_expr (data, e1);
1924 :
1925 771295 : switch (code)
1926 : {
1927 573943 : case MULT_EXPR:
1928 573943 : case PLUS_EXPR:
1929 573943 : case MINUS_EXPR:
1930 573943 : case POINTER_PLUS_EXPR:
1931 : /* Increments, decrements and multiplications by a constant
1932 : are simple. */
1933 573943 : e2 = gimple_assign_rhs2 (stmt);
1934 573943 : iv = find_deriving_biv_for_expr (data, e2);
1935 573943 : if (iv)
1936 : return iv;
1937 526294 : gcc_fallthrough ();
1938 :
1939 526294 : CASE_CONVERT:
1940 : /* Casts are simple. */
1941 526294 : return find_deriving_biv_for_expr (data, e1);
1942 :
1943 : default:
1944 : break;
1945 : }
1946 :
1947 : return NULL;
1948 : }
1949 :
1950 : /* Record BIV, its predecessor and successor that they are used in
1951 : address type uses. */
1952 :
1953 : static void
1954 597270 : record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1955 : {
1956 597270 : unsigned i;
1957 597270 : tree type, base_1, base_2;
1958 597270 : bitmap_iterator bi;
1959 :
1960 594347 : if (!biv || !biv->biv_p || integer_zerop (biv->step)
1961 1191617 : || biv->have_address_use || !biv->no_overflow)
1962 332256 : return;
1963 :
1964 528667 : type = TREE_TYPE (biv->base);
1965 528667 : if (!INTEGRAL_TYPE_P (type))
1966 : return;
1967 :
1968 265014 : biv->have_address_use = true;
1969 265014 : data->bivs_not_used_in_addr--;
1970 265014 : base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1971 2440336 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1972 : {
1973 2175322 : struct iv *iv = ver_info (data, i)->iv;
1974 :
1975 1962291 : if (!iv || !iv->biv_p || integer_zerop (iv->step)
1976 3078445 : || iv->have_address_use || !iv->no_overflow)
1977 1882674 : continue;
1978 :
1979 292648 : if (type != TREE_TYPE (iv->base)
1980 292648 : || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1981 31187 : continue;
1982 :
1983 261461 : if (!operand_equal_p (biv->step, iv->step, 0))
1984 5794 : continue;
1985 :
1986 255667 : base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1987 255667 : if (operand_equal_p (base_1, iv->base, 0)
1988 255667 : || operand_equal_p (base_2, biv->base, 0))
1989 : {
1990 228902 : iv->have_address_use = true;
1991 228902 : data->bivs_not_used_in_addr--;
1992 : }
1993 : }
1994 : }
1995 :
1996 : /* Cumulates the steps of indices into DATA and replaces their values with the
1997 : initial ones. Returns false when the value of the index cannot be determined.
1998 : Callback for for_each_index. */
1999 :
2000 : struct ifs_ivopts_data
2001 : {
2002 : struct ivopts_data *ivopts_data;
2003 : gimple *stmt;
2004 : tree step;
2005 : };
2006 :
2007 : static bool
2008 2218954 : idx_find_step (tree base, tree *idx, void *data)
2009 : {
2010 2218954 : struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2011 2218954 : struct iv *iv;
2012 2218954 : bool use_overflow_semantics = false;
2013 2218954 : tree step, iv_base, iv_step, lbound, off;
2014 2218954 : class loop *loop = dta->ivopts_data->current_loop;
2015 :
2016 : /* If base is a component ref, require that the offset of the reference
2017 : be invariant. */
2018 2218954 : if (TREE_CODE (base) == COMPONENT_REF)
2019 : {
2020 78 : off = component_ref_field_offset (base);
2021 78 : return expr_invariant_in_loop_p (loop, off);
2022 : }
2023 :
2024 : /* If base is array, first check whether we will be able to move the
2025 : reference out of the loop (in order to take its address in strength
2026 : reduction). In order for this to work we need both lower bound
2027 : and step to be loop invariants. */
2028 2218876 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2029 : {
2030 : /* Moreover, for a range, the size needs to be invariant as well. */
2031 521025 : if (TREE_CODE (base) == ARRAY_RANGE_REF
2032 521025 : && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2033 : return false;
2034 :
2035 521025 : step = array_ref_element_size (base);
2036 521025 : lbound = array_ref_low_bound (base);
2037 :
2038 521025 : if (!expr_invariant_in_loop_p (loop, step)
2039 521025 : || !expr_invariant_in_loop_p (loop, lbound))
2040 3150 : return false;
2041 : }
2042 :
2043 2215726 : if (TREE_CODE (*idx) != SSA_NAME)
2044 : return true;
2045 :
2046 1802998 : iv = get_iv (dta->ivopts_data, *idx);
2047 1802998 : if (!iv)
2048 : return false;
2049 :
2050 : /* XXX We produce for a base of *D42 with iv->base being &x[0]
2051 : *&x[0], which is not folded and does not trigger the
2052 : ARRAY_REF path below. */
2053 1165880 : *idx = iv->base;
2054 :
2055 1165880 : if (integer_zerop (iv->step))
2056 : return true;
2057 :
2058 875202 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2059 : {
2060 306904 : step = array_ref_element_size (base);
2061 :
2062 : /* We only handle addresses whose step is an integer constant. */
2063 306904 : if (TREE_CODE (step) != INTEGER_CST)
2064 : return false;
2065 : }
2066 : else
2067 : /* The step for pointer arithmetics already is 1 byte. */
2068 568298 : step = size_one_node;
2069 :
2070 875185 : iv_base = iv->base;
2071 875185 : iv_step = iv->step;
2072 875185 : if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2073 : use_overflow_semantics = true;
2074 :
2075 875185 : if (!convert_affine_scev (dta->ivopts_data->current_loop,
2076 : sizetype, &iv_base, &iv_step, dta->stmt,
2077 : use_overflow_semantics))
2078 : {
2079 : /* The index might wrap. */
2080 : return false;
2081 : }
2082 :
2083 871868 : step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2084 871868 : dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2085 :
2086 871868 : if (dta->ivopts_data->bivs_not_used_in_addr)
2087 : {
2088 597270 : if (!iv->biv_p)
2089 273829 : iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2090 :
2091 597270 : record_biv_for_address_use (dta->ivopts_data, iv);
2092 : }
2093 : return true;
2094 : }
2095 :
2096 : /* Records use in index IDX. Callback for for_each_index. Ivopts data
2097 : object is passed to it in DATA. */
2098 :
2099 : static bool
2100 1811652 : idx_record_use (tree base, tree *idx,
2101 : void *vdata)
2102 : {
2103 1811652 : struct ivopts_data *data = (struct ivopts_data *) vdata;
2104 1811652 : find_interesting_uses_op (data, *idx);
2105 1811652 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2106 : {
2107 231646 : if (TREE_OPERAND (base, 2))
2108 5502 : find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2109 231646 : if (TREE_OPERAND (base, 3))
2110 16796 : find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2111 : }
2112 1811652 : return true;
2113 : }
2114 :
2115 : /* If we can prove that TOP = cst * BOT for some constant cst,
2116 : store cst to MUL and return true. Otherwise return false.
2117 : The returned value is always sign-extended, regardless of the
2118 : signedness of TOP and BOT. */
2119 :
2120 : static bool
2121 17195595 : constant_multiple_of (tree top, tree bot, widest_int *mul,
2122 : struct ivopts_data *data)
2123 : {
2124 34391190 : aff_tree aff_top, aff_bot;
2125 17195595 : tree_to_aff_combination_expand (top, TREE_TYPE (top), &aff_top,
2126 : &data->name_expansion_cache);
2127 17195595 : tree_to_aff_combination_expand (bot, TREE_TYPE (bot), &aff_bot,
2128 : &data->name_expansion_cache);
2129 :
2130 17195595 : poly_widest_int poly_mul;
2131 17195595 : if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul)
2132 17195595 : && poly_mul.is_constant (mul))
2133 14254477 : return true;
2134 :
2135 : return false;
2136 17195595 : }
2137 :
2138 : /* Return true if memory reference REF with step STEP may be unaligned. */
2139 :
2140 : static bool
2141 0 : may_be_unaligned_p (tree ref, tree step)
2142 : {
2143 : /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2144 : thus they are not misaligned. */
2145 0 : if (TREE_CODE (ref) == TARGET_MEM_REF)
2146 : return false;
2147 :
2148 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2149 0 : if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2150 0 : align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2151 :
2152 0 : unsigned HOST_WIDE_INT bitpos;
2153 0 : unsigned int ref_align;
2154 0 : get_object_alignment_1 (ref, &ref_align, &bitpos);
2155 0 : if (ref_align < align
2156 0 : || (bitpos % align) != 0
2157 0 : || (bitpos % BITS_PER_UNIT) != 0)
2158 : return true;
2159 :
2160 0 : unsigned int trailing_zeros = tree_ctz (step);
2161 0 : if (trailing_zeros < HOST_BITS_PER_INT
2162 0 : && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2163 : return true;
2164 :
2165 : return false;
2166 : }
2167 :
2168 : /* Return true if EXPR may be non-addressable. */
2169 :
2170 : bool
2171 13094699 : may_be_nonaddressable_p (tree expr)
2172 : {
2173 13973739 : switch (TREE_CODE (expr))
2174 : {
2175 9311163 : case VAR_DECL:
2176 : /* Check if it's a register variable. */
2177 9311163 : return DECL_HARD_REGISTER (expr);
2178 :
2179 : case TARGET_MEM_REF:
2180 : /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2181 : target, thus they are always addressable. */
2182 : return false;
2183 :
2184 1960433 : case MEM_REF:
2185 : /* Likewise for MEM_REFs, modulo the storage order. */
2186 1960433 : return REF_REVERSE_STORAGE_ORDER (expr);
2187 :
2188 80 : case BIT_FIELD_REF:
2189 80 : if (REF_REVERSE_STORAGE_ORDER (expr))
2190 : return true;
2191 80 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2192 :
2193 1247729 : case COMPONENT_REF:
2194 1247729 : if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2195 : return true;
2196 1247729 : return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2197 1247729 : || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2198 :
2199 857339 : case ARRAY_REF:
2200 857339 : case ARRAY_RANGE_REF:
2201 857339 : if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2202 : return true;
2203 857339 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2204 :
2205 21631 : case VIEW_CONVERT_EXPR:
2206 : /* This kind of view-conversions may wrap non-addressable objects
2207 : and make them look addressable. After some processing the
2208 : non-addressability may be uncovered again, causing ADDR_EXPRs
2209 : of inappropriate objects to be built. */
2210 21631 : if (is_gimple_reg (TREE_OPERAND (expr, 0))
2211 21631 : || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2212 : return true;
2213 21621 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2214 :
2215 : CASE_CONVERT:
2216 : return true;
2217 :
2218 : default:
2219 : break;
2220 : }
2221 :
2222 : return false;
2223 : }
2224 :
2225 : /* Finds addresses in *OP_P inside STMT. */
2226 :
2227 : static void
2228 2699088 : find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2229 : tree *op_p)
2230 : {
2231 2699088 : tree base = *op_p, step = size_zero_node;
2232 2699088 : struct iv *civ;
2233 2699088 : struct ifs_ivopts_data ifs_ivopts_data;
2234 :
2235 : /* Do not play with volatile memory references. A bit too conservative,
2236 : perhaps, but safe. */
2237 5398176 : if (gimple_has_volatile_ops (stmt))
2238 7529 : goto fail;
2239 :
2240 : /* Ignore bitfields for now. Not really something terribly complicated
2241 : to handle. TODO. */
2242 2691559 : if (TREE_CODE (base) == BIT_FIELD_REF)
2243 94993 : goto fail;
2244 :
2245 2596566 : base = unshare_expr (base);
2246 :
2247 2596566 : if (TREE_CODE (base) == TARGET_MEM_REF)
2248 : {
2249 314754 : tree type = build_pointer_type (TREE_TYPE (base));
2250 314754 : tree astep;
2251 :
2252 314754 : if (TMR_BASE (base)
2253 314754 : && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2254 : {
2255 292572 : civ = get_iv (data, TMR_BASE (base));
2256 292572 : if (!civ)
2257 257384 : goto fail;
2258 :
2259 35188 : TMR_BASE (base) = civ->base;
2260 35188 : step = civ->step;
2261 : }
2262 57370 : if (TMR_INDEX2 (base)
2263 57370 : && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2264 : {
2265 14056 : civ = get_iv (data, TMR_INDEX2 (base));
2266 14056 : if (!civ)
2267 4981 : goto fail;
2268 :
2269 9075 : TMR_INDEX2 (base) = civ->base;
2270 9075 : step = civ->step;
2271 : }
2272 52389 : if (TMR_INDEX (base)
2273 52389 : && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2274 : {
2275 52389 : civ = get_iv (data, TMR_INDEX (base));
2276 52389 : if (!civ)
2277 52389 : goto fail;
2278 :
2279 0 : TMR_INDEX (base) = civ->base;
2280 0 : astep = civ->step;
2281 :
2282 0 : if (astep)
2283 : {
2284 0 : if (TMR_STEP (base))
2285 0 : astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2286 :
2287 0 : step = fold_build2 (PLUS_EXPR, type, step, astep);
2288 : }
2289 : }
2290 :
2291 0 : if (integer_zerop (step))
2292 0 : goto fail;
2293 0 : base = tree_mem_ref_addr (type, base);
2294 : }
2295 : else
2296 : {
2297 2281812 : ifs_ivopts_data.ivopts_data = data;
2298 2281812 : ifs_ivopts_data.stmt = stmt;
2299 2281812 : ifs_ivopts_data.step = size_zero_node;
2300 2281812 : if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2301 2281812 : || integer_zerop (ifs_ivopts_data.step))
2302 1411912 : goto fail;
2303 869900 : step = ifs_ivopts_data.step;
2304 :
2305 : /* Check that the base expression is addressable. This needs
2306 : to be done after substituting bases of IVs into it. */
2307 869900 : if (may_be_nonaddressable_p (base))
2308 776 : goto fail;
2309 :
2310 : /* Moreover, on strict alignment platforms, check that it is
2311 : sufficiently aligned. */
2312 869124 : if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2313 : goto fail;
2314 :
2315 869124 : base = build_fold_addr_expr (base);
2316 :
2317 : /* Substituting bases of IVs into the base expression might
2318 : have caused folding opportunities. */
2319 869124 : if (TREE_CODE (base) == ADDR_EXPR)
2320 : {
2321 460370 : tree *ref = &TREE_OPERAND (base, 0);
2322 1584232 : while (handled_component_p (*ref))
2323 663492 : ref = &TREE_OPERAND (*ref, 0);
2324 460370 : if (TREE_CODE (*ref) == MEM_REF)
2325 : {
2326 301496 : tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2327 : TREE_OPERAND (*ref, 0),
2328 : TREE_OPERAND (*ref, 1));
2329 301496 : if (tem)
2330 0 : *ref = tem;
2331 : }
2332 : }
2333 : }
2334 :
2335 869124 : civ = alloc_iv (data, base, step);
2336 : /* Fail if base object of this memory reference is unknown. */
2337 869124 : if (civ->base_object == NULL_TREE)
2338 11210 : goto fail;
2339 :
2340 857914 : record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2341 857914 : return;
2342 :
2343 1841174 : fail:
2344 1841174 : for_each_index (op_p, idx_record_use, data);
2345 : }
2346 :
2347 : /* Finds and records invariants used in STMT. */
2348 :
2349 : static void
2350 15392372 : find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2351 : {
2352 15392372 : ssa_op_iter iter;
2353 15392372 : use_operand_p use_p;
2354 15392372 : tree op;
2355 :
2356 51275134 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2357 : {
2358 20490390 : op = USE_FROM_PTR (use_p);
2359 20490390 : record_invariant (data, op, false);
2360 : }
2361 15392372 : }
2362 :
2363 : /* CALL calls an internal function. If operand *OP_P will become an
2364 : address when the call is expanded, return the type of the memory
2365 : being addressed, otherwise return null. */
2366 :
2367 : static tree
2368 1783 : get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2369 : {
2370 1783 : switch (gimple_call_internal_fn (call))
2371 : {
2372 371 : case IFN_MASK_LOAD:
2373 371 : case IFN_MASK_LOAD_LANES:
2374 371 : case IFN_MASK_LEN_LOAD_LANES:
2375 371 : case IFN_LEN_LOAD:
2376 371 : case IFN_MASK_LEN_LOAD:
2377 371 : if (op_p == gimple_call_arg_ptr (call, 0))
2378 371 : return TREE_TYPE (gimple_call_lhs (call));
2379 : return NULL_TREE;
2380 :
2381 424 : case IFN_MASK_STORE:
2382 424 : case IFN_MASK_STORE_LANES:
2383 424 : case IFN_MASK_LEN_STORE_LANES:
2384 424 : case IFN_LEN_STORE:
2385 424 : case IFN_MASK_LEN_STORE:
2386 424 : {
2387 424 : if (op_p == gimple_call_arg_ptr (call, 0))
2388 : {
2389 424 : internal_fn ifn = gimple_call_internal_fn (call);
2390 424 : int index = internal_fn_stored_value_index (ifn);
2391 424 : return TREE_TYPE (gimple_call_arg (call, index));
2392 : }
2393 : return NULL_TREE;
2394 : }
2395 :
2396 : default:
2397 : return NULL_TREE;
2398 : }
2399 : }
2400 :
2401 : /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2402 : Return true if the operand will become an address when STMT
2403 : is expanded and record the associated address use if so. */
2404 :
2405 : static bool
2406 1704570 : find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2407 : struct iv *iv)
2408 : {
2409 : /* Fail if base object of this memory reference is unknown. */
2410 1704570 : if (iv->base_object == NULL_TREE)
2411 : return false;
2412 :
2413 642426 : tree mem_type = NULL_TREE;
2414 642426 : if (gcall *call = dyn_cast <gcall *> (stmt))
2415 121575 : if (gimple_call_internal_p (call))
2416 1783 : mem_type = get_mem_type_for_internal_fn (call, op_p);
2417 1783 : if (mem_type)
2418 : {
2419 795 : iv = alloc_iv (data, iv->base, iv->step);
2420 795 : record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2421 795 : return true;
2422 : }
2423 : return false;
2424 : }
2425 :
2426 : /* Finds interesting uses of induction variables in the statement STMT. */
2427 :
2428 : static void
2429 15392372 : find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2430 : {
2431 15392372 : struct iv *iv;
2432 15392372 : tree op, *lhs, *rhs;
2433 15392372 : ssa_op_iter iter;
2434 15392372 : use_operand_p use_p;
2435 15392372 : enum tree_code code;
2436 :
2437 15392372 : find_invariants_stmt (data, stmt);
2438 :
2439 15392372 : if (gimple_code (stmt) == GIMPLE_COND)
2440 : {
2441 1456284 : find_interesting_uses_cond (data, stmt);
2442 9011350 : return;
2443 : }
2444 :
2445 13936088 : if (is_gimple_assign (stmt))
2446 : {
2447 10516968 : lhs = gimple_assign_lhs_ptr (stmt);
2448 10516968 : rhs = gimple_assign_rhs1_ptr (stmt);
2449 :
2450 10516968 : if (TREE_CODE (*lhs) == SSA_NAME)
2451 : {
2452 : /* If the statement defines an induction variable, the uses are not
2453 : interesting by themselves. */
2454 :
2455 9405009 : iv = get_iv (data, *lhs);
2456 :
2457 9405009 : if (iv && !integer_zerop (iv->step))
2458 : return;
2459 : }
2460 :
2461 8174339 : code = gimple_assign_rhs_code (stmt);
2462 8174339 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2463 8174339 : && (REFERENCE_CLASS_P (*rhs)
2464 1257910 : || is_gimple_val (*rhs)))
2465 : {
2466 2797808 : if (REFERENCE_CLASS_P (*rhs))
2467 1735425 : find_interesting_uses_address (data, stmt, rhs);
2468 : else
2469 1062383 : find_interesting_uses_op (data, *rhs);
2470 :
2471 2797808 : if (REFERENCE_CLASS_P (*lhs))
2472 963663 : find_interesting_uses_address (data, stmt, lhs);
2473 2797808 : return;
2474 : }
2475 5376531 : else if (TREE_CODE_CLASS (code) == tcc_comparison)
2476 : {
2477 84148 : find_interesting_uses_cond (data, stmt);
2478 84148 : return;
2479 : }
2480 :
2481 : /* TODO -- we should also handle address uses of type
2482 :
2483 : memory = call (whatever);
2484 :
2485 : and
2486 :
2487 : call (memory). */
2488 : }
2489 :
2490 8711503 : if (gimple_code (stmt) == GIMPLE_PHI
2491 8711503 : && gimple_bb (stmt) == data->current_loop->header)
2492 : {
2493 1455177 : iv = get_iv (data, PHI_RESULT (stmt));
2494 :
2495 1455177 : if (iv && !integer_zerop (iv->step))
2496 : return;
2497 : }
2498 :
2499 26327379 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2500 : {
2501 10652767 : op = USE_FROM_PTR (use_p);
2502 :
2503 10652767 : if (TREE_CODE (op) != SSA_NAME)
2504 517483 : continue;
2505 :
2506 10135284 : iv = get_iv (data, op);
2507 10135284 : if (!iv)
2508 8430714 : continue;
2509 :
2510 1704570 : if (!find_address_like_use (data, stmt, use_p->use, iv))
2511 1703775 : find_interesting_uses_op (data, op);
2512 : }
2513 : }
2514 :
2515 : /* Finds interesting uses of induction variables outside of loops
2516 : on loop exit edge EXIT. */
2517 :
2518 : static void
2519 887274 : find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2520 : {
2521 887274 : gphi *phi;
2522 887274 : gphi_iterator psi;
2523 887274 : tree def;
2524 :
2525 1981935 : for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2526 : {
2527 1094661 : phi = psi.phi ();
2528 1094661 : def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2529 2097744 : if (!virtual_operand_p (def))
2530 539420 : find_interesting_uses_op (data, def);
2531 : }
2532 887274 : }
2533 :
2534 : /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2535 : mode for memory reference represented by USE. */
2536 :
2537 : static GTY (()) vec<rtx, va_gc> *addr_list;
2538 :
2539 : static bool
2540 218118 : addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2541 : {
2542 218118 : rtx reg, addr;
2543 218118 : unsigned list_index;
2544 218118 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2545 218118 : machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2546 :
2547 218118 : list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2548 218118 : if (list_index >= vec_safe_length (addr_list))
2549 10145 : vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2550 :
2551 218118 : addr = (*addr_list)[list_index];
2552 218118 : if (!addr)
2553 : {
2554 13248 : addr_mode = targetm.addr_space.address_mode (as);
2555 13248 : reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2556 13248 : addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2557 13248 : (*addr_list)[list_index] = addr;
2558 : }
2559 : else
2560 204870 : addr_mode = GET_MODE (addr);
2561 :
2562 218118 : XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2563 218118 : return (memory_address_addr_space_p (mem_mode, addr, as));
2564 : }
2565 :
2566 : /* Comparison function to sort group in ascending order of addr_offset. */
2567 :
2568 : static int
2569 3120962 : group_compare_offset (const void *a, const void *b)
2570 : {
2571 3120962 : const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2572 3120962 : const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2573 :
2574 3120962 : return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2575 : }
2576 :
2577 : /* Check if small groups should be split. Return true if no group
2578 : contains more than two uses with distinct addr_offsets. Return
2579 : false otherwise. We want to split such groups because:
2580 :
2581 : 1) Small groups don't have much benefit and may interfer with
2582 : general candidate selection.
2583 : 2) Size for problem with only small groups is usually small and
2584 : general algorithm can handle it well.
2585 :
2586 : TODO -- Above claim may not hold when we want to merge memory
2587 : accesses with conseuctive addresses. */
2588 :
2589 : static bool
2590 503696 : split_small_address_groups_p (struct ivopts_data *data)
2591 : {
2592 503696 : unsigned int i, j, distinct = 1;
2593 503696 : struct iv_use *pre;
2594 503696 : struct iv_group *group;
2595 :
2596 2098669 : for (i = 0; i < data->vgroups.length (); i++)
2597 : {
2598 1594973 : group = data->vgroups[i];
2599 1594973 : if (group->vuses.length () == 1)
2600 1456343 : continue;
2601 :
2602 138630 : gcc_assert (address_p (group->type));
2603 138630 : if (group->vuses.length () == 2)
2604 : {
2605 79147 : if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2606 79147 : group->vuses[1]->addr_offset) > 0)
2607 19110 : std::swap (group->vuses[0], group->vuses[1]);
2608 : }
2609 : else
2610 59483 : group->vuses.qsort (group_compare_offset);
2611 :
2612 138630 : if (distinct > 2)
2613 14017 : continue;
2614 :
2615 124613 : distinct = 1;
2616 1785886 : for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2617 : {
2618 190913 : if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2619 : {
2620 133467 : pre = group->vuses[j];
2621 133467 : distinct++;
2622 : }
2623 :
2624 190913 : if (distinct > 2)
2625 : break;
2626 : }
2627 : }
2628 :
2629 503696 : return (distinct <= 2);
2630 : }
2631 :
2632 : /* For each group of address type uses, this function further groups
2633 : these uses according to the maximum offset supported by target's
2634 : [base + offset] addressing mode. */
2635 :
2636 : static void
2637 503696 : split_address_groups (struct ivopts_data *data)
2638 : {
2639 503696 : unsigned int i, j;
2640 : /* Always split group. */
2641 503696 : bool split_p = split_small_address_groups_p (data);
2642 :
2643 2151475 : for (i = 0; i < data->vgroups.length (); i++)
2644 : {
2645 1647779 : struct iv_group *new_group = NULL;
2646 1647779 : struct iv_group *group = data->vgroups[i];
2647 1647779 : struct iv_use *use = group->vuses[0];
2648 :
2649 1647779 : use->id = 0;
2650 1647779 : use->group_id = group->id;
2651 1647779 : if (group->vuses.length () == 1)
2652 1503580 : continue;
2653 :
2654 144199 : gcc_assert (address_p (use->type));
2655 :
2656 1982572 : for (j = 1; j < group->vuses.length ();)
2657 : {
2658 334793 : struct iv_use *next = group->vuses[j];
2659 334793 : poly_int64 offset = next->addr_offset - use->addr_offset;
2660 :
2661 : /* Split group if aksed to, or the offset against the first
2662 : use can't fit in offset part of addressing mode. IV uses
2663 : having the same offset are still kept in one group. */
2664 393650 : if (maybe_ne (offset, 0)
2665 334793 : && (split_p || !addr_offset_valid_p (use, offset)))
2666 : {
2667 58857 : if (!new_group)
2668 52806 : new_group = record_group (data, group->type);
2669 58857 : group->vuses.ordered_remove (j);
2670 58857 : new_group->vuses.safe_push (next);
2671 58857 : continue;
2672 : }
2673 :
2674 275936 : next->id = j;
2675 275936 : next->group_id = group->id;
2676 275936 : j++;
2677 : }
2678 : }
2679 503696 : }
2680 :
2681 : /* Finds uses of the induction variables that are interesting. */
2682 :
2683 : static void
2684 503696 : find_interesting_uses (struct ivopts_data *data, basic_block *body)
2685 : {
2686 503696 : basic_block bb;
2687 503696 : gimple_stmt_iterator bsi;
2688 503696 : unsigned i;
2689 503696 : edge e;
2690 :
2691 3301834 : for (i = 0; i < data->current_loop->num_nodes; i++)
2692 : {
2693 2798138 : edge_iterator ei;
2694 2798138 : bb = body[i];
2695 :
2696 7137769 : FOR_EACH_EDGE (e, ei, bb->succs)
2697 4339631 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2698 4339631 : && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2699 887274 : find_interesting_uses_outside (data, e);
2700 :
2701 5653523 : for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2702 2855385 : find_interesting_uses_stmt (data, gsi_stmt (bsi));
2703 27132171 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2704 21535895 : if (!is_gimple_debug (gsi_stmt (bsi)))
2705 12536987 : find_interesting_uses_stmt (data, gsi_stmt (bsi));
2706 : }
2707 :
2708 503696 : split_address_groups (data);
2709 :
2710 503696 : if (dump_file && (dump_flags & TDF_DETAILS))
2711 : {
2712 67 : fprintf (dump_file, "\n<IV Groups>:\n");
2713 67 : dump_groups (dump_file, data);
2714 67 : fprintf (dump_file, "\n");
2715 : }
2716 503696 : }
2717 :
2718 : /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2719 : is true, assume we are inside an address. If TOP_COMPREF is true, assume
2720 : we are at the top-level of the processed address. */
2721 :
2722 : static tree
2723 3400705 : strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2724 : poly_int64 *offset)
2725 : {
2726 3400705 : tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2727 3400705 : enum tree_code code;
2728 3400705 : tree type, orig_type = TREE_TYPE (expr);
2729 3400705 : poly_int64 off0, off1;
2730 3400705 : HOST_WIDE_INT st;
2731 3400705 : tree orig_expr = expr;
2732 :
2733 3400705 : STRIP_NOPS (expr);
2734 :
2735 3400705 : type = TREE_TYPE (expr);
2736 3400705 : code = TREE_CODE (expr);
2737 3400705 : *offset = 0;
2738 :
2739 3400705 : switch (code)
2740 : {
2741 625436 : case POINTER_PLUS_EXPR:
2742 625436 : case PLUS_EXPR:
2743 625436 : case MINUS_EXPR:
2744 625436 : op0 = TREE_OPERAND (expr, 0);
2745 625436 : op1 = TREE_OPERAND (expr, 1);
2746 :
2747 625436 : op0 = strip_offset_1 (op0, false, false, &off0);
2748 625436 : op1 = strip_offset_1 (op1, false, false, &off1);
2749 :
2750 625436 : *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2751 625436 : if (op0 == TREE_OPERAND (expr, 0)
2752 625436 : && op1 == TREE_OPERAND (expr, 1))
2753 : return orig_expr;
2754 :
2755 386349 : if (integer_zerop (op1))
2756 : expr = op0;
2757 3179 : else if (integer_zerop (op0))
2758 : {
2759 602 : if (code == MINUS_EXPR)
2760 : {
2761 602 : if (TYPE_OVERFLOW_UNDEFINED (type))
2762 : {
2763 0 : type = unsigned_type_for (type);
2764 0 : op1 = fold_convert (type, op1);
2765 : }
2766 602 : expr = fold_build1 (NEGATE_EXPR, type, op1);
2767 : }
2768 : else
2769 : expr = op1;
2770 : }
2771 : else
2772 : {
2773 2577 : if (TYPE_OVERFLOW_UNDEFINED (type))
2774 : {
2775 0 : type = unsigned_type_for (type);
2776 0 : if (code == POINTER_PLUS_EXPR)
2777 0 : code = PLUS_EXPR;
2778 0 : op0 = fold_convert (type, op0);
2779 0 : op1 = fold_convert (type, op1);
2780 : }
2781 2577 : expr = fold_build2 (code, type, op0, op1);
2782 : }
2783 :
2784 386349 : return fold_convert (orig_type, expr);
2785 :
2786 222532 : case MULT_EXPR:
2787 222532 : op1 = TREE_OPERAND (expr, 1);
2788 222532 : if (!cst_and_fits_in_hwi (op1))
2789 : return orig_expr;
2790 :
2791 182831 : op0 = TREE_OPERAND (expr, 0);
2792 182831 : op0 = strip_offset_1 (op0, false, false, &off0);
2793 182831 : if (op0 == TREE_OPERAND (expr, 0))
2794 : return orig_expr;
2795 :
2796 7244 : *offset = off0 * int_cst_value (op1);
2797 7244 : if (integer_zerop (op0))
2798 : expr = op0;
2799 : else
2800 : {
2801 7244 : if (TYPE_OVERFLOW_UNDEFINED (type))
2802 : {
2803 0 : type = unsigned_type_for (type);
2804 0 : op0 = fold_convert (type, op0);
2805 0 : op1 = fold_convert (type, op1);
2806 : }
2807 7244 : expr = fold_build2 (MULT_EXPR, type, op0, op1);
2808 : }
2809 :
2810 7244 : return fold_convert (orig_type, expr);
2811 :
2812 11 : case ARRAY_REF:
2813 11 : case ARRAY_RANGE_REF:
2814 11 : if (!inside_addr)
2815 : return orig_expr;
2816 :
2817 11 : step = array_ref_element_size (expr);
2818 11 : if (!cst_and_fits_in_hwi (step))
2819 : break;
2820 :
2821 11 : st = int_cst_value (step);
2822 11 : op1 = TREE_OPERAND (expr, 1);
2823 11 : op1 = strip_offset_1 (op1, false, false, &off1);
2824 11 : *offset = off1 * st;
2825 :
2826 11 : if (top_compref
2827 11 : && integer_zerop (op1))
2828 : {
2829 : /* Strip the component reference completely. */
2830 9 : op0 = TREE_OPERAND (expr, 0);
2831 9 : op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2832 9 : *offset += off0;
2833 9 : return op0;
2834 : }
2835 : break;
2836 :
2837 1 : case COMPONENT_REF:
2838 1 : {
2839 1 : tree field;
2840 :
2841 1 : if (!inside_addr)
2842 : return orig_expr;
2843 :
2844 1 : tmp = component_ref_field_offset (expr);
2845 1 : field = TREE_OPERAND (expr, 1);
2846 1 : if (top_compref
2847 1 : && cst_and_fits_in_hwi (tmp)
2848 2 : && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2849 : {
2850 1 : HOST_WIDE_INT boffset, abs_off;
2851 :
2852 : /* Strip the component reference completely. */
2853 1 : op0 = TREE_OPERAND (expr, 0);
2854 1 : op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2855 1 : boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2856 1 : abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2857 1 : if (boffset < 0)
2858 0 : abs_off = -abs_off;
2859 :
2860 1 : *offset = off0 + int_cst_value (tmp) + abs_off;
2861 1 : return op0;
2862 : }
2863 : }
2864 : break;
2865 :
2866 319371 : case ADDR_EXPR:
2867 319371 : op0 = TREE_OPERAND (expr, 0);
2868 319371 : op0 = strip_offset_1 (op0, true, true, &off0);
2869 319371 : *offset += off0;
2870 :
2871 319371 : if (op0 == TREE_OPERAND (expr, 0))
2872 : return orig_expr;
2873 :
2874 10 : expr = build_fold_addr_expr (op0);
2875 10 : return fold_convert (orig_type, expr);
2876 :
2877 : case MEM_REF:
2878 : /* ??? Offset operand? */
2879 : inside_addr = false;
2880 : break;
2881 :
2882 2233352 : default:
2883 2233352 : if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2884 869539 : return build_int_cst (orig_type, 0);
2885 : return orig_expr;
2886 : }
2887 :
2888 : /* Default handling of expressions for that we want to recurse into
2889 : the first operand. */
2890 4 : op0 = TREE_OPERAND (expr, 0);
2891 4 : op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2892 4 : *offset += off0;
2893 :
2894 4 : if (op0 == TREE_OPERAND (expr, 0)
2895 4 : && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2896 : return orig_expr;
2897 :
2898 1 : expr = copy_node (expr);
2899 1 : TREE_OPERAND (expr, 0) = op0;
2900 1 : if (op1)
2901 1 : TREE_OPERAND (expr, 1) = op1;
2902 :
2903 : /* Inside address, we might strip the top level component references,
2904 : thus changing type of the expression. Handling of ADDR_EXPR
2905 : will fix that. */
2906 1 : expr = fold_convert (orig_type, expr);
2907 :
2908 1 : return expr;
2909 : }
2910 :
2911 : /* Strips constant offsets from EXPR and stores them to OFFSET. */
2912 :
2913 : static tree
2914 1647606 : strip_offset (tree expr, poly_uint64 *offset)
2915 : {
2916 1647606 : poly_int64 off;
2917 1647606 : tree core = strip_offset_1 (expr, false, false, &off);
2918 1647606 : *offset = off;
2919 1647606 : return core;
2920 : }
2921 :
2922 : /* Returns variant of TYPE that can be used as base for different uses.
2923 : We return unsigned type with the same precision, which avoids problems
2924 : with overflows. */
2925 :
2926 : static tree
2927 8064180 : generic_type_for (tree type)
2928 : {
2929 8064180 : if (POINTER_TYPE_P (type))
2930 1425455 : return unsigned_type_for (type);
2931 :
2932 6638725 : if (TYPE_UNSIGNED (type))
2933 : return type;
2934 :
2935 3102490 : return unsigned_type_for (type);
2936 : }
2937 :
2938 : /* Private data for walk_tree. */
2939 :
2940 : struct walk_tree_data
2941 : {
2942 : bitmap *inv_vars;
2943 : struct ivopts_data *idata;
2944 : };
2945 :
2946 : /* Callback function for walk_tree, it records invariants and symbol
2947 : reference in *EXPR_P. DATA is the structure storing result info. */
2948 :
2949 : static tree
2950 34408536 : find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2951 : {
2952 34408536 : tree op = *expr_p;
2953 34408536 : struct version_info *info;
2954 34408536 : struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2955 :
2956 34408536 : if (TREE_CODE (op) != SSA_NAME)
2957 : return NULL_TREE;
2958 :
2959 8059543 : info = name_info (wdata->idata, op);
2960 : /* Because we expand simple operations when finding IVs, loop invariant
2961 : variable that isn't referred by the original loop could be used now.
2962 : Record such invariant variables here. */
2963 8059543 : if (!info->iv)
2964 : {
2965 390829 : struct ivopts_data *idata = wdata->idata;
2966 390829 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2967 :
2968 390829 : if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2969 : {
2970 390829 : tree steptype = TREE_TYPE (op);
2971 390829 : if (POINTER_TYPE_P (steptype))
2972 196764 : steptype = sizetype;
2973 390829 : set_iv (idata, op, op, build_int_cst (steptype, 0), true);
2974 390829 : record_invariant (idata, op, false);
2975 : }
2976 : }
2977 8059543 : if (!info->inv_id || info->has_nonlin_use)
2978 : return NULL_TREE;
2979 :
2980 6690625 : if (!*wdata->inv_vars)
2981 5181447 : *wdata->inv_vars = BITMAP_ALLOC (NULL);
2982 6690625 : bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2983 :
2984 6690625 : return NULL_TREE;
2985 : }
2986 :
2987 : /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2988 : store it. */
2989 :
2990 : static inline void
2991 27773206 : find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2992 : {
2993 27773206 : struct walk_tree_data wdata;
2994 :
2995 27773206 : if (!inv_vars)
2996 11810639 : return;
2997 :
2998 15962567 : wdata.idata = data;
2999 15962567 : wdata.inv_vars = inv_vars;
3000 15962567 : walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3001 : }
3002 :
3003 : /* Get entry from invariant expr hash table for INV_EXPR. New entry
3004 : will be recorded if it doesn't exist yet. Given below two exprs:
3005 : inv_expr + cst1, inv_expr + cst2
3006 : It's hard to make decision whether constant part should be stripped
3007 : or not. We choose to not strip based on below facts:
3008 : 1) We need to count ADD cost for constant part if it's stripped,
3009 : which isn't always trivial where this functions is called.
3010 : 2) Stripping constant away may be conflict with following loop
3011 : invariant hoisting pass.
3012 : 3) Not stripping constant away results in more invariant exprs,
3013 : which usually leads to decision preferring lower reg pressure. */
3014 :
3015 : static iv_inv_expr_ent *
3016 2650676 : get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3017 : {
3018 2650676 : STRIP_NOPS (inv_expr);
3019 :
3020 2650676 : if (poly_int_tree_p (inv_expr)
3021 2650676 : || TREE_CODE (inv_expr) == SSA_NAME)
3022 : return NULL;
3023 :
3024 : /* Don't strip constant part away as we used to. */
3025 :
3026 : /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3027 2561589 : struct iv_inv_expr_ent ent;
3028 2561589 : ent.expr = inv_expr;
3029 2561589 : ent.hash = iterative_hash_expr (inv_expr, 0);
3030 2561589 : struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3031 :
3032 2561589 : if (!*slot)
3033 : {
3034 1153280 : *slot = XNEW (struct iv_inv_expr_ent);
3035 1153280 : (*slot)->expr = inv_expr;
3036 1153280 : (*slot)->hash = ent.hash;
3037 1153280 : (*slot)->id = ++data->max_inv_expr_id;
3038 : }
3039 :
3040 2561589 : return *slot;
3041 : }
3042 :
3043 :
3044 : /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3045 : unsuitable as ivopts candidates for potentially involving undefined
3046 : behavior. */
3047 :
3048 : static tree
3049 15404358 : find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3050 : {
3051 15404358 : basic_block bb = (basic_block) bb_;
3052 15404358 : if (TREE_CODE (*tp) == SSA_NAME
3053 2257711 : && ssa_name_maybe_undef_p (*tp)
3054 15412973 : && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3055 3080 : return *tp;
3056 15401278 : if (!EXPR_P (*tp))
3057 10425629 : *walk_subtrees = 0;
3058 : return NULL;
3059 : }
3060 :
3061 : /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3062 : position to POS. If USE is not NULL, the candidate is set as related to
3063 : it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3064 : replacement of the final value of the iv by a direct computation. */
3065 :
3066 : static struct iv_cand *
3067 9042201 : add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3068 : enum iv_position pos, struct iv_use *use,
3069 : gimple *incremented_at, struct iv *orig_iv = NULL,
3070 : bool doloop = false)
3071 : {
3072 9042201 : unsigned i;
3073 9042201 : struct iv_cand *cand = NULL;
3074 9042201 : tree type, orig_type;
3075 :
3076 9042201 : gcc_assert (base && step);
3077 :
3078 : /* -fkeep-gc-roots-live means that we have to keep a real pointer
3079 : live, but the ivopts code may replace a real pointer with one
3080 : pointing before or after the memory block that is then adjusted
3081 : into the memory block during the loop. FIXME: It would likely be
3082 : better to actually force the pointer live and still use ivopts;
3083 : for example, it would be enough to write the pointer into memory
3084 : and keep it there until after the loop. */
3085 9042201 : if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3086 : return NULL;
3087 :
3088 : /* If BASE contains undefined SSA names make sure we only record
3089 : the original IV. */
3090 8936056 : bool involves_undefs = false;
3091 8936056 : if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3092 : {
3093 3080 : if (pos != IP_ORIGINAL)
3094 : return NULL;
3095 : important = false;
3096 : involves_undefs = true;
3097 : }
3098 :
3099 : /* For non-original variables, make sure their values are computed in a type
3100 : that does not invoke undefined behavior on overflows (since in general,
3101 : we cannot prove that these induction variables are non-wrapping). */
3102 8932976 : if (pos != IP_ORIGINAL)
3103 : {
3104 8064180 : orig_type = TREE_TYPE (base);
3105 8064180 : type = generic_type_for (orig_type);
3106 8064180 : if (type != orig_type)
3107 : {
3108 4527945 : base = fold_convert (type, base);
3109 4527945 : step = fold_convert (type, step);
3110 : }
3111 : }
3112 :
3113 44720815 : for (i = 0; i < data->vcands.length (); i++)
3114 : {
3115 40095919 : cand = data->vcands[i];
3116 :
3117 40095919 : if (cand->pos != pos)
3118 9814430 : continue;
3119 :
3120 30281489 : if (cand->incremented_at != incremented_at
3121 29792438 : || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3122 0 : && cand->ainc_use != use))
3123 489051 : continue;
3124 :
3125 29792438 : if (operand_equal_p (base, cand->iv->base, 0)
3126 9455698 : && operand_equal_p (step, cand->iv->step, 0)
3127 35480971 : && (TYPE_PRECISION (TREE_TYPE (base))
3128 5688533 : == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3129 : break;
3130 : }
3131 :
3132 17866694 : if (i == data->vcands.length ())
3133 : {
3134 4624896 : cand = XCNEW (struct iv_cand);
3135 4624896 : cand->id = i;
3136 4624896 : cand->iv = alloc_iv (data, base, step);
3137 4624896 : cand->pos = pos;
3138 4624896 : if (pos != IP_ORIGINAL)
3139 : {
3140 3755884 : if (doloop)
3141 0 : cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3142 : else
3143 3755884 : cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3144 3755884 : cand->var_after = cand->var_before;
3145 : }
3146 4624896 : cand->important = important;
3147 4624896 : cand->involves_undefs = involves_undefs;
3148 4624896 : cand->incremented_at = incremented_at;
3149 4624896 : cand->doloop_p = doloop;
3150 4624896 : data->vcands.safe_push (cand);
3151 :
3152 4624896 : if (!poly_int_tree_p (step))
3153 : {
3154 182468 : find_inv_vars (data, &step, &cand->inv_vars);
3155 :
3156 182468 : iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3157 : /* Share bitmap between inv_vars and inv_exprs for cand. */
3158 182468 : if (inv_expr != NULL)
3159 : {
3160 100518 : cand->inv_exprs = cand->inv_vars;
3161 100518 : cand->inv_vars = NULL;
3162 100518 : if (cand->inv_exprs)
3163 83397 : bitmap_clear (cand->inv_exprs);
3164 : else
3165 17121 : cand->inv_exprs = BITMAP_ALLOC (NULL);
3166 :
3167 100518 : bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3168 : }
3169 : }
3170 :
3171 4624896 : if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3172 0 : cand->ainc_use = use;
3173 : else
3174 4624896 : cand->ainc_use = NULL;
3175 :
3176 4624896 : cand->orig_iv = orig_iv;
3177 4624896 : if (dump_file && (dump_flags & TDF_DETAILS))
3178 686 : dump_cand (dump_file, cand);
3179 : }
3180 :
3181 8933347 : cand->important |= important;
3182 8933347 : cand->doloop_p |= doloop;
3183 :
3184 : /* Relate candidate to the group for which it is added. */
3185 8933347 : if (use)
3186 2500970 : bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3187 :
3188 : return cand;
3189 : }
3190 :
3191 : /* Returns true if incrementing the induction variable at the end of the LOOP
3192 : is allowed.
3193 :
3194 : The purpose is to avoid splitting latch edge with a biv increment, thus
3195 : creating a jump, possibly confusing other optimization passes and leaving
3196 : less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3197 : available (so we do not have a better alternative), or if the latch edge
3198 : is already nonempty. */
3199 :
3200 : static bool
3201 7949747 : allow_ip_end_pos_p (class loop *loop)
3202 : {
3203 : /* Do not allow IP_END when creating the IV would need to split the
3204 : latch edge as that makes all IP_NORMAL invalid. */
3205 7949747 : auto pos = gsi_last_bb (ip_end_pos (loop));
3206 7949747 : if (!gsi_end_p (pos) && stmt_ends_bb_p (*pos))
3207 : return false;
3208 :
3209 7949747 : if (!ip_normal_pos (loop))
3210 : return true;
3211 :
3212 7852801 : if (!empty_block_p (ip_end_pos (loop)))
3213 : return true;
3214 :
3215 : return false;
3216 : }
3217 :
3218 : /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3219 : Important field is set to IMPORTANT. */
3220 :
3221 : static void
3222 582770 : add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3223 : bool important, struct iv_use *use)
3224 : {
3225 582770 : basic_block use_bb = gimple_bb (use->stmt);
3226 582770 : machine_mode mem_mode;
3227 582770 : unsigned HOST_WIDE_INT cstepi;
3228 :
3229 : /* If we insert the increment in any position other than the standard
3230 : ones, we must ensure that it is incremented once per iteration.
3231 : It must not be in an inner nested loop, or one side of an if
3232 : statement. */
3233 582770 : if (use_bb->loop_father != data->current_loop
3234 581384 : || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3235 554501 : || stmt_can_throw_internal (cfun, use->stmt)
3236 1133450 : || !cst_and_fits_in_hwi (step))
3237 61952 : return;
3238 :
3239 520818 : cstepi = int_cst_value (step);
3240 :
3241 520818 : mem_mode = TYPE_MODE (use->mem_type);
3242 : if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3243 : || USE_STORE_PRE_INCREMENT (mem_mode))
3244 : && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3245 : || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3246 : || USE_STORE_PRE_DECREMENT (mem_mode))
3247 : && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3248 : {
3249 : enum tree_code code = MINUS_EXPR;
3250 : tree new_base;
3251 : tree new_step = step;
3252 :
3253 : if (POINTER_TYPE_P (TREE_TYPE (base)))
3254 : {
3255 : new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3256 : code = POINTER_PLUS_EXPR;
3257 : }
3258 : else
3259 : new_step = fold_convert (TREE_TYPE (base), new_step);
3260 : new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3261 : add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3262 : use->stmt);
3263 : }
3264 : if (((USE_LOAD_POST_INCREMENT (mem_mode)
3265 : || USE_STORE_POST_INCREMENT (mem_mode))
3266 : && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3267 : || ((USE_LOAD_POST_DECREMENT (mem_mode)
3268 : || USE_STORE_POST_DECREMENT (mem_mode))
3269 : && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3270 : {
3271 : add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3272 : use->stmt);
3273 : }
3274 : }
3275 :
3276 : /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3277 : position to POS. If USE is not NULL, the candidate is set as related to
3278 : it. The candidate computation is scheduled before exit condition and at
3279 : the end of loop. */
3280 :
3281 : static void
3282 6990867 : add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3283 : struct iv_use *use, struct iv *orig_iv = NULL,
3284 : bool doloop = false)
3285 : {
3286 6990867 : if (ip_normal_pos (data->current_loop))
3287 6908688 : add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3288 : doloop);
3289 : /* Exclude doloop candidate here since it requires decrement then comparison
3290 : and jump, the IP_END position doesn't match. */
3291 6990867 : if (!doloop && ip_end_pos (data->current_loop)
3292 13981734 : && allow_ip_end_pos_p (data->current_loop))
3293 272937 : add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3294 6990867 : }
3295 :
3296 : /* Adds standard iv candidates. */
3297 :
3298 : static void
3299 503695 : add_standard_iv_candidates (struct ivopts_data *data)
3300 : {
3301 503695 : add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3302 :
3303 : /* The same for a double-integer type if it is still fast enough. */
3304 503695 : if (TYPE_PRECISION
3305 503695 : (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3306 503695 : && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3307 455804 : add_candidate (data, build_int_cst (long_integer_type_node, 0),
3308 : build_int_cst (long_integer_type_node, 1), true, NULL);
3309 :
3310 : /* The same for a double-integer type if it is still fast enough. */
3311 503695 : if (TYPE_PRECISION
3312 503695 : (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3313 551574 : && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3314 12 : add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3315 : build_int_cst (long_long_integer_type_node, 1), true, NULL);
3316 503695 : }
3317 :
3318 :
3319 : /* Adds candidates bases on the old induction variable IV. */
3320 :
3321 : static void
3322 1742787 : add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3323 : {
3324 1742787 : gimple *phi;
3325 1742787 : tree def;
3326 1742787 : struct iv_cand *cand;
3327 :
3328 : /* Check if this biv is used in address type use. */
3329 1152533 : if (iv->no_overflow && iv->have_address_use
3330 493916 : && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3331 2236703 : && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3332 : {
3333 280553 : tree base = fold_convert (sizetype, iv->base);
3334 280553 : tree step = fold_convert (sizetype, iv->step);
3335 :
3336 : /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3337 280553 : add_candidate (data, base, step, true, NULL, iv);
3338 : /* Add iv cand of the original type only if it has nonlinear use. */
3339 280553 : if (iv->nonlin_use)
3340 28463 : add_candidate (data, iv->base, iv->step, true, NULL);
3341 : }
3342 : else
3343 1462234 : add_candidate (data, iv->base, iv->step, true, NULL);
3344 :
3345 : /* The same, but with initial value zero. */
3346 1742787 : if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3347 326310 : add_candidate (data, size_int (0), iv->step, true, NULL);
3348 : else
3349 1416477 : add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3350 : iv->step, true, NULL);
3351 :
3352 1742787 : phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3353 1742787 : if (gimple_code (phi) == GIMPLE_PHI)
3354 : {
3355 : /* Additionally record the possibility of leaving the original iv
3356 : untouched. */
3357 871471 : def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3358 : /* Don't add candidate if it's from another PHI node because
3359 : it's an affine iv appearing in the form of PEELED_CHREC. */
3360 871471 : phi = SSA_NAME_DEF_STMT (def);
3361 871471 : if (gimple_code (phi) != GIMPLE_PHI)
3362 : {
3363 1742942 : cand = add_candidate_1 (data,
3364 : iv->base, iv->step, true, IP_ORIGINAL, NULL,
3365 871471 : SSA_NAME_DEF_STMT (def));
3366 871471 : if (cand)
3367 : {
3368 869167 : cand->var_before = iv->ssa_name;
3369 869167 : cand->var_after = def;
3370 : }
3371 : }
3372 : else
3373 0 : gcc_assert (gimple_bb (phi) == data->current_loop->header);
3374 : }
3375 1742787 : }
3376 :
3377 : /* Adds candidates based on the old induction variables. */
3378 :
3379 : static void
3380 503695 : add_iv_candidate_for_bivs (struct ivopts_data *data)
3381 : {
3382 503695 : unsigned i;
3383 503695 : struct iv *iv;
3384 503695 : bitmap_iterator bi;
3385 :
3386 5455421 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3387 : {
3388 4951726 : iv = ver_info (data, i)->iv;
3389 4951726 : if (iv && iv->biv_p && !integer_zerop (iv->step))
3390 1742787 : add_iv_candidate_for_biv (data, iv);
3391 : }
3392 503695 : }
3393 :
3394 : /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3395 :
3396 : static void
3397 4164756 : record_common_cand (struct ivopts_data *data, tree base,
3398 : tree step, struct iv_use *use)
3399 : {
3400 4164756 : class iv_common_cand ent;
3401 4164756 : class iv_common_cand **slot;
3402 :
3403 4164756 : ent.base = base;
3404 4164756 : ent.step = step;
3405 4164756 : ent.hash = iterative_hash_expr (base, 0);
3406 4164756 : ent.hash = iterative_hash_expr (step, ent.hash);
3407 :
3408 4164756 : slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3409 4164756 : if (*slot == NULL)
3410 : {
3411 2618078 : *slot = new iv_common_cand ();
3412 2618078 : (*slot)->base = base;
3413 2618078 : (*slot)->step = step;
3414 2618078 : (*slot)->uses.create (8);
3415 2618078 : (*slot)->hash = ent.hash;
3416 2618078 : data->iv_common_cands.safe_push ((*slot));
3417 : }
3418 :
3419 4164756 : gcc_assert (use != NULL);
3420 4164756 : (*slot)->uses.safe_push (use);
3421 4164756 : return;
3422 4164756 : }
3423 :
3424 : /* Comparison function used to sort common candidates. */
3425 :
3426 : static int
3427 19122608 : common_cand_cmp (const void *p1, const void *p2)
3428 : {
3429 19122608 : unsigned n1, n2;
3430 19122608 : const class iv_common_cand *const *const ccand1
3431 : = (const class iv_common_cand *const *)p1;
3432 19122608 : const class iv_common_cand *const *const ccand2
3433 : = (const class iv_common_cand *const *)p2;
3434 :
3435 19122608 : n1 = (*ccand1)->uses.length ();
3436 19122608 : n2 = (*ccand2)->uses.length ();
3437 19122608 : return n2 - n1;
3438 : }
3439 :
3440 : /* Adds IV candidates based on common candidated recorded. */
3441 :
3442 : static void
3443 503695 : add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3444 : {
3445 503695 : unsigned i, j;
3446 503695 : struct iv_cand *cand_1, *cand_2;
3447 :
3448 503695 : data->iv_common_cands.qsort (common_cand_cmp);
3449 1462575 : for (i = 0; i < data->iv_common_cands.length (); i++)
3450 : {
3451 1446766 : class iv_common_cand *ptr = data->iv_common_cands[i];
3452 :
3453 : /* Only add IV candidate if it's derived from multiple uses. */
3454 1446766 : if (ptr->uses.length () <= 1)
3455 : break;
3456 :
3457 958880 : cand_1 = NULL;
3458 958880 : cand_2 = NULL;
3459 958880 : if (ip_normal_pos (data->current_loop))
3460 944113 : cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3461 : false, IP_NORMAL, NULL, NULL);
3462 :
3463 958880 : if (ip_end_pos (data->current_loop)
3464 958880 : && allow_ip_end_pos_p (data->current_loop))
3465 44992 : cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3466 : false, IP_END, NULL, NULL);
3467 :
3468 : /* Bind deriving uses and the new candidates. */
3469 3464438 : for (j = 0; j < ptr->uses.length (); j++)
3470 : {
3471 2505558 : struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3472 2505558 : if (cand_1)
3473 2431123 : bitmap_set_bit (group->related_cands, cand_1->id);
3474 2505558 : if (cand_2)
3475 134692 : bitmap_set_bit (group->related_cands, cand_2->id);
3476 : }
3477 : }
3478 :
3479 : /* Release data since it is useless from this point. */
3480 503695 : data->iv_common_cand_tab->empty ();
3481 503695 : data->iv_common_cands.truncate (0);
3482 503695 : }
3483 :
3484 : /* Adds candidates based on the value of USE's iv. */
3485 :
3486 : static void
3487 1647775 : add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3488 : {
3489 1647775 : poly_uint64 offset;
3490 1647775 : tree base;
3491 1647775 : struct iv *iv = use->iv;
3492 1647775 : tree basetype = TREE_TYPE (iv->base);
3493 :
3494 : /* Don't add candidate for iv_use with non integer, pointer or non-mode
3495 : precision types, instead, add candidate for the corresponding scev in
3496 : unsigned type with the same precision. See PR93674 for more info. */
3497 774339 : if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3498 2421956 : || !type_has_mode_precision_p (basetype))
3499 : {
3500 169 : basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3501 169 : TYPE_UNSIGNED (basetype));
3502 169 : add_candidate (data, fold_convert (basetype, iv->base),
3503 : fold_convert (basetype, iv->step), false, NULL);
3504 169 : return;
3505 : }
3506 :
3507 1647606 : add_candidate (data, iv->base, iv->step, false, use);
3508 :
3509 : /* Record common candidate for use in case it can be shared by others. */
3510 1647606 : record_common_cand (data, iv->base, iv->step, use);
3511 :
3512 : /* Record common candidate with initial value zero. */
3513 1647606 : basetype = TREE_TYPE (iv->base);
3514 1647606 : if (POINTER_TYPE_P (basetype))
3515 774181 : basetype = sizetype;
3516 1647606 : record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3517 :
3518 : /* Compare the cost of an address with an unscaled index with the cost of
3519 : an address with a scaled index and add candidate if useful. */
3520 1647606 : poly_int64 step;
3521 1647606 : if (use != NULL
3522 1647606 : && poly_int_tree_p (iv->step, &step)
3523 1411725 : && address_p (use->type))
3524 : {
3525 532600 : poly_int64 new_step;
3526 532600 : unsigned int fact = preferred_mem_scale_factor
3527 532600 : (use->iv->base,
3528 532600 : TYPE_MODE (use->mem_type),
3529 532600 : optimize_loop_for_speed_p (data->current_loop));
3530 :
3531 532600 : if (fact != 1
3532 532600 : && multiple_p (step, fact, &new_step))
3533 0 : add_candidate (data, size_int (0),
3534 0 : wide_int_to_tree (sizetype, new_step),
3535 : true, NULL);
3536 : }
3537 :
3538 : /* Record common candidate with constant offset stripped in base.
3539 : Like the use itself, we also add candidate directly for it. */
3540 1647606 : base = strip_offset (iv->base, &offset);
3541 1647606 : if (maybe_ne (offset, 0U) || base != iv->base)
3542 : {
3543 869544 : record_common_cand (data, base, iv->step, use);
3544 869544 : add_candidate (data, base, iv->step, false, use);
3545 : }
3546 :
3547 : /* Record common candidate with base_object removed in base. */
3548 1647606 : base = iv->base;
3549 1647606 : STRIP_NOPS (base);
3550 1647606 : if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3551 : {
3552 0 : tree step = iv->step;
3553 :
3554 0 : STRIP_NOPS (step);
3555 0 : base = TREE_OPERAND (base, 1);
3556 0 : step = fold_convert (sizetype, step);
3557 0 : record_common_cand (data, base, step, use);
3558 : /* Also record common candidate with offset stripped. */
3559 0 : tree alt_base, alt_offset;
3560 0 : split_constant_offset (base, &alt_base, &alt_offset);
3561 0 : if (!integer_zerop (alt_offset))
3562 0 : record_common_cand (data, alt_base, step, use);
3563 : }
3564 :
3565 : /* At last, add auto-incremental candidates. Make such variables
3566 : important since other iv uses with same base object may be based
3567 : on it. */
3568 1647606 : if (use != NULL && address_p (use->type))
3569 582770 : add_autoinc_candidates (data, iv->base, iv->step, true, use);
3570 : }
3571 :
3572 : /* Adds candidates based on the uses. */
3573 :
3574 : static void
3575 503695 : add_iv_candidate_for_groups (struct ivopts_data *data)
3576 : {
3577 503695 : unsigned i;
3578 :
3579 : /* Only add candidate for the first use in group. */
3580 2151470 : for (i = 0; i < data->vgroups.length (); i++)
3581 : {
3582 1647775 : struct iv_group *group = data->vgroups[i];
3583 :
3584 1647775 : gcc_assert (group->vuses[0] != NULL);
3585 1647775 : add_iv_candidate_for_use (data, group->vuses[0]);
3586 : }
3587 503695 : add_iv_candidate_derived_from_uses (data);
3588 503695 : }
3589 :
3590 : /* Record important candidates and add them to related_cands bitmaps. */
3591 :
3592 : static void
3593 503695 : record_important_candidates (struct ivopts_data *data)
3594 : {
3595 503695 : unsigned i;
3596 503695 : struct iv_group *group;
3597 :
3598 5128591 : for (i = 0; i < data->vcands.length (); i++)
3599 : {
3600 4624896 : struct iv_cand *cand = data->vcands[i];
3601 :
3602 4624896 : if (cand->important)
3603 3703540 : bitmap_set_bit (data->important_candidates, i);
3604 : }
3605 :
3606 503695 : data->consider_all_candidates = (data->vcands.length ()
3607 503695 : <= CONSIDER_ALL_CANDIDATES_BOUND);
3608 :
3609 : /* Add important candidates to groups' related_cands bitmaps. */
3610 2151470 : for (i = 0; i < data->vgroups.length (); i++)
3611 : {
3612 1647775 : group = data->vgroups[i];
3613 1647775 : bitmap_ior_into (group->related_cands, data->important_candidates);
3614 : }
3615 503695 : }
3616 :
3617 : /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3618 : If consider_all_candidates is true, we use a two-dimensional array, otherwise
3619 : we allocate a simple list to every use. */
3620 :
3621 : static void
3622 503695 : alloc_use_cost_map (struct ivopts_data *data)
3623 : {
3624 503695 : unsigned i, size, s;
3625 :
3626 2151470 : for (i = 0; i < data->vgroups.length (); i++)
3627 : {
3628 1647775 : struct iv_group *group = data->vgroups[i];
3629 :
3630 1647775 : if (data->consider_all_candidates)
3631 1638074 : size = data->vcands.length ();
3632 : else
3633 : {
3634 9701 : s = bitmap_count_bits (group->related_cands);
3635 :
3636 : /* Round up to the power of two, so that moduling by it is fast. */
3637 19402 : size = s ? (1 << ceil_log2 (s)) : 1;
3638 : }
3639 :
3640 1647775 : group->n_map_members = size;
3641 1647775 : group->cost_map = XCNEWVEC (class cost_pair, size);
3642 : }
3643 503695 : }
3644 :
3645 : /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3646 : on invariants INV_VARS and that the value used in expressing it is
3647 : VALUE, and in case of iv elimination the comparison operator is COMP. */
3648 :
3649 : static void
3650 17744892 : set_group_iv_cost (struct ivopts_data *data,
3651 : struct iv_group *group, struct iv_cand *cand,
3652 : comp_cost cost, bitmap inv_vars, tree value,
3653 : enum tree_code comp, bitmap inv_exprs)
3654 : {
3655 17744892 : unsigned i, s;
3656 :
3657 17744892 : if (cost.infinite_cost_p ())
3658 : {
3659 6132179 : BITMAP_FREE (inv_vars);
3660 6132179 : BITMAP_FREE (inv_exprs);
3661 6132179 : return;
3662 : }
3663 :
3664 11612713 : if (data->consider_all_candidates)
3665 : {
3666 11469195 : group->cost_map[cand->id].cand = cand;
3667 11469195 : group->cost_map[cand->id].cost = cost;
3668 11469195 : group->cost_map[cand->id].inv_vars = inv_vars;
3669 11469195 : group->cost_map[cand->id].inv_exprs = inv_exprs;
3670 11469195 : group->cost_map[cand->id].value = value;
3671 11469195 : group->cost_map[cand->id].comp = comp;
3672 11469195 : return;
3673 : }
3674 :
3675 : /* n_map_members is a power of two, so this computes modulo. */
3676 143518 : s = cand->id & (group->n_map_members - 1);
3677 152845 : for (i = s; i < group->n_map_members; i++)
3678 152806 : if (!group->cost_map[i].cand)
3679 143479 : goto found;
3680 53 : for (i = 0; i < s; i++)
3681 53 : if (!group->cost_map[i].cand)
3682 39 : goto found;
3683 :
3684 0 : gcc_unreachable ();
3685 :
3686 143518 : found:
3687 143518 : group->cost_map[i].cand = cand;
3688 143518 : group->cost_map[i].cost = cost;
3689 143518 : group->cost_map[i].inv_vars = inv_vars;
3690 143518 : group->cost_map[i].inv_exprs = inv_exprs;
3691 143518 : group->cost_map[i].value = value;
3692 143518 : group->cost_map[i].comp = comp;
3693 : }
3694 :
3695 : /* Gets cost of (GROUP, CAND) pair. */
3696 :
3697 : static class cost_pair *
3698 203250505 : get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3699 : struct iv_cand *cand)
3700 : {
3701 203250505 : unsigned i, s;
3702 203250505 : class cost_pair *ret;
3703 :
3704 203250505 : if (!cand)
3705 : return NULL;
3706 :
3707 197438070 : if (data->consider_all_candidates)
3708 : {
3709 183676108 : ret = group->cost_map + cand->id;
3710 183676108 : if (!ret->cand)
3711 : return NULL;
3712 :
3713 108499003 : return ret;
3714 : }
3715 :
3716 : /* n_map_members is a power of two, so this computes modulo. */
3717 13761962 : s = cand->id & (group->n_map_members - 1);
3718 18681715 : for (i = s; i < group->n_map_members; i++)
3719 18619792 : if (group->cost_map[i].cand == cand)
3720 : return group->cost_map + i;
3721 10364098 : else if (group->cost_map[i].cand == NULL)
3722 : return NULL;
3723 191482 : for (i = 0; i < s; i++)
3724 171249 : if (group->cost_map[i].cand == cand)
3725 : return group->cost_map + i;
3726 169165 : else if (group->cost_map[i].cand == NULL)
3727 : return NULL;
3728 :
3729 : return NULL;
3730 : }
3731 :
3732 : /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3733 : static rtx
3734 41428 : produce_memory_decl_rtl (tree obj, int *regno)
3735 : {
3736 41428 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3737 41428 : machine_mode address_mode = targetm.addr_space.address_mode (as);
3738 41428 : rtx x;
3739 :
3740 41428 : gcc_assert (obj);
3741 41428 : if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3742 : {
3743 41428 : const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3744 41428 : x = gen_rtx_SYMBOL_REF (address_mode, name);
3745 41428 : SET_SYMBOL_REF_DECL (x, obj);
3746 41428 : x = gen_rtx_MEM (DECL_MODE (obj), x);
3747 41428 : set_mem_addr_space (x, as);
3748 41428 : targetm.encode_section_info (obj, x, true);
3749 : }
3750 : else
3751 : {
3752 0 : x = gen_raw_REG (address_mode, (*regno)++);
3753 0 : x = gen_rtx_MEM (DECL_MODE (obj), x);
3754 0 : set_mem_addr_space (x, as);
3755 : }
3756 :
3757 41428 : return x;
3758 : }
3759 :
3760 : /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3761 : walk_tree. DATA contains the actual fake register number. */
3762 :
3763 : static tree
3764 579992 : prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3765 : {
3766 579992 : tree obj = NULL_TREE;
3767 579992 : rtx x = NULL_RTX;
3768 579992 : int *regno = (int *) data;
3769 :
3770 579992 : switch (TREE_CODE (*expr_p))
3771 : {
3772 165712 : case ADDR_EXPR:
3773 165712 : for (expr_p = &TREE_OPERAND (*expr_p, 0);
3774 165712 : handled_component_p (*expr_p);
3775 0 : expr_p = &TREE_OPERAND (*expr_p, 0))
3776 0 : continue;
3777 165712 : obj = *expr_p;
3778 165712 : if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3779 0 : x = produce_memory_decl_rtl (obj, regno);
3780 : break;
3781 :
3782 0 : case SSA_NAME:
3783 0 : *ws = 0;
3784 0 : obj = SSA_NAME_VAR (*expr_p);
3785 : /* Defer handling of anonymous SSA_NAMEs to the expander. */
3786 0 : if (!obj)
3787 : return NULL_TREE;
3788 0 : if (!DECL_RTL_SET_P (obj))
3789 0 : x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3790 : break;
3791 :
3792 165712 : case VAR_DECL:
3793 165712 : case PARM_DECL:
3794 165712 : case RESULT_DECL:
3795 165712 : *ws = 0;
3796 165712 : obj = *expr_p;
3797 :
3798 165712 : if (DECL_RTL_SET_P (obj))
3799 : break;
3800 :
3801 0 : if (DECL_MODE (obj) == BLKmode)
3802 0 : x = produce_memory_decl_rtl (obj, regno);
3803 : else
3804 0 : x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3805 :
3806 : break;
3807 :
3808 : default:
3809 : break;
3810 : }
3811 :
3812 0 : if (x)
3813 : {
3814 0 : decl_rtl_to_reset.safe_push (obj);
3815 0 : SET_DECL_RTL (obj, x);
3816 : }
3817 :
3818 : return NULL_TREE;
3819 : }
3820 :
3821 : /* Predict whether the given loop will be transformed in the RTL
3822 : doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3823 : This is only for target independent checks, see targetm.predict_doloop_p
3824 : for the target dependent ones.
3825 :
3826 : Note that according to some initial investigation, some checks like costly
3827 : niter check and invalid stmt scanning don't have much gains among general
3828 : cases, so keep this as simple as possible first.
3829 :
3830 : Some RTL specific checks seems unable to be checked in gimple, if any new
3831 : checks or easy checks _are_ missing here, please add them. */
3832 :
3833 : static bool
3834 503695 : generic_predict_doloop_p (struct ivopts_data *data)
3835 : {
3836 503695 : class loop *loop = data->current_loop;
3837 :
3838 : /* Call target hook for target dependent checks. */
3839 503695 : if (!targetm.predict_doloop_p (loop))
3840 : {
3841 503695 : if (dump_file && (dump_flags & TDF_DETAILS))
3842 67 : fprintf (dump_file, "Predict doloop failure due to"
3843 : " target specific checks.\n");
3844 503695 : return false;
3845 : }
3846 :
3847 : /* Similar to doloop_optimize, check iteration description to know it's
3848 : suitable or not. Keep it as simple as possible, feel free to extend it
3849 : if you find any multiple exits cases matter. */
3850 0 : edge exit = single_dom_exit (loop);
3851 0 : class tree_niter_desc *niter_desc;
3852 0 : if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3853 : {
3854 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3855 0 : fprintf (dump_file, "Predict doloop failure due to"
3856 : " unexpected niters.\n");
3857 0 : return false;
3858 : }
3859 :
3860 : /* Similar to doloop_optimize, check whether iteration count too small
3861 : and not profitable. */
3862 0 : HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3863 0 : if (est_niter == -1)
3864 0 : est_niter = get_likely_max_loop_iterations_int (loop);
3865 0 : if (est_niter >= 0 && est_niter < 3)
3866 : {
3867 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3868 0 : fprintf (dump_file,
3869 : "Predict doloop failure due to"
3870 : " too few iterations (%u).\n",
3871 : (unsigned int) est_niter);
3872 0 : return false;
3873 : }
3874 :
3875 : return true;
3876 : }
3877 :
3878 : /* Determines cost of the computation of EXPR. */
3879 :
3880 : static unsigned
3881 248568 : computation_cost (tree expr, bool speed)
3882 : {
3883 248568 : rtx_insn *seq;
3884 248568 : rtx rslt;
3885 248568 : tree type = TREE_TYPE (expr);
3886 248568 : unsigned cost;
3887 : /* Avoid using hard regs in ways which may be unsupported. */
3888 248568 : int regno = LAST_VIRTUAL_REGISTER + 1;
3889 248568 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
3890 248568 : enum node_frequency real_frequency = node->frequency;
3891 :
3892 248568 : node->frequency = NODE_FREQUENCY_NORMAL;
3893 248568 : crtl->maybe_hot_insn_p = speed;
3894 248568 : walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
3895 248568 : start_sequence ();
3896 248568 : rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3897 248568 : seq = end_sequence ();
3898 248568 : default_rtl_profile ();
3899 248568 : node->frequency = real_frequency;
3900 :
3901 248568 : cost = seq_cost (seq, speed);
3902 248568 : if (MEM_P (rslt))
3903 0 : cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3904 0 : TYPE_ADDR_SPACE (type), speed);
3905 248568 : else if (!REG_P (rslt))
3906 497136 : cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3907 :
3908 248568 : return cost;
3909 : }
3910 :
3911 : /* Returns variable containing the value of candidate CAND at statement AT. */
3912 :
3913 : static tree
3914 18450136 : var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3915 : {
3916 18450136 : if (stmt_after_increment (loop, cand, stmt))
3917 4742168 : return cand->var_after;
3918 : else
3919 13707968 : return cand->var_before;
3920 : }
3921 :
3922 : /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3923 : same precision that is at least as wide as the precision of TYPE, stores
3924 : BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3925 : type of A and B. */
3926 :
3927 : static tree
3928 14252010 : determine_common_wider_type (tree *a, tree *b)
3929 : {
3930 14252010 : tree wider_type = NULL;
3931 14252010 : tree suba, subb;
3932 14252010 : tree atype = TREE_TYPE (*a);
3933 :
3934 14252010 : if (CONVERT_EXPR_P (*a))
3935 : {
3936 7975814 : suba = TREE_OPERAND (*a, 0);
3937 7975814 : wider_type = TREE_TYPE (suba);
3938 7975814 : if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3939 : return atype;
3940 : }
3941 : else
3942 : return atype;
3943 :
3944 7957628 : if (CONVERT_EXPR_P (*b))
3945 : {
3946 1590368 : subb = TREE_OPERAND (*b, 0);
3947 1590368 : if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3948 : return atype;
3949 : }
3950 : else
3951 : return atype;
3952 :
3953 1506752 : *a = suba;
3954 1506752 : *b = subb;
3955 1506752 : return wider_type;
3956 : }
3957 :
3958 : /* Determines the expression by that USE is expressed from induction variable
3959 : CAND at statement AT in DATA's current loop. The expression is stored in
3960 : two parts in a decomposed form. The invariant part is stored in AFF_INV;
3961 : while variant part in AFF_VAR. Store ratio of CAND.step over USE.step in
3962 : PRAT if it's non-null. Returns false if USE cannot be expressed using
3963 : CAND. */
3964 :
3965 : static bool
3966 17192820 : get_computation_aff_1 (struct ivopts_data *data, gimple *at, struct iv_use *use,
3967 : struct iv_cand *cand, class aff_tree *aff_inv,
3968 : class aff_tree *aff_var, widest_int *prat = NULL)
3969 : {
3970 17192820 : tree ubase = use->iv->base, ustep = use->iv->step;
3971 17192820 : tree cbase = cand->iv->base, cstep = cand->iv->step;
3972 17192820 : tree common_type, uutype, var, cstep_common;
3973 17192820 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3974 17192820 : aff_tree aff_cbase;
3975 17192820 : widest_int rat;
3976 :
3977 : /* We must have a precision to express the values of use. */
3978 17192820 : if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3979 : return false;
3980 :
3981 17192035 : var = var_at_stmt (data->current_loop, cand, at);
3982 17192035 : uutype = unsigned_type_for (utype);
3983 :
3984 : /* If the conversion is not noop, perform it. */
3985 17192035 : if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3986 : {
3987 266382 : if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3988 1651143 : && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3989 : {
3990 37072 : tree inner_base, inner_step, inner_type;
3991 37072 : inner_base = TREE_OPERAND (cbase, 0);
3992 37072 : if (CONVERT_EXPR_P (cstep))
3993 4503 : inner_step = TREE_OPERAND (cstep, 0);
3994 : else
3995 : inner_step = cstep;
3996 :
3997 37072 : inner_type = TREE_TYPE (inner_base);
3998 : /* If candidate is added from a biv whose type is smaller than
3999 : ctype, we know both candidate and the biv won't overflow.
4000 : In this case, it's safe to skip the convertion in candidate.
4001 : As an example, (unsigned short)((unsigned long)A) equals to
4002 : (unsigned short)A, if A has a type no larger than short. */
4003 37072 : if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4004 : {
4005 36112 : cbase = inner_base;
4006 36112 : cstep = inner_step;
4007 : }
4008 : }
4009 1614071 : cbase = fold_convert (uutype, cbase);
4010 1614071 : cstep = fold_convert (uutype, cstep);
4011 1614071 : var = fold_convert (uutype, var);
4012 : }
4013 :
4014 : /* Ratio is 1 when computing the value of biv cand by itself.
4015 : We can't rely on constant_multiple_of in this case because the
4016 : use is created after the original biv is selected. The call
4017 : could fail because of inconsistent fold behavior. See PR68021
4018 : for more information. */
4019 17192035 : if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4020 : {
4021 5326 : gcc_assert (is_gimple_assign (use->stmt));
4022 5326 : gcc_assert (use->iv->ssa_name == cand->var_after);
4023 5326 : gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4024 5326 : rat = 1;
4025 : }
4026 17186709 : else if (!constant_multiple_of (ustep, cstep, &rat, data))
4027 : return false;
4028 :
4029 14252010 : if (prat)
4030 12772483 : *prat = rat;
4031 :
4032 : /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4033 : type, we achieve better folding by computing their difference in this
4034 : wider type, and cast the result to UUTYPE. We do not need to worry about
4035 : overflows, as all the arithmetics will in the end be performed in UUTYPE
4036 : anyway. */
4037 14252010 : common_type = determine_common_wider_type (&ubase, &cbase);
4038 :
4039 : /* use = ubase - ratio * cbase + ratio * var. */
4040 14252010 : tree_to_aff_combination (ubase, common_type, aff_inv);
4041 14252010 : tree_to_aff_combination (cbase, common_type, &aff_cbase);
4042 14252010 : tree_to_aff_combination (var, uutype, aff_var);
4043 :
4044 : /* We need to shift the value if we are after the increment. */
4045 14252010 : if (stmt_after_increment (data->current_loop, cand, at))
4046 : {
4047 3226961 : aff_tree cstep_aff;
4048 :
4049 3226961 : if (common_type != uutype)
4050 839289 : cstep_common = fold_convert (common_type, cstep);
4051 : else
4052 : cstep_common = cstep;
4053 :
4054 3226961 : tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4055 3226961 : aff_combination_add (&aff_cbase, &cstep_aff);
4056 3226961 : }
4057 :
4058 14252010 : aff_combination_scale (&aff_cbase, -rat);
4059 14252010 : aff_combination_add (aff_inv, &aff_cbase);
4060 14252010 : if (common_type != uutype)
4061 9632390 : aff_combination_convert (aff_inv, uutype);
4062 :
4063 14252010 : aff_combination_scale (aff_var, rat);
4064 14252010 : return true;
4065 17192820 : }
4066 :
4067 : /* Determines the expression by that USE is expressed from induction variable
4068 : CAND at statement AT in DATA's current loop. The expression is stored in a
4069 : decomposed form into AFF. Returns false if USE cannot be expressed using
4070 : CAND. */
4071 :
4072 : static bool
4073 1242644 : get_computation_aff (struct ivopts_data *data, gimple *at, struct iv_use *use,
4074 : struct iv_cand *cand, class aff_tree *aff)
4075 : {
4076 1242644 : aff_tree aff_var;
4077 :
4078 1242644 : if (!get_computation_aff_1 (data, at, use, cand, aff, &aff_var))
4079 : return false;
4080 :
4081 1134874 : aff_combination_add (aff, &aff_var);
4082 1134874 : return true;
4083 1242644 : }
4084 :
4085 : /* Return the type of USE. */
4086 :
4087 : static tree
4088 1011102 : get_use_type (struct iv_use *use)
4089 : {
4090 1011102 : tree base_type = TREE_TYPE (use->iv->base);
4091 1011102 : tree type;
4092 :
4093 1011102 : if (use->type == USE_REF_ADDRESS)
4094 : {
4095 : /* The base_type may be a void pointer. Create a pointer type based on
4096 : the mem_ref instead. */
4097 0 : type = build_pointer_type (TREE_TYPE (*use->op_p));
4098 0 : gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4099 : == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4100 : }
4101 : else
4102 : type = base_type;
4103 :
4104 1011102 : return type;
4105 : }
4106 :
4107 : /* Determines the expression by that USE is expressed from induction variable
4108 : CAND at statement AT in DATA's current loop. The computation is
4109 : unshared. */
4110 :
4111 : static tree
4112 384157 : get_computation_at (struct ivopts_data *data, gimple *at,
4113 : struct iv_use *use, struct iv_cand *cand)
4114 : {
4115 384157 : aff_tree aff;
4116 384157 : tree type = get_use_type (use);
4117 :
4118 384157 : if (!get_computation_aff (data, at, use, cand, &aff))
4119 : return NULL_TREE;
4120 276387 : unshare_aff_combination (&aff);
4121 276387 : return fold_convert (type, aff_combination_to_tree (&aff));
4122 384157 : }
4123 :
4124 : /* Like get_computation_at, but try harder, even if the computation
4125 : is more expensive. Intended for debug stmts. */
4126 :
4127 : static tree
4128 179522 : get_debug_computation_at (struct ivopts_data *data, gimple *at,
4129 : struct iv_use *use, struct iv_cand *cand)
4130 : {
4131 179522 : if (tree ret = get_computation_at (data, at, use, cand))
4132 : return ret;
4133 :
4134 107770 : tree ubase = use->iv->base, ustep = use->iv->step;
4135 107770 : tree cbase = cand->iv->base, cstep = cand->iv->step;
4136 107770 : tree var;
4137 107770 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4138 107770 : widest_int rat;
4139 :
4140 : /* We must have a precision to express the values of use. */
4141 107770 : if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4142 : return NULL_TREE;
4143 :
4144 : /* Try to handle the case that get_computation_at doesn't,
4145 : try to express
4146 : use = ubase + (var - cbase) / ratio. */
4147 8886 : if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4148 : &rat, data))
4149 : return NULL_TREE;
4150 :
4151 7793 : bool neg_p = false;
4152 7793 : if (wi::neg_p (rat))
4153 : {
4154 1057 : if (TYPE_UNSIGNED (ctype))
4155 : return NULL_TREE;
4156 0 : neg_p = true;
4157 0 : rat = wi::neg (rat);
4158 : }
4159 :
4160 : /* If both IVs can wrap around and CAND doesn't have a power of two step,
4161 : it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4162 : the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4163 : uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4164 : ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4165 6736 : if (!use->iv->no_overflow
4166 62 : && !cand->iv->no_overflow
4167 6785 : && !integer_pow2p (cstep))
4168 : return NULL_TREE;
4169 :
4170 6722 : int bits = wi::exact_log2 (rat);
4171 6722 : if (bits == -1)
4172 663 : bits = wi::floor_log2 (rat) + 1;
4173 6722 : if (!cand->iv->no_overflow
4174 6722 : && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4175 : return NULL_TREE;
4176 :
4177 6722 : var = var_at_stmt (data->current_loop, cand, at);
4178 :
4179 6722 : if (POINTER_TYPE_P (ctype))
4180 : {
4181 120 : ctype = unsigned_type_for (ctype);
4182 120 : cbase = fold_convert (ctype, cbase);
4183 120 : cstep = fold_convert (ctype, cstep);
4184 120 : var = fold_convert (ctype, var);
4185 : }
4186 :
4187 6722 : if (stmt_after_increment (data->current_loop, cand, at))
4188 70 : var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4189 : unshare_expr (cstep));
4190 :
4191 6722 : var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4192 6722 : var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4193 : wide_int_to_tree (TREE_TYPE (var), rat));
4194 6722 : if (POINTER_TYPE_P (utype))
4195 : {
4196 0 : var = fold_convert (sizetype, var);
4197 0 : if (neg_p)
4198 0 : var = fold_build1 (NEGATE_EXPR, sizetype, var);
4199 0 : var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4200 : }
4201 : else
4202 : {
4203 6722 : var = fold_convert (utype, var);
4204 13444 : var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4205 : ubase, var);
4206 : }
4207 : return var;
4208 107770 : }
4209 :
4210 : /* Adjust the cost COST for being in loop setup rather than loop body.
4211 : If we're optimizing for space, the loop setup overhead is constant;
4212 : if we're optimizing for speed, amortize it over the per-iteration cost.
4213 : If ROUND_UP_P is true, the result is round up rather than to zero when
4214 : optimizing for speed. */
4215 : static int64_t
4216 10370914 : adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4217 : bool round_up_p = false)
4218 : {
4219 10370914 : if (cost == INFTY)
4220 : return cost;
4221 10370914 : else if (optimize_loop_for_speed_p (data->current_loop))
4222 : {
4223 8735855 : uint64_t niters = avg_loop_niter (data->current_loop);
4224 8735855 : if (niters > (uint64_t) cost)
4225 13406997 : return (round_up_p && cost != 0) ? 1 : 0;
4226 1850946 : return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4227 : }
4228 : else
4229 : return cost;
4230 : }
4231 :
4232 : /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4233 : EXPR operand holding the shift. COST0 and COST1 are the costs for
4234 : calculating the operands of EXPR. Returns true if successful, and returns
4235 : the cost in COST. */
4236 :
4237 : static bool
4238 1445212 : get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4239 : comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4240 : {
4241 1445212 : comp_cost res;
4242 1445212 : tree op1 = TREE_OPERAND (expr, 1);
4243 1445212 : tree cst = TREE_OPERAND (mult, 1);
4244 1445212 : tree multop = TREE_OPERAND (mult, 0);
4245 1445212 : int m = exact_log2 (int_cst_value (cst));
4246 4335126 : int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4247 1445212 : int as_cost, sa_cost;
4248 1445212 : bool mult_in_op1;
4249 :
4250 1445212 : if (!(m >= 0 && m < maxm))
4251 : return false;
4252 :
4253 959365 : STRIP_NOPS (op1);
4254 959365 : mult_in_op1 = operand_equal_p (op1, mult, 0);
4255 :
4256 959365 : as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4257 :
4258 : /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4259 : use that in preference to a shift insn followed by an add insn. */
4260 959365 : sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4261 959365 : ? shiftadd_cost (speed, mode, m)
4262 : : (mult_in_op1
4263 145085 : ? shiftsub1_cost (speed, mode, m)
4264 26262 : : shiftsub0_cost (speed, mode, m)));
4265 :
4266 959365 : res = comp_cost (MIN (as_cost, sa_cost), 0);
4267 1736776 : res += (mult_in_op1 ? cost0 : cost1);
4268 :
4269 959365 : STRIP_NOPS (multop);
4270 959365 : if (!is_gimple_val (multop))
4271 489071 : res += force_expr_to_var_cost (multop, speed);
4272 :
4273 959365 : *cost = res;
4274 959365 : return true;
4275 : }
4276 :
4277 : /* Estimates cost of forcing expression EXPR into a variable. */
4278 :
4279 : static comp_cost
4280 28869463 : force_expr_to_var_cost (tree expr, bool speed)
4281 : {
4282 28869463 : static bool costs_initialized = false;
4283 28869463 : static unsigned integer_cost [2];
4284 28869463 : static unsigned symbol_cost [2];
4285 28869463 : static unsigned address_cost [2];
4286 28869463 : tree op0, op1;
4287 28869463 : comp_cost cost0, cost1, cost;
4288 28869463 : machine_mode mode;
4289 28869463 : scalar_int_mode int_mode;
4290 :
4291 28869463 : if (!costs_initialized)
4292 : {
4293 41428 : tree type = build_pointer_type (integer_type_node);
4294 41428 : tree var, addr;
4295 41428 : rtx x;
4296 41428 : int i;
4297 :
4298 41428 : var = create_tmp_var_raw (integer_type_node, "test_var");
4299 41428 : TREE_STATIC (var) = 1;
4300 41428 : x = produce_memory_decl_rtl (var, NULL);
4301 41428 : SET_DECL_RTL (var, x);
4302 :
4303 41428 : addr = build1 (ADDR_EXPR, type, var);
4304 :
4305 :
4306 165712 : for (i = 0; i < 2; i++)
4307 : {
4308 82856 : integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4309 : 2000), i);
4310 :
4311 82856 : symbol_cost[i] = computation_cost (addr, i) + 1;
4312 :
4313 82856 : address_cost[i]
4314 82856 : = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4315 82856 : if (dump_file && (dump_flags & TDF_DETAILS))
4316 : {
4317 105 : fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4318 70 : fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4319 70 : fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4320 70 : fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4321 70 : fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4322 70 : fprintf (dump_file, "\n");
4323 : }
4324 : }
4325 :
4326 41428 : costs_initialized = true;
4327 : }
4328 :
4329 28869463 : STRIP_NOPS (expr);
4330 :
4331 28869463 : if (SSA_VAR_P (expr))
4332 5413832 : return no_cost;
4333 :
4334 23455631 : if (is_gimple_min_invariant (expr))
4335 : {
4336 13941404 : if (poly_int_tree_p (expr))
4337 11845243 : return comp_cost (integer_cost [speed], 0);
4338 :
4339 2096161 : if (TREE_CODE (expr) == ADDR_EXPR)
4340 : {
4341 2096161 : tree obj = TREE_OPERAND (expr, 0);
4342 :
4343 2096161 : if (VAR_P (obj)
4344 : || TREE_CODE (obj) == PARM_DECL
4345 : || TREE_CODE (obj) == RESULT_DECL)
4346 2032084 : return comp_cost (symbol_cost [speed], 0);
4347 : }
4348 :
4349 64077 : return comp_cost (address_cost [speed], 0);
4350 : }
4351 :
4352 9514227 : switch (TREE_CODE (expr))
4353 : {
4354 8133983 : case POINTER_PLUS_EXPR:
4355 8133983 : case PLUS_EXPR:
4356 8133983 : case MINUS_EXPR:
4357 8133983 : case MULT_EXPR:
4358 8133983 : case EXACT_DIV_EXPR:
4359 8133983 : case TRUNC_DIV_EXPR:
4360 8133983 : case BIT_AND_EXPR:
4361 8133983 : case BIT_IOR_EXPR:
4362 8133983 : case LSHIFT_EXPR:
4363 8133983 : case RSHIFT_EXPR:
4364 8133983 : op0 = TREE_OPERAND (expr, 0);
4365 8133983 : op1 = TREE_OPERAND (expr, 1);
4366 8133983 : STRIP_NOPS (op0);
4367 8133983 : STRIP_NOPS (op1);
4368 8133983 : break;
4369 :
4370 1380204 : CASE_CONVERT:
4371 1380204 : case NEGATE_EXPR:
4372 1380204 : case BIT_NOT_EXPR:
4373 1380204 : op0 = TREE_OPERAND (expr, 0);
4374 1380204 : STRIP_NOPS (op0);
4375 1380204 : op1 = NULL_TREE;
4376 1380204 : break;
4377 : /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4378 : introduce COND_EXPR for IV base, need to support better cost estimation
4379 : for this COND_EXPR and tcc_comparison. */
4380 0 : case COND_EXPR:
4381 0 : op0 = TREE_OPERAND (expr, 1);
4382 0 : STRIP_NOPS (op0);
4383 0 : op1 = TREE_OPERAND (expr, 2);
4384 0 : STRIP_NOPS (op1);
4385 0 : break;
4386 0 : case LT_EXPR:
4387 0 : case LE_EXPR:
4388 0 : case GT_EXPR:
4389 0 : case GE_EXPR:
4390 0 : case EQ_EXPR:
4391 0 : case NE_EXPR:
4392 0 : case UNORDERED_EXPR:
4393 0 : case ORDERED_EXPR:
4394 0 : case UNLT_EXPR:
4395 0 : case UNLE_EXPR:
4396 0 : case UNGT_EXPR:
4397 0 : case UNGE_EXPR:
4398 0 : case UNEQ_EXPR:
4399 0 : case LTGT_EXPR:
4400 0 : case MAX_EXPR:
4401 0 : case MIN_EXPR:
4402 0 : op0 = TREE_OPERAND (expr, 0);
4403 0 : STRIP_NOPS (op0);
4404 0 : op1 = TREE_OPERAND (expr, 1);
4405 0 : STRIP_NOPS (op1);
4406 0 : break;
4407 :
4408 40 : default:
4409 : /* Just an arbitrary value, FIXME. */
4410 40 : return comp_cost (target_spill_cost[speed], 0);
4411 : }
4412 :
4413 9514187 : if (op0 == NULL_TREE
4414 9514187 : || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4415 4498697 : cost0 = no_cost;
4416 : else
4417 5015490 : cost0 = force_expr_to_var_cost (op0, speed);
4418 :
4419 9514187 : if (op1 == NULL_TREE
4420 8133983 : || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4421 8736959 : cost1 = no_cost;
4422 : else
4423 777228 : cost1 = force_expr_to_var_cost (op1, speed);
4424 :
4425 9514187 : mode = TYPE_MODE (TREE_TYPE (expr));
4426 9514187 : switch (TREE_CODE (expr))
4427 : {
4428 5713577 : case POINTER_PLUS_EXPR:
4429 5713577 : case PLUS_EXPR:
4430 5713577 : case MINUS_EXPR:
4431 5713577 : case NEGATE_EXPR:
4432 5713577 : cost = comp_cost (add_cost (speed, mode), 0);
4433 5713577 : if (TREE_CODE (expr) != NEGATE_EXPR)
4434 : {
4435 5572138 : tree mult = NULL_TREE;
4436 5572138 : comp_cost sa_cost;
4437 5572138 : if (TREE_CODE (op1) == MULT_EXPR)
4438 : mult = op1;
4439 5171492 : else if (TREE_CODE (op0) == MULT_EXPR)
4440 : mult = op0;
4441 :
4442 : if (mult != NULL_TREE
4443 4612773 : && is_a <scalar_int_mode> (mode, &int_mode)
4444 1694376 : && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4445 1445212 : && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4446 : speed, &sa_cost))
4447 959365 : return sa_cost;
4448 : }
4449 : break;
4450 :
4451 1225870 : CASE_CONVERT:
4452 1225870 : {
4453 1225870 : tree inner_mode, outer_mode;
4454 1225870 : outer_mode = TREE_TYPE (expr);
4455 1225870 : inner_mode = TREE_TYPE (op0);
4456 1225870 : cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4457 1225870 : TYPE_MODE (inner_mode), speed), 0);
4458 : }
4459 1225870 : break;
4460 :
4461 2462618 : case MULT_EXPR:
4462 2462618 : if (cst_and_fits_in_hwi (op0))
4463 0 : cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4464 0 : mode, speed), 0);
4465 2462618 : else if (cst_and_fits_in_hwi (op1))
4466 2009907 : cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4467 2009907 : mode, speed), 0);
4468 : else
4469 452711 : return comp_cost (target_spill_cost [speed], 0);
4470 : break;
4471 :
4472 53112 : case EXACT_DIV_EXPR:
4473 53112 : case TRUNC_DIV_EXPR:
4474 : /* Division by power of two is usually cheap, so we allow it. Forbid
4475 : anything else. */
4476 53112 : if (integer_pow2p (TREE_OPERAND (expr, 1)))
4477 53112 : cost = comp_cost (add_cost (speed, mode), 0);
4478 : else
4479 0 : cost = comp_cost (target_spill_cost[speed], 0);
4480 : break;
4481 :
4482 59010 : case BIT_AND_EXPR:
4483 59010 : case BIT_IOR_EXPR:
4484 59010 : case BIT_NOT_EXPR:
4485 59010 : case LSHIFT_EXPR:
4486 59010 : case RSHIFT_EXPR:
4487 59010 : cost = comp_cost (add_cost (speed, mode), 0);
4488 59010 : break;
4489 0 : case COND_EXPR:
4490 0 : op0 = TREE_OPERAND (expr, 0);
4491 0 : STRIP_NOPS (op0);
4492 0 : if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4493 0 : || CONSTANT_CLASS_P (op0))
4494 0 : cost = no_cost;
4495 : else
4496 0 : cost = force_expr_to_var_cost (op0, speed);
4497 : break;
4498 0 : case LT_EXPR:
4499 0 : case LE_EXPR:
4500 0 : case GT_EXPR:
4501 0 : case GE_EXPR:
4502 0 : case EQ_EXPR:
4503 0 : case NE_EXPR:
4504 0 : case UNORDERED_EXPR:
4505 0 : case ORDERED_EXPR:
4506 0 : case UNLT_EXPR:
4507 0 : case UNLE_EXPR:
4508 0 : case UNGT_EXPR:
4509 0 : case UNGE_EXPR:
4510 0 : case UNEQ_EXPR:
4511 0 : case LTGT_EXPR:
4512 0 : case MAX_EXPR:
4513 0 : case MIN_EXPR:
4514 : /* Simply use add cost for now, FIXME if there is some more accurate cost
4515 : evaluation way. */
4516 0 : cost = comp_cost (add_cost (speed, mode), 0);
4517 0 : break;
4518 :
4519 0 : default:
4520 0 : gcc_unreachable ();
4521 : }
4522 :
4523 8102111 : cost += cost0;
4524 8102111 : cost += cost1;
4525 8102111 : return cost;
4526 : }
4527 :
4528 : /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4529 : invariants the computation depends on. */
4530 :
4531 : static comp_cost
4532 24628495 : force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4533 : {
4534 24628495 : if (!expr)
4535 2040821 : return no_cost;
4536 :
4537 22587674 : find_inv_vars (data, &expr, inv_vars);
4538 22587674 : return force_expr_to_var_cost (expr, data->speed);
4539 : }
4540 :
4541 : /* Returns cost of auto-modifying address expression in shape base + offset.
4542 : AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4543 : address expression. The address expression has ADDR_MODE in addr space
4544 : AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4545 : speed or size. */
4546 :
4547 : enum ainc_type
4548 : {
4549 : AINC_PRE_INC, /* Pre increment. */
4550 : AINC_PRE_DEC, /* Pre decrement. */
4551 : AINC_POST_INC, /* Post increment. */
4552 : AINC_POST_DEC, /* Post decrement. */
4553 : AINC_NONE /* Also the number of auto increment types. */
4554 : };
4555 :
4556 : struct ainc_cost_data
4557 : {
4558 : int64_t costs[AINC_NONE];
4559 : };
4560 :
4561 : static comp_cost
4562 1759476 : get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4563 : machine_mode addr_mode, machine_mode mem_mode,
4564 : addr_space_t as, bool speed)
4565 : {
4566 1759476 : if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4567 : && !USE_STORE_PRE_DECREMENT (mem_mode)
4568 : && !USE_LOAD_POST_DECREMENT (mem_mode)
4569 : && !USE_STORE_POST_DECREMENT (mem_mode)
4570 : && !USE_LOAD_PRE_INCREMENT (mem_mode)
4571 : && !USE_STORE_PRE_INCREMENT (mem_mode)
4572 : && !USE_LOAD_POST_INCREMENT (mem_mode)
4573 : && !USE_STORE_POST_INCREMENT (mem_mode))
4574 1759476 : return infinite_cost;
4575 :
4576 : static vec<ainc_cost_data *> ainc_cost_data_list;
4577 : unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4578 : if (idx >= ainc_cost_data_list.length ())
4579 : {
4580 : unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4581 :
4582 : gcc_assert (nsize > idx);
4583 : ainc_cost_data_list.safe_grow_cleared (nsize, true);
4584 : }
4585 :
4586 : ainc_cost_data *data = ainc_cost_data_list[idx];
4587 : if (data == NULL)
4588 : {
4589 : rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4590 :
4591 : data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4592 : data->costs[AINC_PRE_DEC] = INFTY;
4593 : data->costs[AINC_POST_DEC] = INFTY;
4594 : data->costs[AINC_PRE_INC] = INFTY;
4595 : data->costs[AINC_POST_INC] = INFTY;
4596 : if (USE_LOAD_PRE_DECREMENT (mem_mode)
4597 : || USE_STORE_PRE_DECREMENT (mem_mode))
4598 : {
4599 : rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4600 :
4601 : if (memory_address_addr_space_p (mem_mode, addr, as))
4602 : data->costs[AINC_PRE_DEC]
4603 : = address_cost (addr, mem_mode, as, speed);
4604 : }
4605 : if (USE_LOAD_POST_DECREMENT (mem_mode)
4606 : || USE_STORE_POST_DECREMENT (mem_mode))
4607 : {
4608 : rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4609 :
4610 : if (memory_address_addr_space_p (mem_mode, addr, as))
4611 : data->costs[AINC_POST_DEC]
4612 : = address_cost (addr, mem_mode, as, speed);
4613 : }
4614 : if (USE_LOAD_PRE_INCREMENT (mem_mode)
4615 : || USE_STORE_PRE_INCREMENT (mem_mode))
4616 : {
4617 : rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4618 :
4619 : if (memory_address_addr_space_p (mem_mode, addr, as))
4620 : data->costs[AINC_PRE_INC]
4621 : = address_cost (addr, mem_mode, as, speed);
4622 : }
4623 : if (USE_LOAD_POST_INCREMENT (mem_mode)
4624 : || USE_STORE_POST_INCREMENT (mem_mode))
4625 : {
4626 : rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4627 :
4628 : if (memory_address_addr_space_p (mem_mode, addr, as))
4629 : data->costs[AINC_POST_INC]
4630 : = address_cost (addr, mem_mode, as, speed);
4631 : }
4632 : ainc_cost_data_list[idx] = data;
4633 : }
4634 :
4635 : poly_int64 msize = GET_MODE_SIZE (mem_mode);
4636 : if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4637 : return comp_cost (data->costs[AINC_POST_INC], 0);
4638 : if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4639 : return comp_cost (data->costs[AINC_POST_DEC], 0);
4640 : if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4641 : return comp_cost (data->costs[AINC_PRE_INC], 0);
4642 : if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4643 : return comp_cost (data->costs[AINC_PRE_DEC], 0);
4644 :
4645 : return infinite_cost;
4646 : }
4647 :
4648 : /* Return cost of computing USE's address expression by using CAND.
4649 : AFF_INV and AFF_VAR represent invariant and variant parts of the
4650 : address expression, respectively. If AFF_INV is simple, store
4651 : the loop invariant variables which are depended by it in INV_VARS;
4652 : if AFF_INV is complicated, handle it as a new invariant expression
4653 : and record it in INV_EXPR. RATIO indicates multiple times between
4654 : steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4655 : value to it indicating if this is an auto-increment address. */
4656 :
4657 : static comp_cost
4658 5507337 : get_address_cost (struct ivopts_data *data, struct iv_use *use,
4659 : struct iv_cand *cand, aff_tree *aff_inv,
4660 : aff_tree *aff_var, HOST_WIDE_INT ratio,
4661 : bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4662 : bool *can_autoinc, bool speed)
4663 : {
4664 5507337 : rtx addr;
4665 5507337 : bool simple_inv = true;
4666 5507337 : tree comp_inv = NULL_TREE, type = aff_var->type;
4667 5507337 : comp_cost var_cost = no_cost, cost = no_cost;
4668 5507337 : struct mem_address parts = {NULL_TREE, integer_one_node,
4669 5507337 : NULL_TREE, NULL_TREE, NULL_TREE};
4670 5507337 : machine_mode addr_mode = TYPE_MODE (type);
4671 5507337 : machine_mode mem_mode = TYPE_MODE (use->mem_type);
4672 5507337 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4673 : /* Only true if ratio != 1. */
4674 5507337 : bool ok_with_ratio_p = false;
4675 5507337 : bool ok_without_ratio_p = false;
4676 5507337 : code_helper code = ERROR_MARK;
4677 :
4678 5507337 : if (use->type == USE_PTR_ADDRESS)
4679 : {
4680 4496 : gcall *call = as_a<gcall *> (use->stmt);
4681 4496 : gcc_assert (gimple_call_internal_p (call));
4682 4496 : code = gimple_call_internal_fn (call);
4683 : }
4684 :
4685 5507337 : if (!aff_combination_const_p (aff_inv))
4686 : {
4687 3678739 : parts.index = integer_one_node;
4688 : /* Addressing mode "base + index". */
4689 3678739 : ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4690 3678739 : if (ratio != 1)
4691 : {
4692 2788344 : parts.step = wide_int_to_tree (type, ratio);
4693 : /* Addressing mode "base + index << scale". */
4694 2788344 : ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4695 2788344 : if (!ok_with_ratio_p)
4696 1704148 : parts.step = NULL_TREE;
4697 : }
4698 2594543 : if (ok_with_ratio_p || ok_without_ratio_p)
4699 : {
4700 3678739 : if (maybe_ne (aff_inv->offset, 0))
4701 : {
4702 2405523 : parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4703 : /* Addressing mode "base + index [<< scale] + offset". */
4704 2405523 : if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4705 487 : parts.offset = NULL_TREE;
4706 : else
4707 2405036 : aff_inv->offset = 0;
4708 : }
4709 :
4710 3678739 : move_fixed_address_to_symbol (&parts, aff_inv);
4711 : /* Base is fixed address and is moved to symbol part. */
4712 3678739 : if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4713 445482 : parts.base = NULL_TREE;
4714 :
4715 : /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4716 3678739 : if (parts.symbol != NULL_TREE
4717 3678739 : && !valid_mem_ref_p (mem_mode, as, &parts, code))
4718 : {
4719 6544 : aff_combination_add_elt (aff_inv, parts.symbol, 1);
4720 6544 : parts.symbol = NULL_TREE;
4721 : /* Reset SIMPLE_INV since symbol address needs to be computed
4722 : outside of address expression in this case. */
4723 6544 : simple_inv = false;
4724 : /* Symbol part is moved back to base part, it can't be NULL. */
4725 6544 : parts.base = integer_one_node;
4726 : }
4727 : }
4728 : else
4729 0 : parts.index = NULL_TREE;
4730 : }
4731 : else
4732 : {
4733 1828598 : poly_int64 ainc_step;
4734 1828598 : if (can_autoinc
4735 1828598 : && ratio == 1
4736 3657188 : && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4737 : {
4738 1759476 : poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4739 :
4740 1759476 : if (stmt_after_increment (data->current_loop, cand, use->stmt))
4741 : ainc_offset += ainc_step;
4742 1759476 : cost = get_address_cost_ainc (ainc_step, ainc_offset,
4743 : addr_mode, mem_mode, as, speed);
4744 1759476 : if (!cost.infinite_cost_p ())
4745 : {
4746 0 : *can_autoinc = true;
4747 0 : return cost;
4748 : }
4749 1759476 : cost = no_cost;
4750 : }
4751 1828598 : if (!aff_combination_zero_p (aff_inv))
4752 : {
4753 1009580 : parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4754 : /* Addressing mode "base + offset". */
4755 1009580 : if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4756 44 : parts.offset = NULL_TREE;
4757 : else
4758 1009536 : aff_inv->offset = 0;
4759 : }
4760 : }
4761 :
4762 1835142 : if (simple_inv)
4763 5500793 : simple_inv = (aff_inv == NULL
4764 8739755 : || aff_combination_const_p (aff_inv)
4765 8733211 : || aff_combination_singleton_var_p (aff_inv));
4766 5507337 : if (!aff_combination_zero_p (aff_inv))
4767 3239048 : comp_inv = aff_combination_to_tree (aff_inv);
4768 3239048 : if (comp_inv != NULL_TREE)
4769 3239048 : cost = force_var_cost (data, comp_inv, inv_vars);
4770 5507337 : if (ratio != 1 && parts.step == NULL_TREE)
4771 1704156 : var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4772 5507337 : if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4773 44 : var_cost += add_cost (speed, addr_mode);
4774 :
4775 5507337 : if (comp_inv && inv_expr && !simple_inv)
4776 : {
4777 750262 : *inv_expr = get_loop_invariant_expr (data, comp_inv);
4778 : /* Clear depends on. */
4779 750262 : if (*inv_expr != NULL && inv_vars && *inv_vars)
4780 431720 : bitmap_clear (*inv_vars);
4781 :
4782 : /* Cost of small invariant expression adjusted against loop niters
4783 : is usually zero, which makes it difficult to be differentiated
4784 : from candidate based on loop invariant variables. Secondly, the
4785 : generated invariant expression may not be hoisted out of loop by
4786 : following pass. We penalize the cost by rounding up in order to
4787 : neutralize such effects. */
4788 750262 : cost.cost = adjust_setup_cost (data, cost.cost, true);
4789 750262 : cost.scratch = cost.cost;
4790 : }
4791 :
4792 5507337 : cost += var_cost;
4793 5507337 : addr = addr_for_mem_ref (&parts, as, false);
4794 5507337 : gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4795 5507337 : cost += address_cost (addr, mem_mode, as, speed);
4796 :
4797 5507337 : if (parts.symbol != NULL_TREE)
4798 499581 : cost.complexity += 1;
4799 : /* Don't increase the complexity of adding a scaled index if it's
4800 : the only kind of index that the target allows. */
4801 5507337 : if (parts.step != NULL_TREE && ok_without_ratio_p)
4802 1084196 : cost.complexity += 1;
4803 5507337 : if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4804 3239004 : cost.complexity += 1;
4805 5507337 : if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4806 3414572 : cost.complexity += 1;
4807 :
4808 : return cost;
4809 : }
4810 :
4811 : /* Scale (multiply) the computed COST (except scratch part that should be
4812 : hoisted out a loop) by header->frequency / AT->frequency, which makes
4813 : expected cost more accurate. */
4814 :
4815 : static comp_cost
4816 12772483 : get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4817 : {
4818 12772483 : if (data->speed
4819 12772483 : && data->current_loop->header->count.to_frequency (cfun) > 0)
4820 : {
4821 11195143 : basic_block bb = gimple_bb (at);
4822 11195143 : gcc_assert (cost.scratch <= cost.cost);
4823 11195143 : int scale_factor = (int)(intptr_t) bb->aux;
4824 11195143 : if (scale_factor == 1)
4825 10634220 : return cost;
4826 :
4827 560923 : int64_t scaled_cost
4828 560923 : = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4829 :
4830 560923 : if (dump_file && (dump_flags & TDF_DETAILS))
4831 93 : fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4832 : "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4833 : 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4834 :
4835 : cost.cost = scaled_cost;
4836 : }
4837 :
4838 2138263 : return cost;
4839 : }
4840 :
4841 : /* Determines the cost of the computation by that USE is expressed
4842 : from induction variable CAND. If ADDRESS_P is true, we just need
4843 : to create an address from it, otherwise we want to get it into
4844 : register. A set of invariants we depend on is stored in INV_VARS.
4845 : If CAN_AUTOINC is nonnull, use it to record whether autoinc
4846 : addressing is likely. If INV_EXPR is nonnull, record invariant
4847 : expr entry in it. */
4848 :
4849 : static comp_cost
4850 20118285 : get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4851 : struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4852 : bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4853 : {
4854 20118285 : gimple *at = use->stmt;
4855 20118285 : tree ubase = use->iv->base, cbase = cand->iv->base;
4856 20118285 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4857 20118285 : tree comp_inv = NULL_TREE;
4858 20118285 : HOST_WIDE_INT ratio, aratio;
4859 20118285 : comp_cost cost;
4860 20118285 : widest_int rat;
4861 40236570 : aff_tree aff_inv, aff_var;
4862 20118285 : bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4863 :
4864 20118285 : if (inv_vars)
4865 17687486 : *inv_vars = NULL;
4866 20118285 : if (can_autoinc)
4867 8723957 : *can_autoinc = false;
4868 20118285 : if (inv_expr)
4869 19699494 : *inv_expr = NULL;
4870 :
4871 : /* Check if we have enough precision to express the values of use. */
4872 20118285 : if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4873 3060053 : return infinite_cost;
4874 :
4875 17058232 : if (address_p
4876 17058232 : || (use->iv->base_object
4877 2076848 : && cand->iv->base_object
4878 1002994 : && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4879 991563 : && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4880 : {
4881 : /* Do not try to express address of an object with computation based
4882 : on address of a different object. This may cause problems in rtl
4883 : level alias analysis (that does not expect this to be happening,
4884 : as this is illegal in C), and would be unlikely to be useful
4885 : anyway. */
4886 7869988 : if (use->iv->base_object
4887 7869988 : && cand->iv->base_object
4888 12048350 : && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4889 1452709 : return infinite_cost;
4890 : }
4891 :
4892 15605523 : if (!get_computation_aff_1 (data, at, use, cand, &aff_inv, &aff_var, &rat)
4893 15605523 : || !wi::fits_shwi_p (rat))
4894 2833040 : return infinite_cost;
4895 :
4896 12772483 : ratio = rat.to_shwi ();
4897 12772483 : if (address_p)
4898 : {
4899 5507337 : cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4900 : inv_vars, inv_expr, can_autoinc, speed);
4901 5507337 : cost = get_scaled_computation_cost_at (data, at, cost);
4902 : /* For doloop IV cand, add on the extra cost. */
4903 5507337 : cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4904 5507337 : return cost;
4905 : }
4906 :
4907 7265146 : bool simple_inv = (aff_combination_const_p (&aff_inv)
4908 2010941 : || aff_combination_singleton_var_p (&aff_inv));
4909 7265146 : tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4910 7265146 : aff_combination_convert (&aff_inv, signed_type);
4911 7265146 : if (!aff_combination_zero_p (&aff_inv))
4912 5224325 : comp_inv = aff_combination_to_tree (&aff_inv);
4913 :
4914 7265146 : cost = force_var_cost (data, comp_inv, inv_vars);
4915 7265146 : if (comp_inv && inv_expr && !simple_inv)
4916 : {
4917 1404100 : *inv_expr = get_loop_invariant_expr (data, comp_inv);
4918 : /* Clear depends on. */
4919 1404100 : if (*inv_expr != NULL && inv_vars && *inv_vars)
4920 873329 : bitmap_clear (*inv_vars);
4921 :
4922 1404100 : cost.cost = adjust_setup_cost (data, cost.cost);
4923 : /* Record setup cost in scratch field. */
4924 1404100 : cost.scratch = cost.cost;
4925 : }
4926 : /* Cost of constant integer can be covered when adding invariant part to
4927 : variant part. */
4928 5861046 : else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4929 3213359 : cost = no_cost;
4930 :
4931 : /* Need type narrowing to represent use with cand. */
4932 7265146 : if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4933 : {
4934 798391 : machine_mode outer_mode = TYPE_MODE (utype);
4935 798391 : machine_mode inner_mode = TYPE_MODE (ctype);
4936 798391 : cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4937 : }
4938 :
4939 : /* Turn a + i * (-c) into a - i * c. */
4940 7265146 : if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4941 1848333 : aratio = -ratio;
4942 : else
4943 : aratio = ratio;
4944 :
4945 7265146 : if (ratio != 1)
4946 2746006 : cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4947 :
4948 : /* TODO: We may also need to check if we can compute a + i * 4 in one
4949 : instruction. */
4950 : /* Need to add up the invariant and variant parts. */
4951 7265146 : if (comp_inv && !integer_zerop (comp_inv))
4952 10441702 : cost += add_cost (speed, TYPE_MODE (utype));
4953 :
4954 7265146 : cost = get_scaled_computation_cost_at (data, at, cost);
4955 :
4956 : /* For doloop IV cand, add on the extra cost. */
4957 7265146 : if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4958 0 : cost += targetm.doloop_cost_for_generic;
4959 :
4960 7265146 : return cost;
4961 20118285 : }
4962 :
4963 : /* Determines cost of computing the use in GROUP with CAND in a generic
4964 : expression. */
4965 :
4966 : static bool
4967 5544203 : determine_group_iv_cost_generic (struct ivopts_data *data,
4968 : struct iv_group *group, struct iv_cand *cand)
4969 : {
4970 5544203 : comp_cost cost;
4971 5544203 : iv_inv_expr_ent *inv_expr = NULL;
4972 5544203 : bitmap inv_vars = NULL, inv_exprs = NULL;
4973 5544203 : struct iv_use *use = group->vuses[0];
4974 :
4975 : /* The simple case first -- if we need to express value of the preserved
4976 : original biv, the cost is 0. This also prevents us from counting the
4977 : cost of increment twice -- once at this use and once in the cost of
4978 : the candidate. */
4979 5544203 : if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4980 57406 : cost = no_cost;
4981 : /* If the IV candidate involves undefined SSA values and is not the
4982 : same IV as on the USE avoid using that candidate here. */
4983 5486797 : else if (cand->involves_undefs
4984 5486797 : && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
4985 218 : return false;
4986 : else
4987 5486579 : cost = get_computation_cost (data, use, cand, false,
4988 : &inv_vars, NULL, &inv_expr);
4989 :
4990 5543985 : if (inv_expr)
4991 : {
4992 988994 : inv_exprs = BITMAP_ALLOC (NULL);
4993 988994 : bitmap_set_bit (inv_exprs, inv_expr->id);
4994 : }
4995 5543985 : set_group_iv_cost (data, group, cand, cost, inv_vars,
4996 : NULL_TREE, ERROR_MARK, inv_exprs);
4997 5543985 : return !cost.infinite_cost_p ();
4998 : }
4999 :
5000 : /* Determines cost of computing uses in GROUP with CAND in addresses. */
5001 :
5002 : static bool
5003 6293158 : determine_group_iv_cost_address (struct ivopts_data *data,
5004 : struct iv_group *group, struct iv_cand *cand)
5005 : {
5006 6293158 : unsigned i;
5007 6293158 : bitmap inv_vars = NULL, inv_exprs = NULL;
5008 6293158 : bool can_autoinc;
5009 6293158 : iv_inv_expr_ent *inv_expr = NULL;
5010 6293158 : struct iv_use *use = group->vuses[0];
5011 6293158 : comp_cost sum_cost = no_cost, cost;
5012 :
5013 6293158 : cost = get_computation_cost (data, use, cand, true,
5014 : &inv_vars, &can_autoinc, &inv_expr);
5015 :
5016 6293158 : if (inv_expr)
5017 : {
5018 461881 : inv_exprs = BITMAP_ALLOC (NULL);
5019 461881 : bitmap_set_bit (inv_exprs, inv_expr->id);
5020 : }
5021 6293158 : sum_cost = cost;
5022 6293158 : if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5023 : {
5024 0 : if (can_autoinc)
5025 0 : sum_cost -= cand->cost_step;
5026 : /* If we generated the candidate solely for exploiting autoincrement
5027 : opportunities, and it turns out it can't be used, set the cost to
5028 : infinity to make sure we ignore it. */
5029 0 : else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5030 0 : sum_cost = infinite_cost;
5031 : }
5032 :
5033 : /* Compute and add costs for rest uses of this group. */
5034 8305166 : for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5035 : {
5036 2012008 : struct iv_use *next = group->vuses[i];
5037 :
5038 : /* TODO: We could skip computing cost for sub iv_use when it has the
5039 : same cost as the first iv_use, but the cost really depends on the
5040 : offset and where the iv_use is. */
5041 2012008 : cost = get_computation_cost (data, next, cand, true,
5042 : NULL, &can_autoinc, &inv_expr);
5043 2012008 : if (inv_expr)
5044 : {
5045 288152 : if (!inv_exprs)
5046 90 : inv_exprs = BITMAP_ALLOC (NULL);
5047 :
5048 : /* Uses in a group can share setup code,
5049 : so only add setup cost once. */
5050 288152 : if (bitmap_bit_p (inv_exprs, inv_expr->id))
5051 287765 : cost -= cost.scratch;
5052 : else
5053 387 : bitmap_set_bit (inv_exprs, inv_expr->id);
5054 : }
5055 2012008 : sum_cost += cost;
5056 : }
5057 6293158 : set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5058 : NULL_TREE, ERROR_MARK, inv_exprs);
5059 :
5060 6293158 : return !sum_cost.infinite_cost_p ();
5061 : }
5062 :
5063 : /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5064 : and stores it to VAL. */
5065 :
5066 : static void
5067 3806062 : cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5068 : class tree_niter_desc *desc, aff_tree *val)
5069 : {
5070 11418186 : aff_tree step, delta, nit;
5071 3806062 : struct iv *iv = cand->iv;
5072 3806062 : tree type = TREE_TYPE (iv->base);
5073 3806062 : tree niter = desc->niter;
5074 3806062 : bool after_adjust = stmt_after_increment (loop, cand, at);
5075 3806062 : tree steptype;
5076 :
5077 3806062 : if (POINTER_TYPE_P (type))
5078 108344 : steptype = sizetype;
5079 : else
5080 3697718 : steptype = unsigned_type_for (type);
5081 :
5082 : /* If AFTER_ADJUST is required, the code below generates the equivalent
5083 : of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5084 : BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5085 : SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5086 : doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5087 : class for common idioms that we know are safe. */
5088 3806062 : if (after_adjust
5089 3612845 : && desc->control.no_overflow
5090 3605183 : && integer_onep (desc->control.step)
5091 960451 : && (desc->cmp == LT_EXPR
5092 39794 : || desc->cmp == NE_EXPR)
5093 4766513 : && TREE_CODE (desc->bound) == SSA_NAME)
5094 : {
5095 509633 : if (integer_onep (desc->control.base))
5096 : {
5097 376418 : niter = desc->bound;
5098 376418 : after_adjust = false;
5099 : }
5100 133215 : else if (TREE_CODE (niter) == MINUS_EXPR
5101 133215 : && integer_onep (TREE_OPERAND (niter, 1)))
5102 : {
5103 71848 : niter = TREE_OPERAND (niter, 0);
5104 71848 : after_adjust = false;
5105 : }
5106 : }
5107 :
5108 3806062 : tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5109 3806062 : aff_combination_convert (&step, steptype);
5110 3806062 : tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5111 3806062 : aff_combination_convert (&nit, steptype);
5112 3806062 : aff_combination_mult (&nit, &step, &delta);
5113 3806062 : if (after_adjust)
5114 3164579 : aff_combination_add (&delta, &step);
5115 :
5116 3806062 : tree_to_aff_combination (iv->base, type, val);
5117 3806062 : if (!POINTER_TYPE_P (type))
5118 3697718 : aff_combination_convert (val, steptype);
5119 3806062 : aff_combination_add (val, &delta);
5120 3806062 : }
5121 :
5122 : /* Returns period of induction variable iv. */
5123 :
5124 : static tree
5125 4076837 : iv_period (struct iv *iv)
5126 : {
5127 4076837 : tree step = iv->step, period, type;
5128 4076837 : tree pow2div;
5129 :
5130 4076837 : gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5131 :
5132 4076837 : type = unsigned_type_for (TREE_TYPE (step));
5133 : /* Period of the iv is lcm (step, type_range)/step -1,
5134 : i.e., N*type_range/step - 1. Since type range is power
5135 : of two, N == (step >> num_of_ending_zeros_binary (step),
5136 : so the final result is
5137 :
5138 : (type_range >> num_of_ending_zeros_binary (step)) - 1
5139 :
5140 : */
5141 4076837 : pow2div = num_ending_zeros (step);
5142 :
5143 12230511 : period = build_low_bits_mask (type,
5144 4076837 : (TYPE_PRECISION (type)
5145 4076837 : - tree_to_uhwi (pow2div)));
5146 :
5147 4076837 : return period;
5148 : }
5149 :
5150 : /* Returns the comparison operator used when eliminating the iv USE. */
5151 :
5152 : static enum tree_code
5153 3806062 : iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5154 : {
5155 3806062 : class loop *loop = data->current_loop;
5156 3806062 : basic_block ex_bb;
5157 3806062 : edge exit;
5158 :
5159 3806062 : ex_bb = gimple_bb (use->stmt);
5160 3806062 : exit = EDGE_SUCC (ex_bb, 0);
5161 3806062 : if (flow_bb_inside_loop_p (loop, exit->dest))
5162 2861327 : exit = EDGE_SUCC (ex_bb, 1);
5163 :
5164 3806062 : return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5165 : }
5166 :
5167 : /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5168 : we only detect the situation that BASE = SOMETHING + OFFSET, where the
5169 : calculation is performed in non-wrapping type.
5170 :
5171 : TODO: More generally, we could test for the situation that
5172 : BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5173 : This would require knowing the sign of OFFSET. */
5174 :
5175 : static bool
5176 477 : difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5177 : {
5178 477 : enum tree_code code;
5179 477 : tree e1, e2;
5180 1431 : aff_tree aff_e1, aff_e2, aff_offset;
5181 :
5182 477 : if (!nowrap_type_p (TREE_TYPE (base)))
5183 : return false;
5184 :
5185 477 : base = expand_simple_operations (base);
5186 :
5187 477 : if (TREE_CODE (base) == SSA_NAME)
5188 : {
5189 476 : gimple *stmt = SSA_NAME_DEF_STMT (base);
5190 :
5191 476 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
5192 : return false;
5193 :
5194 18 : code = gimple_assign_rhs_code (stmt);
5195 18 : if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5196 : return false;
5197 :
5198 5 : e1 = gimple_assign_rhs1 (stmt);
5199 5 : e2 = gimple_assign_rhs2 (stmt);
5200 : }
5201 : else
5202 : {
5203 1 : code = TREE_CODE (base);
5204 1 : if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5205 : return false;
5206 0 : e1 = TREE_OPERAND (base, 0);
5207 0 : e2 = TREE_OPERAND (base, 1);
5208 : }
5209 :
5210 : /* Use affine expansion as deeper inspection to prove the equality. */
5211 5 : tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5212 : &aff_e2, &data->name_expansion_cache);
5213 5 : tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5214 : &aff_offset, &data->name_expansion_cache);
5215 5 : aff_combination_scale (&aff_offset, -1);
5216 5 : switch (code)
5217 : {
5218 3 : case PLUS_EXPR:
5219 3 : aff_combination_add (&aff_e2, &aff_offset);
5220 3 : if (aff_combination_zero_p (&aff_e2))
5221 : return true;
5222 :
5223 1 : tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5224 : &aff_e1, &data->name_expansion_cache);
5225 1 : aff_combination_add (&aff_e1, &aff_offset);
5226 1 : return aff_combination_zero_p (&aff_e1);
5227 :
5228 2 : case POINTER_PLUS_EXPR:
5229 2 : aff_combination_add (&aff_e2, &aff_offset);
5230 2 : return aff_combination_zero_p (&aff_e2);
5231 :
5232 : default:
5233 : return false;
5234 : }
5235 477 : }
5236 :
5237 : /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5238 : comparison with CAND. NITER describes the number of iterations of
5239 : the loops. If successful, the comparison in COMP_P is altered accordingly.
5240 :
5241 : We aim to handle the following situation:
5242 :
5243 : sometype *base, *p;
5244 : int a, b, i;
5245 :
5246 : i = a;
5247 : p = p_0 = base + a;
5248 :
5249 : do
5250 : {
5251 : bla (*p);
5252 : p++;
5253 : i++;
5254 : }
5255 : while (i < b);
5256 :
5257 : Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5258 : We aim to optimize this to
5259 :
5260 : p = p_0 = base + a;
5261 : do
5262 : {
5263 : bla (*p);
5264 : p++;
5265 : }
5266 : while (p < p_0 - a + b);
5267 :
5268 : This preserves the correctness, since the pointer arithmetics does not
5269 : overflow. More precisely:
5270 :
5271 : 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5272 : overflow in computing it or the values of p.
5273 : 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5274 : overflow. To prove this, we use the fact that p_0 = base + a. */
5275 :
5276 : static bool
5277 205535 : iv_elimination_compare_lt (struct ivopts_data *data,
5278 : struct iv_cand *cand, enum tree_code *comp_p,
5279 : class tree_niter_desc *niter)
5280 : {
5281 205535 : tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5282 616605 : class aff_tree nit, tmpa, tmpb;
5283 205535 : enum tree_code comp;
5284 205535 : HOST_WIDE_INT step;
5285 :
5286 : /* We need to know that the candidate induction variable does not overflow.
5287 : While more complex analysis may be used to prove this, for now just
5288 : check that the variable appears in the original program and that it
5289 : is computed in a type that guarantees no overflows. */
5290 205535 : cand_type = TREE_TYPE (cand->iv->base);
5291 205535 : if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5292 183528 : return false;
5293 :
5294 : /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5295 : the calculation of the BOUND could overflow, making the comparison
5296 : invalid. */
5297 22007 : if (!data->loop_single_exit_p)
5298 : return false;
5299 :
5300 : /* We need to be able to decide whether candidate is increasing or decreasing
5301 : in order to choose the right comparison operator. */
5302 15473 : if (!cst_and_fits_in_hwi (cand->iv->step))
5303 : return false;
5304 15473 : step = int_cst_value (cand->iv->step);
5305 :
5306 : /* Check that the number of iterations matches the expected pattern:
5307 : a + 1 > b ? 0 : b - a - 1. */
5308 15473 : mbz = niter->may_be_zero;
5309 15473 : if (TREE_CODE (mbz) == GT_EXPR)
5310 : {
5311 : /* Handle a + 1 > b. */
5312 1715 : tree op0 = TREE_OPERAND (mbz, 0);
5313 1715 : if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5314 : {
5315 794 : a = TREE_OPERAND (op0, 0);
5316 794 : b = TREE_OPERAND (mbz, 1);
5317 : }
5318 : else
5319 921 : return false;
5320 : }
5321 13758 : else if (TREE_CODE (mbz) == LT_EXPR)
5322 : {
5323 4620 : tree op1 = TREE_OPERAND (mbz, 1);
5324 :
5325 : /* Handle b < a + 1. */
5326 4620 : if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5327 : {
5328 82 : a = TREE_OPERAND (op1, 0);
5329 82 : b = TREE_OPERAND (mbz, 0);
5330 : }
5331 : else
5332 4538 : return false;
5333 : }
5334 : else
5335 : return false;
5336 :
5337 : /* Expected number of iterations is B - A - 1. Check that it matches
5338 : the actual number, i.e., that B - A - NITER = 1. */
5339 876 : tree_to_aff_combination (niter->niter, nit_type, &nit);
5340 876 : tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5341 876 : tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5342 876 : aff_combination_scale (&nit, -1);
5343 876 : aff_combination_scale (&tmpa, -1);
5344 876 : aff_combination_add (&tmpb, &tmpa);
5345 876 : aff_combination_add (&tmpb, &nit);
5346 876 : if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5347 399 : return false;
5348 :
5349 : /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5350 : overflow. */
5351 477 : offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5352 : cand->iv->step,
5353 : fold_convert (TREE_TYPE (cand->iv->step), a));
5354 477 : if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5355 : return false;
5356 :
5357 : /* Determine the new comparison operator. */
5358 4 : comp = step < 0 ? GT_EXPR : LT_EXPR;
5359 4 : if (*comp_p == NE_EXPR)
5360 4 : *comp_p = comp;
5361 0 : else if (*comp_p == EQ_EXPR)
5362 0 : *comp_p = invert_tree_comparison (comp, false);
5363 : else
5364 0 : gcc_unreachable ();
5365 :
5366 : return true;
5367 205535 : }
5368 :
5369 : /* Check whether it is possible to express the condition in USE by comparison
5370 : of candidate CAND. If so, store the value compared with to BOUND, and the
5371 : comparison operator to COMP. */
5372 :
5373 : static bool
5374 4915682 : may_eliminate_iv (struct ivopts_data *data,
5375 : struct iv_use *use, struct iv_cand *cand, tree *bound,
5376 : enum tree_code *comp)
5377 : {
5378 4915682 : basic_block ex_bb;
5379 4915682 : edge exit;
5380 4915682 : tree period;
5381 4915682 : class loop *loop = data->current_loop;
5382 4915682 : aff_tree bnd;
5383 4915682 : class tree_niter_desc *desc = NULL;
5384 :
5385 4915682 : if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5386 : return false;
5387 :
5388 : /* For now works only for exits that dominate the loop latch.
5389 : TODO: extend to other conditions inside loop body. */
5390 4721069 : ex_bb = gimple_bb (use->stmt);
5391 4721069 : if (use->stmt != last_nondebug_stmt (ex_bb)
5392 4614831 : || gimple_code (use->stmt) != GIMPLE_COND
5393 9333735 : || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5394 235102 : return false;
5395 :
5396 4485967 : exit = EDGE_SUCC (ex_bb, 0);
5397 4485967 : if (flow_bb_inside_loop_p (loop, exit->dest))
5398 3386547 : exit = EDGE_SUCC (ex_bb, 1);
5399 4485967 : if (flow_bb_inside_loop_p (loop, exit->dest))
5400 : return false;
5401 :
5402 4368266 : desc = niter_for_exit (data, exit);
5403 4368266 : if (!desc)
5404 : return false;
5405 :
5406 : /* Determine whether we can use the variable to test the exit condition.
5407 : This is the case iff the period of the induction variable is greater
5408 : than the number of iterations for which the exit condition is true. */
5409 4076837 : period = iv_period (cand->iv);
5410 :
5411 : /* If the number of iterations is constant, compare against it directly. */
5412 4076837 : if (TREE_CODE (desc->niter) == INTEGER_CST)
5413 : {
5414 : /* See cand_value_at. */
5415 2656173 : if (stmt_after_increment (loop, cand, use->stmt))
5416 : {
5417 2600510 : if (!tree_int_cst_lt (desc->niter, period))
5418 : return false;
5419 : }
5420 : else
5421 : {
5422 55663 : if (tree_int_cst_lt (period, desc->niter))
5423 : return false;
5424 : }
5425 : }
5426 :
5427 : /* If not, and if this is the only possible exit of the loop, see whether
5428 : we can get a conservative estimate on the number of iterations of the
5429 : entire loop and compare against that instead. */
5430 : else
5431 : {
5432 1420664 : widest_int period_value, max_niter;
5433 :
5434 1420664 : max_niter = desc->max;
5435 1420664 : if (stmt_after_increment (loop, cand, use->stmt))
5436 1213622 : max_niter += 1;
5437 1420664 : period_value = wi::to_widest (period);
5438 1420664 : if (wi::gtu_p (max_niter, period_value))
5439 : {
5440 : /* See if we can take advantage of inferred loop bound
5441 : information. */
5442 475271 : if (data->loop_single_exit_p)
5443 : {
5444 282922 : if (!max_loop_iterations (loop, &max_niter))
5445 : return false;
5446 : /* The loop bound is already adjusted by adding 1. */
5447 282922 : if (wi::gtu_p (max_niter, period_value))
5448 : return false;
5449 : }
5450 : else
5451 : return false;
5452 : }
5453 1420664 : }
5454 :
5455 : /* For doloop IV cand, the bound would be zero. It's safe whether
5456 : may_be_zero set or not. */
5457 3806062 : if (cand->doloop_p)
5458 : {
5459 0 : *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5460 0 : *comp = iv_elimination_compare (data, use);
5461 0 : return true;
5462 : }
5463 :
5464 3806062 : cand_value_at (loop, cand, use->stmt, desc, &bnd);
5465 :
5466 3806062 : *bound = fold_convert (TREE_TYPE (cand->iv->base),
5467 : aff_combination_to_tree (&bnd));
5468 3806062 : *comp = iv_elimination_compare (data, use);
5469 :
5470 : /* It is unlikely that computing the number of iterations using division
5471 : would be more profitable than keeping the original induction variable. */
5472 3806062 : bool cond_overflow_p;
5473 3806062 : if (expression_expensive_p (*bound, &cond_overflow_p))
5474 : return false;
5475 :
5476 : /* Sometimes, it is possible to handle the situation that the number of
5477 : iterations may be zero unless additional assumptions by using <
5478 : instead of != in the exit condition.
5479 :
5480 : TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5481 : base the exit condition on it. However, that is often too
5482 : expensive. */
5483 3797187 : if (!integer_zerop (desc->may_be_zero))
5484 205535 : return iv_elimination_compare_lt (data, cand, comp, desc);
5485 :
5486 : return true;
5487 4915682 : }
5488 :
5489 : /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5490 : be copied, if it is used in the loop body and DATA->body_includes_call. */
5491 :
5492 : static int
5493 8332197 : parm_decl_cost (struct ivopts_data *data, tree bound)
5494 : {
5495 8332197 : tree sbound = bound;
5496 8332197 : STRIP_NOPS (sbound);
5497 :
5498 8332197 : if (TREE_CODE (sbound) == SSA_NAME
5499 2890368 : && SSA_NAME_IS_DEFAULT_DEF (sbound)
5500 151797 : && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5501 8481954 : && data->body_includes_call)
5502 36701 : return COSTS_N_INSNS (1);
5503 :
5504 : return 0;
5505 : }
5506 :
5507 : /* Determines cost of computing the use in GROUP with CAND in a condition. */
5508 :
5509 : static bool
5510 5907749 : determine_group_iv_cost_cond (struct ivopts_data *data,
5511 : struct iv_group *group, struct iv_cand *cand)
5512 : {
5513 5907749 : tree bound = NULL_TREE;
5514 5907749 : struct iv *cmp_iv;
5515 5907749 : bitmap inv_exprs = NULL;
5516 5907749 : bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5517 5907749 : comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5518 5907749 : enum comp_iv_rewrite rewrite_type;
5519 5907749 : iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5520 5907749 : tree *control_var, *bound_cst;
5521 5907749 : enum tree_code comp = ERROR_MARK;
5522 5907749 : struct iv_use *use = group->vuses[0];
5523 :
5524 : /* Extract condition operands. */
5525 5907749 : rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5526 : &bound_cst, NULL, &cmp_iv);
5527 5907749 : gcc_assert (rewrite_type != COMP_IV_NA);
5528 :
5529 : /* Try iv elimination. */
5530 5907749 : if (rewrite_type == COMP_IV_ELIM
5531 5907749 : && may_eliminate_iv (data, use, cand, &bound, &comp))
5532 : {
5533 3591656 : elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5534 3591656 : if (elim_cost.cost == 0)
5535 2446268 : elim_cost.cost = parm_decl_cost (data, bound);
5536 1145388 : else if (TREE_CODE (bound) == INTEGER_CST)
5537 0 : elim_cost.cost = 0;
5538 : /* If we replace a loop condition 'i < n' with 'p < base + n',
5539 : inv_vars_elim will have 'base' and 'n' set, which implies that both
5540 : 'base' and 'n' will be live during the loop. More likely,
5541 : 'base + n' will be loop invariant, resulting in only one live value
5542 : during the loop. So in that case we clear inv_vars_elim and set
5543 : inv_expr_elim instead. */
5544 3591656 : if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5545 : {
5546 313846 : inv_expr_elim = get_loop_invariant_expr (data, bound);
5547 313846 : bitmap_clear (inv_vars_elim);
5548 : }
5549 : /* The bound is a loop invariant, so it will be only computed
5550 : once. */
5551 3591656 : elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5552 : }
5553 :
5554 : /* When the condition is a comparison of the candidate IV against
5555 : zero, prefer this IV.
5556 :
5557 : TODO: The constant that we're subtracting from the cost should
5558 : be target-dependent. This information should be added to the
5559 : target costs for each backend. */
5560 5907749 : if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5561 3591656 : && integer_zerop (*bound_cst)
5562 8499240 : && (operand_equal_p (*control_var, cand->var_after, 0)
5563 2342688 : || operand_equal_p (*control_var, cand->var_before, 0)))
5564 254527 : elim_cost -= 1;
5565 :
5566 5907749 : express_cost = get_computation_cost (data, use, cand, false,
5567 : &inv_vars_express, NULL,
5568 : &inv_expr_express);
5569 5907749 : if (cmp_iv != NULL)
5570 5003064 : find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5571 :
5572 : /* Count the cost of the original bound as well. */
5573 5907749 : bound_cost = force_var_cost (data, *bound_cst, NULL);
5574 5907749 : if (bound_cost.cost == 0)
5575 5885929 : bound_cost.cost = parm_decl_cost (data, *bound_cst);
5576 21820 : else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5577 0 : bound_cost.cost = 0;
5578 5907749 : express_cost += bound_cost;
5579 :
5580 : /* Choose the better approach, preferring the eliminated IV. */
5581 5907749 : if (elim_cost <= express_cost)
5582 : {
5583 4508123 : cost = elim_cost;
5584 4508123 : inv_vars = inv_vars_elim;
5585 4508123 : inv_vars_elim = NULL;
5586 4508123 : inv_expr = inv_expr_elim;
5587 : /* For doloop candidate/use pair, adjust to zero cost. */
5588 4508123 : if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5589 0 : cost = no_cost;
5590 : }
5591 : else
5592 : {
5593 1399626 : cost = express_cost;
5594 1399626 : inv_vars = inv_vars_express;
5595 1399626 : inv_vars_express = NULL;
5596 1399626 : bound = NULL_TREE;
5597 1399626 : comp = ERROR_MARK;
5598 1399626 : inv_expr = inv_expr_express;
5599 : }
5600 :
5601 5907749 : if (inv_expr)
5602 : {
5603 602182 : inv_exprs = BITMAP_ALLOC (NULL);
5604 602182 : bitmap_set_bit (inv_exprs, inv_expr->id);
5605 : }
5606 5907749 : set_group_iv_cost (data, group, cand, cost,
5607 : inv_vars, bound, comp, inv_exprs);
5608 :
5609 5907749 : if (inv_vars_elim)
5610 24590 : BITMAP_FREE (inv_vars_elim);
5611 5907749 : if (inv_vars_express)
5612 1257728 : BITMAP_FREE (inv_vars_express);
5613 :
5614 5907749 : return !cost.infinite_cost_p ();
5615 : }
5616 :
5617 : /* Determines cost of computing uses in GROUP with CAND. Returns false
5618 : if USE cannot be represented with CAND. */
5619 :
5620 : static bool
5621 17745110 : determine_group_iv_cost (struct ivopts_data *data,
5622 : struct iv_group *group, struct iv_cand *cand)
5623 : {
5624 17745110 : switch (group->type)
5625 : {
5626 5544203 : case USE_NONLINEAR_EXPR:
5627 5544203 : return determine_group_iv_cost_generic (data, group, cand);
5628 :
5629 6293158 : case USE_REF_ADDRESS:
5630 6293158 : case USE_PTR_ADDRESS:
5631 6293158 : return determine_group_iv_cost_address (data, group, cand);
5632 :
5633 5907749 : case USE_COMPARE:
5634 5907749 : return determine_group_iv_cost_cond (data, group, cand);
5635 :
5636 0 : default:
5637 0 : gcc_unreachable ();
5638 : }
5639 : }
5640 :
5641 : /* Return true if get_computation_cost indicates that autoincrement is
5642 : a possibility for the pair of USE and CAND, false otherwise. */
5643 :
5644 : static bool
5645 1283916 : autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5646 : struct iv_cand *cand)
5647 : {
5648 1283916 : if (!address_p (use->type))
5649 : return false;
5650 :
5651 418791 : bool can_autoinc = false;
5652 418791 : get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5653 418791 : return can_autoinc;
5654 : }
5655 :
5656 : /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5657 : use that allows autoincrement, and set their AINC_USE if possible. */
5658 :
5659 : static void
5660 503695 : set_autoinc_for_original_candidates (struct ivopts_data *data)
5661 : {
5662 503695 : unsigned i, j;
5663 :
5664 5128591 : for (i = 0; i < data->vcands.length (); i++)
5665 : {
5666 4624896 : struct iv_cand *cand = data->vcands[i];
5667 4624896 : struct iv_use *closest_before = NULL;
5668 4624896 : struct iv_use *closest_after = NULL;
5669 4624896 : if (cand->pos != IP_ORIGINAL)
5670 3755884 : continue;
5671 :
5672 3826934 : for (j = 0; j < data->vgroups.length (); j++)
5673 : {
5674 2957922 : struct iv_group *group = data->vgroups[j];
5675 2957922 : struct iv_use *use = group->vuses[0];
5676 2957922 : unsigned uid = gimple_uid (use->stmt);
5677 :
5678 2957922 : if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5679 1174443 : continue;
5680 :
5681 1783479 : if (uid < gimple_uid (cand->incremented_at)
5682 1783479 : && (closest_before == NULL
5683 374806 : || uid > gimple_uid (closest_before->stmt)))
5684 : closest_before = use;
5685 :
5686 1783479 : if (uid > gimple_uid (cand->incremented_at)
5687 1783479 : && (closest_after == NULL
5688 67351 : || uid < gimple_uid (closest_after->stmt)))
5689 : closest_after = use;
5690 : }
5691 :
5692 869012 : if (closest_before != NULL
5693 869012 : && autoinc_possible_for_pair (data, closest_before, cand))
5694 0 : cand->ainc_use = closest_before;
5695 869012 : else if (closest_after != NULL
5696 869012 : && autoinc_possible_for_pair (data, closest_after, cand))
5697 0 : cand->ainc_use = closest_after;
5698 : }
5699 503695 : }
5700 :
5701 : /* Relate compare use with all candidates. */
5702 :
5703 : static void
5704 299 : relate_compare_use_with_all_cands (struct ivopts_data *data)
5705 : {
5706 299 : unsigned i, count = data->vcands.length ();
5707 10000 : for (i = 0; i < data->vgroups.length (); i++)
5708 : {
5709 9701 : struct iv_group *group = data->vgroups[i];
5710 :
5711 9701 : if (group->type == USE_COMPARE)
5712 2185 : bitmap_set_range (group->related_cands, 0, count);
5713 : }
5714 299 : }
5715 :
5716 : /* If PREFERRED_MODE is suitable and profitable, use the preferred
5717 : PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
5718 :
5719 : static tree
5720 0 : compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5721 : const widest_int &iterations_max)
5722 : {
5723 0 : tree ntype = TREE_TYPE (niter);
5724 0 : tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5725 0 : if (!pref_type)
5726 0 : return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5727 : build_int_cst (ntype, 1));
5728 :
5729 0 : gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5730 :
5731 0 : int prec = TYPE_PRECISION (ntype);
5732 0 : int pref_prec = TYPE_PRECISION (pref_type);
5733 :
5734 0 : tree base;
5735 :
5736 : /* Check if the PREFERRED_MODED is able to present niter. */
5737 0 : if (pref_prec > prec
5738 0 : || wi::ltu_p (iterations_max,
5739 0 : widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5740 : UNSIGNED)))
5741 : {
5742 : /* No wrap, it is safe to use preferred type after niter + 1. */
5743 0 : if (wi::ltu_p (iterations_max,
5744 0 : widest_int::from (wi::max_value (prec, UNSIGNED),
5745 : UNSIGNED)))
5746 : {
5747 : /* This could help to optimize "-1 +1" pair when niter looks
5748 : like "n-1": n is in original mode. "base = (n - 1) + 1"
5749 : in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
5750 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5751 : build_int_cst (ntype, 1));
5752 0 : base = fold_convert (pref_type, base);
5753 : }
5754 :
5755 : /* To avoid wrap, convert niter to preferred type before plus 1. */
5756 : else
5757 : {
5758 0 : niter = fold_convert (pref_type, niter);
5759 0 : base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5760 : build_int_cst (pref_type, 1));
5761 : }
5762 : }
5763 : else
5764 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5765 : build_int_cst (ntype, 1));
5766 : return base;
5767 : }
5768 :
5769 : /* Add one doloop dedicated IV candidate:
5770 : - Base is (may_be_zero ? 1 : (niter + 1)).
5771 : - Step is -1. */
5772 :
5773 : static void
5774 0 : add_iv_candidate_for_doloop (struct ivopts_data *data)
5775 : {
5776 0 : tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5777 0 : gcc_assert (niter_desc && niter_desc->assumptions);
5778 :
5779 0 : tree niter = niter_desc->niter;
5780 0 : tree ntype = TREE_TYPE (niter);
5781 0 : gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5782 :
5783 0 : tree may_be_zero = niter_desc->may_be_zero;
5784 0 : if (may_be_zero && integer_zerop (may_be_zero))
5785 : may_be_zero = NULL_TREE;
5786 0 : if (may_be_zero)
5787 : {
5788 0 : if (COMPARISON_CLASS_P (may_be_zero))
5789 : {
5790 0 : niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5791 : build_int_cst (ntype, 0),
5792 : rewrite_to_non_trapping_overflow (niter));
5793 : }
5794 : /* Don't try to obtain the iteration count expression when may_be_zero is
5795 : integer_nonzerop (actually iteration count is one) or else. */
5796 : else
5797 : return;
5798 : }
5799 :
5800 0 : machine_mode mode = TYPE_MODE (ntype);
5801 0 : machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5802 :
5803 0 : tree base;
5804 0 : if (mode != pref_mode)
5805 : {
5806 0 : base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5807 0 : ntype = TREE_TYPE (base);
5808 : }
5809 : else
5810 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5811 : build_int_cst (ntype, 1));
5812 :
5813 :
5814 0 : add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5815 : }
5816 :
5817 : /* Finds the candidates for the induction variables. */
5818 :
5819 : static void
5820 503695 : find_iv_candidates (struct ivopts_data *data)
5821 : {
5822 : /* Add commonly used ivs. */
5823 503695 : add_standard_iv_candidates (data);
5824 :
5825 : /* Add doloop dedicated ivs. */
5826 503695 : if (data->doloop_use_p)
5827 0 : add_iv_candidate_for_doloop (data);
5828 :
5829 : /* Add old induction variables. */
5830 503695 : add_iv_candidate_for_bivs (data);
5831 :
5832 : /* Add induction variables derived from uses. */
5833 503695 : add_iv_candidate_for_groups (data);
5834 :
5835 503695 : set_autoinc_for_original_candidates (data);
5836 :
5837 : /* Record the important candidates. */
5838 503695 : record_important_candidates (data);
5839 :
5840 : /* Relate compare iv_use with all candidates. */
5841 503695 : if (!data->consider_all_candidates)
5842 299 : relate_compare_use_with_all_cands (data);
5843 :
5844 503695 : if (dump_file && (dump_flags & TDF_DETAILS))
5845 : {
5846 67 : unsigned i;
5847 :
5848 67 : fprintf (dump_file, "\n<Important Candidates>:\t");
5849 820 : for (i = 0; i < data->vcands.length (); i++)
5850 686 : if (data->vcands[i]->important)
5851 492 : fprintf (dump_file, " %d,", data->vcands[i]->id);
5852 67 : fprintf (dump_file, "\n");
5853 :
5854 67 : fprintf (dump_file, "\n<Group, Cand> Related:\n");
5855 287 : for (i = 0; i < data->vgroups.length (); i++)
5856 : {
5857 220 : struct iv_group *group = data->vgroups[i];
5858 :
5859 220 : if (group->related_cands)
5860 : {
5861 220 : fprintf (dump_file, " Group %d:\t", group->id);
5862 220 : dump_bitmap (dump_file, group->related_cands);
5863 : }
5864 : }
5865 67 : fprintf (dump_file, "\n");
5866 : }
5867 503695 : }
5868 :
5869 : /* Determines costs of computing use of iv with an iv candidate. */
5870 :
5871 : static void
5872 503695 : determine_group_iv_costs (struct ivopts_data *data)
5873 : {
5874 503695 : unsigned i, j;
5875 503695 : struct iv_cand *cand;
5876 503695 : struct iv_group *group;
5877 503695 : bitmap to_clear = BITMAP_ALLOC (NULL);
5878 :
5879 503695 : alloc_use_cost_map (data);
5880 :
5881 2151470 : for (i = 0; i < data->vgroups.length (); i++)
5882 : {
5883 1647775 : group = data->vgroups[i];
5884 :
5885 1647775 : if (data->consider_all_candidates)
5886 : {
5887 19038449 : for (j = 0; j < data->vcands.length (); j++)
5888 : {
5889 17390674 : cand = data->vcands[j];
5890 17390674 : determine_group_iv_cost (data, group, cand);
5891 : }
5892 : }
5893 : else
5894 : {
5895 9701 : bitmap_iterator bi;
5896 :
5897 364137 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5898 : {
5899 354436 : cand = data->vcands[j];
5900 354436 : if (!determine_group_iv_cost (data, group, cand))
5901 210918 : bitmap_set_bit (to_clear, j);
5902 : }
5903 :
5904 : /* Remove the candidates for that the cost is infinite from
5905 : the list of related candidates. */
5906 9701 : bitmap_and_compl_into (group->related_cands, to_clear);
5907 9701 : bitmap_clear (to_clear);
5908 : }
5909 : }
5910 :
5911 503695 : BITMAP_FREE (to_clear);
5912 :
5913 503695 : if (dump_file && (dump_flags & TDF_DETAILS))
5914 : {
5915 67 : bitmap_iterator bi;
5916 :
5917 : /* Dump invariant variables. */
5918 67 : fprintf (dump_file, "\n<Invariant Vars>:\n");
5919 1041 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5920 : {
5921 974 : struct version_info *info = ver_info (data, i);
5922 974 : if (info->inv_id)
5923 : {
5924 222 : fprintf (dump_file, "Inv %d:\t", info->inv_id);
5925 222 : print_generic_expr (dump_file, info->name, TDF_SLIM);
5926 222 : fprintf (dump_file, "%s\n",
5927 222 : info->has_nonlin_use ? "" : "\t(eliminable)");
5928 : }
5929 : }
5930 :
5931 : /* Dump invariant expressions. */
5932 67 : fprintf (dump_file, "\n<Invariant Expressions>:\n");
5933 67 : auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5934 :
5935 439 : for (hash_table<iv_inv_expr_hasher>::iterator it
5936 506 : = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5937 372 : ++it)
5938 372 : list.safe_push (*it);
5939 :
5940 67 : list.qsort (sort_iv_inv_expr_ent);
5941 :
5942 439 : for (i = 0; i < list.length (); ++i)
5943 : {
5944 372 : fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5945 372 : print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5946 372 : fprintf (dump_file, "\n");
5947 : }
5948 :
5949 67 : fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5950 :
5951 287 : for (i = 0; i < data->vgroups.length (); i++)
5952 : {
5953 220 : group = data->vgroups[i];
5954 :
5955 220 : fprintf (dump_file, "Group %d:\n", i);
5956 220 : fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5957 2982 : for (j = 0; j < group->n_map_members; j++)
5958 : {
5959 3856 : if (!group->cost_map[j].cand
5960 2762 : || group->cost_map[j].cost.infinite_cost_p ())
5961 1094 : continue;
5962 :
5963 1668 : fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5964 1668 : group->cost_map[j].cand->id,
5965 : group->cost_map[j].cost.cost,
5966 1668 : group->cost_map[j].cost.complexity);
5967 1668 : if (!group->cost_map[j].inv_exprs
5968 1668 : || bitmap_empty_p (group->cost_map[j].inv_exprs))
5969 1168 : fprintf (dump_file, "NIL;\t");
5970 : else
5971 500 : bitmap_print (dump_file,
5972 : group->cost_map[j].inv_exprs, "", ";\t");
5973 1668 : if (!group->cost_map[j].inv_vars
5974 1668 : || bitmap_empty_p (group->cost_map[j].inv_vars))
5975 1347 : fprintf (dump_file, "NIL;\n");
5976 : else
5977 321 : bitmap_print (dump_file,
5978 : group->cost_map[j].inv_vars, "", "\n");
5979 : }
5980 :
5981 220 : fprintf (dump_file, "\n");
5982 : }
5983 67 : fprintf (dump_file, "\n");
5984 67 : }
5985 503695 : }
5986 :
5987 : /* Determines cost of the candidate CAND. */
5988 :
5989 : static void
5990 4624896 : determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5991 : {
5992 4624896 : comp_cost cost_base;
5993 4624896 : int64_t cost, cost_step;
5994 4624896 : tree base;
5995 :
5996 4624896 : gcc_assert (cand->iv != NULL);
5997 :
5998 : /* There are two costs associated with the candidate -- its increment
5999 : and its initialization. The second is almost negligible for any loop
6000 : that rolls enough, so we take it just very little into account. */
6001 :
6002 4624896 : base = cand->iv->base;
6003 4624896 : cost_base = force_var_cost (data, base, NULL);
6004 : /* It will be exceptional that the iv register happens to be initialized with
6005 : the proper value at no cost. In general, there will at least be a regcopy
6006 : or a const set. */
6007 4624896 : if (cost_base.cost == 0)
6008 3665401 : cost_base.cost = COSTS_N_INSNS (1);
6009 : /* Doloop decrement should be considered as zero cost. */
6010 4624896 : if (cand->doloop_p)
6011 : cost_step = 0;
6012 : else
6013 4624896 : cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6014 4624896 : cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6015 :
6016 : /* Prefer the original ivs unless we may gain something by replacing it.
6017 : The reason is to make debugging simpler; so this is not relevant for
6018 : artificial ivs created by other optimization passes. */
6019 4624896 : if ((cand->pos != IP_ORIGINAL
6020 869012 : || !SSA_NAME_VAR (cand->var_before)
6021 437021 : || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6022 : /* Prefer doloop as well. */
6023 5147489 : && !cand->doloop_p)
6024 4278477 : cost++;
6025 :
6026 : /* Prefer not to insert statements into latch unless there are some
6027 : already (so that we do not create unnecessary jumps). */
6028 4624896 : if (cand->pos == IP_END
6029 4624896 : && empty_block_p (ip_end_pos (data->current_loop)))
6030 1931 : cost++;
6031 :
6032 4624896 : cand->cost = cost;
6033 4624896 : cand->cost_step = cost_step;
6034 4624896 : }
6035 :
6036 : /* Determines costs of computation of the candidates. */
6037 :
6038 : static void
6039 503695 : determine_iv_costs (struct ivopts_data *data)
6040 : {
6041 503695 : unsigned i;
6042 :
6043 503695 : if (dump_file && (dump_flags & TDF_DETAILS))
6044 : {
6045 67 : fprintf (dump_file, "<Candidate Costs>:\n");
6046 67 : fprintf (dump_file, " cand\tcost\n");
6047 : }
6048 :
6049 5128591 : for (i = 0; i < data->vcands.length (); i++)
6050 : {
6051 4624896 : struct iv_cand *cand = data->vcands[i];
6052 :
6053 4624896 : determine_iv_cost (data, cand);
6054 :
6055 4624896 : if (dump_file && (dump_flags & TDF_DETAILS))
6056 686 : fprintf (dump_file, " %d\t%d\n", i, cand->cost);
6057 : }
6058 :
6059 503695 : if (dump_file && (dump_flags & TDF_DETAILS))
6060 67 : fprintf (dump_file, "\n");
6061 503695 : }
6062 :
6063 : /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6064 : induction variables. Note N_INVS includes both invariant variables and
6065 : invariant expressions. */
6066 :
6067 : static unsigned
6068 419531887 : ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6069 : unsigned n_cands)
6070 : {
6071 419531887 : unsigned cost;
6072 419531887 : unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6073 419531887 : unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6074 419531887 : bool speed = data->speed;
6075 :
6076 : /* If there is a call in the loop body, the call-clobbered registers
6077 : are not available for loop invariants. */
6078 419531887 : if (data->body_includes_call)
6079 92377062 : available_regs = available_regs - target_clobbered_regs;
6080 :
6081 : /* If we have enough registers. */
6082 419531887 : if (regs_needed + target_res_regs < available_regs)
6083 : cost = n_new;
6084 : /* If close to running out of registers, try to preserve them. */
6085 183647817 : else if (regs_needed <= available_regs)
6086 51034529 : cost = target_reg_cost [speed] * regs_needed;
6087 : /* If we run out of available registers but the number of candidates
6088 : does not, we penalize extra registers using target_spill_cost. */
6089 132613288 : else if (n_cands <= available_regs)
6090 118538811 : cost = target_reg_cost [speed] * available_regs
6091 118538811 : + target_spill_cost [speed] * (regs_needed - available_regs);
6092 : /* If the number of candidates runs out available registers, we penalize
6093 : extra candidate registers using target_spill_cost * 2. Because it is
6094 : more expensive to spill induction variable than invariant. */
6095 : else
6096 14074477 : cost = target_reg_cost [speed] * available_regs
6097 14074477 : + target_spill_cost [speed] * (n_cands - available_regs) * 2
6098 14074477 : + target_spill_cost [speed] * (regs_needed - n_cands);
6099 :
6100 : /* Finally, add the number of candidates, so that we prefer eliminating
6101 : induction variables if possible. */
6102 419531887 : return cost + n_cands;
6103 : }
6104 :
6105 : /* For each size of the induction variable set determine the penalty. */
6106 :
6107 : static void
6108 503695 : determine_set_costs (struct ivopts_data *data)
6109 : {
6110 503695 : unsigned j, n;
6111 503695 : gphi *phi;
6112 503695 : gphi_iterator psi;
6113 503695 : tree op;
6114 503695 : class loop *loop = data->current_loop;
6115 503695 : bitmap_iterator bi;
6116 :
6117 503695 : if (dump_file && (dump_flags & TDF_DETAILS))
6118 : {
6119 67 : fprintf (dump_file, "<Global Costs>:\n");
6120 67 : fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
6121 67 : fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6122 67 : fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6123 67 : fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6124 : }
6125 :
6126 503695 : n = 0;
6127 1958868 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6128 : {
6129 1455173 : phi = psi.phi ();
6130 1455173 : op = PHI_RESULT (phi);
6131 :
6132 2910346 : if (virtual_operand_p (op))
6133 308940 : continue;
6134 :
6135 1146233 : if (get_iv (data, op))
6136 874194 : continue;
6137 :
6138 503315 : if (!POINTER_TYPE_P (TREE_TYPE (op))
6139 503174 : && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6140 101841 : continue;
6141 :
6142 170198 : n++;
6143 : }
6144 :
6145 5540784 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6146 : {
6147 5037089 : struct version_info *info = ver_info (data, j);
6148 :
6149 5037089 : if (info->inv_id && info->has_nonlin_use)
6150 510610 : n++;
6151 : }
6152 :
6153 503695 : data->regs_used = n;
6154 503695 : if (dump_file && (dump_flags & TDF_DETAILS))
6155 67 : fprintf (dump_file, " regs_used %d\n", n);
6156 :
6157 503695 : if (dump_file && (dump_flags & TDF_DETAILS))
6158 : {
6159 67 : fprintf (dump_file, " cost for size:\n");
6160 67 : fprintf (dump_file, " ivs\tcost\n");
6161 2144 : for (j = 0; j <= 2 * target_avail_regs; j++)
6162 2077 : fprintf (dump_file, " %d\t%d\n", j,
6163 : ivopts_estimate_reg_pressure (data, 0, j));
6164 67 : fprintf (dump_file, "\n");
6165 : }
6166 503695 : }
6167 :
6168 : /* Returns true if A is a cheaper cost pair than B. */
6169 :
6170 : static bool
6171 82926755 : cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6172 : {
6173 82926755 : if (!a)
6174 : return false;
6175 :
6176 77615168 : if (!b)
6177 : return true;
6178 :
6179 74409445 : if (a->cost < b->cost)
6180 : return true;
6181 :
6182 55031884 : if (b->cost < a->cost)
6183 : return false;
6184 :
6185 : /* In case the costs are the same, prefer the cheaper candidate. */
6186 31540402 : if (a->cand->cost < b->cand->cost)
6187 : return true;
6188 :
6189 : return false;
6190 : }
6191 :
6192 : /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6193 : for more expensive, equal and cheaper respectively. */
6194 :
6195 : static int
6196 29199987 : compare_cost_pair (class cost_pair *a, class cost_pair *b)
6197 : {
6198 29199987 : if (cheaper_cost_pair (a, b))
6199 : return -1;
6200 23021656 : if (cheaper_cost_pair (b, a))
6201 14885167 : return 1;
6202 :
6203 : return 0;
6204 : }
6205 :
6206 : /* Returns candidate by that USE is expressed in IVS. */
6207 :
6208 : static class cost_pair *
6209 279930583 : iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6210 : {
6211 279930583 : return ivs->cand_for_group[group->id];
6212 : }
6213 :
6214 : /* Computes the cost field of IVS structure. */
6215 :
6216 : static void
6217 419529562 : iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6218 : {
6219 419529562 : comp_cost cost = ivs->cand_use_cost;
6220 :
6221 419529562 : cost += ivs->cand_cost;
6222 419529562 : cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6223 419529562 : ivs->cost = cost;
6224 419529562 : }
6225 :
6226 : /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6227 : and IVS. */
6228 :
6229 : static void
6230 574771904 : iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6231 : {
6232 574771904 : bitmap_iterator bi;
6233 574771904 : unsigned iid;
6234 :
6235 574771904 : if (!invs)
6236 456264624 : return;
6237 :
6238 118507280 : gcc_assert (n_inv_uses != NULL);
6239 204409413 : EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6240 : {
6241 85902133 : n_inv_uses[iid]--;
6242 85902133 : if (n_inv_uses[iid] == 0)
6243 63517468 : ivs->n_invs--;
6244 : }
6245 : }
6246 :
6247 : /* Set USE not to be expressed by any candidate in IVS. */
6248 :
6249 : static void
6250 208118978 : iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6251 : struct iv_group *group)
6252 : {
6253 208118978 : unsigned gid = group->id, cid;
6254 208118978 : class cost_pair *cp;
6255 :
6256 208118978 : cp = ivs->cand_for_group[gid];
6257 208118978 : if (!cp)
6258 : return;
6259 208118978 : cid = cp->cand->id;
6260 :
6261 208118978 : ivs->bad_groups++;
6262 208118978 : ivs->cand_for_group[gid] = NULL;
6263 208118978 : ivs->n_cand_uses[cid]--;
6264 :
6265 208118978 : if (ivs->n_cand_uses[cid] == 0)
6266 : {
6267 79266974 : bitmap_clear_bit (ivs->cands, cid);
6268 79266974 : if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6269 79266974 : ivs->n_cands--;
6270 79266974 : ivs->cand_cost -= cp->cand->cost;
6271 79266974 : iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6272 79266974 : iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6273 : }
6274 :
6275 208118978 : ivs->cand_use_cost -= cp->cost;
6276 208118978 : iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6277 208118978 : iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6278 208118978 : iv_ca_recount_cost (data, ivs);
6279 : }
6280 :
6281 : /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6282 : IVS. */
6283 :
6284 : static void
6285 584150208 : iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6286 : {
6287 584150208 : bitmap_iterator bi;
6288 584150208 : unsigned iid;
6289 :
6290 584150208 : if (!invs)
6291 464506598 : return;
6292 :
6293 119643610 : gcc_assert (n_inv_uses != NULL);
6294 206514558 : EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6295 : {
6296 86870948 : n_inv_uses[iid]++;
6297 86870948 : if (n_inv_uses[iid] == 1)
6298 64408799 : ivs->n_invs++;
6299 : }
6300 : }
6301 :
6302 : /* Set cost pair for GROUP in set IVS to CP. */
6303 :
6304 : static void
6305 224398349 : iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6306 : struct iv_group *group, class cost_pair *cp)
6307 : {
6308 224398349 : unsigned gid = group->id, cid;
6309 :
6310 224398349 : if (ivs->cand_for_group[gid] == cp)
6311 : return;
6312 :
6313 211410584 : if (ivs->cand_for_group[gid])
6314 195851610 : iv_ca_set_no_cp (data, ivs, group);
6315 :
6316 211410584 : if (cp)
6317 : {
6318 211410584 : cid = cp->cand->id;
6319 :
6320 211410584 : ivs->bad_groups--;
6321 211410584 : ivs->cand_for_group[gid] = cp;
6322 211410584 : ivs->n_cand_uses[cid]++;
6323 211410584 : if (ivs->n_cand_uses[cid] == 1)
6324 : {
6325 80664520 : bitmap_set_bit (ivs->cands, cid);
6326 80664520 : if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6327 80664520 : ivs->n_cands++;
6328 80664520 : ivs->cand_cost += cp->cand->cost;
6329 80664520 : iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6330 80664520 : iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6331 : }
6332 :
6333 211410584 : ivs->cand_use_cost += cp->cost;
6334 211410584 : iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6335 211410584 : iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6336 211410584 : iv_ca_recount_cost (data, ivs);
6337 : }
6338 : }
6339 :
6340 : /* Extend set IVS by expressing USE by some of the candidates in it
6341 : if possible. Consider all important candidates if candidates in
6342 : set IVS don't give any result. */
6343 :
6344 : static void
6345 3292844 : iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6346 : struct iv_group *group)
6347 : {
6348 3292844 : class cost_pair *best_cp = NULL, *cp;
6349 3292844 : bitmap_iterator bi;
6350 3292844 : unsigned i;
6351 3292844 : struct iv_cand *cand;
6352 :
6353 3292844 : gcc_assert (ivs->upto >= group->id);
6354 3292844 : ivs->upto++;
6355 3292844 : ivs->bad_groups++;
6356 :
6357 6204028 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6358 : {
6359 2911184 : cand = data->vcands[i];
6360 2911184 : cp = get_group_iv_cost (data, group, cand);
6361 2911184 : if (cheaper_cost_pair (cp, best_cp))
6362 2022095 : best_cp = cp;
6363 : }
6364 :
6365 3292844 : if (best_cp == NULL)
6366 : {
6367 11804152 : EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6368 : {
6369 10460091 : cand = data->vcands[i];
6370 10460091 : cp = get_group_iv_cost (data, group, cand);
6371 10460091 : if (cheaper_cost_pair (cp, best_cp))
6372 2409722 : best_cp = cp;
6373 : }
6374 : }
6375 :
6376 3292844 : iv_ca_set_cp (data, ivs, group, best_cp);
6377 3292844 : }
6378 :
6379 : /* Get cost for assignment IVS. */
6380 :
6381 : static comp_cost
6382 82609813 : iv_ca_cost (class iv_ca *ivs)
6383 : {
6384 : /* This was a conditional expression but it triggered a bug in
6385 : Sun C 5.5. */
6386 0 : if (ivs->bad_groups)
6387 87121 : return infinite_cost;
6388 : else
6389 82522692 : return ivs->cost;
6390 : }
6391 :
6392 : /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6393 : than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6394 : respectively. */
6395 :
6396 : static int
6397 38849434 : iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6398 : struct iv_group *group, class cost_pair *old_cp,
6399 : class cost_pair *new_cp)
6400 : {
6401 38849434 : gcc_assert (old_cp && new_cp && old_cp != new_cp);
6402 38849434 : unsigned old_n_invs = ivs->n_invs;
6403 38849434 : iv_ca_set_cp (data, ivs, group, new_cp);
6404 38849434 : unsigned new_n_invs = ivs->n_invs;
6405 38849434 : iv_ca_set_cp (data, ivs, group, old_cp);
6406 :
6407 38849434 : return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6408 : }
6409 :
6410 : /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6411 : it before NEXT. */
6412 :
6413 : static struct iv_ca_delta *
6414 48588855 : iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6415 : class cost_pair *new_cp, struct iv_ca_delta *next)
6416 : {
6417 0 : struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6418 :
6419 48588855 : change->group = group;
6420 48588855 : change->old_cp = old_cp;
6421 48588855 : change->new_cp = new_cp;
6422 48588855 : change->next = next;
6423 :
6424 48588855 : return change;
6425 : }
6426 :
6427 : /* Joins two lists of changes L1 and L2. Destructive -- old lists
6428 : are rewritten. */
6429 :
6430 : static struct iv_ca_delta *
6431 8162278 : iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6432 : {
6433 8162278 : struct iv_ca_delta *last;
6434 :
6435 0 : if (!l2)
6436 : return l1;
6437 :
6438 0 : if (!l1)
6439 : return l2;
6440 :
6441 3500488 : for (last = l1; last->next; last = last->next)
6442 1129468 : continue;
6443 2371020 : last->next = l2;
6444 :
6445 2371020 : return l1;
6446 1129468 : }
6447 :
6448 : /* Reverse the list of changes DELTA, forming the inverse to it. */
6449 :
6450 : static struct iv_ca_delta *
6451 0 : iv_ca_delta_reverse (struct iv_ca_delta *delta)
6452 : {
6453 0 : struct iv_ca_delta *act, *next, *prev = NULL;
6454 :
6455 161487444 : for (act = delta; act; act = next)
6456 : {
6457 91276940 : next = act->next;
6458 91276940 : act->next = prev;
6459 91276940 : prev = act;
6460 :
6461 91276940 : std::swap (act->old_cp, act->new_cp);
6462 : }
6463 :
6464 0 : return prev;
6465 : }
6466 :
6467 : /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6468 : reverted instead. */
6469 :
6470 : static void
6471 73989221 : iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6472 : struct iv_ca_delta *delta, bool forward)
6473 : {
6474 73989221 : class cost_pair *from, *to;
6475 73989221 : struct iv_ca_delta *act;
6476 :
6477 73989221 : if (!forward)
6478 73989221 : delta = iv_ca_delta_reverse (delta);
6479 :
6480 170155818 : for (act = delta; act; act = act->next)
6481 : {
6482 96166597 : from = act->old_cp;
6483 96166597 : to = act->new_cp;
6484 96166597 : gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6485 96166597 : iv_ca_set_cp (data, ivs, act->group, to);
6486 : }
6487 :
6488 73989221 : if (!forward)
6489 73989221 : iv_ca_delta_reverse (delta);
6490 73989221 : }
6491 :
6492 : /* Returns true if CAND is used in IVS. */
6493 :
6494 : static bool
6495 29126542 : iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6496 : {
6497 29126542 : return ivs->n_cand_uses[cand->id] > 0;
6498 : }
6499 :
6500 : /* Returns number of induction variable candidates in the set IVS. */
6501 :
6502 : static unsigned
6503 12690108 : iv_ca_n_cands (class iv_ca *ivs)
6504 : {
6505 12690108 : return ivs->n_cands;
6506 : }
6507 :
6508 : /* Free the list of changes DELTA. */
6509 :
6510 : static void
6511 43635468 : iv_ca_delta_free (struct iv_ca_delta **delta)
6512 : {
6513 43635468 : struct iv_ca_delta *act, *next;
6514 :
6515 92224323 : for (act = *delta; act; act = next)
6516 : {
6517 48588855 : next = act->next;
6518 48588855 : free (act);
6519 : }
6520 :
6521 43635468 : *delta = NULL;
6522 43635468 : }
6523 :
6524 : /* Allocates new iv candidates assignment. */
6525 :
6526 : static class iv_ca *
6527 1007390 : iv_ca_new (struct ivopts_data *data)
6528 : {
6529 1007390 : class iv_ca *nw = XNEW (class iv_ca);
6530 :
6531 1007390 : nw->upto = 0;
6532 1007390 : nw->bad_groups = 0;
6533 2014780 : nw->cand_for_group = XCNEWVEC (class cost_pair *,
6534 : data->vgroups.length ());
6535 2014780 : nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6536 1007390 : nw->cands = BITMAP_ALLOC (NULL);
6537 1007390 : nw->n_cands = 0;
6538 1007390 : nw->n_invs = 0;
6539 1007390 : nw->cand_use_cost = no_cost;
6540 1007390 : nw->cand_cost = 0;
6541 1007390 : nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6542 1007390 : nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6543 1007390 : nw->cost = no_cost;
6544 :
6545 1007390 : return nw;
6546 : }
6547 :
6548 : /* Free memory occupied by the set IVS. */
6549 :
6550 : static void
6551 1007390 : iv_ca_free (class iv_ca **ivs)
6552 : {
6553 1007390 : free ((*ivs)->cand_for_group);
6554 1007390 : free ((*ivs)->n_cand_uses);
6555 1007390 : BITMAP_FREE ((*ivs)->cands);
6556 1007390 : free ((*ivs)->n_inv_var_uses);
6557 1007390 : free ((*ivs)->n_inv_expr_uses);
6558 1007390 : free (*ivs);
6559 1007390 : *ivs = NULL;
6560 1007390 : }
6561 :
6562 : /* Dumps IVS to FILE. */
6563 :
6564 : static void
6565 248 : iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6566 : {
6567 248 : unsigned i;
6568 248 : comp_cost cost = iv_ca_cost (ivs);
6569 :
6570 248 : fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6571 : cost.complexity);
6572 248 : fprintf (file, " reg_cost: %d\n",
6573 : ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6574 248 : fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6575 : "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6576 : ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6577 248 : bitmap_print (file, ivs->cands, " candidates: ","\n");
6578 :
6579 1285 : for (i = 0; i < ivs->upto; i++)
6580 : {
6581 1037 : struct iv_group *group = data->vgroups[i];
6582 1037 : class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6583 1037 : if (cp)
6584 1037 : fprintf (file, " group:%d --> iv_cand:%d, cost=("
6585 1037 : "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6586 : cp->cost.cost, cp->cost.complexity);
6587 : else
6588 0 : fprintf (file, " group:%d --> ??\n", group->id);
6589 : }
6590 :
6591 248 : const char *pref = "";
6592 248 : fprintf (file, " invariant variables: ");
6593 1438 : for (i = 1; i <= data->max_inv_var_id; i++)
6594 942 : if (ivs->n_inv_var_uses[i])
6595 : {
6596 133 : fprintf (file, "%s%d", pref, i);
6597 133 : pref = ", ";
6598 : }
6599 :
6600 248 : pref = "";
6601 248 : fprintf (file, "\n invariant expressions: ");
6602 2486 : for (i = 1; i <= data->max_inv_expr_id; i++)
6603 1990 : if (ivs->n_inv_expr_uses[i])
6604 : {
6605 303 : fprintf (file, "%s%d", pref, i);
6606 303 : pref = ", ";
6607 : }
6608 :
6609 248 : fprintf (file, "\n\n");
6610 248 : }
6611 :
6612 : /* Try changing candidate in IVS to CAND for each use. Return cost of the
6613 : new set, and store differences in DELTA. Number of induction variables
6614 : in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6615 : the function will try to find a solution with mimimal iv candidates. */
6616 :
6617 : static comp_cost
6618 21751753 : iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6619 : struct iv_cand *cand, struct iv_ca_delta **delta,
6620 : unsigned *n_ivs, bool min_ncand)
6621 : {
6622 21751753 : unsigned i;
6623 21751753 : comp_cost cost;
6624 21751753 : struct iv_group *group;
6625 21751753 : class cost_pair *old_cp, *new_cp;
6626 :
6627 21751753 : *delta = NULL;
6628 121030943 : for (i = 0; i < ivs->upto; i++)
6629 : {
6630 99279190 : group = data->vgroups[i];
6631 99279190 : old_cp = iv_ca_cand_for_group (ivs, group);
6632 :
6633 99279190 : if (old_cp
6634 99279190 : && old_cp->cand == cand)
6635 9061645 : continue;
6636 :
6637 90217545 : new_cp = get_group_iv_cost (data, group, cand);
6638 90217545 : if (!new_cp)
6639 35025271 : continue;
6640 :
6641 55192274 : if (!min_ncand)
6642 : {
6643 38849434 : int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6644 : /* Skip if new_cp depends on more invariants. */
6645 38849434 : if (cmp_invs > 0)
6646 9649447 : continue;
6647 :
6648 29199987 : int cmp_cost = compare_cost_pair (new_cp, old_cp);
6649 : /* Skip if new_cp is not cheaper. */
6650 29199987 : if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6651 22624315 : continue;
6652 : }
6653 :
6654 22918512 : *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6655 : }
6656 :
6657 21751753 : iv_ca_delta_commit (data, ivs, *delta, true);
6658 21751753 : cost = iv_ca_cost (ivs);
6659 21751753 : if (n_ivs)
6660 12690108 : *n_ivs = iv_ca_n_cands (ivs);
6661 21751753 : iv_ca_delta_commit (data, ivs, *delta, false);
6662 :
6663 21751753 : return cost;
6664 : }
6665 :
6666 : /* Try narrowing set IVS by removing CAND. Return the cost of
6667 : the new set and store the differences in DELTA. START is
6668 : the candidate with which we start narrowing. */
6669 :
6670 : static comp_cost
6671 15123798 : iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6672 : struct iv_cand *cand, struct iv_cand *start,
6673 : struct iv_ca_delta **delta)
6674 : {
6675 15123798 : unsigned i, ci;
6676 15123798 : struct iv_group *group;
6677 15123798 : class cost_pair *old_cp, *new_cp, *cp;
6678 15123798 : bitmap_iterator bi;
6679 15123798 : struct iv_cand *cnd;
6680 15123798 : comp_cost cost, best_cost, acost;
6681 :
6682 15123798 : *delta = NULL;
6683 80433774 : for (i = 0; i < data->vgroups.length (); i++)
6684 : {
6685 75242553 : group = data->vgroups[i];
6686 :
6687 75242553 : old_cp = iv_ca_cand_for_group (ivs, group);
6688 75242553 : if (old_cp->cand != cand)
6689 53396711 : continue;
6690 :
6691 21845842 : best_cost = iv_ca_cost (ivs);
6692 : /* Start narrowing with START. */
6693 21845842 : new_cp = get_group_iv_cost (data, group, start);
6694 :
6695 21845842 : if (data->consider_all_candidates)
6696 : {
6697 92623299 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6698 : {
6699 72000012 : if (ci == cand->id || (start && ci == start->id))
6700 35478269 : continue;
6701 :
6702 36521743 : cnd = data->vcands[ci];
6703 :
6704 36521743 : cp = get_group_iv_cost (data, group, cnd);
6705 36521743 : if (!cp)
6706 21879792 : continue;
6707 :
6708 14641951 : iv_ca_set_cp (data, ivs, group, cp);
6709 14641951 : acost = iv_ca_cost (ivs);
6710 :
6711 14641951 : if (acost < best_cost)
6712 : {
6713 1949526 : best_cost = acost;
6714 1949526 : new_cp = cp;
6715 : }
6716 : }
6717 : }
6718 : else
6719 : {
6720 4982112 : EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6721 : {
6722 3759557 : if (ci == cand->id || (start && ci == start->id))
6723 2068955 : continue;
6724 :
6725 1690602 : cnd = data->vcands[ci];
6726 :
6727 1690602 : cp = get_group_iv_cost (data, group, cnd);
6728 1690602 : if (!cp)
6729 0 : continue;
6730 :
6731 1690602 : iv_ca_set_cp (data, ivs, group, cp);
6732 1690602 : acost = iv_ca_cost (ivs);
6733 :
6734 1690602 : if (acost < best_cost)
6735 : {
6736 43334 : best_cost = acost;
6737 43334 : new_cp = cp;
6738 : }
6739 : }
6740 : }
6741 : /* Restore to old cp for use. */
6742 21845842 : iv_ca_set_cp (data, ivs, group, old_cp);
6743 :
6744 21845842 : if (!new_cp)
6745 : {
6746 9932577 : iv_ca_delta_free (delta);
6747 9932577 : return infinite_cost;
6748 : }
6749 :
6750 11913265 : *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6751 : }
6752 :
6753 5191221 : iv_ca_delta_commit (data, ivs, *delta, true);
6754 5191221 : cost = iv_ca_cost (ivs);
6755 5191221 : iv_ca_delta_commit (data, ivs, *delta, false);
6756 :
6757 5191221 : return cost;
6758 : }
6759 :
6760 : /* Try optimizing the set of candidates IVS by removing candidates different
6761 : from to EXCEPT_CAND from it. Return cost of the new set, and store
6762 : differences in DELTA. */
6763 :
6764 : static comp_cost
6765 9198998 : iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6766 : struct iv_cand *except_cand, struct iv_ca_delta **delta)
6767 : {
6768 9198998 : bitmap_iterator bi;
6769 9198998 : struct iv_ca_delta *act_delta, *best_delta;
6770 9198998 : unsigned i;
6771 9198998 : comp_cost best_cost, acost;
6772 9198998 : struct iv_cand *cand;
6773 :
6774 9198998 : best_delta = NULL;
6775 9198998 : best_cost = iv_ca_cost (ivs);
6776 :
6777 30774008 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6778 : {
6779 21575010 : cand = data->vcands[i];
6780 :
6781 21575010 : if (cand == except_cand)
6782 6451212 : continue;
6783 :
6784 15123798 : acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6785 :
6786 15123798 : if (acost < best_cost)
6787 : {
6788 2552713 : best_cost = acost;
6789 2552713 : iv_ca_delta_free (&best_delta);
6790 2552713 : best_delta = act_delta;
6791 : }
6792 : else
6793 12571085 : iv_ca_delta_free (&act_delta);
6794 : }
6795 :
6796 9198998 : if (!best_delta)
6797 : {
6798 6826798 : *delta = NULL;
6799 6826798 : return best_cost;
6800 : }
6801 :
6802 : /* Recurse to possibly remove other unnecessary ivs. */
6803 2372200 : iv_ca_delta_commit (data, ivs, best_delta, true);
6804 2372200 : best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6805 2372200 : iv_ca_delta_commit (data, ivs, best_delta, false);
6806 2372200 : *delta = iv_ca_delta_join (best_delta, *delta);
6807 2372200 : return best_cost;
6808 : }
6809 :
6810 : /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6811 : cheaper local cost for GROUP than BEST_CP. Return pointer to
6812 : the corresponding cost_pair, otherwise just return BEST_CP. */
6813 :
6814 : static class cost_pair*
6815 29250855 : cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6816 : unsigned int cand_idx, struct iv_cand *old_cand,
6817 : class cost_pair *best_cp)
6818 : {
6819 29250855 : struct iv_cand *cand;
6820 29250855 : class cost_pair *cp;
6821 :
6822 29250855 : gcc_assert (old_cand != NULL && best_cp != NULL);
6823 29250855 : if (cand_idx == old_cand->id)
6824 : return best_cp;
6825 :
6826 26423062 : cand = data->vcands[cand_idx];
6827 26423062 : cp = get_group_iv_cost (data, group, cand);
6828 26423062 : if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6829 : return cp;
6830 :
6831 : return best_cp;
6832 : }
6833 :
6834 : /* Try breaking local optimal fixed-point for IVS by replacing candidates
6835 : which are used by more than one iv uses. For each of those candidates,
6836 : this function tries to represent iv uses under that candidate using
6837 : other ones with lower local cost, then tries to prune the new set.
6838 : If the new set has lower cost, It returns the new cost after recording
6839 : candidate replacement in list DELTA. */
6840 :
6841 : static comp_cost
6842 1006152 : iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6843 : struct iv_ca_delta **delta)
6844 : {
6845 1006152 : bitmap_iterator bi, bj;
6846 1006152 : unsigned int i, j, k;
6847 1006152 : struct iv_cand *cand;
6848 1006152 : comp_cost orig_cost, acost;
6849 1006152 : struct iv_ca_delta *act_delta, *tmp_delta;
6850 1006152 : class cost_pair *old_cp, *best_cp = NULL;
6851 :
6852 1006152 : *delta = NULL;
6853 1006152 : orig_cost = iv_ca_cost (ivs);
6854 :
6855 2339545 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6856 : {
6857 1362781 : if (ivs->n_cand_uses[i] == 1
6858 1026418 : || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6859 342162 : continue;
6860 :
6861 1020619 : cand = data->vcands[i];
6862 :
6863 1020619 : act_delta = NULL;
6864 : /* Represent uses under current candidate using other ones with
6865 : lower local cost. */
6866 5216809 : for (j = 0; j < ivs->upto; j++)
6867 : {
6868 4196190 : struct iv_group *group = data->vgroups[j];
6869 4196190 : old_cp = iv_ca_cand_for_group (ivs, group);
6870 :
6871 4196190 : if (old_cp->cand != cand)
6872 1368397 : continue;
6873 :
6874 2827793 : best_cp = old_cp;
6875 2827793 : if (data->consider_all_candidates)
6876 31979156 : for (k = 0; k < data->vcands.length (); k++)
6877 29158696 : best_cp = cheaper_cost_with_cand (data, group, k,
6878 : old_cp->cand, best_cp);
6879 : else
6880 99492 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6881 92159 : best_cp = cheaper_cost_with_cand (data, group, k,
6882 : old_cp->cand, best_cp);
6883 :
6884 2827793 : if (best_cp == old_cp)
6885 1338083 : continue;
6886 :
6887 1489710 : act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6888 : }
6889 : /* No need for further prune. */
6890 1020619 : if (!act_delta)
6891 234885 : continue;
6892 :
6893 : /* Prune the new candidate set. */
6894 785734 : iv_ca_delta_commit (data, ivs, act_delta, true);
6895 785734 : acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6896 785734 : iv_ca_delta_commit (data, ivs, act_delta, false);
6897 785734 : act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6898 :
6899 785734 : if (acost < orig_cost)
6900 : {
6901 29388 : *delta = act_delta;
6902 29388 : return acost;
6903 : }
6904 : else
6905 756346 : iv_ca_delta_free (&act_delta);
6906 : }
6907 :
6908 976764 : return orig_cost;
6909 : }
6910 :
6911 : /* Tries to extend the sets IVS in the best possible way in order to
6912 : express the GROUP. If ORIGINALP is true, prefer candidates from
6913 : the original set of IVs, otherwise favor important candidates not
6914 : based on any memory object. */
6915 :
6916 : static bool
6917 3292844 : try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6918 : struct iv_group *group, bool originalp)
6919 : {
6920 3292844 : comp_cost best_cost, act_cost;
6921 3292844 : unsigned i;
6922 3292844 : bitmap_iterator bi;
6923 3292844 : struct iv_cand *cand;
6924 3292844 : struct iv_ca_delta *best_delta = NULL, *act_delta;
6925 3292844 : class cost_pair *cp;
6926 :
6927 3292844 : iv_ca_add_group (data, ivs, group);
6928 3292844 : best_cost = iv_ca_cost (ivs);
6929 3292844 : cp = iv_ca_cand_for_group (ivs, group);
6930 3292844 : if (cp)
6931 : {
6932 3205723 : best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6933 3205723 : iv_ca_set_no_cp (data, ivs, group);
6934 : }
6935 :
6936 : /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6937 : first try important candidates not based on any memory object. Only if
6938 : this fails, try the specific ones. Rationale -- in loops with many
6939 : variables the best choice often is to use just one generic biv. If we
6940 : added here many ivs specific to the uses, the optimization algorithm later
6941 : would be likely to get stuck in a local minimum, thus causing us to create
6942 : too many ivs. The approach from few ivs to more seems more likely to be
6943 : successful -- starting from few ivs, replacing an expensive use by a
6944 : specific iv should always be a win. */
6945 30691798 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6946 : {
6947 27398954 : cand = data->vcands[i];
6948 :
6949 27398954 : if (originalp && cand->pos !=IP_ORIGINAL)
6950 10799905 : continue;
6951 :
6952 13699477 : if (!originalp && cand->iv->base_object != NULL_TREE)
6953 2498194 : continue;
6954 :
6955 14100855 : if (iv_ca_cand_used_p (ivs, cand))
6956 1517946 : continue;
6957 :
6958 12582909 : cp = get_group_iv_cost (data, group, cand);
6959 12582909 : if (!cp)
6960 3628271 : continue;
6961 :
6962 8954638 : iv_ca_set_cp (data, ivs, group, cp);
6963 8954638 : act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6964 : true);
6965 8954638 : iv_ca_set_no_cp (data, ivs, group);
6966 8954638 : act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6967 :
6968 8954638 : if (act_cost < best_cost)
6969 : {
6970 396508 : best_cost = act_cost;
6971 :
6972 396508 : iv_ca_delta_free (&best_delta);
6973 396508 : best_delta = act_delta;
6974 : }
6975 : else
6976 8558130 : iv_ca_delta_free (&act_delta);
6977 : }
6978 :
6979 3292844 : if (best_cost.infinite_cost_p ())
6980 : {
6981 692590 : for (i = 0; i < group->n_map_members; i++)
6982 : {
6983 628977 : cp = group->cost_map + i;
6984 628977 : cand = cp->cand;
6985 628977 : if (!cand)
6986 521970 : continue;
6987 :
6988 : /* Already tried this. */
6989 107007 : if (cand->important)
6990 : {
6991 0 : if (originalp && cand->pos == IP_ORIGINAL)
6992 0 : continue;
6993 0 : if (!originalp && cand->iv->base_object == NULL_TREE)
6994 0 : continue;
6995 : }
6996 :
6997 107007 : if (iv_ca_cand_used_p (ivs, cand))
6998 0 : continue;
6999 :
7000 107007 : act_delta = NULL;
7001 107007 : iv_ca_set_cp (data, ivs, group, cp);
7002 107007 : act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7003 107007 : iv_ca_set_no_cp (data, ivs, group);
7004 107007 : act_delta = iv_ca_delta_add (group,
7005 : iv_ca_cand_for_group (ivs, group),
7006 : cp, act_delta);
7007 :
7008 107007 : if (act_cost < best_cost)
7009 : {
7010 65117 : best_cost = act_cost;
7011 :
7012 65117 : if (best_delta)
7013 2742 : iv_ca_delta_free (&best_delta);
7014 65117 : best_delta = act_delta;
7015 : }
7016 : else
7017 41890 : iv_ca_delta_free (&act_delta);
7018 : }
7019 : }
7020 :
7021 3292844 : iv_ca_delta_commit (data, ivs, best_delta, true);
7022 3292844 : iv_ca_delta_free (&best_delta);
7023 :
7024 3292844 : return !best_cost.infinite_cost_p ();
7025 : }
7026 :
7027 : /* Finds an initial assignment of candidates to uses. */
7028 :
7029 : static class iv_ca *
7030 1007390 : get_initial_solution (struct ivopts_data *data, bool originalp)
7031 : {
7032 1007390 : unsigned i;
7033 1007390 : class iv_ca *ivs = iv_ca_new (data);
7034 :
7035 4298996 : for (i = 0; i < data->vgroups.length (); i++)
7036 3292844 : if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7037 : {
7038 1238 : iv_ca_free (&ivs);
7039 1238 : return NULL;
7040 : }
7041 :
7042 : return ivs;
7043 : }
7044 :
7045 : /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
7046 : points to a bool variable, this function tries to break local
7047 : optimal fixed-point by replacing candidates in IVS if it's true. */
7048 :
7049 : static bool
7050 1492025 : try_improve_iv_set (struct ivopts_data *data,
7051 : class iv_ca *ivs, bool *try_replace_p)
7052 : {
7053 1492025 : unsigned i, n_ivs;
7054 1492025 : comp_cost acost, best_cost = iv_ca_cost (ivs);
7055 1492025 : struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7056 1492025 : struct iv_cand *cand;
7057 :
7058 : /* Try extending the set of induction variables by one. */
7059 16410705 : for (i = 0; i < data->vcands.length (); i++)
7060 : {
7061 14918680 : cand = data->vcands[i];
7062 :
7063 14918680 : if (iv_ca_cand_used_p (ivs, cand))
7064 2228572 : continue;
7065 :
7066 12690108 : acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7067 12690108 : if (!act_delta)
7068 7645348 : continue;
7069 :
7070 : /* If we successfully added the candidate and the set is small enough,
7071 : try optimizing it by removing other candidates. */
7072 5044760 : if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7073 : {
7074 5004344 : iv_ca_delta_commit (data, ivs, act_delta, true);
7075 5004344 : acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7076 5004344 : iv_ca_delta_commit (data, ivs, act_delta, false);
7077 5004344 : act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7078 : }
7079 :
7080 5044760 : if (acost < best_cost)
7081 : {
7082 581803 : best_cost = acost;
7083 581803 : iv_ca_delta_free (&best_delta);
7084 581803 : best_delta = act_delta;
7085 : }
7086 : else
7087 4462957 : iv_ca_delta_free (&act_delta);
7088 : }
7089 :
7090 1492025 : if (!best_delta)
7091 : {
7092 : /* Try removing the candidates from the set instead. */
7093 1036720 : best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7094 :
7095 1036720 : if (!best_delta && *try_replace_p)
7096 : {
7097 1006152 : *try_replace_p = false;
7098 : /* So far candidate selecting algorithm tends to choose fewer IVs
7099 : so that it can handle cases in which loops have many variables
7100 : but the best choice is often to use only one general biv. One
7101 : weakness is it can't handle opposite cases, in which different
7102 : candidates should be chosen with respect to each use. To solve
7103 : the problem, we replace candidates in a manner described by the
7104 : comments of iv_ca_replace, thus give general algorithm a chance
7105 : to break local optimal fixed-point in these cases. */
7106 1006152 : best_cost = iv_ca_replace (data, ivs, &best_delta);
7107 : }
7108 :
7109 1036720 : if (!best_delta)
7110 : return false;
7111 : }
7112 :
7113 485873 : iv_ca_delta_commit (data, ivs, best_delta, true);
7114 485873 : iv_ca_delta_free (&best_delta);
7115 971746 : return best_cost == iv_ca_cost (ivs);
7116 : }
7117 :
7118 : /* Attempts to find the optimal set of induction variables. We do simple
7119 : greedy heuristic -- we try to replace at most one candidate in the selected
7120 : solution and remove the unused ivs while this improves the cost. */
7121 :
7122 : static class iv_ca *
7123 1007390 : find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7124 : {
7125 1007390 : class iv_ca *set;
7126 1007390 : bool try_replace_p = true;
7127 :
7128 : /* Get the initial solution. */
7129 1007390 : set = get_initial_solution (data, originalp);
7130 1007390 : if (!set)
7131 : {
7132 1238 : if (dump_file && (dump_flags & TDF_DETAILS))
7133 0 : fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7134 1238 : return NULL;
7135 : }
7136 :
7137 1006152 : if (dump_file && (dump_flags & TDF_DETAILS))
7138 : {
7139 134 : fprintf (dump_file, "Initial set of candidates:\n");
7140 134 : iv_ca_dump (data, dump_file, set);
7141 : }
7142 :
7143 1492025 : while (try_improve_iv_set (data, set, &try_replace_p))
7144 : {
7145 485873 : if (dump_file && (dump_flags & TDF_DETAILS))
7146 : {
7147 114 : fprintf (dump_file, "Improved to:\n");
7148 114 : iv_ca_dump (data, dump_file, set);
7149 : }
7150 : }
7151 :
7152 : /* If the set has infinite_cost, it can't be optimal. */
7153 2012304 : if (iv_ca_cost (set).infinite_cost_p ())
7154 : {
7155 0 : if (dump_file && (dump_flags & TDF_DETAILS))
7156 0 : fprintf (dump_file,
7157 : "Overflow to infinite cost in try_improve_iv_set.\n");
7158 0 : iv_ca_free (&set);
7159 : }
7160 1006152 : return set;
7161 : }
7162 :
7163 : static class iv_ca *
7164 503695 : find_optimal_iv_set (struct ivopts_data *data)
7165 : {
7166 503695 : unsigned i;
7167 503695 : comp_cost cost, origcost;
7168 503695 : class iv_ca *set, *origset;
7169 :
7170 : /* Determine the cost based on a strategy that starts with original IVs,
7171 : and try again using a strategy that prefers candidates not based
7172 : on any IVs. */
7173 503695 : origset = find_optimal_iv_set_1 (data, true);
7174 503695 : set = find_optimal_iv_set_1 (data, false);
7175 :
7176 503695 : if (!origset && !set)
7177 : return NULL;
7178 :
7179 503076 : origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7180 503076 : cost = set ? iv_ca_cost (set) : infinite_cost;
7181 :
7182 503076 : if (dump_file && (dump_flags & TDF_DETAILS))
7183 : {
7184 67 : fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7185 : origcost.cost, origcost.complexity);
7186 67 : fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7187 : cost.cost, cost.complexity);
7188 : }
7189 :
7190 : /* Choose the one with the best cost. */
7191 503076 : if (origcost <= cost)
7192 : {
7193 467744 : if (set)
7194 467744 : iv_ca_free (&set);
7195 467744 : set = origset;
7196 : }
7197 35332 : else if (origset)
7198 35332 : iv_ca_free (&origset);
7199 :
7200 2148241 : for (i = 0; i < data->vgroups.length (); i++)
7201 : {
7202 1645165 : struct iv_group *group = data->vgroups[i];
7203 1645165 : group->selected = iv_ca_cand_for_group (set, group)->cand;
7204 : }
7205 :
7206 503076 : return set;
7207 : }
7208 :
7209 : /* Creates a new induction variable corresponding to CAND. */
7210 :
7211 : static void
7212 678231 : create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7213 : {
7214 678231 : gimple_stmt_iterator incr_pos;
7215 678231 : tree base;
7216 678231 : struct iv_use *use;
7217 678231 : struct iv_group *group;
7218 678231 : bool after = false;
7219 :
7220 678231 : gcc_assert (cand->iv != NULL);
7221 :
7222 678231 : switch (cand->pos)
7223 : {
7224 466416 : case IP_NORMAL:
7225 466416 : incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7226 466416 : break;
7227 :
7228 10208 : case IP_END:
7229 10208 : incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7230 10208 : after = true;
7231 10208 : gcc_assert (gsi_end_p (incr_pos) || !stmt_ends_bb_p (*incr_pos));
7232 : break;
7233 :
7234 0 : case IP_AFTER_USE:
7235 0 : after = true;
7236 : /* fall through */
7237 0 : case IP_BEFORE_USE:
7238 0 : incr_pos = gsi_for_stmt (cand->incremented_at);
7239 0 : break;
7240 :
7241 201607 : case IP_ORIGINAL:
7242 : /* Mark that the iv is preserved. */
7243 201607 : name_info (data, cand->var_before)->preserve_biv = true;
7244 201607 : name_info (data, cand->var_after)->preserve_biv = true;
7245 :
7246 : /* Rewrite the increment so that it uses var_before directly. */
7247 201607 : use = find_interesting_uses_op (data, cand->var_after);
7248 201607 : group = data->vgroups[use->group_id];
7249 201607 : group->selected = cand;
7250 201607 : return;
7251 : }
7252 :
7253 476624 : gimple_add_tmp_var (cand->var_before);
7254 :
7255 476624 : base = unshare_expr (cand->iv->base);
7256 :
7257 : /* The step computation could invoke UB when the loop does not iterate.
7258 : Avoid inserting it on the preheader in its native form but rewrite
7259 : it to a well-defined form. This also helps masking SCEV issues
7260 : which freely re-associates the IV computations when building up
7261 : CHRECs without much regard for signed overflow invoking UB. */
7262 476624 : gimple_seq stmts = NULL;
7263 476624 : tree step = force_gimple_operand (unshare_expr (cand->iv->step), &stmts,
7264 : true, NULL_TREE);
7265 476624 : if (stmts)
7266 : {
7267 143082 : for (auto gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
7268 95188 : if (gimple_needing_rewrite_undefined (gsi_stmt (gsi)))
7269 10725 : rewrite_to_defined_unconditional (&gsi);
7270 47894 : gsi_insert_seq_on_edge_immediate
7271 47894 : (loop_preheader_edge (data->current_loop), stmts);
7272 : }
7273 :
7274 476624 : create_iv (base, PLUS_EXPR, step,
7275 : cand->var_before, data->current_loop,
7276 : &incr_pos, after, &cand->var_before, &cand->var_after);
7277 : }
7278 :
7279 : /* Creates new induction variables described in SET. */
7280 :
7281 : static void
7282 503076 : create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7283 : {
7284 503076 : unsigned i;
7285 503076 : struct iv_cand *cand;
7286 503076 : bitmap_iterator bi;
7287 :
7288 1181307 : EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7289 : {
7290 678231 : cand = data->vcands[i];
7291 678231 : create_new_iv (data, cand);
7292 : }
7293 :
7294 503076 : if (dump_file && (dump_flags & TDF_DETAILS))
7295 : {
7296 67 : fprintf (dump_file, "Selected IV set for loop %d",
7297 67 : data->current_loop->num);
7298 67 : if (data->loop_loc != UNKNOWN_LOCATION)
7299 65 : fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7300 130 : LOCATION_LINE (data->loop_loc));
7301 67 : fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " avg niters",
7302 : avg_loop_niter (data->current_loop));
7303 67 : fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7304 178 : EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7305 : {
7306 111 : cand = data->vcands[i];
7307 111 : dump_cand (dump_file, cand);
7308 : }
7309 67 : fprintf (dump_file, "\n");
7310 : }
7311 503076 : }
7312 :
7313 : /* Rewrites USE (definition of iv used in a nonlinear expression)
7314 : using candidate CAND. */
7315 :
7316 : static void
7317 626150 : rewrite_use_nonlinear_expr (struct ivopts_data *data,
7318 : struct iv_use *use, struct iv_cand *cand)
7319 : {
7320 626150 : gassign *ass;
7321 626150 : gimple_stmt_iterator bsi;
7322 626150 : tree comp, type = get_use_type (use), tgt;
7323 :
7324 : /* An important special case -- if we are asked to express value of
7325 : the original iv by itself, just exit; there is no need to
7326 : introduce a new computation (that might also need casting the
7327 : variable to unsigned and back). */
7328 626150 : if (cand->pos == IP_ORIGINAL
7329 335233 : && cand->incremented_at == use->stmt)
7330 : {
7331 201607 : tree op = NULL_TREE;
7332 201607 : enum tree_code stmt_code;
7333 :
7334 201607 : gcc_assert (is_gimple_assign (use->stmt));
7335 201607 : gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7336 :
7337 : /* Check whether we may leave the computation unchanged.
7338 : This is the case only if it does not rely on other
7339 : computations in the loop -- otherwise, the computation
7340 : we rely upon may be removed in remove_unused_ivs,
7341 : thus leading to ICE. */
7342 201607 : stmt_code = gimple_assign_rhs_code (use->stmt);
7343 201607 : if (stmt_code == PLUS_EXPR
7344 201607 : || stmt_code == MINUS_EXPR
7345 201607 : || stmt_code == POINTER_PLUS_EXPR)
7346 : {
7347 197628 : if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7348 195781 : op = gimple_assign_rhs2 (use->stmt);
7349 1847 : else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7350 : op = gimple_assign_rhs1 (use->stmt);
7351 : }
7352 :
7353 196281 : if (op != NULL_TREE)
7354 : {
7355 196281 : if (expr_invariant_in_loop_p (data->current_loop, op))
7356 281497 : return;
7357 181 : if (TREE_CODE (op) == SSA_NAME)
7358 : {
7359 181 : struct iv *iv = get_iv (data, op);
7360 181 : if (iv != NULL && integer_zerop (iv->step))
7361 : return;
7362 : }
7363 : }
7364 : }
7365 :
7366 429869 : switch (gimple_code (use->stmt))
7367 : {
7368 127663 : case GIMPLE_PHI:
7369 127663 : tgt = PHI_RESULT (use->stmt);
7370 :
7371 : /* If we should keep the biv, do not replace it. */
7372 127663 : if (name_info (data, tgt)->preserve_biv)
7373 : return;
7374 :
7375 42447 : bsi = gsi_after_labels (gimple_bb (use->stmt));
7376 42447 : break;
7377 :
7378 302206 : case GIMPLE_ASSIGN:
7379 302206 : tgt = gimple_assign_lhs (use->stmt);
7380 302206 : bsi = gsi_for_stmt (use->stmt);
7381 302206 : break;
7382 :
7383 0 : default:
7384 0 : gcc_unreachable ();
7385 : }
7386 :
7387 1033959 : aff_tree aff_inv, aff_var;
7388 344653 : if (!get_computation_aff_1 (data, use->stmt, use, cand, &aff_inv, &aff_var))
7389 0 : gcc_unreachable ();
7390 :
7391 344653 : unshare_aff_combination (&aff_inv);
7392 344653 : unshare_aff_combination (&aff_var);
7393 : /* Prefer CSE opportunity than loop invariant by adding offset at last
7394 : so that iv_uses have different offsets can be CSEed. */
7395 689306 : poly_widest_int offset = aff_inv.offset;
7396 344653 : aff_inv.offset = 0;
7397 :
7398 344653 : gimple_seq stmt_list = NULL, seq = NULL;
7399 344653 : tree comp_op1 = aff_combination_to_tree (&aff_inv);
7400 344653 : tree comp_op2 = aff_combination_to_tree (&aff_var);
7401 344653 : gcc_assert (comp_op1 && comp_op2);
7402 :
7403 344653 : comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7404 344653 : gimple_seq_add_seq (&stmt_list, seq);
7405 344653 : comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7406 344653 : gimple_seq_add_seq (&stmt_list, seq);
7407 :
7408 344653 : if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7409 : std::swap (comp_op1, comp_op2);
7410 :
7411 344653 : if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7412 : {
7413 0 : comp = fold_build_pointer_plus (comp_op1,
7414 : fold_convert (sizetype, comp_op2));
7415 0 : comp = fold_build_pointer_plus (comp,
7416 : wide_int_to_tree (sizetype, offset));
7417 : }
7418 : else
7419 : {
7420 344653 : comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7421 : fold_convert (TREE_TYPE (comp_op1), comp_op2));
7422 344653 : comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7423 : wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7424 : }
7425 :
7426 344653 : comp = fold_convert (type, comp);
7427 344653 : comp = force_gimple_operand (comp, &seq, false, NULL);
7428 344653 : gimple_seq_add_seq (&stmt_list, seq);
7429 344653 : if (gimple_code (use->stmt) != GIMPLE_PHI
7430 : /* We can't allow re-allocating the stmt as it might be pointed
7431 : to still. */
7432 344653 : && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7433 302206 : >= gimple_num_ops (gsi_stmt (bsi))))
7434 : {
7435 8481 : comp = force_gimple_operand (comp, &seq, true, NULL);
7436 8481 : gimple_seq_add_seq (&stmt_list, seq);
7437 8481 : if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7438 : {
7439 0 : duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7440 : /* As this isn't a plain copy we have to reset alignment
7441 : information. */
7442 0 : if (SSA_NAME_PTR_INFO (comp))
7443 0 : mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7444 : }
7445 : }
7446 :
7447 344653 : gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7448 344653 : if (gimple_code (use->stmt) == GIMPLE_PHI)
7449 : {
7450 42447 : ass = gimple_build_assign (tgt, comp);
7451 42447 : gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7452 :
7453 42447 : bsi = gsi_for_stmt (use->stmt);
7454 42447 : remove_phi_node (&bsi, false);
7455 : }
7456 : else
7457 : {
7458 302206 : gimple_assign_set_rhs_from_tree (&bsi, comp);
7459 302206 : use->stmt = gsi_stmt (bsi);
7460 : }
7461 : }
7462 :
7463 : /* Performs a peephole optimization to reorder the iv update statement with
7464 : a mem ref to enable instruction combining in later phases. The mem ref uses
7465 : the iv value before the update, so the reordering transformation requires
7466 : adjustment of the offset. CAND is the selected IV_CAND.
7467 :
7468 : Example:
7469 :
7470 : t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7471 : iv2 = iv1 + 1;
7472 :
7473 : if (t < val) (1)
7474 : goto L;
7475 : goto Head;
7476 :
7477 :
7478 : directly propagating t over to (1) will introduce overlapping live range
7479 : thus increase register pressure. This peephole transform it into:
7480 :
7481 :
7482 : iv2 = iv1 + 1;
7483 : t = MEM_REF (base, iv2, 8, 8);
7484 : if (t < val)
7485 : goto L;
7486 : goto Head;
7487 : */
7488 :
7489 : static void
7490 858487 : adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7491 : {
7492 858487 : tree var_after;
7493 858487 : gimple *iv_update, *stmt;
7494 858487 : basic_block bb;
7495 858487 : gimple_stmt_iterator gsi, gsi_iv;
7496 :
7497 858487 : if (cand->pos != IP_NORMAL)
7498 856302 : return;
7499 :
7500 660622 : var_after = cand->var_after;
7501 660622 : iv_update = SSA_NAME_DEF_STMT (var_after);
7502 :
7503 660622 : bb = gimple_bb (iv_update);
7504 660622 : gsi = gsi_last_nondebug_bb (bb);
7505 660622 : stmt = gsi_stmt (gsi);
7506 :
7507 : /* Only handle conditional statement for now. */
7508 660622 : if (gimple_code (stmt) != GIMPLE_COND)
7509 : return;
7510 :
7511 660622 : gsi_prev_nondebug (&gsi);
7512 660622 : stmt = gsi_stmt (gsi);
7513 660622 : if (stmt != iv_update)
7514 : return;
7515 :
7516 531993 : gsi_prev_nondebug (&gsi);
7517 531993 : if (gsi_end_p (gsi))
7518 : return;
7519 :
7520 528822 : stmt = gsi_stmt (gsi);
7521 528822 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
7522 : return;
7523 :
7524 528661 : if (stmt != use->stmt)
7525 : return;
7526 :
7527 5032 : if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7528 : return;
7529 :
7530 2185 : if (dump_file && (dump_flags & TDF_DETAILS))
7531 : {
7532 0 : fprintf (dump_file, "Reordering \n");
7533 0 : print_gimple_stmt (dump_file, iv_update, 0);
7534 0 : print_gimple_stmt (dump_file, use->stmt, 0);
7535 0 : fprintf (dump_file, "\n");
7536 : }
7537 :
7538 2185 : gsi = gsi_for_stmt (use->stmt);
7539 2185 : gsi_iv = gsi_for_stmt (iv_update);
7540 2185 : gsi_move_before (&gsi_iv, &gsi);
7541 :
7542 2185 : cand->pos = IP_BEFORE_USE;
7543 2185 : cand->incremented_at = use->stmt;
7544 : }
7545 :
7546 : /* Return the alias pointer type that should be used for a MEM_REF
7547 : associated with USE, which has type USE_PTR_ADDRESS. */
7548 :
7549 : static tree
7550 795 : get_alias_ptr_type_for_ptr_address (iv_use *use)
7551 : {
7552 795 : gcall *call = as_a <gcall *> (use->stmt);
7553 795 : switch (gimple_call_internal_fn (call))
7554 : {
7555 795 : case IFN_MASK_LOAD:
7556 795 : case IFN_MASK_STORE:
7557 795 : case IFN_MASK_LOAD_LANES:
7558 795 : case IFN_MASK_STORE_LANES:
7559 795 : case IFN_MASK_LEN_LOAD_LANES:
7560 795 : case IFN_MASK_LEN_STORE_LANES:
7561 795 : case IFN_LEN_LOAD:
7562 795 : case IFN_LEN_STORE:
7563 795 : case IFN_MASK_LEN_LOAD:
7564 795 : case IFN_MASK_LEN_STORE:
7565 : /* The second argument contains the correct alias type. */
7566 795 : gcc_assert (use->op_p == gimple_call_arg_ptr (call, 0));
7567 795 : return TREE_TYPE (gimple_call_arg (call, 1));
7568 :
7569 0 : default:
7570 0 : gcc_unreachable ();
7571 : }
7572 : }
7573 :
7574 :
7575 : /* Rewrites USE (address that is an iv) using candidate CAND. */
7576 :
7577 : static void
7578 858487 : rewrite_use_address (struct ivopts_data *data,
7579 : struct iv_use *use, struct iv_cand *cand)
7580 : {
7581 858487 : aff_tree aff;
7582 858487 : bool ok;
7583 :
7584 858487 : adjust_iv_update_pos (cand, use);
7585 858487 : ok = get_computation_aff (data, use->stmt, use, cand, &aff);
7586 858487 : gcc_assert (ok);
7587 858487 : unshare_aff_combination (&aff);
7588 :
7589 : /* To avoid undefined overflow problems, all IV candidates use unsigned
7590 : integer types. The drawback is that this makes it impossible for
7591 : create_mem_ref to distinguish an IV that is based on a memory object
7592 : from one that represents simply an offset.
7593 :
7594 : To work around this problem, we pass a hint to create_mem_ref that
7595 : indicates which variable (if any) in aff is an IV based on a memory
7596 : object. Note that we only consider the candidate. If this is not
7597 : based on an object, the base of the reference is in some subexpression
7598 : of the use -- but these will use pointer types, so they are recognized
7599 : by the create_mem_ref heuristics anyway. */
7600 858487 : tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7601 858487 : tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7602 858487 : gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7603 858487 : tree type = use->mem_type;
7604 858487 : tree alias_ptr_type;
7605 858487 : if (use->type == USE_PTR_ADDRESS)
7606 795 : alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7607 : else
7608 : {
7609 857692 : gcc_assert (type == TREE_TYPE (*use->op_p));
7610 857692 : unsigned int align = get_object_alignment (*use->op_p);
7611 857692 : if (align != TYPE_ALIGN (type))
7612 33997 : type = build_aligned_type (type, align);
7613 857692 : alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7614 : }
7615 1716974 : tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7616 858487 : iv, base_hint, data->speed);
7617 :
7618 858487 : if (use->type == USE_PTR_ADDRESS)
7619 : {
7620 795 : ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7621 795 : ref = fold_convert (get_use_type (use), ref);
7622 795 : ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7623 : true, GSI_SAME_STMT);
7624 : }
7625 : else
7626 : {
7627 : /* When we end up confused enough and have no suitable base but
7628 : stuffed everything to index2 use a LEA for the address and
7629 : create a plain MEM_REF to avoid basing a memory reference
7630 : on address zero which create_mem_ref_raw does as fallback. */
7631 857692 : if (TREE_CODE (ref) == TARGET_MEM_REF
7632 857692 : && TMR_INDEX2 (ref) != NULL_TREE
7633 868235 : && integer_zerop (TREE_OPERAND (ref, 0)))
7634 : {
7635 20 : ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7636 20 : ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7637 : true, GSI_SAME_STMT);
7638 20 : ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7639 : }
7640 857692 : copy_ref_info (ref, *use->op_p);
7641 : }
7642 :
7643 858487 : *use->op_p = ref;
7644 858487 : }
7645 :
7646 : /* Rewrites USE (the condition such that one of the arguments is an iv) using
7647 : candidate CAND. */
7648 :
7649 : static void
7650 597527 : rewrite_use_compare (struct ivopts_data *data,
7651 : struct iv_use *use, struct iv_cand *cand)
7652 : {
7653 597527 : tree comp, op, bound;
7654 597527 : gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7655 597527 : enum tree_code compare;
7656 597527 : struct iv_group *group = data->vgroups[use->group_id];
7657 597527 : class cost_pair *cp = get_group_iv_cost (data, group, cand);
7658 :
7659 597527 : bound = cp->value;
7660 597527 : if (bound)
7661 : {
7662 392892 : tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7663 392892 : tree var_type = TREE_TYPE (var);
7664 392892 : gimple_seq stmts;
7665 :
7666 392892 : if (dump_file && (dump_flags & TDF_DETAILS))
7667 : {
7668 58 : fprintf (dump_file, "Replacing exit test: ");
7669 58 : print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7670 : }
7671 392892 : compare = cp->comp;
7672 392892 : bound = unshare_expr (fold_convert (var_type, bound));
7673 392892 : op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7674 392892 : if (stmts)
7675 180756 : gsi_insert_seq_on_edge_immediate (
7676 180756 : loop_preheader_edge (data->current_loop),
7677 : stmts);
7678 :
7679 392892 : gcond *cond_stmt = as_a <gcond *> (use->stmt);
7680 392892 : gimple_cond_set_lhs (cond_stmt, var);
7681 392892 : gimple_cond_set_code (cond_stmt, compare);
7682 392892 : gimple_cond_set_rhs (cond_stmt, op);
7683 392892 : return;
7684 : }
7685 :
7686 : /* The induction variable elimination failed; just express the original
7687 : giv. */
7688 204635 : comp = get_computation_at (data, use->stmt, use, cand);
7689 204635 : gcc_assert (comp != NULL_TREE);
7690 204635 : gcc_assert (use->op_p != NULL);
7691 204635 : *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7692 204635 : SSA_NAME_VAR (*use->op_p),
7693 : true, GSI_SAME_STMT);
7694 : }
7695 :
7696 : /* Rewrite the groups using the selected induction variables. */
7697 :
7698 : static void
7699 503076 : rewrite_groups (struct ivopts_data *data)
7700 : {
7701 503076 : unsigned i, j;
7702 :
7703 2309343 : for (i = 0; i < data->vgroups.length (); i++)
7704 : {
7705 1806267 : struct iv_group *group = data->vgroups[i];
7706 1806267 : struct iv_cand *cand = group->selected;
7707 :
7708 1806267 : gcc_assert (cand);
7709 :
7710 1806267 : if (group->type == USE_NONLINEAR_EXPR)
7711 : {
7712 1252300 : for (j = 0; j < group->vuses.length (); j++)
7713 : {
7714 626150 : rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7715 626150 : update_stmt (group->vuses[j]->stmt);
7716 : }
7717 : }
7718 1180117 : else if (address_p (group->type))
7719 : {
7720 1441077 : for (j = 0; j < group->vuses.length (); j++)
7721 : {
7722 858487 : rewrite_use_address (data, group->vuses[j], cand);
7723 858487 : update_stmt (group->vuses[j]->stmt);
7724 : }
7725 : }
7726 : else
7727 : {
7728 597527 : gcc_assert (group->type == USE_COMPARE);
7729 :
7730 2403794 : for (j = 0; j < group->vuses.length (); j++)
7731 : {
7732 597527 : rewrite_use_compare (data, group->vuses[j], cand);
7733 597527 : update_stmt (group->vuses[j]->stmt);
7734 : }
7735 : }
7736 : }
7737 503076 : }
7738 :
7739 : /* Removes the ivs that are not used after rewriting. */
7740 :
7741 : static void
7742 503076 : remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7743 : {
7744 503076 : unsigned j;
7745 503076 : bitmap_iterator bi;
7746 :
7747 : /* Figure out an order in which to release SSA DEFs so that we don't
7748 : release something that we'd have to propagate into a debug stmt
7749 : afterwards. */
7750 5524277 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7751 : {
7752 5021201 : struct version_info *info;
7753 :
7754 5021201 : info = ver_info (data, j);
7755 5021201 : if (info->iv
7756 4877395 : && !integer_zerop (info->iv->step)
7757 3210474 : && !info->inv_id
7758 3210474 : && !info->iv->nonlin_use
7759 7605525 : && !info->preserve_biv)
7760 : {
7761 2467933 : bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7762 :
7763 2467933 : tree def = info->iv->ssa_name;
7764 :
7765 3208887 : if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7766 : {
7767 740954 : imm_use_iterator imm_iter;
7768 740954 : use_operand_p use_p;
7769 740954 : gimple *stmt;
7770 740954 : int count = 0;
7771 :
7772 2199562 : FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7773 : {
7774 745968 : if (!gimple_debug_bind_p (stmt))
7775 627423 : continue;
7776 :
7777 : /* We just want to determine whether to do nothing
7778 : (count == 0), to substitute the computed
7779 : expression into a single use of the SSA DEF by
7780 : itself (count == 1), or to use a debug temp
7781 : because the SSA DEF is used multiple times or as
7782 : part of a larger expression (count > 1). */
7783 118545 : count++;
7784 118545 : if (gimple_debug_bind_get_value (stmt) != def)
7785 7441 : count++;
7786 :
7787 118545 : if (count > 1)
7788 : break;
7789 740954 : }
7790 :
7791 740954 : if (!count)
7792 664939 : continue;
7793 :
7794 97451 : struct iv_use dummy_use;
7795 97451 : struct iv_cand *best_cand = NULL, *cand;
7796 97451 : unsigned i, best_pref = 0, cand_pref;
7797 97451 : tree comp = NULL_TREE;
7798 :
7799 97451 : memset (&dummy_use, 0, sizeof (dummy_use));
7800 97451 : dummy_use.iv = info->iv;
7801 496153 : for (i = 0; i < data->vgroups.length () && i < 64; i++)
7802 : {
7803 398702 : cand = data->vgroups[i]->selected;
7804 398702 : if (cand == best_cand)
7805 165211 : continue;
7806 154608 : cand_pref = operand_equal_p (cand->iv->step,
7807 233491 : info->iv->step, 0)
7808 233491 : ? 4 : 0;
7809 233491 : cand_pref
7810 233491 : += TYPE_MODE (TREE_TYPE (cand->iv->base))
7811 233491 : == TYPE_MODE (TREE_TYPE (info->iv->base))
7812 233491 : ? 2 : 0;
7813 233491 : cand_pref
7814 466982 : += TREE_CODE (cand->iv->base) == INTEGER_CST
7815 233491 : ? 1 : 0;
7816 233491 : if (best_cand == NULL || best_pref < cand_pref)
7817 : {
7818 179522 : tree this_comp
7819 359044 : = get_debug_computation_at (data,
7820 179522 : SSA_NAME_DEF_STMT (def),
7821 : &dummy_use, cand);
7822 179522 : if (this_comp)
7823 : {
7824 398702 : best_cand = cand;
7825 398702 : best_pref = cand_pref;
7826 398702 : comp = this_comp;
7827 : }
7828 : }
7829 : }
7830 :
7831 97451 : if (!best_cand)
7832 21436 : continue;
7833 :
7834 76015 : comp = unshare_expr (comp);
7835 76015 : if (count > 1)
7836 : {
7837 24125 : tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7838 : /* FIXME: Is setting the mode really necessary? */
7839 24125 : if (SSA_NAME_VAR (def))
7840 14319 : SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7841 : else
7842 9806 : SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7843 24125 : gdebug *def_temp
7844 24125 : = gimple_build_debug_bind (vexpr, comp, NULL);
7845 24125 : gimple_stmt_iterator gsi;
7846 :
7847 24125 : if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7848 13515 : gsi = gsi_after_labels (gimple_bb
7849 13515 : (SSA_NAME_DEF_STMT (def)));
7850 : else
7851 10610 : gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7852 :
7853 24125 : gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7854 24125 : comp = vexpr;
7855 : }
7856 :
7857 363603 : FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7858 : {
7859 211573 : if (!gimple_debug_bind_p (stmt))
7860 83802 : continue;
7861 :
7862 383385 : FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7863 127807 : SET_USE (use_p, comp);
7864 :
7865 127771 : update_stmt (stmt);
7866 76015 : }
7867 : }
7868 : }
7869 : }
7870 503076 : }
7871 :
7872 : /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7873 : for hash_map::traverse. */
7874 :
7875 : bool
7876 484287 : free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7877 : {
7878 484287 : if (value)
7879 : {
7880 444644 : value->~tree_niter_desc ();
7881 444644 : free (value);
7882 : }
7883 484287 : return true;
7884 : }
7885 :
7886 : /* Frees data allocated by the optimization of a single loop. */
7887 :
7888 : static void
7889 870665 : free_loop_data (struct ivopts_data *data)
7890 : {
7891 870665 : unsigned i, j;
7892 870665 : bitmap_iterator bi;
7893 870665 : tree obj;
7894 :
7895 870665 : if (data->niters)
7896 : {
7897 956417 : data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7898 944260 : delete data->niters;
7899 472130 : data->niters = NULL;
7900 : }
7901 :
7902 5907764 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7903 : {
7904 5037099 : struct version_info *info;
7905 :
7906 5037099 : info = ver_info (data, i);
7907 5037099 : info->iv = NULL;
7908 5037099 : info->has_nonlin_use = false;
7909 5037099 : info->preserve_biv = false;
7910 5037099 : info->inv_id = 0;
7911 : }
7912 870665 : bitmap_clear (data->relevant);
7913 870665 : bitmap_clear (data->important_candidates);
7914 :
7915 2679546 : for (i = 0; i < data->vgroups.length (); i++)
7916 : {
7917 1808881 : struct iv_group *group = data->vgroups[i];
7918 :
7919 3893698 : for (j = 0; j < group->vuses.length (); j++)
7920 2084817 : free (group->vuses[j]);
7921 1808881 : group->vuses.release ();
7922 :
7923 1808881 : BITMAP_FREE (group->related_cands);
7924 19674131 : for (j = 0; j < group->n_map_members; j++)
7925 : {
7926 17865250 : if (group->cost_map[j].inv_vars)
7927 3740976 : BITMAP_FREE (group->cost_map[j].inv_vars);
7928 17865250 : if (group->cost_map[j].inv_exprs)
7929 2053147 : BITMAP_FREE (group->cost_map[j].inv_exprs);
7930 : }
7931 :
7932 1808881 : free (group->cost_map);
7933 1808881 : free (group);
7934 : }
7935 870665 : data->vgroups.truncate (0);
7936 :
7937 5495561 : for (i = 0; i < data->vcands.length (); i++)
7938 : {
7939 4624896 : struct iv_cand *cand = data->vcands[i];
7940 :
7941 4624896 : if (cand->inv_vars)
7942 74756 : BITMAP_FREE (cand->inv_vars);
7943 4624896 : if (cand->inv_exprs)
7944 100518 : BITMAP_FREE (cand->inv_exprs);
7945 4624896 : free (cand);
7946 : }
7947 870665 : data->vcands.truncate (0);
7948 :
7949 870665 : if (data->version_info_size < num_ssa_names)
7950 : {
7951 162 : data->version_info_size = 2 * num_ssa_names;
7952 162 : free (data->version_info);
7953 162 : data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7954 : }
7955 :
7956 870665 : data->max_inv_var_id = 0;
7957 870665 : data->max_inv_expr_id = 0;
7958 :
7959 870665 : FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7960 0 : SET_DECL_RTL (obj, NULL_RTX);
7961 :
7962 870665 : decl_rtl_to_reset.truncate (0);
7963 :
7964 870665 : data->inv_expr_tab->empty ();
7965 :
7966 870665 : data->iv_common_cand_tab->empty ();
7967 870665 : data->iv_common_cands.truncate (0);
7968 870665 : }
7969 :
7970 : /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7971 : loop tree. */
7972 :
7973 : static void
7974 240808 : tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7975 : {
7976 240808 : free_loop_data (data);
7977 240808 : free (data->version_info);
7978 240808 : BITMAP_FREE (data->relevant);
7979 240808 : BITMAP_FREE (data->important_candidates);
7980 :
7981 240808 : decl_rtl_to_reset.release ();
7982 240808 : data->vgroups.release ();
7983 240808 : data->vcands.release ();
7984 240808 : delete data->inv_expr_tab;
7985 240808 : data->inv_expr_tab = NULL;
7986 240808 : free_affine_expand_cache (&data->name_expansion_cache);
7987 240808 : if (data->base_object_map)
7988 162954 : delete data->base_object_map;
7989 240808 : delete data->iv_common_cand_tab;
7990 240808 : data->iv_common_cand_tab = NULL;
7991 240808 : data->iv_common_cands.release ();
7992 240808 : obstack_free (&data->iv_obstack, NULL);
7993 240808 : }
7994 :
7995 : /* Returns true if the loop body BODY includes any function calls. */
7996 :
7997 : static bool
7998 629857 : loop_body_includes_call (basic_block *body, unsigned num_nodes)
7999 : {
8000 629857 : gimple_stmt_iterator gsi;
8001 629857 : unsigned i;
8002 :
8003 2835194 : for (i = 0; i < num_nodes; i++)
8004 23594843 : for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
8005 : {
8006 18975231 : gimple *stmt = gsi_stmt (gsi);
8007 18975231 : if (is_gimple_call (stmt)
8008 280154 : && !gimple_call_internal_p (stmt)
8009 19190103 : && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
8010 : return true;
8011 : }
8012 : return false;
8013 : }
8014 :
8015 : /* Determine cost scaling factor for basic blocks in loop. */
8016 : #define COST_SCALING_FACTOR_BOUND (20)
8017 :
8018 : static void
8019 503695 : determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8020 : {
8021 503695 : int lfreq = data->current_loop->header->count.to_frequency (cfun);
8022 503695 : if (!data->speed || lfreq <= 0)
8023 : return;
8024 :
8025 : int max_freq = lfreq;
8026 2855050 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8027 : {
8028 2438316 : body[i]->aux = (void *)(intptr_t) 1;
8029 2438316 : if (max_freq < body[i]->count.to_frequency (cfun))
8030 102614 : max_freq = body[i]->count.to_frequency (cfun);
8031 : }
8032 416734 : if (max_freq > lfreq)
8033 : {
8034 65502 : int divisor, factor;
8035 : /* Check if scaling factor itself needs to be scaled by the bound. This
8036 : is to avoid overflow when scaling cost according to profile info. */
8037 65502 : if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8038 : {
8039 : divisor = max_freq;
8040 : factor = COST_SCALING_FACTOR_BOUND;
8041 : }
8042 : else
8043 : {
8044 49672 : divisor = lfreq;
8045 49672 : factor = 1;
8046 : }
8047 991478 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8048 : {
8049 925976 : int bfreq = body[i]->count.to_frequency (cfun);
8050 925976 : if (bfreq <= lfreq)
8051 509433 : continue;
8052 :
8053 416543 : body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8054 : }
8055 : }
8056 : }
8057 :
8058 : /* Find doloop comparison use and set its doloop_p on if found. */
8059 :
8060 : static bool
8061 0 : find_doloop_use (struct ivopts_data *data)
8062 : {
8063 0 : struct loop *loop = data->current_loop;
8064 :
8065 0 : for (unsigned i = 0; i < data->vgroups.length (); i++)
8066 : {
8067 0 : struct iv_group *group = data->vgroups[i];
8068 0 : if (group->type == USE_COMPARE)
8069 : {
8070 0 : gcc_assert (group->vuses.length () == 1);
8071 0 : struct iv_use *use = group->vuses[0];
8072 0 : gimple *stmt = use->stmt;
8073 0 : if (gimple_code (stmt) == GIMPLE_COND)
8074 : {
8075 0 : basic_block bb = gimple_bb (stmt);
8076 0 : edge true_edge, false_edge;
8077 0 : extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8078 : /* This comparison is used for loop latch. Require latch is empty
8079 : for now. */
8080 0 : if ((loop->latch == true_edge->dest
8081 0 : || loop->latch == false_edge->dest)
8082 0 : && empty_block_p (loop->latch))
8083 : {
8084 0 : group->doloop_p = true;
8085 0 : if (dump_file && (dump_flags & TDF_DETAILS))
8086 : {
8087 0 : fprintf (dump_file, "Doloop cmp iv use: ");
8088 0 : print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8089 : }
8090 0 : return true;
8091 : }
8092 : }
8093 : }
8094 : }
8095 :
8096 : return false;
8097 : }
8098 :
8099 : /* For the targets which support doloop, to predict whether later RTL doloop
8100 : transformation will perform on this loop, further detect the doloop use and
8101 : mark the flag doloop_use_p if predicted. */
8102 :
8103 : void
8104 503695 : analyze_and_mark_doloop_use (struct ivopts_data *data)
8105 : {
8106 503695 : data->doloop_use_p = false;
8107 :
8108 503695 : if (!flag_branch_on_count_reg)
8109 : return;
8110 :
8111 503695 : if (data->current_loop->unroll == USHRT_MAX)
8112 : return;
8113 :
8114 503695 : if (!generic_predict_doloop_p (data))
8115 : return;
8116 :
8117 0 : if (find_doloop_use (data))
8118 : {
8119 0 : data->doloop_use_p = true;
8120 0 : if (dump_file && (dump_flags & TDF_DETAILS))
8121 : {
8122 0 : struct loop *loop = data->current_loop;
8123 0 : fprintf (dump_file,
8124 : "Predict loop %d can perform"
8125 : " doloop optimization later.\n",
8126 : loop->num);
8127 0 : flow_loop_dump (loop, dump_file, NULL, 1);
8128 : }
8129 : }
8130 : }
8131 :
8132 : /* Optimizes the LOOP. Returns true if anything changed. */
8133 :
8134 : static bool
8135 629857 : tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8136 : bitmap toremove)
8137 : {
8138 629857 : bool changed = false;
8139 629857 : class iv_ca *iv_ca;
8140 629857 : edge exit = single_dom_exit (loop);
8141 629857 : basic_block *body;
8142 :
8143 629857 : gcc_assert (!data->niters);
8144 629857 : data->current_loop = loop;
8145 629857 : data->loop_loc = find_loop_location (loop).get_location_t ();
8146 629857 : data->speed = optimize_loop_for_speed_p (loop);
8147 :
8148 629857 : if (dump_file && (dump_flags & TDF_DETAILS))
8149 : {
8150 67 : fprintf (dump_file, "Processing loop %d", loop->num);
8151 67 : if (data->loop_loc != UNKNOWN_LOCATION)
8152 65 : fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8153 130 : LOCATION_LINE (data->loop_loc));
8154 67 : fprintf (dump_file, "\n");
8155 :
8156 67 : if (exit)
8157 : {
8158 57 : fprintf (dump_file, " single exit %d -> %d, exit condition ",
8159 57 : exit->src->index, exit->dest->index);
8160 114 : print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8161 : 0, TDF_SLIM);
8162 57 : fprintf (dump_file, "\n");
8163 : }
8164 :
8165 67 : fprintf (dump_file, "\n");
8166 : }
8167 :
8168 629857 : body = get_loop_body (loop);
8169 629857 : data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8170 629857 : renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8171 :
8172 629857 : data->loop_single_exit_p
8173 629857 : = exit != NULL && loop_only_exit_p (loop, body, exit);
8174 :
8175 : /* For each ssa name determines whether it behaves as an induction variable
8176 : in some loop. */
8177 629857 : if (!find_induction_variables (data, body))
8178 126161 : goto finish;
8179 :
8180 : /* Finds interesting uses (item 1). */
8181 503696 : find_interesting_uses (data, body);
8182 503696 : if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8183 1 : goto finish;
8184 :
8185 : /* Determine cost scaling factor for basic blocks in loop. */
8186 503695 : determine_scaling_factor (data, body);
8187 :
8188 : /* Analyze doloop possibility and mark the doloop use if predicted. */
8189 503695 : analyze_and_mark_doloop_use (data);
8190 :
8191 : /* Finds candidates for the induction variables (item 2). */
8192 503695 : find_iv_candidates (data);
8193 :
8194 : /* Calculates the costs (item 3, part 1). */
8195 503695 : determine_iv_costs (data);
8196 503695 : determine_group_iv_costs (data);
8197 503695 : determine_set_costs (data);
8198 :
8199 : /* Find the optimal set of induction variables (item 3, part 2). */
8200 503695 : iv_ca = find_optimal_iv_set (data);
8201 : /* Cleanup basic block aux field. */
8202 3301831 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8203 2798136 : body[i]->aux = NULL;
8204 503695 : if (!iv_ca)
8205 619 : goto finish;
8206 503076 : changed = true;
8207 :
8208 : /* Create the new induction variables (item 4, part 1). */
8209 503076 : create_new_ivs (data, iv_ca);
8210 503076 : iv_ca_free (&iv_ca);
8211 :
8212 : /* Rewrite the uses (item 4, part 2). */
8213 503076 : rewrite_groups (data);
8214 :
8215 : /* Remove the ivs that are unused after rewriting. */
8216 503076 : remove_unused_ivs (data, toremove);
8217 :
8218 629857 : finish:
8219 629857 : free (body);
8220 629857 : free_loop_data (data);
8221 :
8222 629857 : return changed;
8223 : }
8224 :
8225 : /* Main entry point. Optimizes induction variables in loops. */
8226 :
8227 : void
8228 240808 : tree_ssa_iv_optimize (void)
8229 : {
8230 240808 : struct ivopts_data data;
8231 240808 : auto_bitmap toremove;
8232 :
8233 240808 : tree_ssa_iv_optimize_init (&data);
8234 240808 : mark_ssa_maybe_undefs ();
8235 :
8236 : /* Optimize the loops starting with the innermost ones. */
8237 1352281 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8238 : {
8239 629857 : if (!dbg_cnt (ivopts_loop))
8240 0 : continue;
8241 :
8242 629857 : if (dump_file && (dump_flags & TDF_DETAILS))
8243 67 : flow_loop_dump (loop, dump_file, NULL, 1);
8244 :
8245 629857 : tree_ssa_iv_optimize_loop (&data, loop, toremove);
8246 240808 : }
8247 :
8248 : /* Remove eliminated IV defs. */
8249 240808 : release_defs_bitset (toremove);
8250 :
8251 : /* We have changed the structure of induction variables; it might happen
8252 : that definitions in the scev database refer to some of them that were
8253 : eliminated. */
8254 240808 : scev_reset_htab ();
8255 : /* Likewise niter and control-IV information. */
8256 240808 : free_numbers_of_iterations_estimates (cfun);
8257 :
8258 240808 : tree_ssa_iv_optimize_finalize (&data);
8259 240808 : }
8260 :
8261 : #include "gt-tree-ssa-loop-ivopts.h"
|