Line data Source code
1 : /* Induction variable optimizations.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* This pass tries to find the optimal set of induction variables for the loop.
21 : It optimizes just the basic linear induction variables (although adding
22 : support for other types should not be too hard). It includes the
23 : optimizations commonly known as strength reduction, induction variable
24 : coalescing and induction variable elimination. It does it in the
25 : following steps:
26 :
27 : 1) The interesting uses of induction variables are found. This includes
28 :
29 : -- uses of induction variables in non-linear expressions
30 : -- addresses of arrays
31 : -- comparisons of induction variables
32 :
33 : Note the interesting uses are categorized and handled in group.
34 : Generally, address type uses are grouped together if their iv bases
35 : are different in constant offset.
36 :
37 : 2) Candidates for the induction variables are found. This includes
38 :
39 : -- old induction variables
40 : -- the variables defined by expressions derived from the "interesting
41 : groups/uses" above
42 :
43 : 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 : cost function assigns a cost to sets of induction variables and consists
45 : of three parts:
46 :
47 : -- The group/use costs. Each of the interesting groups/uses chooses
48 : the best induction variable in the set and adds its cost to the sum.
49 : The cost reflects the time spent on modifying the induction variables
50 : value to be usable for the given purpose (adding base and offset for
51 : arrays, etc.).
52 : -- The variable costs. Each of the variables has a cost assigned that
53 : reflects the costs associated with incrementing the value of the
54 : variable. The original variables are somewhat preferred.
55 : -- The set cost. Depending on the size of the set, extra cost may be
56 : added to reflect register pressure.
57 :
58 : All the costs are defined in a machine-specific way, using the target
59 : hooks and machine descriptions to determine them.
60 :
61 : 4) The trees are transformed to use the new variables, the dead code is
62 : removed.
63 :
64 : All of this is done loop by loop. Doing it globally is theoretically
65 : possible, it might give a better performance and it might enable us
66 : to decide costs more precisely, but getting all the interactions right
67 : would be complicated.
68 :
69 : For the targets supporting low-overhead loops, IVOPTs has to take care of
70 : the loops which will probably be transformed in RTL doloop optimization,
71 : to try to make selected IV candidate set optimal. The process of doloop
72 : support includes:
73 :
74 : 1) Analyze the current loop will be transformed to doloop or not, find and
75 : mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 : set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 : doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 : The target hook predict_doloop_p can be used for target specific checks.
79 :
80 : 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 : set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 : like biv. For cost determination between doloop IV cand and IV use, the
83 : target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 : provided to add on extra costs for generic type and address type IV use.
85 : Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 : use, and bound zero is set for IV elimination.
87 :
88 : 3) With the cost setting in step 2), the current cost model based IV
89 : selection algorithm will process as usual, pick up doloop dedicated IV if
90 : profitable. */
91 :
92 : #include "config.h"
93 : #include "system.h"
94 : #include "coretypes.h"
95 : #include "backend.h"
96 : #include "rtl.h"
97 : #include "tree.h"
98 : #include "gimple.h"
99 : #include "cfghooks.h"
100 : #include "tree-pass.h"
101 : #include "memmodel.h"
102 : #include "tm_p.h"
103 : #include "ssa.h"
104 : #include "expmed.h"
105 : #include "insn-config.h"
106 : #include "emit-rtl.h"
107 : #include "recog.h"
108 : #include "cgraph.h"
109 : #include "gimple-pretty-print.h"
110 : #include "alias.h"
111 : #include "fold-const.h"
112 : #include "stor-layout.h"
113 : #include "tree-eh.h"
114 : #include "gimplify.h"
115 : #include "gimple-iterator.h"
116 : #include "gimplify-me.h"
117 : #include "tree-cfg.h"
118 : #include "tree-ssa-loop-ivopts.h"
119 : #include "tree-ssa-loop-manip.h"
120 : #include "tree-ssa-loop-niter.h"
121 : #include "tree-ssa-loop.h"
122 : #include "explow.h"
123 : #include "expr.h"
124 : #include "tree-dfa.h"
125 : #include "tree-ssa.h"
126 : #include "cfgloop.h"
127 : #include "tree-scalar-evolution.h"
128 : #include "tree-affine.h"
129 : #include "tree-ssa-propagate.h"
130 : #include "tree-ssa-address.h"
131 : #include "builtins.h"
132 : #include "tree-vectorizer.h"
133 : #include "dbgcnt.h"
134 : #include "cfganal.h"
135 : #include "gimple-fold.h"
136 :
137 : /* For lang_hooks.types.type_for_mode. */
138 : #include "langhooks.h"
139 :
140 : /* FIXME: Expressions are expanded to RTL in this pass to determine the
141 : cost of different addressing modes. This should be moved to a TBD
142 : interface between the GIMPLE and RTL worlds. */
143 :
144 : /* The infinite cost. */
145 : #define INFTY 1000000000
146 :
147 : /* Returns the expected number of loop iterations for LOOP.
148 : The average trip count is computed from profile data if it
149 : exists. */
150 :
151 : static inline unsigned HOST_WIDE_INT
152 8680039 : avg_loop_niter (class loop *loop)
153 : {
154 8680039 : HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
155 8680039 : if (niter == -1)
156 : {
157 4885380 : niter = likely_max_stmt_executions_int (loop);
158 :
159 4885380 : if (niter == -1 || niter > param_avg_loop_niter)
160 4103969 : return param_avg_loop_niter;
161 : }
162 :
163 4576070 : return niter;
164 : }
165 :
166 : struct iv_use;
167 :
168 : /* Representation of the induction variable. */
169 : struct iv
170 : {
171 : tree base; /* Initial value of the iv. */
172 : tree base_object; /* A memory object to that the induction variable points. */
173 : tree step; /* Step of the iv (constant only). */
174 : tree ssa_name; /* The ssa name with the value. */
175 : struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
176 : bool biv_p; /* Is it a biv? */
177 : bool no_overflow; /* True if the iv doesn't overflow. */
178 : bool have_address_use;/* For biv, indicate if it's used in any address
179 : type use. */
180 : };
181 :
182 : /* Per-ssa version information (induction variable descriptions, etc.). */
183 : struct version_info
184 : {
185 : tree name; /* The ssa name. */
186 : struct iv *iv; /* Induction variable description. */
187 : bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
188 : an expression that is not an induction variable. */
189 : bool preserve_biv; /* For the original biv, whether to preserve it. */
190 : unsigned inv_id; /* Id of an invariant. */
191 : };
192 :
193 : /* Types of uses. */
194 : enum use_type
195 : {
196 : USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
197 : USE_REF_ADDRESS, /* Use is an address for an explicit memory
198 : reference. */
199 : USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
200 : cases where the expansion of the function
201 : will turn the argument into a normal address. */
202 : USE_COMPARE /* Use is a compare. */
203 : };
204 :
205 : /* Cost of a computation. */
206 : class comp_cost
207 : {
208 : public:
209 130313091 : comp_cost (): cost (0), complexity (0), scratch (0)
210 : {}
211 :
212 25088630 : comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
213 15153856 : : cost (cost), complexity (complexity), scratch (scratch)
214 14362819 : {}
215 :
216 : /* Returns true if COST is infinite. */
217 : bool infinite_cost_p ();
218 :
219 : /* Adds costs COST1 and COST2. */
220 : friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
221 :
222 : /* Adds COST to the comp_cost. */
223 : comp_cost operator+= (comp_cost cost);
224 :
225 : /* Adds constant C to this comp_cost. */
226 : comp_cost operator+= (HOST_WIDE_INT c);
227 :
228 : /* Subtracts constant C to this comp_cost. */
229 : comp_cost operator-= (HOST_WIDE_INT c);
230 :
231 : /* Divide the comp_cost by constant C. */
232 : comp_cost operator/= (HOST_WIDE_INT c);
233 :
234 : /* Multiply the comp_cost by constant C. */
235 : comp_cost operator*= (HOST_WIDE_INT c);
236 :
237 : /* Subtracts costs COST1 and COST2. */
238 : friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
239 :
240 : /* Subtracts COST from this comp_cost. */
241 : comp_cost operator-= (comp_cost cost);
242 :
243 : /* Returns true if COST1 is smaller than COST2. */
244 : friend bool operator< (comp_cost cost1, comp_cost cost2);
245 :
246 : /* Returns true if COST1 and COST2 are equal. */
247 : friend bool operator== (comp_cost cost1, comp_cost cost2);
248 :
249 : /* Returns true if COST1 is smaller or equal than COST2. */
250 : friend bool operator<= (comp_cost cost1, comp_cost cost2);
251 :
252 : int64_t cost; /* The runtime cost. */
253 : unsigned complexity; /* The estimate of the complexity of the code for
254 : the computation (in no concrete units --
255 : complexity field should be larger for more
256 : complex expressions and addressing modes). */
257 : int64_t scratch; /* Scratch used during cost computation. */
258 : };
259 :
260 : static const comp_cost no_cost;
261 : static const comp_cost infinite_cost (INFTY, 0, INFTY);
262 :
263 : bool
264 1817580217 : comp_cost::infinite_cost_p ()
265 : {
266 1817580217 : return cost == INFTY;
267 : }
268 :
269 : comp_cost
270 242822522 : operator+ (comp_cost cost1, comp_cost cost2)
271 : {
272 242822522 : if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
273 1878993 : return infinite_cost;
274 :
275 240943529 : gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
276 240943529 : cost1.cost += cost2.cost;
277 240943529 : cost1.complexity += cost2.complexity;
278 :
279 240943529 : return cost1;
280 : }
281 :
282 : comp_cost
283 207768853 : operator- (comp_cost cost1, comp_cost cost2)
284 : {
285 207768853 : if (cost1.infinite_cost_p ())
286 0 : return infinite_cost;
287 :
288 207768853 : gcc_assert (!cost2.infinite_cost_p ());
289 207768853 : gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
290 :
291 207768853 : cost1.cost -= cost2.cost;
292 207768853 : cost1.complexity -= cost2.complexity;
293 :
294 207768853 : return cost1;
295 : }
296 :
297 : comp_cost
298 242822522 : comp_cost::operator+= (comp_cost cost)
299 : {
300 242822522 : *this = *this + cost;
301 242822522 : return *this;
302 : }
303 :
304 : comp_cost
305 858346558 : comp_cost::operator+= (HOST_WIDE_INT c)
306 : {
307 858346558 : if (c >= INFTY)
308 0 : this->cost = INFTY;
309 :
310 858346558 : if (infinite_cost_p ())
311 0 : return *this;
312 :
313 858346558 : gcc_assert (this->cost + c < infinite_cost.cost);
314 858346558 : this->cost += c;
315 :
316 858346558 : return *this;
317 : }
318 :
319 : comp_cost
320 541879 : comp_cost::operator-= (HOST_WIDE_INT c)
321 : {
322 541879 : if (infinite_cost_p ())
323 0 : return *this;
324 :
325 541879 : gcc_assert (this->cost - c < infinite_cost.cost);
326 541879 : this->cost -= c;
327 :
328 541879 : return *this;
329 : }
330 :
331 : comp_cost
332 0 : comp_cost::operator/= (HOST_WIDE_INT c)
333 : {
334 0 : gcc_assert (c != 0);
335 0 : if (infinite_cost_p ())
336 0 : return *this;
337 :
338 0 : this->cost /= c;
339 :
340 0 : return *this;
341 : }
342 :
343 : comp_cost
344 0 : comp_cost::operator*= (HOST_WIDE_INT c)
345 : {
346 0 : if (infinite_cost_p ())
347 0 : return *this;
348 :
349 0 : gcc_assert (this->cost * c < infinite_cost.cost);
350 0 : this->cost *= c;
351 :
352 0 : return *this;
353 : }
354 :
355 : comp_cost
356 207768853 : comp_cost::operator-= (comp_cost cost)
357 : {
358 207768853 : *this = *this - cost;
359 207768853 : return *this;
360 : }
361 :
362 : bool
363 181781342 : operator< (comp_cost cost1, comp_cost cost2)
364 : {
365 181781342 : if (cost1.cost == cost2.cost)
366 80111812 : return cost1.complexity < cost2.complexity;
367 :
368 101669530 : return cost1.cost < cost2.cost;
369 : }
370 :
371 : bool
372 3907215 : operator== (comp_cost cost1, comp_cost cost2)
373 : {
374 3907215 : return cost1.cost == cost2.cost
375 3907215 : && cost1.complexity == cost2.complexity;
376 : }
377 :
378 : bool
379 6409980 : operator<= (comp_cost cost1, comp_cost cost2)
380 : {
381 6409980 : return cost1 < cost2 || cost1 == cost2;
382 : }
383 :
384 : struct iv_inv_expr_ent;
385 :
386 : /* The candidate - cost pair. */
387 : class cost_pair
388 : {
389 : public:
390 : struct iv_cand *cand; /* The candidate. */
391 : comp_cost cost; /* The cost. */
392 : enum tree_code comp; /* For iv elimination, the comparison. */
393 : bitmap inv_vars; /* The list of invariant ssa_vars that have to be
394 : preserved when representing iv_use with iv_cand. */
395 : bitmap inv_exprs; /* The list of newly created invariant expressions
396 : when representing iv_use with iv_cand. */
397 : tree value; /* For final value elimination, the expression for
398 : the final value of the iv. For iv elimination,
399 : the new bound to compare with. */
400 : };
401 :
402 : /* Use. */
403 : struct iv_use
404 : {
405 : unsigned id; /* The id of the use. */
406 : unsigned group_id; /* The group id the use belongs to. */
407 : enum use_type type; /* Type of the use. */
408 : tree mem_type; /* The memory type to use when testing whether an
409 : address is legitimate, and what the address's
410 : cost is. */
411 : struct iv *iv; /* The induction variable it is based on. */
412 : gimple *stmt; /* Statement in that it occurs. */
413 : tree *op_p; /* The place where it occurs. */
414 :
415 : tree addr_base; /* Base address with const offset stripped. */
416 : poly_uint64 addr_offset;
417 : /* Const offset stripped from base address. */
418 : };
419 :
420 : /* Group of uses. */
421 : struct iv_group
422 : {
423 : /* The id of the group. */
424 : unsigned id;
425 : /* Uses of the group are of the same type. */
426 : enum use_type type;
427 : /* The set of "related" IV candidates, plus the important ones. */
428 : bitmap related_cands;
429 : /* Number of IV candidates in the cost_map. */
430 : unsigned n_map_members;
431 : /* The costs wrto the iv candidates. */
432 : class cost_pair *cost_map;
433 : /* The selected candidate for the group. */
434 : struct iv_cand *selected;
435 : /* To indicate this is a doloop use group. */
436 : bool doloop_p;
437 : /* Uses in the group. */
438 : vec<struct iv_use *> vuses;
439 : };
440 :
441 : /* The position where the iv is computed. */
442 : enum iv_position
443 : {
444 : IP_NORMAL, /* At the end, just before the exit condition. */
445 : IP_END, /* At the end of the latch block. */
446 : IP_BEFORE_USE, /* Immediately before a specific use. */
447 : IP_AFTER_USE, /* Immediately after a specific use. */
448 : IP_ORIGINAL /* The original biv. */
449 : };
450 :
451 : /* The induction variable candidate. */
452 : struct iv_cand
453 : {
454 : unsigned id; /* The number of the candidate. */
455 : bool important; /* Whether this is an "important" candidate, i.e. such
456 : that it should be considered by all uses. */
457 : bool involves_undefs; /* Whether the IV involves undefined values. */
458 : ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
459 : gimple *incremented_at;/* For original biv, the statement where it is
460 : incremented. */
461 : tree var_before; /* The variable used for it before increment. */
462 : tree var_after; /* The variable used for it after increment. */
463 : struct iv *iv; /* The value of the candidate. NULL for
464 : "pseudocandidate" used to indicate the possibility
465 : to replace the final value of an iv by direct
466 : computation of the value. */
467 : unsigned cost; /* Cost of the candidate. */
468 : unsigned cost_step; /* Cost of the candidate's increment operation. */
469 : struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
470 : where it is incremented. */
471 : bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
472 : iv_cand. */
473 : bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
474 : handle it as a new invariant expression which will
475 : be hoisted out of loop. */
476 : struct iv *orig_iv; /* The original iv if this cand is added from biv with
477 : smaller type. */
478 : bool doloop_p; /* Whether this is a doloop candidate. */
479 : };
480 :
481 : /* Hashtable entry for common candidate derived from iv uses. */
482 2602548 : class iv_common_cand
483 : {
484 : public:
485 : tree base;
486 : tree step;
487 : /* IV uses from which this common candidate is derived. */
488 : auto_vec<struct iv_use *> uses;
489 : hashval_t hash;
490 : };
491 :
492 : /* Hashtable helpers. */
493 :
494 : struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
495 : {
496 : static inline hashval_t hash (const iv_common_cand *);
497 : static inline bool equal (const iv_common_cand *, const iv_common_cand *);
498 : };
499 :
500 : /* Hash function for possible common candidates. */
501 :
502 : inline hashval_t
503 9865097 : iv_common_cand_hasher::hash (const iv_common_cand *ccand)
504 : {
505 9865097 : return ccand->hash;
506 : }
507 :
508 : /* Hash table equality function for common candidates. */
509 :
510 : inline bool
511 11136736 : iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
512 : const iv_common_cand *ccand2)
513 : {
514 11136736 : return (ccand1->hash == ccand2->hash
515 1620467 : && operand_equal_p (ccand1->base, ccand2->base, 0)
516 1599523 : && operand_equal_p (ccand1->step, ccand2->step, 0)
517 12729440 : && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
518 1592704 : == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
519 : }
520 :
521 : /* Loop invariant expression hashtable entry. */
522 :
523 : struct iv_inv_expr_ent
524 : {
525 : /* Tree expression of the entry. */
526 : tree expr;
527 : /* Unique indentifier. */
528 : int id;
529 : /* Hash value. */
530 : hashval_t hash;
531 : };
532 :
533 : /* Sort iv_inv_expr_ent pair A and B by id field. */
534 :
535 : static int
536 5737 : sort_iv_inv_expr_ent (const void *a, const void *b)
537 : {
538 5737 : const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
539 5737 : const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
540 :
541 5737 : unsigned id1 = (*e1)->id;
542 5737 : unsigned id2 = (*e2)->id;
543 :
544 5737 : if (id1 < id2)
545 : return -1;
546 2670 : else if (id1 > id2)
547 : return 1;
548 : else
549 0 : return 0;
550 : }
551 :
552 : /* Hashtable helpers. */
553 :
554 : struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
555 : {
556 : static inline hashval_t hash (const iv_inv_expr_ent *);
557 : static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
558 : };
559 :
560 : /* Return true if uses of type TYPE represent some form of address. */
561 :
562 : inline bool
563 8947979 : address_p (use_type type)
564 : {
565 8947979 : return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
566 : }
567 :
568 : /* Hash function for loop invariant expressions. */
569 :
570 : inline hashval_t
571 6643332 : iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
572 : {
573 6643332 : return expr->hash;
574 : }
575 :
576 : /* Hash table equality function for expressions. */
577 :
578 : inline bool
579 7991439 : iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
580 : const iv_inv_expr_ent *expr2)
581 : {
582 7991439 : return expr1->hash == expr2->hash
583 7991439 : && operand_equal_p (expr1->expr, expr2->expr, 0);
584 : }
585 :
586 : struct ivopts_data
587 : {
588 : /* The currently optimized loop. */
589 : class loop *current_loop;
590 : location_t loop_loc;
591 :
592 : /* Numbers of iterations for all exits of the current loop. */
593 : hash_map<edge, tree_niter_desc *> *niters;
594 :
595 : /* Number of registers used in it. */
596 : unsigned regs_used;
597 :
598 : /* The size of version_info array allocated. */
599 : unsigned version_info_size;
600 :
601 : /* The array of information for the ssa names. */
602 : struct version_info *version_info;
603 :
604 : /* The hashtable of loop invariant expressions created
605 : by ivopt. */
606 : hash_table<iv_inv_expr_hasher> *inv_expr_tab;
607 :
608 : /* The bitmap of indices in version_info whose value was changed. */
609 : bitmap relevant;
610 :
611 : /* The uses of induction variables. */
612 : vec<iv_group *> vgroups;
613 :
614 : /* The candidates. */
615 : vec<iv_cand *> vcands;
616 :
617 : /* A bitmap of important candidates. */
618 : bitmap important_candidates;
619 :
620 : /* Cache used by tree_to_aff_combination_expand. */
621 : hash_map<tree, name_expansion *> *name_expansion_cache;
622 :
623 : /* The hashtable of common candidates derived from iv uses. */
624 : hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
625 :
626 : /* The common candidates. */
627 : vec<iv_common_cand *> iv_common_cands;
628 :
629 : /* Hash map recording base object information of tree exp. */
630 : hash_map<tree, tree> *base_object_map;
631 :
632 : /* The maximum invariant variable id. */
633 : unsigned max_inv_var_id;
634 :
635 : /* The maximum invariant expression id. */
636 : unsigned max_inv_expr_id;
637 :
638 : /* Number of no_overflow BIVs which are not used in memory address. */
639 : unsigned bivs_not_used_in_addr;
640 :
641 : /* Obstack for iv structure. */
642 : struct obstack iv_obstack;
643 :
644 : /* Whether to consider just related and important candidates when replacing a
645 : use. */
646 : bool consider_all_candidates;
647 :
648 : /* Are we optimizing for speed? */
649 : bool speed;
650 :
651 : /* Whether the loop body includes any function calls. */
652 : bool body_includes_call;
653 :
654 : /* Whether the loop body can only be exited via single exit. */
655 : bool loop_single_exit_p;
656 :
657 : /* Whether the loop has doloop comparison use. */
658 : bool doloop_use_p;
659 : };
660 :
661 : /* An assignment of iv candidates to uses. */
662 :
663 : class iv_ca
664 : {
665 : public:
666 : /* The number of uses covered by the assignment. */
667 : unsigned upto;
668 :
669 : /* Number of uses that cannot be expressed by the candidates in the set. */
670 : unsigned bad_groups;
671 :
672 : /* Candidate assigned to a use, together with the related costs. */
673 : class cost_pair **cand_for_group;
674 :
675 : /* Number of times each candidate is used. */
676 : unsigned *n_cand_uses;
677 :
678 : /* The candidates used. */
679 : bitmap cands;
680 :
681 : /* The number of candidates in the set. */
682 : unsigned n_cands;
683 :
684 : /* The number of invariants needed, including both invariant variants and
685 : invariant expressions. */
686 : unsigned n_invs;
687 :
688 : /* Total cost of expressing uses. */
689 : comp_cost cand_use_cost;
690 :
691 : /* Total cost of candidates. */
692 : int64_t cand_cost;
693 :
694 : /* Number of times each invariant variable is used. */
695 : unsigned *n_inv_var_uses;
696 :
697 : /* Number of times each invariant expression is used. */
698 : unsigned *n_inv_expr_uses;
699 :
700 : /* Total cost of the assignment. */
701 : comp_cost cost;
702 : };
703 :
704 : /* Difference of two iv candidate assignments. */
705 :
706 : struct iv_ca_delta
707 : {
708 : /* Changed group. */
709 : struct iv_group *group;
710 :
711 : /* An old assignment (for rollback purposes). */
712 : class cost_pair *old_cp;
713 :
714 : /* A new assignment. */
715 : class cost_pair *new_cp;
716 :
717 : /* Next change in the list. */
718 : struct iv_ca_delta *next;
719 : };
720 :
721 : /* Bound on number of candidates below that all candidates are considered. */
722 :
723 : #define CONSIDER_ALL_CANDIDATES_BOUND \
724 : ((unsigned) param_iv_consider_all_candidates_bound)
725 :
726 : /* If there are more iv occurrences, we just give up (it is quite unlikely that
727 : optimizing such a loop would help, and it would take ages). */
728 :
729 : #define MAX_CONSIDERED_GROUPS \
730 : ((unsigned) param_iv_max_considered_uses)
731 :
732 : /* If there are at most this number of ivs in the set, try removing unnecessary
733 : ivs from the set always. */
734 :
735 : #define ALWAYS_PRUNE_CAND_SET_BOUND \
736 : ((unsigned) param_iv_always_prune_cand_set_bound)
737 :
738 : /* The list of trees for that the decl_rtl field must be reset is stored
739 : here. */
740 :
741 : static vec<tree> decl_rtl_to_reset;
742 :
743 : static comp_cost force_expr_to_var_cost (tree, bool);
744 :
745 : /* The single loop exit if it dominates the latch, NULL otherwise. */
746 :
747 : edge
748 695341 : single_dom_exit (class loop *loop)
749 : {
750 695341 : edge exit = single_exit (loop);
751 :
752 695341 : if (!exit)
753 : return NULL;
754 :
755 463306 : if (!just_once_each_iteration_p (loop, exit->src))
756 : return NULL;
757 :
758 : return exit;
759 : }
760 :
761 : /* Dumps information about the induction variable IV to FILE. Don't dump
762 : variable's name if DUMP_NAME is FALSE. The information is dumped with
763 : preceding spaces indicated by INDENT_LEVEL. */
764 :
765 : void
766 1597 : dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
767 : {
768 1597 : const char *p;
769 1597 : const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
770 :
771 1597 : if (indent_level > 4)
772 : indent_level = 4;
773 1597 : p = spaces + 8 - (indent_level << 1);
774 :
775 1597 : fprintf (file, "%sIV struct:\n", p);
776 1597 : if (iv->ssa_name && dump_name)
777 : {
778 550 : fprintf (file, "%s SSA_NAME:\t", p);
779 550 : print_generic_expr (file, iv->ssa_name, TDF_SLIM);
780 550 : fprintf (file, "\n");
781 : }
782 :
783 1597 : fprintf (file, "%s Type:\t", p);
784 1597 : print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
785 1597 : fprintf (file, "\n");
786 :
787 1597 : fprintf (file, "%s Base:\t", p);
788 1597 : print_generic_expr (file, iv->base, TDF_SLIM);
789 1597 : fprintf (file, "\n");
790 :
791 1597 : fprintf (file, "%s Step:\t", p);
792 1597 : print_generic_expr (file, iv->step, TDF_SLIM);
793 1597 : fprintf (file, "\n");
794 :
795 1597 : if (iv->base_object)
796 : {
797 497 : fprintf (file, "%s Object:\t", p);
798 497 : print_generic_expr (file, iv->base_object, TDF_SLIM);
799 497 : fprintf (file, "\n");
800 : }
801 :
802 2887 : fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
803 :
804 1597 : fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
805 1597 : p, iv->no_overflow ? "No-overflow" : "Overflow");
806 1597 : }
807 :
808 : /* Dumps information about the USE to FILE. */
809 :
810 : void
811 250 : dump_use (FILE *file, struct iv_use *use)
812 : {
813 250 : fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
814 250 : fprintf (file, " At stmt:\t");
815 250 : print_gimple_stmt (file, use->stmt, 0);
816 250 : fprintf (file, " At pos:\t");
817 250 : if (use->op_p)
818 160 : print_generic_expr (file, *use->op_p, TDF_SLIM);
819 250 : fprintf (file, "\n");
820 250 : dump_iv (file, use->iv, false, 2);
821 250 : }
822 :
823 : /* Dumps information about the uses to FILE. */
824 :
825 : void
826 67 : dump_groups (FILE *file, struct ivopts_data *data)
827 : {
828 67 : unsigned i, j;
829 67 : struct iv_group *group;
830 :
831 287 : for (i = 0; i < data->vgroups.length (); i++)
832 : {
833 220 : group = data->vgroups[i];
834 220 : fprintf (file, "Group %d:\n", group->id);
835 220 : if (group->type == USE_NONLINEAR_EXPR)
836 90 : fprintf (file, " Type:\tGENERIC\n");
837 130 : else if (group->type == USE_REF_ADDRESS)
838 56 : fprintf (file, " Type:\tREFERENCE ADDRESS\n");
839 74 : else if (group->type == USE_PTR_ADDRESS)
840 0 : fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
841 : else
842 : {
843 74 : gcc_assert (group->type == USE_COMPARE);
844 74 : fprintf (file, " Type:\tCOMPARE\n");
845 : }
846 470 : for (j = 0; j < group->vuses.length (); j++)
847 250 : dump_use (file, group->vuses[j]);
848 : }
849 67 : }
850 :
851 : /* Dumps information about induction variable candidate CAND to FILE. */
852 :
853 : void
854 797 : dump_cand (FILE *file, struct iv_cand *cand)
855 : {
856 797 : struct iv *iv = cand->iv;
857 :
858 797 : fprintf (file, "Candidate %d:\n", cand->id);
859 797 : if (cand->inv_vars)
860 : {
861 26 : fprintf (file, " Depend on inv.vars: ");
862 26 : dump_bitmap (file, cand->inv_vars);
863 : }
864 797 : if (cand->inv_exprs)
865 : {
866 0 : fprintf (file, " Depend on inv.exprs: ");
867 0 : dump_bitmap (file, cand->inv_exprs);
868 : }
869 :
870 797 : if (cand->var_before)
871 : {
872 687 : fprintf (file, " Var befor: ");
873 687 : print_generic_expr (file, cand->var_before, TDF_SLIM);
874 687 : fprintf (file, "\n");
875 : }
876 797 : if (cand->var_after)
877 : {
878 687 : fprintf (file, " Var after: ");
879 687 : print_generic_expr (file, cand->var_after, TDF_SLIM);
880 687 : fprintf (file, "\n");
881 : }
882 :
883 797 : switch (cand->pos)
884 : {
885 653 : case IP_NORMAL:
886 653 : fprintf (file, " Incr POS: before exit test\n");
887 653 : break;
888 :
889 0 : case IP_BEFORE_USE:
890 0 : fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
891 0 : break;
892 :
893 0 : case IP_AFTER_USE:
894 0 : fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
895 0 : break;
896 :
897 0 : case IP_END:
898 0 : fprintf (file, " Incr POS: at end\n");
899 0 : break;
900 :
901 144 : case IP_ORIGINAL:
902 144 : fprintf (file, " Incr POS: orig biv\n");
903 144 : break;
904 : }
905 :
906 797 : dump_iv (file, iv, false, 1);
907 797 : }
908 :
909 : /* Returns the info for ssa version VER. */
910 :
911 : static inline struct version_info *
912 116346274 : ver_info (struct ivopts_data *data, unsigned ver)
913 : {
914 116346274 : return data->version_info + ver;
915 : }
916 :
917 : /* Returns the info for ssa name NAME. */
918 :
919 : static inline struct version_info *
920 94205498 : name_info (struct ivopts_data *data, tree name)
921 : {
922 94205498 : return ver_info (data, SSA_NAME_VERSION (name));
923 : }
924 :
925 : /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
926 : emitted in LOOP. */
927 :
928 : static bool
929 33319469 : stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
930 : {
931 33319469 : basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
932 :
933 33319469 : gcc_assert (bb);
934 :
935 33319469 : if (sbb == loop->latch)
936 : return true;
937 :
938 33213614 : if (sbb != bb)
939 : return false;
940 :
941 19353930 : return stmt == last_nondebug_stmt (bb);
942 : }
943 :
944 : /* Returns true if STMT if after the place where the original induction
945 : variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
946 : if the positions are identical. */
947 :
948 : static bool
949 7853962 : stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
950 : {
951 7853962 : basic_block cand_bb = gimple_bb (cand->incremented_at);
952 7853962 : basic_block stmt_bb = gimple_bb (stmt);
953 :
954 7853962 : if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
955 : return false;
956 :
957 5403349 : if (stmt_bb != cand_bb)
958 : return true;
959 :
960 5144727 : if (true_if_equal
961 5144727 : && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
962 : return true;
963 5138148 : return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
964 : }
965 :
966 : /* Returns true if STMT if after the place where the induction variable
967 : CAND is incremented in LOOP. */
968 :
969 : static bool
970 42332503 : stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
971 : {
972 42332503 : switch (cand->pos)
973 : {
974 : case IP_END:
975 : return false;
976 :
977 33319469 : case IP_NORMAL:
978 33319469 : return stmt_after_ip_normal_pos (loop, stmt);
979 :
980 7843852 : case IP_ORIGINAL:
981 7843852 : case IP_AFTER_USE:
982 7843852 : return stmt_after_inc_pos (cand, stmt, false);
983 :
984 10110 : case IP_BEFORE_USE:
985 10110 : return stmt_after_inc_pos (cand, stmt, true);
986 :
987 0 : default:
988 0 : gcc_unreachable ();
989 : }
990 : }
991 :
992 : /* walk_tree callback for contains_abnormal_ssa_name_p. */
993 :
994 : static tree
995 14523262 : contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
996 : {
997 14523262 : if (TREE_CODE (*tp) == SSA_NAME
998 14523262 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
999 : return *tp;
1000 :
1001 14523245 : if (!EXPR_P (*tp))
1002 9976472 : *walk_subtrees = 0;
1003 :
1004 : return NULL_TREE;
1005 : }
1006 :
1007 : /* Returns true if EXPR contains a ssa name that occurs in an
1008 : abnormal phi node. */
1009 :
1010 : bool
1011 7857455 : contains_abnormal_ssa_name_p (tree expr)
1012 : {
1013 7857455 : return walk_tree_without_duplicates
1014 7857455 : (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1015 : }
1016 :
1017 : /* Returns the structure describing number of iterations determined from
1018 : EXIT of DATA->current_loop, or NULL if something goes wrong. */
1019 :
1020 : static class tree_niter_desc *
1021 4355939 : niter_for_exit (struct ivopts_data *data, edge exit)
1022 : {
1023 4355939 : class tree_niter_desc *desc;
1024 4355939 : tree_niter_desc **slot;
1025 :
1026 4355939 : if (!data->niters)
1027 : {
1028 470342 : data->niters = new hash_map<edge, tree_niter_desc *>;
1029 470342 : slot = NULL;
1030 : }
1031 : else
1032 3885597 : slot = data->niters->get (exit);
1033 :
1034 4355939 : if (!slot)
1035 : {
1036 : /* Try to determine number of iterations. We cannot safely work with ssa
1037 : names that appear in phi nodes on abnormal edges, so that we do not
1038 : create overlapping life ranges for them (PR 27283). */
1039 482496 : desc = XNEW (class tree_niter_desc);
1040 482496 : ::new (static_cast<void*> (desc)) tree_niter_desc ();
1041 482496 : if (!number_of_iterations_exit (data->current_loop,
1042 : exit, desc, true)
1043 482496 : || contains_abnormal_ssa_name_p (desc->niter))
1044 : {
1045 39700 : desc->~tree_niter_desc ();
1046 39700 : XDELETE (desc);
1047 39700 : desc = NULL;
1048 : }
1049 482496 : data->niters->put (exit, desc);
1050 : }
1051 : else
1052 3873443 : desc = *slot;
1053 :
1054 4355939 : return desc;
1055 : }
1056 :
1057 : /* Returns the structure describing number of iterations determined from
1058 : single dominating exit of DATA->current_loop, or NULL if something
1059 : goes wrong. */
1060 :
1061 : static class tree_niter_desc *
1062 67 : niter_for_single_dom_exit (struct ivopts_data *data)
1063 : {
1064 67 : edge exit = single_dom_exit (data->current_loop);
1065 :
1066 67 : if (!exit)
1067 : return NULL;
1068 :
1069 57 : return niter_for_exit (data, exit);
1070 : }
1071 :
1072 : /* Initializes data structures used by the iv optimization pass, stored
1073 : in DATA. */
1074 :
1075 : static void
1076 239985 : tree_ssa_iv_optimize_init (struct ivopts_data *data)
1077 : {
1078 239985 : data->version_info_size = 2 * num_ssa_names;
1079 239985 : data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1080 239985 : data->relevant = BITMAP_ALLOC (NULL);
1081 239985 : data->important_candidates = BITMAP_ALLOC (NULL);
1082 239985 : data->max_inv_var_id = 0;
1083 239985 : data->max_inv_expr_id = 0;
1084 239985 : data->niters = NULL;
1085 239985 : data->vgroups.create (20);
1086 239985 : data->vcands.create (20);
1087 239985 : data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1088 239985 : data->name_expansion_cache = NULL;
1089 239985 : data->base_object_map = NULL;
1090 239985 : data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1091 239985 : data->iv_common_cands.create (20);
1092 239985 : decl_rtl_to_reset.create (20);
1093 239985 : gcc_obstack_init (&data->iv_obstack);
1094 239985 : }
1095 :
1096 : /* walk_tree callback for determine_base_object. */
1097 :
1098 : static tree
1099 19150375 : determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1100 : {
1101 19150375 : tree_code code = TREE_CODE (*tp);
1102 19150375 : tree obj = NULL_TREE;
1103 19150375 : if (code == ADDR_EXPR)
1104 : {
1105 1019636 : tree base = get_base_address (TREE_OPERAND (*tp, 0));
1106 1019636 : if (!base)
1107 0 : obj = *tp;
1108 1019636 : else if (TREE_CODE (base) != MEM_REF)
1109 1019608 : obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1110 : }
1111 18130739 : else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1112 1894282 : obj = fold_convert (ptr_type_node, *tp);
1113 :
1114 2913890 : if (!obj)
1115 : {
1116 16236485 : if (!EXPR_P (*tp))
1117 7101370 : *walk_subtrees = 0;
1118 :
1119 16236485 : return NULL_TREE;
1120 : }
1121 : /* Record special node for multiple base objects and stop. */
1122 2913890 : if (*static_cast<tree *> (wdata))
1123 : {
1124 4254 : *static_cast<tree *> (wdata) = integer_zero_node;
1125 4254 : return integer_zero_node;
1126 : }
1127 : /* Record the base object and continue looking. */
1128 2909636 : *static_cast<tree *> (wdata) = obj;
1129 2909636 : return NULL_TREE;
1130 : }
1131 :
1132 : /* Returns a memory object to that EXPR points with caching. Return NULL if we
1133 : are able to determine that it does not point to any such object; specially
1134 : return integer_zero_node if EXPR contains multiple base objects. */
1135 :
1136 : static tree
1137 10340581 : determine_base_object (struct ivopts_data *data, tree expr)
1138 : {
1139 10340581 : tree *slot, obj = NULL_TREE;
1140 10340581 : if (data->base_object_map)
1141 : {
1142 10178491 : if ((slot = data->base_object_map->get(expr)) != NULL)
1143 4731613 : return *slot;
1144 : }
1145 : else
1146 162090 : data->base_object_map = new hash_map<tree, tree>;
1147 :
1148 5608968 : (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1149 5608968 : data->base_object_map->put (expr, obj);
1150 5608968 : return obj;
1151 : }
1152 :
1153 : /* Allocates an induction variable with given initial value BASE and step STEP
1154 : for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1155 :
1156 : static struct iv *
1157 10340581 : alloc_iv (struct ivopts_data *data, tree base, tree step,
1158 : bool no_overflow = false)
1159 : {
1160 10340581 : tree expr = base;
1161 10340581 : struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1162 : sizeof (struct iv));
1163 10340581 : gcc_assert (step != NULL_TREE);
1164 :
1165 : /* Canonicalize the address expression in base if it were an unsigned
1166 : computation. That leads to more equalities being detected and results in:
1167 :
1168 : 1) More accurate cost can be computed for address expressions;
1169 : 2) Duplicate candidates won't be created for bases in different
1170 : forms, like &a[0] and &a.
1171 : 3) Duplicate candidates won't be created for IV expressions that differ
1172 : only in their sign. */
1173 10340581 : aff_tree comb;
1174 10340581 : STRIP_NOPS (expr);
1175 10340581 : expr = fold_convert (unsigned_type_for (TREE_TYPE (expr)), expr);
1176 10340581 : tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1177 10340581 : base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1178 :
1179 10340581 : iv->base = base;
1180 10340581 : iv->base_object = determine_base_object (data, base);
1181 10340581 : iv->step = step;
1182 10340581 : iv->biv_p = false;
1183 10340581 : iv->nonlin_use = NULL;
1184 10340581 : iv->ssa_name = NULL_TREE;
1185 10340581 : if (!no_overflow
1186 10340581 : && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1187 : base, step))
1188 : no_overflow = true;
1189 10340581 : iv->no_overflow = no_overflow;
1190 10340581 : iv->have_address_use = false;
1191 :
1192 20681162 : return iv;
1193 10340581 : }
1194 :
1195 : /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1196 : doesn't overflow. */
1197 :
1198 : static void
1199 4871878 : set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1200 : bool no_overflow)
1201 : {
1202 4871878 : struct version_info *info = name_info (data, iv);
1203 :
1204 4871878 : gcc_assert (!info->iv);
1205 :
1206 4871878 : bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1207 4871878 : info->iv = alloc_iv (data, base, step, no_overflow);
1208 4871878 : info->iv->ssa_name = iv;
1209 4871878 : }
1210 :
1211 : /* Finds induction variable declaration for VAR. */
1212 :
1213 : static struct iv *
1214 44191241 : get_iv (struct ivopts_data *data, tree var)
1215 : {
1216 44191241 : basic_block bb;
1217 44191241 : tree type = TREE_TYPE (var);
1218 :
1219 44191241 : if (!POINTER_TYPE_P (type)
1220 35119556 : && !INTEGRAL_TYPE_P (type))
1221 : return NULL;
1222 :
1223 38504784 : if (!name_info (data, var)->iv)
1224 : {
1225 17855027 : bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1226 :
1227 17855027 : if (!bb
1228 17855027 : || !flow_bb_inside_loop_p (data->current_loop, bb))
1229 : {
1230 795780 : if (POINTER_TYPE_P (type))
1231 314858 : type = sizetype;
1232 795780 : set_iv (data, var, var, build_int_cst (type, 0), true);
1233 : }
1234 : }
1235 :
1236 38504784 : return name_info (data, var)->iv;
1237 : }
1238 :
1239 : /* Return the first non-invariant ssa var found in EXPR. */
1240 :
1241 : static tree
1242 4061969 : extract_single_var_from_expr (tree expr)
1243 : {
1244 4061969 : int i, n;
1245 4061969 : tree tmp;
1246 4061969 : enum tree_code code;
1247 :
1248 4061969 : if (!expr || is_gimple_min_invariant (expr))
1249 3384376 : return NULL;
1250 :
1251 677593 : code = TREE_CODE (expr);
1252 677593 : if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1253 : {
1254 373192 : n = TREE_OPERAND_LENGTH (expr);
1255 746455 : for (i = 0; i < n; i++)
1256 : {
1257 373263 : tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1258 :
1259 373263 : if (tmp)
1260 : return tmp;
1261 : }
1262 : }
1263 304401 : return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1264 : }
1265 :
1266 : /* Finds basic ivs. */
1267 :
1268 : static bool
1269 626077 : find_bivs (struct ivopts_data *data)
1270 : {
1271 626077 : gphi *phi;
1272 626077 : affine_iv iv;
1273 626077 : tree step, type, base, stop;
1274 626077 : bool found = false;
1275 626077 : class loop *loop = data->current_loop;
1276 626077 : gphi_iterator psi;
1277 :
1278 2335261 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1279 : {
1280 1709184 : phi = psi.phi ();
1281 :
1282 1709184 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1283 237 : continue;
1284 :
1285 1708947 : if (virtual_operand_p (PHI_RESULT (phi)))
1286 409966 : continue;
1287 :
1288 1298981 : if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1289 428885 : continue;
1290 :
1291 870096 : if (integer_zerop (iv.step))
1292 0 : continue;
1293 :
1294 870096 : step = iv.step;
1295 870096 : base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1296 : /* Stop expanding iv base at the first ssa var referred by iv step.
1297 : Ideally we should stop at any ssa var, because that's expensive
1298 : and unusual to happen, we just do it on the first one.
1299 :
1300 : See PR64705 for the rationale. */
1301 870096 : stop = extract_single_var_from_expr (step);
1302 870096 : base = expand_simple_operations (base, stop);
1303 870096 : if (contains_abnormal_ssa_name_p (base)
1304 870096 : || contains_abnormal_ssa_name_p (step))
1305 10 : continue;
1306 :
1307 870086 : type = TREE_TYPE (PHI_RESULT (phi));
1308 870086 : base = fold_convert (type, base);
1309 870086 : if (step)
1310 : {
1311 870086 : if (POINTER_TYPE_P (type))
1312 162922 : step = convert_to_ptrofftype (step);
1313 : else
1314 707164 : step = fold_convert (type, step);
1315 : }
1316 :
1317 870086 : set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1318 870086 : found = true;
1319 : }
1320 :
1321 626077 : return found;
1322 : }
1323 :
1324 : /* Marks basic ivs. */
1325 :
1326 : static void
1327 500071 : mark_bivs (struct ivopts_data *data)
1328 : {
1329 500071 : gphi *phi;
1330 500071 : gimple *def;
1331 500071 : tree var;
1332 500071 : struct iv *iv, *incr_iv;
1333 500071 : class loop *loop = data->current_loop;
1334 500071 : basic_block incr_bb;
1335 500071 : gphi_iterator psi;
1336 :
1337 500071 : data->bivs_not_used_in_addr = 0;
1338 1945736 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1339 : {
1340 1445665 : phi = psi.phi ();
1341 :
1342 1445665 : iv = get_iv (data, PHI_RESULT (phi));
1343 1445665 : if (!iv)
1344 575579 : continue;
1345 :
1346 870086 : var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1347 870086 : def = SSA_NAME_DEF_STMT (var);
1348 : /* Don't mark iv peeled from other one as biv. */
1349 871666 : if (def
1350 870086 : && gimple_code (def) == GIMPLE_PHI
1351 872796 : && gimple_bb (def) == loop->header)
1352 1580 : continue;
1353 :
1354 868506 : incr_iv = get_iv (data, var);
1355 868506 : if (!incr_iv)
1356 1141 : continue;
1357 :
1358 : /* If the increment is in the subloop, ignore it. */
1359 867365 : incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1360 867365 : if (incr_bb->loop_father != data->current_loop
1361 867365 : || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1362 0 : continue;
1363 :
1364 867365 : iv->biv_p = true;
1365 867365 : incr_iv->biv_p = true;
1366 867365 : if (iv->no_overflow)
1367 579179 : data->bivs_not_used_in_addr++;
1368 867365 : if (incr_iv->no_overflow)
1369 570762 : data->bivs_not_used_in_addr++;
1370 : }
1371 500071 : }
1372 :
1373 : /* Checks whether STMT defines a linear induction variable and stores its
1374 : parameters to IV. */
1375 :
1376 : static bool
1377 12484596 : find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1378 : {
1379 12484596 : tree lhs, stop;
1380 12484596 : class loop *loop = data->current_loop;
1381 :
1382 12484596 : iv->base = NULL_TREE;
1383 12484596 : iv->step = NULL_TREE;
1384 :
1385 12484596 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
1386 : return false;
1387 :
1388 10463342 : lhs = gimple_assign_lhs (stmt);
1389 10463342 : if (TREE_CODE (lhs) != SSA_NAME)
1390 : return false;
1391 :
1392 18708940 : if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1393 : return false;
1394 :
1395 : /* Stop expanding iv base at the first ssa var referred by iv step.
1396 : Ideally we should stop at any ssa var, because that's expensive
1397 : and unusual to happen, we just do it on the first one.
1398 :
1399 : See PR64705 for the rationale. */
1400 2818610 : stop = extract_single_var_from_expr (iv->step);
1401 2818610 : iv->base = expand_simple_operations (iv->base, stop);
1402 2818610 : if (contains_abnormal_ssa_name_p (iv->base)
1403 2818610 : || contains_abnormal_ssa_name_p (iv->step))
1404 6 : return false;
1405 :
1406 : /* If STMT could throw, then do not consider STMT as defining a GIV.
1407 : While this will suppress optimizations, we cannot safely delete this
1408 : GIV and associated statements, even if it appears it is not used. */
1409 2818604 : if (stmt_could_throw_p (cfun, stmt))
1410 : return false;
1411 :
1412 : return true;
1413 : }
1414 :
1415 : /* Finds general ivs in statement STMT. */
1416 :
1417 : static void
1418 12484596 : find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1419 : {
1420 12484596 : affine_iv iv;
1421 :
1422 12484596 : if (!find_givs_in_stmt_scev (data, stmt, &iv))
1423 9666000 : return;
1424 :
1425 2818596 : set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1426 : }
1427 :
1428 : /* Finds general ivs in basic block BB. */
1429 :
1430 : static void
1431 2799805 : find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1432 : {
1433 2799805 : gimple_stmt_iterator bsi;
1434 :
1435 27249823 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1436 21650213 : if (!is_gimple_debug (gsi_stmt (bsi)))
1437 12484596 : find_givs_in_stmt (data, gsi_stmt (bsi));
1438 2799805 : }
1439 :
1440 : /* Finds general ivs. */
1441 :
1442 : static void
1443 500071 : find_givs (struct ivopts_data *data, basic_block *body)
1444 : {
1445 500071 : class loop *loop = data->current_loop;
1446 500071 : unsigned i;
1447 :
1448 3299876 : for (i = 0; i < loop->num_nodes; i++)
1449 2799805 : find_givs_in_bb (data, body[i]);
1450 500071 : }
1451 :
1452 : /* For each ssa name defined in LOOP determines whether it is an induction
1453 : variable and if so, its initial value and step. */
1454 :
1455 : static bool
1456 626077 : find_induction_variables (struct ivopts_data *data, basic_block *body)
1457 : {
1458 626077 : unsigned i;
1459 626077 : bitmap_iterator bi;
1460 :
1461 626077 : if (!find_bivs (data))
1462 : return false;
1463 :
1464 500071 : find_givs (data, body);
1465 500071 : mark_bivs (data);
1466 :
1467 500071 : if (dump_file && (dump_flags & TDF_DETAILS))
1468 : {
1469 67 : class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1470 :
1471 67 : if (niter)
1472 : {
1473 51 : fprintf (dump_file, " number of iterations ");
1474 51 : print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1475 51 : if (!integer_zerop (niter->may_be_zero))
1476 : {
1477 1 : fprintf (dump_file, "; zero if ");
1478 1 : print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1479 : }
1480 51 : fprintf (dump_file, "\n");
1481 67 : };
1482 :
1483 67 : fprintf (dump_file, "\n<Induction Vars>:\n");
1484 819 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1485 : {
1486 752 : struct version_info *info = ver_info (data, i);
1487 752 : if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1488 550 : dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1489 : }
1490 : }
1491 :
1492 : return true;
1493 : }
1494 :
1495 : /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1496 : For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1497 : is the const offset stripped from IV base and MEM_TYPE is the type
1498 : of the memory being addressed. For uses of other types, ADDR_BASE
1499 : and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1500 :
1501 : static struct iv_use *
1502 2077418 : record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1503 : gimple *stmt, enum use_type type, tree mem_type,
1504 : tree addr_base, poly_uint64 addr_offset)
1505 : {
1506 2077418 : struct iv_use *use = XCNEW (struct iv_use);
1507 :
1508 2077418 : use->id = group->vuses.length ();
1509 2077418 : use->group_id = group->id;
1510 2077418 : use->type = type;
1511 2077418 : use->mem_type = mem_type;
1512 2077418 : use->iv = iv;
1513 2077418 : use->stmt = stmt;
1514 2077418 : use->op_p = use_p;
1515 2077418 : use->addr_base = addr_base;
1516 2077418 : use->addr_offset = addr_offset;
1517 :
1518 2077418 : group->vuses.safe_push (use);
1519 2077418 : return use;
1520 : }
1521 :
1522 : /* Checks whether OP is a loop-level invariant and if so, records it.
1523 : NONLINEAR_USE is true if the invariant is used in a way we do not
1524 : handle specially. */
1525 :
1526 : static void
1527 22466066 : record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1528 : {
1529 22466066 : basic_block bb;
1530 22466066 : struct version_info *info;
1531 :
1532 22466066 : if (TREE_CODE (op) != SSA_NAME
1533 22466066 : || virtual_operand_p (op))
1534 : return;
1535 :
1536 21288706 : bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1537 21288706 : if (bb
1538 21288706 : && flow_bb_inside_loop_p (data->current_loop, bb))
1539 : return;
1540 :
1541 3802421 : info = name_info (data, op);
1542 3802421 : info->name = op;
1543 3802421 : info->has_nonlin_use |= nonlinear_use;
1544 3802421 : if (!info->inv_id)
1545 1326643 : info->inv_id = ++data->max_inv_var_id;
1546 3802421 : bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1547 : }
1548 :
1549 : /* Record a group of TYPE. */
1550 :
1551 : static struct iv_group *
1552 1799552 : record_group (struct ivopts_data *data, enum use_type type)
1553 : {
1554 1799552 : struct iv_group *group = XCNEW (struct iv_group);
1555 :
1556 1799552 : group->id = data->vgroups.length ();
1557 1799552 : group->type = type;
1558 1799552 : group->related_cands = BITMAP_ALLOC (NULL);
1559 1799552 : group->vuses.create (1);
1560 1799552 : group->doloop_p = false;
1561 :
1562 1799552 : data->vgroups.safe_push (group);
1563 1799552 : return group;
1564 : }
1565 :
1566 : /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1567 : New group will be created if there is no existing group for the use.
1568 : MEM_TYPE is the type of memory being addressed, or NULL if this
1569 : isn't an address reference. */
1570 :
1571 : static struct iv_use *
1572 2077418 : record_group_use (struct ivopts_data *data, tree *use_p,
1573 : struct iv *iv, gimple *stmt, enum use_type type,
1574 : tree mem_type)
1575 : {
1576 2077418 : tree addr_base = NULL;
1577 2077418 : struct iv_group *group = NULL;
1578 2077418 : poly_uint64 addr_offset = 0;
1579 :
1580 : /* Record non address type use in a new group. */
1581 2077418 : if (address_p (type))
1582 : {
1583 858287 : unsigned int i;
1584 :
1585 858287 : gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1586 858287 : tree addr_toffset;
1587 858287 : split_constant_offset (iv->base, &addr_base, &addr_toffset);
1588 858287 : addr_offset = int_cst_value (addr_toffset);
1589 1614270 : for (i = 0; i < data->vgroups.length (); i++)
1590 : {
1591 1087077 : struct iv_use *use;
1592 :
1593 1087077 : group = data->vgroups[i];
1594 1087077 : use = group->vuses[0];
1595 1087077 : if (!address_p (use->type))
1596 334703 : continue;
1597 :
1598 : /* Check if it has the same stripped base and step. */
1599 752374 : if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1600 398265 : && operand_equal_p (iv->step, use->iv->step, OEP_ASSUME_WRAPV)
1601 1147538 : && operand_equal_p (addr_base, use->addr_base, OEP_ASSUME_WRAPV))
1602 : break;
1603 : }
1604 1716574 : if (i == data->vgroups.length ())
1605 527193 : group = NULL;
1606 : }
1607 :
1608 858287 : if (!group)
1609 1746324 : group = record_group (data, type);
1610 :
1611 2077418 : return record_use (group, use_p, iv, stmt, type, mem_type,
1612 2077418 : addr_base, addr_offset);
1613 : }
1614 :
1615 : /* Checks whether the use OP is interesting and if so, records it. */
1616 :
1617 : static struct iv_use *
1618 7203999 : find_interesting_uses_op (struct ivopts_data *data, tree op)
1619 : {
1620 7203999 : struct iv *iv;
1621 7203999 : gimple *stmt;
1622 7203999 : struct iv_use *use;
1623 :
1624 7203999 : if (TREE_CODE (op) != SSA_NAME)
1625 : return NULL;
1626 :
1627 5793330 : iv = get_iv (data, op);
1628 5793330 : if (!iv)
1629 : return NULL;
1630 :
1631 2498215 : if (iv->nonlin_use)
1632 : {
1633 195469 : gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1634 : return iv->nonlin_use;
1635 : }
1636 :
1637 2302746 : if (integer_zerop (iv->step))
1638 : {
1639 1681127 : record_invariant (data, op, true);
1640 1681127 : return NULL;
1641 : }
1642 :
1643 621619 : stmt = SSA_NAME_DEF_STMT (op);
1644 621619 : gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1645 :
1646 621619 : use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1647 621619 : iv->nonlin_use = use;
1648 621619 : return use;
1649 : }
1650 :
1651 : /* Indicate how compare type iv_use can be handled. */
1652 : enum comp_iv_rewrite
1653 : {
1654 : COMP_IV_NA,
1655 : /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1656 : COMP_IV_EXPR,
1657 : /* We may rewrite compare type iv_uses on both sides of comparison by
1658 : expressing value of each iv_use. */
1659 : COMP_IV_EXPR_2,
1660 : /* We may rewrite compare type iv_use by expressing value of the iv_use
1661 : or by eliminating it with other iv_cand. */
1662 : COMP_IV_ELIM
1663 : };
1664 :
1665 : /* Given a condition in statement STMT, checks whether it is a compare
1666 : of an induction variable and an invariant. If this is the case,
1667 : CONTROL_VAR is set to location of the iv, BOUND to the location of
1668 : the invariant, IV_VAR and IV_BOUND are set to the corresponding
1669 : induction variable descriptions, and true is returned. If this is not
1670 : the case, CONTROL_VAR and BOUND are set to the arguments of the
1671 : condition and false is returned. */
1672 :
1673 : static enum comp_iv_rewrite
1674 7450354 : extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1675 : tree **control_var, tree **bound,
1676 : struct iv **iv_var, struct iv **iv_bound)
1677 : {
1678 : /* The objects returned when COND has constant operands. */
1679 7450354 : static struct iv const_iv;
1680 7450354 : static tree zero;
1681 7450354 : tree *op0 = &zero, *op1 = &zero;
1682 7450354 : struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1683 7450354 : enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1684 :
1685 7450354 : if (gimple_code (stmt) == GIMPLE_COND)
1686 : {
1687 7183532 : gcond *cond_stmt = as_a <gcond *> (stmt);
1688 7183532 : op0 = gimple_cond_lhs_ptr (cond_stmt);
1689 7183532 : op1 = gimple_cond_rhs_ptr (cond_stmt);
1690 : }
1691 : else
1692 : {
1693 266822 : op0 = gimple_assign_rhs1_ptr (stmt);
1694 266822 : op1 = gimple_assign_rhs2_ptr (stmt);
1695 : }
1696 :
1697 7450354 : zero = integer_zero_node;
1698 7450354 : const_iv.step = integer_zero_node;
1699 :
1700 7450354 : if (TREE_CODE (*op0) == SSA_NAME)
1701 7450197 : iv0 = get_iv (data, *op0);
1702 7450354 : if (TREE_CODE (*op1) == SSA_NAME)
1703 3361243 : iv1 = get_iv (data, *op1);
1704 :
1705 : /* If both sides of comparison are IVs. We can express ivs on both end. */
1706 7450354 : if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1707 : {
1708 91065 : rewrite_type = COMP_IV_EXPR_2;
1709 91065 : goto end;
1710 : }
1711 :
1712 : /* If none side of comparison is IV. */
1713 5770403 : if ((!iv0 || integer_zerop (iv0->step))
1714 8694581 : && (!iv1 || integer_zerop (iv1->step)))
1715 944392 : goto end;
1716 :
1717 : /* Control variable may be on the other side. */
1718 6414897 : if (!iv0 || integer_zerop (iv0->step))
1719 : {
1720 : std::swap (op0, op1);
1721 : std::swap (iv0, iv1);
1722 : }
1723 : /* If one side is IV and the other side isn't loop invariant. */
1724 6414897 : if (!iv1)
1725 : rewrite_type = COMP_IV_EXPR;
1726 : /* If one side is IV and the other side is loop invariant. */
1727 5420744 : else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1728 : rewrite_type = COMP_IV_ELIM;
1729 :
1730 7450354 : end:
1731 7450354 : if (control_var)
1732 7450354 : *control_var = op0;
1733 7450354 : if (iv_var)
1734 1539823 : *iv_var = iv0;
1735 7450354 : if (bound)
1736 7450354 : *bound = op1;
1737 7450354 : if (iv_bound)
1738 7450354 : *iv_bound = iv1;
1739 :
1740 7450354 : return rewrite_type;
1741 : }
1742 :
1743 : /* Checks whether the condition in STMT is interesting and if so,
1744 : records it. */
1745 :
1746 : static void
1747 1539823 : find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1748 : {
1749 1539823 : tree *var_p, *bound_p;
1750 1539823 : struct iv *var_iv, *bound_iv;
1751 1539823 : enum comp_iv_rewrite ret;
1752 :
1753 1539823 : ret = extract_cond_operands (data, stmt,
1754 : &var_p, &bound_p, &var_iv, &bound_iv);
1755 1539823 : if (ret == COMP_IV_NA)
1756 : {
1757 944392 : find_interesting_uses_op (data, *var_p);
1758 944392 : find_interesting_uses_op (data, *bound_p);
1759 944392 : return;
1760 : }
1761 :
1762 595431 : record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1763 : /* Record compare type iv_use for iv on the other side of comparison. */
1764 595431 : if (ret == COMP_IV_EXPR_2)
1765 2081 : record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1766 : }
1767 :
1768 : /* Returns the outermost loop EXPR is obviously invariant in
1769 : relative to the loop LOOP, i.e. if all its operands are defined
1770 : outside of the returned loop. Returns NULL if EXPR is not
1771 : even obviously invariant in LOOP. */
1772 :
1773 : class loop *
1774 366073 : outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1775 : {
1776 366073 : basic_block def_bb;
1777 366073 : unsigned i, len;
1778 :
1779 366073 : if (is_gimple_min_invariant (expr))
1780 48891 : return current_loops->tree_root;
1781 :
1782 317182 : if (TREE_CODE (expr) == SSA_NAME)
1783 : {
1784 185244 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1785 185244 : if (def_bb)
1786 : {
1787 112881 : if (flow_bb_inside_loop_p (loop, def_bb))
1788 : return NULL;
1789 225746 : return superloop_at_depth (loop,
1790 155876 : loop_depth (def_bb->loop_father) + 1);
1791 : }
1792 :
1793 72363 : return current_loops->tree_root;
1794 : }
1795 :
1796 131938 : if (!EXPR_P (expr))
1797 : return NULL;
1798 :
1799 131938 : unsigned maxdepth = 0;
1800 131938 : len = TREE_OPERAND_LENGTH (expr);
1801 343857 : for (i = 0; i < len; i++)
1802 : {
1803 211943 : class loop *ivloop;
1804 211943 : if (!TREE_OPERAND (expr, i))
1805 0 : continue;
1806 :
1807 211943 : ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1808 211943 : if (!ivloop)
1809 : return NULL;
1810 375754 : maxdepth = MAX (maxdepth, loop_depth (ivloop));
1811 : }
1812 :
1813 131914 : return superloop_at_depth (loop, maxdepth);
1814 : }
1815 :
1816 : /* Returns true if expression EXPR is obviously invariant in LOOP,
1817 : i.e. if all its operands are defined outside of the LOOP. LOOP
1818 : should not be the function body. */
1819 :
1820 : bool
1821 12053691 : expr_invariant_in_loop_p (class loop *loop, tree expr)
1822 : {
1823 12053691 : basic_block def_bb;
1824 12053691 : unsigned i, len;
1825 :
1826 12053691 : gcc_assert (loop_depth (loop) > 0);
1827 :
1828 12053691 : if (is_gimple_min_invariant (expr))
1829 : return true;
1830 :
1831 8371188 : if (TREE_CODE (expr) == SSA_NAME)
1832 : {
1833 7959001 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1834 7959001 : if (def_bb
1835 7959001 : && flow_bb_inside_loop_p (loop, def_bb))
1836 : return false;
1837 :
1838 4093894 : return true;
1839 : }
1840 :
1841 412187 : if (!EXPR_P (expr))
1842 : return false;
1843 :
1844 412184 : len = TREE_OPERAND_LENGTH (expr);
1845 887227 : for (i = 0; i < len; i++)
1846 524072 : if (TREE_OPERAND (expr, i)
1847 524072 : && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1848 : return false;
1849 :
1850 : return true;
1851 : }
1852 :
1853 : /* Given expression EXPR which computes inductive values with respect
1854 : to loop recorded in DATA, this function returns biv from which EXPR
1855 : is derived by tracing definition chains of ssa variables in EXPR. */
1856 :
1857 : static struct iv*
1858 875493 : find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1859 : {
1860 1417290 : struct iv *iv;
1861 1417290 : unsigned i, n;
1862 1417290 : tree e2, e1;
1863 1417290 : enum tree_code code;
1864 1417290 : gimple *stmt;
1865 :
1866 1417290 : if (expr == NULL_TREE)
1867 : return NULL;
1868 :
1869 1417035 : if (is_gimple_min_invariant (expr))
1870 : return NULL;
1871 :
1872 1126066 : code = TREE_CODE (expr);
1873 1126066 : if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1874 : {
1875 21514 : n = TREE_OPERAND_LENGTH (expr);
1876 23311 : for (i = 0; i < n; i++)
1877 : {
1878 22887 : iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1879 22887 : if (iv)
1880 : return iv;
1881 : }
1882 : }
1883 :
1884 : /* Stop if it's not ssa name. */
1885 1104976 : if (code != SSA_NAME)
1886 : return NULL;
1887 :
1888 1104006 : iv = get_iv (data, expr);
1889 1104006 : if (!iv || integer_zerop (iv->step))
1890 45340 : return NULL;
1891 1058666 : else if (iv->biv_p)
1892 : return iv;
1893 :
1894 788594 : stmt = SSA_NAME_DEF_STMT (expr);
1895 788594 : if (gphi *phi = dyn_cast <gphi *> (stmt))
1896 : {
1897 1866 : ssa_op_iter iter;
1898 1866 : use_operand_p use_p;
1899 1866 : basic_block phi_bb = gimple_bb (phi);
1900 :
1901 : /* Skip loop header PHI that doesn't define biv. */
1902 1866 : if (phi_bb->loop_father == data->current_loop)
1903 : return NULL;
1904 :
1905 0 : if (virtual_operand_p (gimple_phi_result (phi)))
1906 : return NULL;
1907 :
1908 0 : FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1909 : {
1910 0 : tree use = USE_FROM_PTR (use_p);
1911 0 : iv = find_deriving_biv_for_expr (data, use);
1912 0 : if (iv)
1913 : return iv;
1914 : }
1915 : return NULL;
1916 : }
1917 786728 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
1918 : return NULL;
1919 :
1920 786728 : e1 = gimple_assign_rhs1 (stmt);
1921 786728 : code = gimple_assign_rhs_code (stmt);
1922 786728 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1923 : return find_deriving_biv_for_expr (data, e1);
1924 :
1925 776752 : switch (code)
1926 : {
1927 579605 : case MULT_EXPR:
1928 579605 : case PLUS_EXPR:
1929 579605 : case MINUS_EXPR:
1930 579605 : case POINTER_PLUS_EXPR:
1931 : /* Increments, decrements and multiplications by a constant
1932 : are simple. */
1933 579605 : e2 = gimple_assign_rhs2 (stmt);
1934 579605 : iv = find_deriving_biv_for_expr (data, e2);
1935 579605 : if (iv)
1936 : return iv;
1937 531821 : gcc_fallthrough ();
1938 :
1939 531821 : CASE_CONVERT:
1940 : /* Casts are simple. */
1941 531821 : return find_deriving_biv_for_expr (data, e1);
1942 :
1943 : default:
1944 : break;
1945 : }
1946 :
1947 : return NULL;
1948 : }
1949 :
1950 : /* Record BIV, its predecessor and successor that they are used in
1951 : address type uses. */
1952 :
1953 : static void
1954 596892 : record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1955 : {
1956 596892 : unsigned i;
1957 596892 : tree type, base_1, base_2;
1958 596892 : bitmap_iterator bi;
1959 :
1960 593963 : if (!biv || !biv->biv_p || integer_zerop (biv->step)
1961 1190855 : || biv->have_address_use || !biv->no_overflow)
1962 331538 : return;
1963 :
1964 528519 : type = TREE_TYPE (biv->base);
1965 528519 : if (!INTEGRAL_TYPE_P (type))
1966 : return;
1967 :
1968 265354 : biv->have_address_use = true;
1969 265354 : data->bivs_not_used_in_addr--;
1970 265354 : base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1971 2442467 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1972 : {
1973 2177113 : struct iv *iv = ver_info (data, i)->iv;
1974 :
1975 1963805 : if (!iv || !iv->biv_p || integer_zerop (iv->step)
1976 3081354 : || iv->have_address_use || !iv->no_overflow)
1977 1884065 : continue;
1978 :
1979 293048 : if (type != TREE_TYPE (iv->base)
1980 293048 : || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1981 31225 : continue;
1982 :
1983 261823 : if (!operand_equal_p (biv->step, iv->step, 0))
1984 5826 : continue;
1985 :
1986 255997 : base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1987 255997 : if (operand_equal_p (base_1, iv->base, 0)
1988 255997 : || operand_equal_p (base_2, biv->base, 0))
1989 : {
1990 229334 : iv->have_address_use = true;
1991 229334 : data->bivs_not_used_in_addr--;
1992 : }
1993 : }
1994 : }
1995 :
1996 : /* Cumulates the steps of indices into DATA and replaces their values with the
1997 : initial ones. Returns false when the value of the index cannot be determined.
1998 : Callback for for_each_index. */
1999 :
2000 : struct ifs_ivopts_data
2001 : {
2002 : struct ivopts_data *ivopts_data;
2003 : gimple *stmt;
2004 : tree step;
2005 : };
2006 :
2007 : static bool
2008 2219125 : idx_find_step (tree base, tree *idx, void *data)
2009 : {
2010 2219125 : struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2011 2219125 : struct iv *iv;
2012 2219125 : bool use_overflow_semantics = false;
2013 2219125 : tree step, iv_base, iv_step, lbound, off;
2014 2219125 : class loop *loop = dta->ivopts_data->current_loop;
2015 :
2016 : /* If base is a component ref, require that the offset of the reference
2017 : be invariant. */
2018 2219125 : if (TREE_CODE (base) == COMPONENT_REF)
2019 : {
2020 78 : off = component_ref_field_offset (base);
2021 78 : return expr_invariant_in_loop_p (loop, off);
2022 : }
2023 :
2024 : /* If base is array, first check whether we will be able to move the
2025 : reference out of the loop (in order to take its address in strength
2026 : reduction). In order for this to work we need both lower bound
2027 : and step to be loop invariants. */
2028 2219047 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2029 : {
2030 : /* Moreover, for a range, the size needs to be invariant as well. */
2031 520508 : if (TREE_CODE (base) == ARRAY_RANGE_REF
2032 520508 : && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2033 : return false;
2034 :
2035 520508 : step = array_ref_element_size (base);
2036 520508 : lbound = array_ref_low_bound (base);
2037 :
2038 520508 : if (!expr_invariant_in_loop_p (loop, step)
2039 520508 : || !expr_invariant_in_loop_p (loop, lbound))
2040 3150 : return false;
2041 : }
2042 :
2043 2215897 : if (TREE_CODE (*idx) != SSA_NAME)
2044 : return true;
2045 :
2046 1799935 : iv = get_iv (dta->ivopts_data, *idx);
2047 1799935 : if (!iv)
2048 : return false;
2049 :
2050 : /* XXX We produce for a base of *D42 with iv->base being &x[0]
2051 : *&x[0], which is not folded and does not trigger the
2052 : ARRAY_REF path below. */
2053 1167133 : *idx = iv->base;
2054 :
2055 1167133 : if (integer_zerop (iv->step))
2056 : return true;
2057 :
2058 874767 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2059 : {
2060 307750 : step = array_ref_element_size (base);
2061 :
2062 : /* We only handle addresses whose step is an integer constant. */
2063 307750 : if (TREE_CODE (step) != INTEGER_CST)
2064 : return false;
2065 : }
2066 : else
2067 : /* The step for pointer arithmetics already is 1 byte. */
2068 567017 : step = size_one_node;
2069 :
2070 874750 : iv_base = iv->base;
2071 874750 : iv_step = iv->step;
2072 874750 : if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2073 : use_overflow_semantics = true;
2074 :
2075 874750 : if (!convert_affine_scev (dta->ivopts_data->current_loop,
2076 : sizetype, &iv_base, &iv_step, dta->stmt,
2077 : use_overflow_semantics))
2078 : {
2079 : /* The index might wrap. */
2080 : return false;
2081 : }
2082 :
2083 871434 : step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2084 871434 : dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2085 :
2086 871434 : if (dta->ivopts_data->bivs_not_used_in_addr)
2087 : {
2088 596892 : if (!iv->biv_p)
2089 273001 : iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2090 :
2091 596892 : record_biv_for_address_use (dta->ivopts_data, iv);
2092 : }
2093 : return true;
2094 : }
2095 :
2096 : /* Records use in index IDX. Callback for for_each_index. Ivopts data
2097 : object is passed to it in DATA. */
2098 :
2099 : static bool
2100 1809636 : idx_record_use (tree base, tree *idx,
2101 : void *vdata)
2102 : {
2103 1809636 : struct ivopts_data *data = (struct ivopts_data *) vdata;
2104 1809636 : find_interesting_uses_op (data, *idx);
2105 1809636 : if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2106 : {
2107 230244 : if (TREE_OPERAND (base, 2))
2108 5502 : find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2109 230244 : if (TREE_OPERAND (base, 3))
2110 16796 : find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2111 : }
2112 1809636 : return true;
2113 : }
2114 :
2115 : /* If we can prove that TOP = cst * BOT for some constant cst,
2116 : store cst to MUL and return true. Otherwise return false.
2117 : The returned value is always sign-extended, regardless of the
2118 : signedness of TOP and BOT. */
2119 :
2120 : static bool
2121 17182122 : constant_multiple_of (tree top, tree bot, widest_int *mul,
2122 : struct ivopts_data *data)
2123 : {
2124 34364244 : aff_tree aff_top, aff_bot;
2125 17182122 : tree_to_aff_combination_expand (top, TREE_TYPE (top), &aff_top,
2126 : &data->name_expansion_cache);
2127 17182122 : tree_to_aff_combination_expand (bot, TREE_TYPE (bot), &aff_bot,
2128 : &data->name_expansion_cache);
2129 :
2130 17182122 : poly_widest_int poly_mul;
2131 17182122 : if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul)
2132 17182122 : && poly_mul.is_constant (mul))
2133 14248175 : return true;
2134 :
2135 : return false;
2136 17182122 : }
2137 :
2138 : /* Return true if memory reference REF with step STEP may be unaligned. */
2139 :
2140 : static bool
2141 0 : may_be_unaligned_p (tree ref, tree step)
2142 : {
2143 : /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2144 : thus they are not misaligned. */
2145 0 : if (TREE_CODE (ref) == TARGET_MEM_REF)
2146 : return false;
2147 :
2148 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2149 0 : if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2150 0 : align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2151 :
2152 0 : unsigned HOST_WIDE_INT bitpos;
2153 0 : unsigned int ref_align;
2154 0 : get_object_alignment_1 (ref, &ref_align, &bitpos);
2155 0 : if (ref_align < align
2156 0 : || (bitpos % align) != 0
2157 0 : || (bitpos % BITS_PER_UNIT) != 0)
2158 : return true;
2159 :
2160 0 : unsigned int trailing_zeros = tree_ctz (step);
2161 0 : if (trailing_zeros < HOST_BITS_PER_INT
2162 0 : && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2163 : return true;
2164 :
2165 : return false;
2166 : }
2167 :
2168 : /* Return true if EXPR may be non-addressable. */
2169 :
2170 : bool
2171 12997533 : may_be_nonaddressable_p (tree expr)
2172 : {
2173 13879107 : switch (TREE_CODE (expr))
2174 : {
2175 9249466 : case VAR_DECL:
2176 : /* Check if it's a register variable. */
2177 9249466 : return DECL_HARD_REGISTER (expr);
2178 :
2179 : case TARGET_MEM_REF:
2180 : /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2181 : target, thus they are always addressable. */
2182 : return false;
2183 :
2184 1936385 : case MEM_REF:
2185 : /* Likewise for MEM_REFs, modulo the storage order. */
2186 1936385 : return REF_REVERSE_STORAGE_ORDER (expr);
2187 :
2188 76 : case BIT_FIELD_REF:
2189 76 : if (REF_REVERSE_STORAGE_ORDER (expr))
2190 : return true;
2191 76 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2192 :
2193 1236236 : case COMPONENT_REF:
2194 1236236 : if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2195 : return true;
2196 1236236 : return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2197 1236236 : || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2198 :
2199 859076 : case ARRAY_REF:
2200 859076 : case ARRAY_RANGE_REF:
2201 859076 : if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2202 : return true;
2203 859076 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2204 :
2205 22432 : case VIEW_CONVERT_EXPR:
2206 : /* This kind of view-conversions may wrap non-addressable objects
2207 : and make them look addressable. After some processing the
2208 : non-addressability may be uncovered again, causing ADDR_EXPRs
2209 : of inappropriate objects to be built. */
2210 22432 : if (is_gimple_reg (TREE_OPERAND (expr, 0))
2211 22432 : || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2212 : return true;
2213 22422 : return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2214 :
2215 : CASE_CONVERT:
2216 : return true;
2217 :
2218 : default:
2219 : break;
2220 : }
2221 :
2222 : return false;
2223 : }
2224 :
2225 : /* Finds addresses in *OP_P inside STMT. */
2226 :
2227 : static void
2228 2696662 : find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2229 : tree *op_p)
2230 : {
2231 2696662 : tree base = *op_p, step = size_zero_node;
2232 2696662 : struct iv *civ;
2233 2696662 : struct ifs_ivopts_data ifs_ivopts_data;
2234 :
2235 : /* Do not play with volatile memory references. A bit too conservative,
2236 : perhaps, but safe. */
2237 5393324 : if (gimple_has_volatile_ops (stmt))
2238 7530 : goto fail;
2239 :
2240 : /* Ignore bitfields for now. Not really something terribly complicated
2241 : to handle. TODO. */
2242 2689132 : if (TREE_CODE (base) == BIT_FIELD_REF)
2243 93234 : goto fail;
2244 :
2245 2595898 : base = unshare_expr (base);
2246 :
2247 2595898 : if (TREE_CODE (base) == TARGET_MEM_REF)
2248 : {
2249 314912 : tree type = build_pointer_type (TREE_TYPE (base));
2250 314912 : tree astep;
2251 :
2252 314912 : if (TMR_BASE (base)
2253 314912 : && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2254 : {
2255 292728 : civ = get_iv (data, TMR_BASE (base));
2256 292728 : if (!civ)
2257 257433 : goto fail;
2258 :
2259 35295 : TMR_BASE (base) = civ->base;
2260 35295 : step = civ->step;
2261 : }
2262 57479 : if (TMR_INDEX2 (base)
2263 57479 : && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2264 : {
2265 14056 : civ = get_iv (data, TMR_INDEX2 (base));
2266 14056 : if (!civ)
2267 4981 : goto fail;
2268 :
2269 9075 : TMR_INDEX2 (base) = civ->base;
2270 9075 : step = civ->step;
2271 : }
2272 52498 : if (TMR_INDEX (base)
2273 52498 : && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2274 : {
2275 52498 : civ = get_iv (data, TMR_INDEX (base));
2276 52498 : if (!civ)
2277 52498 : goto fail;
2278 :
2279 0 : TMR_INDEX (base) = civ->base;
2280 0 : astep = civ->step;
2281 :
2282 0 : if (astep)
2283 : {
2284 0 : if (TMR_STEP (base))
2285 0 : astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2286 :
2287 0 : step = fold_build2 (PLUS_EXPR, type, step, astep);
2288 : }
2289 : }
2290 :
2291 0 : if (integer_zerop (step))
2292 0 : goto fail;
2293 0 : base = tree_mem_ref_addr (type, base);
2294 : }
2295 : else
2296 : {
2297 2280986 : ifs_ivopts_data.ivopts_data = data;
2298 2280986 : ifs_ivopts_data.stmt = stmt;
2299 2280986 : ifs_ivopts_data.step = size_zero_node;
2300 2280986 : if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2301 2280986 : || integer_zerop (ifs_ivopts_data.step))
2302 1411520 : goto fail;
2303 869466 : step = ifs_ivopts_data.step;
2304 :
2305 : /* Check that the base expression is addressable. This needs
2306 : to be done after substituting bases of IVs into it. */
2307 869466 : if (may_be_nonaddressable_p (base))
2308 770 : goto fail;
2309 :
2310 : /* Moreover, on strict alignment platforms, check that it is
2311 : sufficiently aligned. */
2312 868696 : if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2313 : goto fail;
2314 :
2315 868696 : base = build_fold_addr_expr (base);
2316 :
2317 : /* Substituting bases of IVs into the base expression might
2318 : have caused folding opportunities. */
2319 868696 : if (TREE_CODE (base) == ADDR_EXPR)
2320 : {
2321 464564 : tree *ref = &TREE_OPERAND (base, 0);
2322 1590106 : while (handled_component_p (*ref))
2323 660978 : ref = &TREE_OPERAND (*ref, 0);
2324 464564 : if (TREE_CODE (*ref) == MEM_REF)
2325 : {
2326 305214 : tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2327 : TREE_OPERAND (*ref, 0),
2328 : TREE_OPERAND (*ref, 1));
2329 305214 : if (tem)
2330 0 : *ref = tem;
2331 : }
2332 : }
2333 : }
2334 :
2335 868696 : civ = alloc_iv (data, base, step);
2336 : /* Fail if base object of this memory reference is unknown. */
2337 868696 : if (civ->base_object == NULL_TREE)
2338 11205 : goto fail;
2339 :
2340 857491 : record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2341 857491 : return;
2342 :
2343 1839171 : fail:
2344 1839171 : for_each_index (op_p, idx_record_use, data);
2345 : }
2346 :
2347 : /* Finds and records invariants used in STMT. */
2348 :
2349 : static void
2350 15329881 : find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2351 : {
2352 15329881 : ssa_op_iter iter;
2353 15329881 : use_operand_p use_p;
2354 15329881 : tree op;
2355 :
2356 51057285 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2357 : {
2358 20397523 : op = USE_FROM_PTR (use_p);
2359 20397523 : record_invariant (data, op, false);
2360 : }
2361 15329881 : }
2362 :
2363 : /* CALL calls an internal function. If operand *OP_P will become an
2364 : address when the call is expanded, return the type of the memory
2365 : being addressed, otherwise return null. */
2366 :
2367 : static tree
2368 1769 : get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2369 : {
2370 1769 : switch (gimple_call_internal_fn (call))
2371 : {
2372 372 : case IFN_MASK_LOAD:
2373 372 : case IFN_MASK_LOAD_LANES:
2374 372 : case IFN_MASK_LEN_LOAD_LANES:
2375 372 : case IFN_LEN_LOAD:
2376 372 : case IFN_MASK_LEN_LOAD:
2377 372 : if (op_p == gimple_call_arg_ptr (call, 0))
2378 372 : return TREE_TYPE (gimple_call_lhs (call));
2379 : return NULL_TREE;
2380 :
2381 424 : case IFN_MASK_STORE:
2382 424 : case IFN_MASK_STORE_LANES:
2383 424 : case IFN_MASK_LEN_STORE_LANES:
2384 424 : case IFN_LEN_STORE:
2385 424 : case IFN_MASK_LEN_STORE:
2386 424 : {
2387 424 : if (op_p == gimple_call_arg_ptr (call, 0))
2388 : {
2389 424 : internal_fn ifn = gimple_call_internal_fn (call);
2390 424 : int index = internal_fn_stored_value_index (ifn);
2391 424 : return TREE_TYPE (gimple_call_arg (call, index));
2392 : }
2393 : return NULL_TREE;
2394 : }
2395 :
2396 : default:
2397 : return NULL_TREE;
2398 : }
2399 : }
2400 :
2401 : /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2402 : Return true if the operand will become an address when STMT
2403 : is expanded and record the associated address use if so. */
2404 :
2405 : static bool
2406 1691600 : find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2407 : struct iv *iv)
2408 : {
2409 : /* Fail if base object of this memory reference is unknown. */
2410 1691600 : if (iv->base_object == NULL_TREE)
2411 : return false;
2412 :
2413 639472 : tree mem_type = NULL_TREE;
2414 639472 : if (gcall *call = dyn_cast <gcall *> (stmt))
2415 122751 : if (gimple_call_internal_p (call))
2416 1769 : mem_type = get_mem_type_for_internal_fn (call, op_p);
2417 1769 : if (mem_type)
2418 : {
2419 796 : iv = alloc_iv (data, iv->base, iv->step);
2420 796 : record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2421 796 : return true;
2422 : }
2423 : return false;
2424 : }
2425 :
2426 : /* Finds interesting uses of induction variables in the statement STMT. */
2427 :
2428 : static void
2429 15329881 : find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2430 : {
2431 15329881 : struct iv *iv;
2432 15329881 : tree op, *lhs, *rhs;
2433 15329881 : ssa_op_iter iter;
2434 15329881 : use_operand_p use_p;
2435 15329881 : enum tree_code code;
2436 :
2437 15329881 : find_invariants_stmt (data, stmt);
2438 :
2439 15329881 : if (gimple_code (stmt) == GIMPLE_COND)
2440 : {
2441 1455776 : find_interesting_uses_cond (data, stmt);
2442 8996188 : return;
2443 : }
2444 :
2445 13874105 : if (is_gimple_assign (stmt))
2446 : {
2447 10463342 : lhs = gimple_assign_lhs_ptr (stmt);
2448 10463342 : rhs = gimple_assign_rhs1_ptr (stmt);
2449 :
2450 10463342 : if (TREE_CODE (*lhs) == SSA_NAME)
2451 : {
2452 : /* If the statement defines an induction variable, the uses are not
2453 : interesting by themselves. */
2454 :
2455 9354470 : iv = get_iv (data, *lhs);
2456 :
2457 9354470 : if (iv && !integer_zerop (iv->step))
2458 : return;
2459 : }
2460 :
2461 8127418 : code = gimple_assign_rhs_code (stmt);
2462 8127418 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2463 8127418 : && (REFERENCE_CLASS_P (*rhs)
2464 1253901 : || is_gimple_val (*rhs)))
2465 : {
2466 2794579 : if (REFERENCE_CLASS_P (*rhs))
2467 1736410 : find_interesting_uses_address (data, stmt, rhs);
2468 : else
2469 1058169 : find_interesting_uses_op (data, *rhs);
2470 :
2471 2794579 : if (REFERENCE_CLASS_P (*lhs))
2472 960252 : find_interesting_uses_address (data, stmt, lhs);
2473 2794579 : return;
2474 : }
2475 5332839 : else if (TREE_CODE_CLASS (code) == tcc_comparison)
2476 : {
2477 84047 : find_interesting_uses_cond (data, stmt);
2478 84047 : return;
2479 : }
2480 :
2481 : /* TODO -- we should also handle address uses of type
2482 :
2483 : memory = call (whatever);
2484 :
2485 : and
2486 :
2487 : call (memory). */
2488 : }
2489 :
2490 8659555 : if (gimple_code (stmt) == GIMPLE_PHI
2491 8659555 : && gimple_bb (stmt) == data->current_loop->header)
2492 : {
2493 1445665 : iv = get_iv (data, PHI_RESULT (stmt));
2494 :
2495 1445665 : if (iv && !integer_zerop (iv->step))
2496 : return;
2497 : }
2498 :
2499 26164765 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2500 : {
2501 10585827 : op = USE_FROM_PTR (use_p);
2502 :
2503 10585827 : if (TREE_CODE (op) != SSA_NAME)
2504 516631 : continue;
2505 :
2506 10069196 : iv = get_iv (data, op);
2507 10069196 : if (!iv)
2508 8377596 : continue;
2509 :
2510 1691600 : if (!find_address_like_use (data, stmt, use_p->use, iv))
2511 1690804 : find_interesting_uses_op (data, op);
2512 : }
2513 : }
2514 :
2515 : /* Finds interesting uses of induction variables outside of loops
2516 : on loop exit edge EXIT. */
2517 :
2518 : static void
2519 885492 : find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2520 : {
2521 885492 : gphi *phi;
2522 885492 : gphi_iterator psi;
2523 885492 : tree def;
2524 :
2525 1975464 : for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2526 : {
2527 1089972 : phi = psi.phi ();
2528 1089972 : def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2529 2089311 : if (!virtual_operand_p (def))
2530 535607 : find_interesting_uses_op (data, def);
2531 : }
2532 885492 : }
2533 :
2534 : /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2535 : mode for memory reference represented by USE. */
2536 :
2537 : static GTY (()) vec<rtx, va_gc> *addr_list;
2538 :
2539 : static bool
2540 220726 : addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2541 : {
2542 220726 : rtx reg, addr;
2543 220726 : unsigned list_index;
2544 220726 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2545 220726 : machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2546 :
2547 220726 : list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2548 220726 : if (list_index >= vec_safe_length (addr_list))
2549 10166 : vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2550 :
2551 220726 : addr = (*addr_list)[list_index];
2552 220726 : if (!addr)
2553 : {
2554 13261 : addr_mode = targetm.addr_space.address_mode (as);
2555 13261 : reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2556 13261 : addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2557 13261 : (*addr_list)[list_index] = addr;
2558 : }
2559 : else
2560 207465 : addr_mode = GET_MODE (addr);
2561 :
2562 220726 : XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2563 220726 : return (memory_address_addr_space_p (mem_mode, addr, as));
2564 : }
2565 :
2566 : /* Comparison function to sort group in ascending order of addr_offset. */
2567 :
2568 : static int
2569 3210782 : group_compare_offset (const void *a, const void *b)
2570 : {
2571 3210782 : const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2572 3210782 : const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2573 :
2574 3210782 : return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2575 : }
2576 :
2577 : /* Check if small groups should be split. Return true if no group
2578 : contains more than two uses with distinct addr_offsets. Return
2579 : false otherwise. We want to split such groups because:
2580 :
2581 : 1) Small groups don't have much benefit and may interfer with
2582 : general candidate selection.
2583 : 2) Size for problem with only small groups is usually small and
2584 : general algorithm can handle it well.
2585 :
2586 : TODO -- Above claim may not hold when we want to merge memory
2587 : accesses with conseuctive addresses. */
2588 :
2589 : static bool
2590 500071 : split_small_address_groups_p (struct ivopts_data *data)
2591 : {
2592 500071 : unsigned int i, j, distinct = 1;
2593 500071 : struct iv_use *pre;
2594 500071 : struct iv_group *group;
2595 :
2596 2087541 : for (i = 0; i < data->vgroups.length (); i++)
2597 : {
2598 1587470 : group = data->vgroups[i];
2599 1587470 : if (group->vuses.length () == 1)
2600 1449368 : continue;
2601 :
2602 138102 : gcc_assert (address_p (group->type));
2603 138102 : if (group->vuses.length () == 2)
2604 : {
2605 78942 : if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2606 78942 : group->vuses[1]->addr_offset) > 0)
2607 19295 : std::swap (group->vuses[0], group->vuses[1]);
2608 : }
2609 : else
2610 59160 : group->vuses.qsort (group_compare_offset);
2611 :
2612 138102 : if (distinct > 2)
2613 14080 : continue;
2614 :
2615 124022 : distinct = 1;
2616 1777649 : for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2617 : {
2618 190179 : if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2619 : {
2620 133339 : pre = group->vuses[j];
2621 133339 : distinct++;
2622 : }
2623 :
2624 190179 : if (distinct > 2)
2625 : break;
2626 : }
2627 : }
2628 :
2629 500071 : return (distinct <= 2);
2630 : }
2631 :
2632 : /* For each group of address type uses, this function further groups
2633 : these uses according to the maximum offset supported by target's
2634 : [base + offset] addressing mode. */
2635 :
2636 : static void
2637 500071 : split_address_groups (struct ivopts_data *data)
2638 : {
2639 500071 : unsigned int i, j;
2640 : /* Always split group. */
2641 500071 : bool split_p = split_small_address_groups_p (data);
2642 :
2643 2140769 : for (i = 0; i < data->vgroups.length (); i++)
2644 : {
2645 1640698 : struct iv_group *new_group = NULL;
2646 1640698 : struct iv_group *group = data->vgroups[i];
2647 1640698 : struct iv_use *use = group->vuses[0];
2648 :
2649 1640698 : use->id = 0;
2650 1640698 : use->group_id = group->id;
2651 1640698 : if (group->vuses.length () == 1)
2652 1497029 : continue;
2653 :
2654 143669 : gcc_assert (address_p (use->type));
2655 :
2656 1977821 : for (j = 1; j < group->vuses.length ();)
2657 : {
2658 337123 : struct iv_use *next = group->vuses[j];
2659 337123 : poly_int64 offset = next->addr_offset - use->addr_offset;
2660 :
2661 : /* Split group if aksed to, or the offset against the first
2662 : use can't fit in offset part of addressing mode. IV uses
2663 : having the same offset are still kept in one group. */
2664 396380 : if (maybe_ne (offset, 0)
2665 337123 : && (split_p || !addr_offset_valid_p (use, offset)))
2666 : {
2667 59257 : if (!new_group)
2668 53228 : new_group = record_group (data, group->type);
2669 59257 : group->vuses.ordered_remove (j);
2670 59257 : new_group->vuses.safe_push (next);
2671 59257 : continue;
2672 : }
2673 :
2674 277866 : next->id = j;
2675 277866 : next->group_id = group->id;
2676 277866 : j++;
2677 : }
2678 : }
2679 500071 : }
2680 :
2681 : /* Finds uses of the induction variables that are interesting. */
2682 :
2683 : static void
2684 500071 : find_interesting_uses (struct ivopts_data *data, basic_block *body)
2685 : {
2686 500071 : basic_block bb;
2687 500071 : gimple_stmt_iterator bsi;
2688 500071 : unsigned i;
2689 500071 : edge e;
2690 :
2691 3299876 : for (i = 0; i < data->current_loop->num_nodes; i++)
2692 : {
2693 2799805 : edge_iterator ei;
2694 2799805 : bb = body[i];
2695 :
2696 7142051 : FOR_EACH_EDGE (e, ei, bb->succs)
2697 4342246 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2698 4342246 : && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2699 885492 : find_interesting_uses_outside (data, e);
2700 :
2701 5645090 : for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2702 2845285 : find_interesting_uses_stmt (data, gsi_stmt (bsi));
2703 27249823 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2704 21650213 : if (!is_gimple_debug (gsi_stmt (bsi)))
2705 12484596 : find_interesting_uses_stmt (data, gsi_stmt (bsi));
2706 : }
2707 :
2708 500071 : split_address_groups (data);
2709 :
2710 500071 : if (dump_file && (dump_flags & TDF_DETAILS))
2711 : {
2712 67 : fprintf (dump_file, "\n<IV Groups>:\n");
2713 67 : dump_groups (dump_file, data);
2714 67 : fprintf (dump_file, "\n");
2715 : }
2716 500071 : }
2717 :
2718 : /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2719 : is true, assume we are inside an address. If TOP_COMPREF is true, assume
2720 : we are at the top-level of the processed address. */
2721 :
2722 : static tree
2723 3378821 : strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2724 : poly_int64 *offset)
2725 : {
2726 3378821 : tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2727 3378821 : enum tree_code code;
2728 3378821 : tree type, orig_type = TREE_TYPE (expr);
2729 3378821 : poly_int64 off0, off1;
2730 3378821 : HOST_WIDE_INT st;
2731 3378821 : tree orig_expr = expr;
2732 :
2733 3378821 : STRIP_NOPS (expr);
2734 :
2735 3378821 : type = TREE_TYPE (expr);
2736 3378821 : code = TREE_CODE (expr);
2737 3378821 : *offset = 0;
2738 :
2739 3378821 : switch (code)
2740 : {
2741 619114 : case POINTER_PLUS_EXPR:
2742 619114 : case PLUS_EXPR:
2743 619114 : case MINUS_EXPR:
2744 619114 : op0 = TREE_OPERAND (expr, 0);
2745 619114 : op1 = TREE_OPERAND (expr, 1);
2746 :
2747 619114 : op0 = strip_offset_1 (op0, false, false, &off0);
2748 619114 : op1 = strip_offset_1 (op1, false, false, &off1);
2749 :
2750 619114 : *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2751 619114 : if (op0 == TREE_OPERAND (expr, 0)
2752 619114 : && op1 == TREE_OPERAND (expr, 1))
2753 : return orig_expr;
2754 :
2755 381586 : if (integer_zerop (op1))
2756 : expr = op0;
2757 3175 : else if (integer_zerop (op0))
2758 : {
2759 602 : if (code == MINUS_EXPR)
2760 : {
2761 602 : if (TYPE_OVERFLOW_UNDEFINED (type))
2762 : {
2763 0 : type = unsigned_type_for (type);
2764 0 : op1 = fold_convert (type, op1);
2765 : }
2766 602 : expr = fold_build1 (NEGATE_EXPR, type, op1);
2767 : }
2768 : else
2769 : expr = op1;
2770 : }
2771 : else
2772 : {
2773 2573 : if (TYPE_OVERFLOW_UNDEFINED (type))
2774 : {
2775 0 : type = unsigned_type_for (type);
2776 0 : if (code == POINTER_PLUS_EXPR)
2777 0 : code = PLUS_EXPR;
2778 0 : op0 = fold_convert (type, op0);
2779 0 : op1 = fold_convert (type, op1);
2780 : }
2781 2573 : expr = fold_build2 (code, type, op0, op1);
2782 : }
2783 :
2784 381586 : return fold_convert (orig_type, expr);
2785 :
2786 221755 : case MULT_EXPR:
2787 221755 : op1 = TREE_OPERAND (expr, 1);
2788 221755 : if (!cst_and_fits_in_hwi (op1))
2789 : return orig_expr;
2790 :
2791 182237 : op0 = TREE_OPERAND (expr, 0);
2792 182237 : op0 = strip_offset_1 (op0, false, false, &off0);
2793 182237 : if (op0 == TREE_OPERAND (expr, 0))
2794 : return orig_expr;
2795 :
2796 7237 : *offset = off0 * int_cst_value (op1);
2797 7237 : if (integer_zerop (op0))
2798 : expr = op0;
2799 : else
2800 : {
2801 7237 : if (TYPE_OVERFLOW_UNDEFINED (type))
2802 : {
2803 0 : type = unsigned_type_for (type);
2804 0 : op0 = fold_convert (type, op0);
2805 0 : op1 = fold_convert (type, op1);
2806 : }
2807 7237 : expr = fold_build2 (MULT_EXPR, type, op0, op1);
2808 : }
2809 :
2810 7237 : return fold_convert (orig_type, expr);
2811 :
2812 11 : case ARRAY_REF:
2813 11 : case ARRAY_RANGE_REF:
2814 11 : if (!inside_addr)
2815 : return orig_expr;
2816 :
2817 11 : step = array_ref_element_size (expr);
2818 11 : if (!cst_and_fits_in_hwi (step))
2819 : break;
2820 :
2821 11 : st = int_cst_value (step);
2822 11 : op1 = TREE_OPERAND (expr, 1);
2823 11 : op1 = strip_offset_1 (op1, false, false, &off1);
2824 11 : *offset = off1 * st;
2825 :
2826 11 : if (top_compref
2827 11 : && integer_zerop (op1))
2828 : {
2829 : /* Strip the component reference completely. */
2830 9 : op0 = TREE_OPERAND (expr, 0);
2831 9 : op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2832 9 : *offset += off0;
2833 9 : return op0;
2834 : }
2835 : break;
2836 :
2837 1 : case COMPONENT_REF:
2838 1 : {
2839 1 : tree field;
2840 :
2841 1 : if (!inside_addr)
2842 : return orig_expr;
2843 :
2844 1 : tmp = component_ref_field_offset (expr);
2845 1 : field = TREE_OPERAND (expr, 1);
2846 1 : if (top_compref
2847 1 : && cst_and_fits_in_hwi (tmp)
2848 2 : && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2849 : {
2850 1 : HOST_WIDE_INT boffset, abs_off;
2851 :
2852 : /* Strip the component reference completely. */
2853 1 : op0 = TREE_OPERAND (expr, 0);
2854 1 : op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2855 1 : boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2856 1 : abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2857 1 : if (boffset < 0)
2858 0 : abs_off = -abs_off;
2859 :
2860 1 : *offset = off0 + int_cst_value (tmp) + abs_off;
2861 1 : return op0;
2862 : }
2863 : }
2864 : break;
2865 :
2866 317873 : case ADDR_EXPR:
2867 317873 : op0 = TREE_OPERAND (expr, 0);
2868 317873 : op0 = strip_offset_1 (op0, true, true, &off0);
2869 317873 : *offset += off0;
2870 :
2871 317873 : if (op0 == TREE_OPERAND (expr, 0))
2872 : return orig_expr;
2873 :
2874 10 : expr = build_fold_addr_expr (op0);
2875 10 : return fold_convert (orig_type, expr);
2876 :
2877 : case MEM_REF:
2878 : /* ??? Offset operand? */
2879 : inside_addr = false;
2880 : break;
2881 :
2882 2220065 : default:
2883 2220065 : if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2884 863564 : return build_int_cst (orig_type, 0);
2885 : return orig_expr;
2886 : }
2887 :
2888 : /* Default handling of expressions for that we want to recurse into
2889 : the first operand. */
2890 4 : op0 = TREE_OPERAND (expr, 0);
2891 4 : op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2892 4 : *offset += off0;
2893 :
2894 4 : if (op0 == TREE_OPERAND (expr, 0)
2895 4 : && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2896 : return orig_expr;
2897 :
2898 1 : expr = copy_node (expr);
2899 1 : TREE_OPERAND (expr, 0) = op0;
2900 1 : if (op1)
2901 1 : TREE_OPERAND (expr, 1) = op1;
2902 :
2903 : /* Inside address, we might strip the top level component references,
2904 : thus changing type of the expression. Handling of ADDR_EXPR
2905 : will fix that. */
2906 1 : expr = fold_convert (orig_type, expr);
2907 :
2908 1 : return expr;
2909 : }
2910 :
2911 : /* Strips constant offsets from EXPR and stores them to OFFSET. */
2912 :
2913 : static tree
2914 1640458 : strip_offset (tree expr, poly_uint64 *offset)
2915 : {
2916 1640458 : poly_int64 off;
2917 1640458 : tree core = strip_offset_1 (expr, false, false, &off);
2918 1640458 : *offset = off;
2919 1640458 : return core;
2920 : }
2921 :
2922 : /* Returns variant of TYPE that can be used as base for different uses.
2923 : We return unsigned type with the same precision, which avoids problems
2924 : with overflows. */
2925 :
2926 : static tree
2927 8023453 : generic_type_for (tree type)
2928 : {
2929 8023453 : if (POINTER_TYPE_P (type))
2930 1417627 : return unsigned_type_for (type);
2931 :
2932 6605826 : if (TYPE_UNSIGNED (type))
2933 : return type;
2934 :
2935 3096646 : return unsigned_type_for (type);
2936 : }
2937 :
2938 : /* Private data for walk_tree. */
2939 :
2940 : struct walk_tree_data
2941 : {
2942 : bitmap *inv_vars;
2943 : struct ivopts_data *idata;
2944 : };
2945 :
2946 : /* Callback function for walk_tree, it records invariants and symbol
2947 : reference in *EXPR_P. DATA is the structure storing result info. */
2948 :
2949 : static tree
2950 34181851 : find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2951 : {
2952 34181851 : tree op = *expr_p;
2953 34181851 : struct version_info *info;
2954 34181851 : struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2955 :
2956 34181851 : if (TREE_CODE (op) != SSA_NAME)
2957 : return NULL_TREE;
2958 :
2959 7998681 : info = name_info (wdata->idata, op);
2960 : /* Because we expand simple operations when finding IVs, loop invariant
2961 : variable that isn't referred by the original loop could be used now.
2962 : Record such invariant variables here. */
2963 7998681 : if (!info->iv)
2964 : {
2965 387416 : struct ivopts_data *idata = wdata->idata;
2966 387416 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2967 :
2968 387416 : if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2969 : {
2970 387416 : tree steptype = TREE_TYPE (op);
2971 387416 : if (POINTER_TYPE_P (steptype))
2972 195411 : steptype = sizetype;
2973 387416 : set_iv (idata, op, op, build_int_cst (steptype, 0), true);
2974 387416 : record_invariant (idata, op, false);
2975 : }
2976 : }
2977 7998681 : if (!info->inv_id || info->has_nonlin_use)
2978 : return NULL_TREE;
2979 :
2980 6649051 : if (!*wdata->inv_vars)
2981 5151444 : *wdata->inv_vars = BITMAP_ALLOC (NULL);
2982 6649051 : bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2983 :
2984 6649051 : return NULL_TREE;
2985 : }
2986 :
2987 : /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2988 : store it. */
2989 :
2990 : static inline void
2991 27712616 : find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2992 : {
2993 27712616 : struct walk_tree_data wdata;
2994 :
2995 27712616 : if (!inv_vars)
2996 11794416 : return;
2997 :
2998 15918200 : wdata.idata = data;
2999 15918200 : wdata.inv_vars = inv_vars;
3000 15918200 : walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3001 : }
3002 :
3003 : /* Get entry from invariant expr hash table for INV_EXPR. New entry
3004 : will be recorded if it doesn't exist yet. Given below two exprs:
3005 : inv_expr + cst1, inv_expr + cst2
3006 : It's hard to make decision whether constant part should be stripped
3007 : or not. We choose to not strip based on below facts:
3008 : 1) We need to count ADD cost for constant part if it's stripped,
3009 : which isn't always trivial where this functions is called.
3010 : 2) Stripping constant away may be conflict with following loop
3011 : invariant hoisting pass.
3012 : 3) Not stripping constant away results in more invariant exprs,
3013 : which usually leads to decision preferring lower reg pressure. */
3014 :
3015 : static iv_inv_expr_ent *
3016 2628171 : get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3017 : {
3018 2628171 : STRIP_NOPS (inv_expr);
3019 :
3020 2628171 : if (poly_int_tree_p (inv_expr)
3021 2628171 : || TREE_CODE (inv_expr) == SSA_NAME)
3022 : return NULL;
3023 :
3024 : /* Don't strip constant part away as we used to. */
3025 :
3026 : /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3027 2539486 : struct iv_inv_expr_ent ent;
3028 2539486 : ent.expr = inv_expr;
3029 2539486 : ent.hash = iterative_hash_expr (inv_expr, 0);
3030 2539486 : struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3031 :
3032 2539486 : if (!*slot)
3033 : {
3034 1141955 : *slot = XNEW (struct iv_inv_expr_ent);
3035 1141955 : (*slot)->expr = inv_expr;
3036 1141955 : (*slot)->hash = ent.hash;
3037 1141955 : (*slot)->id = ++data->max_inv_expr_id;
3038 : }
3039 :
3040 2539486 : return *slot;
3041 : }
3042 :
3043 :
3044 : /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3045 : unsuitable as ivopts candidates for potentially involving undefined
3046 : behavior. */
3047 :
3048 : static tree
3049 15304267 : find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3050 : {
3051 15304267 : basic_block bb = (basic_block) bb_;
3052 15304267 : if (TREE_CODE (*tp) == SSA_NAME
3053 2236110 : && ssa_name_maybe_undef_p (*tp)
3054 15312958 : && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3055 3106 : return *tp;
3056 15301161 : if (!EXPR_P (*tp))
3057 10365937 : *walk_subtrees = 0;
3058 : return NULL;
3059 : }
3060 :
3061 : /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3062 : position to POS. If USE is not NULL, the candidate is set as related to
3063 : it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3064 : replacement of the final value of the iv by a direct computation. */
3065 :
3066 : static struct iv_cand *
3067 8997422 : add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3068 : enum iv_position pos, struct iv_use *use,
3069 : gimple *incremented_at, struct iv *orig_iv = NULL,
3070 : bool doloop = false)
3071 : {
3072 8997422 : unsigned i;
3073 8997422 : struct iv_cand *cand = NULL;
3074 8997422 : tree type, orig_type;
3075 :
3076 8997422 : gcc_assert (base && step);
3077 :
3078 : /* -fkeep-gc-roots-live means that we have to keep a real pointer
3079 : live, but the ivopts code may replace a real pointer with one
3080 : pointing before or after the memory block that is then adjusted
3081 : into the memory block during the loop. FIXME: It would likely be
3082 : better to actually force the pointer live and still use ivopts;
3083 : for example, it would be enough to write the pointer into memory
3084 : and keep it there until after the loop. */
3085 8997422 : if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3086 : return NULL;
3087 :
3088 : /* If BASE contains undefined SSA names make sure we only record
3089 : the original IV. */
3090 8891232 : bool involves_undefs = false;
3091 8891232 : if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3092 : {
3093 3106 : if (pos != IP_ORIGINAL)
3094 : return NULL;
3095 : important = false;
3096 : involves_undefs = true;
3097 : }
3098 :
3099 : /* For non-original variables, make sure their values are computed in a type
3100 : that does not invoke undefined behavior on overflows (since in general,
3101 : we cannot prove that these induction variables are non-wrapping). */
3102 8888126 : if (pos != IP_ORIGINAL)
3103 : {
3104 8023453 : orig_type = TREE_TYPE (base);
3105 8023453 : type = generic_type_for (orig_type);
3106 8023453 : if (type != orig_type)
3107 : {
3108 4514273 : base = fold_convert (type, base);
3109 4514273 : step = fold_convert (type, step);
3110 : }
3111 : }
3112 :
3113 44532021 : for (i = 0; i < data->vcands.length (); i++)
3114 : {
3115 39932810 : cand = data->vcands[i];
3116 :
3117 39932810 : if (cand->pos != pos)
3118 9794242 : continue;
3119 :
3120 30138568 : if (cand->incremented_at != incremented_at
3121 29650371 : || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3122 0 : && cand->ainc_use != use))
3123 488197 : continue;
3124 :
3125 29650371 : if (operand_equal_p (base, cand->iv->base, 0)
3126 9411006 : && operand_equal_p (step, cand->iv->step, 0)
3127 35314248 : && (TYPE_PRECISION (TREE_TYPE (base))
3128 5663877 : == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3129 : break;
3130 : }
3131 :
3132 17777002 : if (i == data->vcands.length ())
3133 : {
3134 4599211 : cand = XCNEW (struct iv_cand);
3135 4599211 : cand->id = i;
3136 4599211 : cand->iv = alloc_iv (data, base, step);
3137 4599211 : cand->pos = pos;
3138 4599211 : if (pos != IP_ORIGINAL)
3139 : {
3140 3734318 : if (doloop)
3141 0 : cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3142 : else
3143 3734318 : cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3144 3734318 : cand->var_after = cand->var_before;
3145 : }
3146 4599211 : cand->important = important;
3147 4599211 : cand->involves_undefs = involves_undefs;
3148 4599211 : cand->incremented_at = incremented_at;
3149 4599211 : cand->doloop_p = doloop;
3150 4599211 : data->vcands.safe_push (cand);
3151 :
3152 4599211 : if (!poly_int_tree_p (step))
3153 : {
3154 182027 : find_inv_vars (data, &step, &cand->inv_vars);
3155 :
3156 182027 : iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3157 : /* Share bitmap between inv_vars and inv_exprs for cand. */
3158 182027 : if (inv_expr != NULL)
3159 : {
3160 100484 : cand->inv_exprs = cand->inv_vars;
3161 100484 : cand->inv_vars = NULL;
3162 100484 : if (cand->inv_exprs)
3163 83427 : bitmap_clear (cand->inv_exprs);
3164 : else
3165 17057 : cand->inv_exprs = BITMAP_ALLOC (NULL);
3166 :
3167 100484 : bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3168 : }
3169 : }
3170 :
3171 4599211 : if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3172 0 : cand->ainc_use = use;
3173 : else
3174 4599211 : cand->ainc_use = NULL;
3175 :
3176 4599211 : cand->orig_iv = orig_iv;
3177 4599211 : if (dump_file && (dump_flags & TDF_DETAILS))
3178 686 : dump_cand (dump_file, cand);
3179 : }
3180 :
3181 8888501 : cand->important |= important;
3182 8888501 : cand->doloop_p |= doloop;
3183 :
3184 : /* Relate candidate to the group for which it is added. */
3185 8888501 : if (use)
3186 2488415 : bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3187 :
3188 : return cand;
3189 : }
3190 :
3191 : /* Returns true if incrementing the induction variable at the end of the LOOP
3192 : is allowed.
3193 :
3194 : The purpose is to avoid splitting latch edge with a biv increment, thus
3195 : creating a jump, possibly confusing other optimization passes and leaving
3196 : less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3197 : available (so we do not have a better alternative), or if the latch edge
3198 : is already nonempty. */
3199 :
3200 : static bool
3201 7907457 : allow_ip_end_pos_p (class loop *loop)
3202 : {
3203 : /* Do not allow IP_END when creating the IV would need to split the
3204 : latch edge as that makes all IP_NORMAL invalid. */
3205 7907457 : auto pos = gsi_last_bb (ip_end_pos (loop));
3206 7907457 : if (!gsi_end_p (pos) && stmt_ends_bb_p (*pos))
3207 : return false;
3208 :
3209 7907457 : if (!ip_normal_pos (loop))
3210 : return true;
3211 :
3212 7810643 : if (!empty_block_p (ip_end_pos (loop)))
3213 : return true;
3214 :
3215 : return false;
3216 : }
3217 :
3218 : /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3219 : Important field is set to IMPORTANT. */
3220 :
3221 : static void
3222 580418 : add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3223 : bool important, struct iv_use *use)
3224 : {
3225 580418 : basic_block use_bb = gimple_bb (use->stmt);
3226 580418 : machine_mode mem_mode;
3227 580418 : unsigned HOST_WIDE_INT cstepi;
3228 :
3229 : /* If we insert the increment in any position other than the standard
3230 : ones, we must ensure that it is incremented once per iteration.
3231 : It must not be in an inner nested loop, or one side of an if
3232 : statement. */
3233 580418 : if (use_bb->loop_father != data->current_loop
3234 579019 : || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3235 551864 : || stmt_can_throw_internal (cfun, use->stmt)
3236 1128461 : || !cst_and_fits_in_hwi (step))
3237 61935 : return;
3238 :
3239 518483 : cstepi = int_cst_value (step);
3240 :
3241 518483 : mem_mode = TYPE_MODE (use->mem_type);
3242 : if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3243 : || USE_STORE_PRE_INCREMENT (mem_mode))
3244 : && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3245 : || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3246 : || USE_STORE_PRE_DECREMENT (mem_mode))
3247 : && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3248 : {
3249 : enum tree_code code = MINUS_EXPR;
3250 : tree new_base;
3251 : tree new_step = step;
3252 :
3253 : if (POINTER_TYPE_P (TREE_TYPE (base)))
3254 : {
3255 : new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3256 : code = POINTER_PLUS_EXPR;
3257 : }
3258 : else
3259 : new_step = fold_convert (TREE_TYPE (base), new_step);
3260 : new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3261 : add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3262 : use->stmt);
3263 : }
3264 : if (((USE_LOAD_POST_INCREMENT (mem_mode)
3265 : || USE_STORE_POST_INCREMENT (mem_mode))
3266 : && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3267 : || ((USE_LOAD_POST_DECREMENT (mem_mode)
3268 : || USE_STORE_POST_DECREMENT (mem_mode))
3269 : && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3270 : {
3271 : add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3272 : use->stmt);
3273 : }
3274 : }
3275 :
3276 : /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3277 : position to POS. If USE is not NULL, the candidate is set as related to
3278 : it. The candidate computation is scheduled before exit condition and at
3279 : the end of loop. */
3280 :
3281 : static void
3282 6953970 : add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3283 : struct iv_use *use, struct iv *orig_iv = NULL,
3284 : bool doloop = false)
3285 : {
3286 6953970 : if (ip_normal_pos (data->current_loop))
3287 6871907 : add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3288 : doloop);
3289 : /* Exclude doloop candidate here since it requires decrement then comparison
3290 : and jump, the IP_END position doesn't match. */
3291 6953970 : if (!doloop && ip_end_pos (data->current_loop)
3292 13907940 : && allow_ip_end_pos_p (data->current_loop))
3293 274279 : add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3294 6953970 : }
3295 :
3296 : /* Adds standard iv candidates. */
3297 :
3298 : static void
3299 500070 : add_standard_iv_candidates (struct ivopts_data *data)
3300 : {
3301 500070 : add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3302 :
3303 : /* The same for a double-integer type if it is still fast enough. */
3304 500070 : if (TYPE_PRECISION
3305 500070 : (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3306 500070 : && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3307 451987 : add_candidate (data, build_int_cst (long_integer_type_node, 0),
3308 : build_int_cst (long_integer_type_node, 1), true, NULL);
3309 :
3310 : /* The same for a double-integer type if it is still fast enough. */
3311 500070 : if (TYPE_PRECISION
3312 500070 : (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3313 548141 : && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3314 12 : add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3315 : build_int_cst (long_long_integer_type_node, 1), true, NULL);
3316 500070 : }
3317 :
3318 :
3319 : /* Adds candidates bases on the old induction variable IV. */
3320 :
3321 : static void
3322 1734569 : add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3323 : {
3324 1734569 : gimple *phi;
3325 1734569 : tree def;
3326 1734569 : struct iv_cand *cand;
3327 :
3328 : /* Check if this biv is used in address type use. */
3329 1149780 : if (iv->no_overflow && iv->have_address_use
3330 494688 : && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3331 2229257 : && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3332 : {
3333 280531 : tree base = fold_convert (sizetype, iv->base);
3334 280531 : tree step = fold_convert (sizetype, iv->step);
3335 :
3336 : /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3337 280531 : add_candidate (data, base, step, true, NULL, iv);
3338 : /* Add iv cand of the original type only if it has nonlinear use. */
3339 280531 : if (iv->nonlin_use)
3340 28500 : add_candidate (data, iv->base, iv->step, true, NULL);
3341 : }
3342 : else
3343 1454038 : add_candidate (data, iv->base, iv->step, true, NULL);
3344 :
3345 : /* The same, but with initial value zero. */
3346 1734569 : if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3347 322630 : add_candidate (data, size_int (0), iv->step, true, NULL);
3348 : else
3349 1411939 : add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3350 : iv->step, true, NULL);
3351 :
3352 1734569 : phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3353 1734569 : if (gimple_code (phi) == GIMPLE_PHI)
3354 : {
3355 : /* Additionally record the possibility of leaving the original iv
3356 : untouched. */
3357 867362 : def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3358 : /* Don't add candidate if it's from another PHI node because
3359 : it's an affine iv appearing in the form of PEELED_CHREC. */
3360 867362 : phi = SSA_NAME_DEF_STMT (def);
3361 867362 : if (gimple_code (phi) != GIMPLE_PHI)
3362 : {
3363 1734724 : cand = add_candidate_1 (data,
3364 : iv->base, iv->step, true, IP_ORIGINAL, NULL,
3365 867362 : SSA_NAME_DEF_STMT (def));
3366 867362 : if (cand)
3367 : {
3368 865048 : cand->var_before = iv->ssa_name;
3369 865048 : cand->var_after = def;
3370 : }
3371 : }
3372 : else
3373 0 : gcc_assert (gimple_bb (phi) == data->current_loop->header);
3374 : }
3375 1734569 : }
3376 :
3377 : /* Adds candidates based on the old induction variables. */
3378 :
3379 : static void
3380 500070 : add_iv_candidate_for_bivs (struct ivopts_data *data)
3381 : {
3382 500070 : unsigned i;
3383 500070 : struct iv *iv;
3384 500070 : bitmap_iterator bi;
3385 :
3386 5431432 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3387 : {
3388 4931362 : iv = ver_info (data, i)->iv;
3389 4931362 : if (iv && iv->biv_p && !integer_zerop (iv->step))
3390 1734569 : add_iv_candidate_for_biv (data, iv);
3391 : }
3392 500070 : }
3393 :
3394 : /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3395 :
3396 : static void
3397 4144485 : record_common_cand (struct ivopts_data *data, tree base,
3398 : tree step, struct iv_use *use)
3399 : {
3400 4144485 : class iv_common_cand ent;
3401 4144485 : class iv_common_cand **slot;
3402 :
3403 4144485 : ent.base = base;
3404 4144485 : ent.step = step;
3405 4144485 : ent.hash = iterative_hash_expr (base, 0);
3406 4144485 : ent.hash = iterative_hash_expr (step, ent.hash);
3407 :
3408 4144485 : slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3409 4144485 : if (*slot == NULL)
3410 : {
3411 2602548 : *slot = new iv_common_cand ();
3412 2602548 : (*slot)->base = base;
3413 2602548 : (*slot)->step = step;
3414 2602548 : (*slot)->uses.create (8);
3415 2602548 : (*slot)->hash = ent.hash;
3416 2602548 : data->iv_common_cands.safe_push ((*slot));
3417 : }
3418 :
3419 4144485 : gcc_assert (use != NULL);
3420 4144485 : (*slot)->uses.safe_push (use);
3421 4144485 : return;
3422 4144485 : }
3423 :
3424 : /* Comparison function used to sort common candidates. */
3425 :
3426 : static int
3427 19037787 : common_cand_cmp (const void *p1, const void *p2)
3428 : {
3429 19037787 : unsigned n1, n2;
3430 19037787 : const class iv_common_cand *const *const ccand1
3431 : = (const class iv_common_cand *const *)p1;
3432 19037787 : const class iv_common_cand *const *const ccand2
3433 : = (const class iv_common_cand *const *)p2;
3434 :
3435 19037787 : n1 = (*ccand1)->uses.length ();
3436 19037787 : n2 = (*ccand2)->uses.length ();
3437 19037787 : return n2 - n1;
3438 : }
3439 :
3440 : /* Adds IV candidates based on common candidated recorded. */
3441 :
3442 : static void
3443 500070 : add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3444 : {
3445 500070 : unsigned i, j;
3446 500070 : struct iv_cand *cand_1, *cand_2;
3447 :
3448 500070 : data->iv_common_cands.qsort (common_cand_cmp);
3449 1453557 : for (i = 0; i < data->iv_common_cands.length (); i++)
3450 : {
3451 1437742 : class iv_common_cand *ptr = data->iv_common_cands[i];
3452 :
3453 : /* Only add IV candidate if it's derived from multiple uses. */
3454 1437742 : if (ptr->uses.length () <= 1)
3455 : break;
3456 :
3457 953487 : cand_1 = NULL;
3458 953487 : cand_2 = NULL;
3459 953487 : if (ip_normal_pos (data->current_loop))
3460 938736 : cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3461 : false, IP_NORMAL, NULL, NULL);
3462 :
3463 953487 : if (ip_end_pos (data->current_loop)
3464 953487 : && allow_ip_end_pos_p (data->current_loop))
3465 45138 : cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3466 : false, IP_END, NULL, NULL);
3467 :
3468 : /* Bind deriving uses and the new candidates. */
3469 3448911 : for (j = 0; j < ptr->uses.length (); j++)
3470 : {
3471 2495424 : struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3472 2495424 : if (cand_1)
3473 2420995 : bitmap_set_bit (group->related_cands, cand_1->id);
3474 2495424 : if (cand_2)
3475 135418 : bitmap_set_bit (group->related_cands, cand_2->id);
3476 : }
3477 : }
3478 :
3479 : /* Release data since it is useless from this point. */
3480 500070 : data->iv_common_cand_tab->empty ();
3481 500070 : data->iv_common_cands.truncate (0);
3482 500070 : }
3483 :
3484 : /* Adds candidates based on the value of USE's iv. */
3485 :
3486 : static void
3487 1640694 : add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3488 : {
3489 1640694 : poly_uint64 offset;
3490 1640694 : tree base;
3491 1640694 : struct iv *iv = use->iv;
3492 1640694 : tree basetype = TREE_TYPE (iv->base);
3493 :
3494 : /* Don't add candidate for iv_use with non integer, pointer or non-mode
3495 : precision types, instead, add candidate for the corresponding scev in
3496 : unsigned type with the same precision. See PR93674 for more info. */
3497 771844 : if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3498 2412313 : || !type_has_mode_precision_p (basetype))
3499 : {
3500 236 : basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3501 236 : TYPE_UNSIGNED (basetype));
3502 236 : add_candidate (data, fold_convert (basetype, iv->base),
3503 : fold_convert (basetype, iv->step), false, NULL);
3504 236 : return;
3505 : }
3506 :
3507 1640458 : add_candidate (data, iv->base, iv->step, false, use);
3508 :
3509 : /* Record common candidate for use in case it can be shared by others. */
3510 1640458 : record_common_cand (data, iv->base, iv->step, use);
3511 :
3512 : /* Record common candidate with initial value zero. */
3513 1640458 : basetype = TREE_TYPE (iv->base);
3514 1640458 : if (POINTER_TYPE_P (basetype))
3515 771619 : basetype = sizetype;
3516 1640458 : record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3517 :
3518 : /* Compare the cost of an address with an unscaled index with the cost of
3519 : an address with a scaled index and add candidate if useful. */
3520 1640458 : poly_int64 step;
3521 1640458 : if (use != NULL
3522 1640458 : && poly_int_tree_p (iv->step, &step)
3523 1405628 : && address_p (use->type))
3524 : {
3525 531068 : poly_int64 new_step;
3526 531068 : unsigned int fact = preferred_mem_scale_factor
3527 531068 : (use->iv->base,
3528 531068 : TYPE_MODE (use->mem_type),
3529 531068 : optimize_loop_for_speed_p (data->current_loop));
3530 :
3531 531068 : if (fact != 1
3532 531068 : && multiple_p (step, fact, &new_step))
3533 0 : add_candidate (data, size_int (0),
3534 0 : wide_int_to_tree (sizetype, new_step),
3535 : true, NULL);
3536 : }
3537 :
3538 : /* Record common candidate with constant offset stripped in base.
3539 : Like the use itself, we also add candidate directly for it. */
3540 1640458 : base = strip_offset (iv->base, &offset);
3541 1640458 : if (maybe_ne (offset, 0U) || base != iv->base)
3542 : {
3543 863569 : record_common_cand (data, base, iv->step, use);
3544 863569 : add_candidate (data, base, iv->step, false, use);
3545 : }
3546 :
3547 : /* Record common candidate with base_object removed in base. */
3548 1640458 : base = iv->base;
3549 1640458 : STRIP_NOPS (base);
3550 1640458 : if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3551 : {
3552 0 : tree step = iv->step;
3553 :
3554 0 : STRIP_NOPS (step);
3555 0 : base = TREE_OPERAND (base, 1);
3556 0 : step = fold_convert (sizetype, step);
3557 0 : record_common_cand (data, base, step, use);
3558 : /* Also record common candidate with offset stripped. */
3559 0 : tree alt_base, alt_offset;
3560 0 : split_constant_offset (base, &alt_base, &alt_offset);
3561 0 : if (!integer_zerop (alt_offset))
3562 0 : record_common_cand (data, alt_base, step, use);
3563 : }
3564 :
3565 : /* At last, add auto-incremental candidates. Make such variables
3566 : important since other iv uses with same base object may be based
3567 : on it. */
3568 1640458 : if (use != NULL && address_p (use->type))
3569 580418 : add_autoinc_candidates (data, iv->base, iv->step, true, use);
3570 : }
3571 :
3572 : /* Adds candidates based on the uses. */
3573 :
3574 : static void
3575 500070 : add_iv_candidate_for_groups (struct ivopts_data *data)
3576 : {
3577 500070 : unsigned i;
3578 :
3579 : /* Only add candidate for the first use in group. */
3580 2140764 : for (i = 0; i < data->vgroups.length (); i++)
3581 : {
3582 1640694 : struct iv_group *group = data->vgroups[i];
3583 :
3584 1640694 : gcc_assert (group->vuses[0] != NULL);
3585 1640694 : add_iv_candidate_for_use (data, group->vuses[0]);
3586 : }
3587 500070 : add_iv_candidate_derived_from_uses (data);
3588 500070 : }
3589 :
3590 : /* Record important candidates and add them to related_cands bitmaps. */
3591 :
3592 : static void
3593 500070 : record_important_candidates (struct ivopts_data *data)
3594 : {
3595 500070 : unsigned i;
3596 500070 : struct iv_group *group;
3597 :
3598 5099281 : for (i = 0; i < data->vcands.length (); i++)
3599 : {
3600 4599211 : struct iv_cand *cand = data->vcands[i];
3601 :
3602 4599211 : if (cand->important)
3603 3680792 : bitmap_set_bit (data->important_candidates, i);
3604 : }
3605 :
3606 500070 : data->consider_all_candidates = (data->vcands.length ()
3607 500070 : <= CONSIDER_ALL_CANDIDATES_BOUND);
3608 :
3609 : /* Add important candidates to groups' related_cands bitmaps. */
3610 2140764 : for (i = 0; i < data->vgroups.length (); i++)
3611 : {
3612 1640694 : group = data->vgroups[i];
3613 1640694 : bitmap_ior_into (group->related_cands, data->important_candidates);
3614 : }
3615 500070 : }
3616 :
3617 : /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3618 : If consider_all_candidates is true, we use a two-dimensional array, otherwise
3619 : we allocate a simple list to every use. */
3620 :
3621 : static void
3622 500070 : alloc_use_cost_map (struct ivopts_data *data)
3623 : {
3624 500070 : unsigned i, size, s;
3625 :
3626 2140764 : for (i = 0; i < data->vgroups.length (); i++)
3627 : {
3628 1640694 : struct iv_group *group = data->vgroups[i];
3629 :
3630 1640694 : if (data->consider_all_candidates)
3631 1630993 : size = data->vcands.length ();
3632 : else
3633 : {
3634 9701 : s = bitmap_count_bits (group->related_cands);
3635 :
3636 : /* Round up to the power of two, so that moduling by it is fast. */
3637 19402 : size = s ? (1 << ceil_log2 (s)) : 1;
3638 : }
3639 :
3640 1640694 : group->n_map_members = size;
3641 1640694 : group->cost_map = XCNEWVEC (class cost_pair, size);
3642 : }
3643 500070 : }
3644 :
3645 : /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3646 : on invariants INV_VARS and that the value used in expressing it is
3647 : VALUE, and in case of iv elimination the comparison operator is COMP. */
3648 :
3649 : static void
3650 17689308 : set_group_iv_cost (struct ivopts_data *data,
3651 : struct iv_group *group, struct iv_cand *cand,
3652 : comp_cost cost, bitmap inv_vars, tree value,
3653 : enum tree_code comp, bitmap inv_exprs)
3654 : {
3655 17689308 : unsigned i, s;
3656 :
3657 17689308 : if (cost.infinite_cost_p ())
3658 : {
3659 6116694 : BITMAP_FREE (inv_vars);
3660 6116694 : BITMAP_FREE (inv_exprs);
3661 6116694 : return;
3662 : }
3663 :
3664 11572614 : if (data->consider_all_candidates)
3665 : {
3666 11429096 : group->cost_map[cand->id].cand = cand;
3667 11429096 : group->cost_map[cand->id].cost = cost;
3668 11429096 : group->cost_map[cand->id].inv_vars = inv_vars;
3669 11429096 : group->cost_map[cand->id].inv_exprs = inv_exprs;
3670 11429096 : group->cost_map[cand->id].value = value;
3671 11429096 : group->cost_map[cand->id].comp = comp;
3672 11429096 : return;
3673 : }
3674 :
3675 : /* n_map_members is a power of two, so this computes modulo. */
3676 143518 : s = cand->id & (group->n_map_members - 1);
3677 152845 : for (i = s; i < group->n_map_members; i++)
3678 152806 : if (!group->cost_map[i].cand)
3679 143479 : goto found;
3680 53 : for (i = 0; i < s; i++)
3681 53 : if (!group->cost_map[i].cand)
3682 39 : goto found;
3683 :
3684 0 : gcc_unreachable ();
3685 :
3686 143518 : found:
3687 143518 : group->cost_map[i].cand = cand;
3688 143518 : group->cost_map[i].cost = cost;
3689 143518 : group->cost_map[i].inv_vars = inv_vars;
3690 143518 : group->cost_map[i].inv_exprs = inv_exprs;
3691 143518 : group->cost_map[i].value = value;
3692 143518 : group->cost_map[i].comp = comp;
3693 : }
3694 :
3695 : /* Gets cost of (GROUP, CAND) pair. */
3696 :
3697 : static class cost_pair *
3698 202647818 : get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3699 : struct iv_cand *cand)
3700 : {
3701 202647818 : unsigned i, s;
3702 202647818 : class cost_pair *ret;
3703 :
3704 202647818 : if (!cand)
3705 : return NULL;
3706 :
3707 196852672 : if (data->consider_all_candidates)
3708 : {
3709 183090572 : ret = group->cost_map + cand->id;
3710 183090572 : if (!ret->cand)
3711 : return NULL;
3712 :
3713 108194531 : return ret;
3714 : }
3715 :
3716 : /* n_map_members is a power of two, so this computes modulo. */
3717 13762100 : s = cand->id & (group->n_map_members - 1);
3718 18681853 : for (i = s; i < group->n_map_members; i++)
3719 18619930 : if (group->cost_map[i].cand == cand)
3720 : return group->cost_map + i;
3721 10364236 : else if (group->cost_map[i].cand == NULL)
3722 : return NULL;
3723 191482 : for (i = 0; i < s; i++)
3724 171249 : if (group->cost_map[i].cand == cand)
3725 : return group->cost_map + i;
3726 169165 : else if (group->cost_map[i].cand == NULL)
3727 : return NULL;
3728 :
3729 : return NULL;
3730 : }
3731 :
3732 : /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3733 : static rtx
3734 41677 : produce_memory_decl_rtl (tree obj, int *regno)
3735 : {
3736 41677 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3737 41677 : machine_mode address_mode = targetm.addr_space.address_mode (as);
3738 41677 : rtx x;
3739 :
3740 41677 : gcc_assert (obj);
3741 41677 : if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3742 : {
3743 41677 : const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3744 41677 : x = gen_rtx_SYMBOL_REF (address_mode, name);
3745 41677 : SET_SYMBOL_REF_DECL (x, obj);
3746 41677 : x = gen_rtx_MEM (DECL_MODE (obj), x);
3747 41677 : set_mem_addr_space (x, as);
3748 41677 : targetm.encode_section_info (obj, x, true);
3749 : }
3750 : else
3751 : {
3752 0 : x = gen_raw_REG (address_mode, (*regno)++);
3753 0 : x = gen_rtx_MEM (DECL_MODE (obj), x);
3754 0 : set_mem_addr_space (x, as);
3755 : }
3756 :
3757 41677 : return x;
3758 : }
3759 :
3760 : /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3761 : walk_tree. DATA contains the actual fake register number. */
3762 :
3763 : static tree
3764 583478 : prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3765 : {
3766 583478 : tree obj = NULL_TREE;
3767 583478 : rtx x = NULL_RTX;
3768 583478 : int *regno = (int *) data;
3769 :
3770 583478 : switch (TREE_CODE (*expr_p))
3771 : {
3772 166708 : case ADDR_EXPR:
3773 166708 : for (expr_p = &TREE_OPERAND (*expr_p, 0);
3774 166708 : handled_component_p (*expr_p);
3775 0 : expr_p = &TREE_OPERAND (*expr_p, 0))
3776 0 : continue;
3777 166708 : obj = *expr_p;
3778 166708 : if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3779 0 : x = produce_memory_decl_rtl (obj, regno);
3780 : break;
3781 :
3782 0 : case SSA_NAME:
3783 0 : *ws = 0;
3784 0 : obj = SSA_NAME_VAR (*expr_p);
3785 : /* Defer handling of anonymous SSA_NAMEs to the expander. */
3786 0 : if (!obj)
3787 : return NULL_TREE;
3788 0 : if (!DECL_RTL_SET_P (obj))
3789 0 : x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3790 : break;
3791 :
3792 166708 : case VAR_DECL:
3793 166708 : case PARM_DECL:
3794 166708 : case RESULT_DECL:
3795 166708 : *ws = 0;
3796 166708 : obj = *expr_p;
3797 :
3798 166708 : if (DECL_RTL_SET_P (obj))
3799 : break;
3800 :
3801 0 : if (DECL_MODE (obj) == BLKmode)
3802 0 : x = produce_memory_decl_rtl (obj, regno);
3803 : else
3804 0 : x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3805 :
3806 : break;
3807 :
3808 : default:
3809 : break;
3810 : }
3811 :
3812 0 : if (x)
3813 : {
3814 0 : decl_rtl_to_reset.safe_push (obj);
3815 0 : SET_DECL_RTL (obj, x);
3816 : }
3817 :
3818 : return NULL_TREE;
3819 : }
3820 :
3821 : /* Predict whether the given loop will be transformed in the RTL
3822 : doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3823 : This is only for target independent checks, see targetm.predict_doloop_p
3824 : for the target dependent ones.
3825 :
3826 : Note that according to some initial investigation, some checks like costly
3827 : niter check and invalid stmt scanning don't have much gains among general
3828 : cases, so keep this as simple as possible first.
3829 :
3830 : Some RTL specific checks seems unable to be checked in gimple, if any new
3831 : checks or easy checks _are_ missing here, please add them. */
3832 :
3833 : static bool
3834 500070 : generic_predict_doloop_p (struct ivopts_data *data)
3835 : {
3836 500070 : class loop *loop = data->current_loop;
3837 :
3838 : /* Call target hook for target dependent checks. */
3839 500070 : if (!targetm.predict_doloop_p (loop))
3840 : {
3841 500070 : if (dump_file && (dump_flags & TDF_DETAILS))
3842 67 : fprintf (dump_file, "Predict doloop failure due to"
3843 : " target specific checks.\n");
3844 500070 : return false;
3845 : }
3846 :
3847 : /* Similar to doloop_optimize, check iteration description to know it's
3848 : suitable or not. Keep it as simple as possible, feel free to extend it
3849 : if you find any multiple exits cases matter. */
3850 0 : edge exit = single_dom_exit (loop);
3851 0 : class tree_niter_desc *niter_desc;
3852 0 : if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3853 : {
3854 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3855 0 : fprintf (dump_file, "Predict doloop failure due to"
3856 : " unexpected niters.\n");
3857 0 : return false;
3858 : }
3859 :
3860 : /* Similar to doloop_optimize, check whether iteration count too small
3861 : and not profitable. */
3862 0 : HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3863 0 : if (est_niter == -1)
3864 0 : est_niter = get_likely_max_loop_iterations_int (loop);
3865 0 : if (est_niter >= 0 && est_niter < 3)
3866 : {
3867 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3868 0 : fprintf (dump_file,
3869 : "Predict doloop failure due to"
3870 : " too few iterations (%u).\n",
3871 : (unsigned int) est_niter);
3872 0 : return false;
3873 : }
3874 :
3875 : return true;
3876 : }
3877 :
3878 : /* Determines cost of the computation of EXPR. */
3879 :
3880 : static unsigned
3881 250062 : computation_cost (tree expr, bool speed)
3882 : {
3883 250062 : rtx_insn *seq;
3884 250062 : rtx rslt;
3885 250062 : tree type = TREE_TYPE (expr);
3886 250062 : unsigned cost;
3887 : /* Avoid using hard regs in ways which may be unsupported. */
3888 250062 : int regno = LAST_VIRTUAL_REGISTER + 1;
3889 250062 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
3890 250062 : enum node_frequency real_frequency = node->frequency;
3891 :
3892 250062 : node->frequency = NODE_FREQUENCY_NORMAL;
3893 250062 : crtl->maybe_hot_insn_p = speed;
3894 250062 : walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
3895 250062 : start_sequence ();
3896 250062 : rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3897 250062 : seq = end_sequence ();
3898 250062 : default_rtl_profile ();
3899 250062 : node->frequency = real_frequency;
3900 :
3901 250062 : cost = seq_cost (seq, speed);
3902 250062 : if (MEM_P (rslt))
3903 0 : cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3904 0 : TYPE_ADDR_SPACE (type), speed);
3905 250062 : else if (!REG_P (rslt))
3906 500124 : cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3907 :
3908 250062 : return cost;
3909 : }
3910 :
3911 : /* Returns variable containing the value of candidate CAND at statement AT. */
3912 :
3913 : static tree
3914 18435608 : var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3915 : {
3916 18435608 : if (stmt_after_increment (loop, cand, stmt))
3917 4728875 : return cand->var_after;
3918 : else
3919 13706733 : return cand->var_before;
3920 : }
3921 :
3922 : /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3923 : same precision that is at least as wide as the precision of TYPE, stores
3924 : BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3925 : type of A and B. */
3926 :
3927 : static tree
3928 14246012 : determine_common_wider_type (tree *a, tree *b)
3929 : {
3930 14246012 : tree wider_type = NULL;
3931 14246012 : tree suba, subb;
3932 14246012 : tree atype = TREE_TYPE (*a);
3933 :
3934 14246012 : if (CONVERT_EXPR_P (*a))
3935 : {
3936 8010957 : suba = TREE_OPERAND (*a, 0);
3937 8010957 : wider_type = TREE_TYPE (suba);
3938 8010957 : if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3939 : return atype;
3940 : }
3941 : else
3942 : return atype;
3943 :
3944 7992640 : if (CONVERT_EXPR_P (*b))
3945 : {
3946 1584128 : subb = TREE_OPERAND (*b, 0);
3947 1584128 : if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3948 : return atype;
3949 : }
3950 : else
3951 : return atype;
3952 :
3953 1502444 : *a = suba;
3954 1502444 : *b = subb;
3955 1502444 : return wider_type;
3956 : }
3957 :
3958 : /* Determines the expression by that USE is expressed from induction variable
3959 : CAND at statement AT in DATA's current loop. The expression is stored in
3960 : two parts in a decomposed form. The invariant part is stored in AFF_INV;
3961 : while variant part in AFF_VAR. Store ratio of CAND.step over USE.step in
3962 : PRAT if it's non-null. Returns false if USE cannot be expressed using
3963 : CAND. */
3964 :
3965 : static bool
3966 17179625 : get_computation_aff_1 (struct ivopts_data *data, gimple *at, struct iv_use *use,
3967 : struct iv_cand *cand, class aff_tree *aff_inv,
3968 : class aff_tree *aff_var, widest_int *prat = NULL)
3969 : {
3970 17179625 : tree ubase = use->iv->base, ustep = use->iv->step;
3971 17179625 : tree cbase = cand->iv->base, cstep = cand->iv->step;
3972 17179625 : tree common_type, uutype, var, cstep_common;
3973 17179625 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3974 17179625 : aff_tree aff_cbase;
3975 17179625 : widest_int rat;
3976 :
3977 : /* We must have a precision to express the values of use. */
3978 17179625 : if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3979 : return false;
3980 :
3981 17178866 : var = var_at_stmt (data->current_loop, cand, at);
3982 17178866 : uutype = unsigned_type_for (utype);
3983 :
3984 : /* If the conversion is not noop, perform it. */
3985 17178866 : if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3986 : {
3987 265908 : if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3988 1640265 : && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3989 : {
3990 36529 : tree inner_base, inner_step, inner_type;
3991 36529 : inner_base = TREE_OPERAND (cbase, 0);
3992 36529 : if (CONVERT_EXPR_P (cstep))
3993 4527 : inner_step = TREE_OPERAND (cstep, 0);
3994 : else
3995 : inner_step = cstep;
3996 :
3997 36529 : inner_type = TREE_TYPE (inner_base);
3998 : /* If candidate is added from a biv whose type is smaller than
3999 : ctype, we know both candidate and the biv won't overflow.
4000 : In this case, it's safe to skip the convertion in candidate.
4001 : As an example, (unsigned short)((unsigned long)A) equals to
4002 : (unsigned short)A, if A has a type no larger than short. */
4003 36529 : if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4004 : {
4005 35287 : cbase = inner_base;
4006 35287 : cstep = inner_step;
4007 : }
4008 : }
4009 1603736 : cbase = fold_convert (uutype, cbase);
4010 1603736 : cstep = fold_convert (uutype, cstep);
4011 1603736 : var = fold_convert (uutype, var);
4012 : }
4013 :
4014 : /* Ratio is 1 when computing the value of biv cand by itself.
4015 : We can't rely on constant_multiple_of in this case because the
4016 : use is created after the original biv is selected. The call
4017 : could fail because of inconsistent fold behavior. See PR68021
4018 : for more information. */
4019 17178866 : if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4020 : {
4021 5344 : gcc_assert (is_gimple_assign (use->stmt));
4022 5344 : gcc_assert (use->iv->ssa_name == cand->var_after);
4023 5344 : gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4024 5344 : rat = 1;
4025 : }
4026 17173522 : else if (!constant_multiple_of (ustep, cstep, &rat, data))
4027 : return false;
4028 :
4029 14246012 : if (prat)
4030 12770992 : *prat = rat;
4031 :
4032 : /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4033 : type, we achieve better folding by computing their difference in this
4034 : wider type, and cast the result to UUTYPE. We do not need to worry about
4035 : overflows, as all the arithmetics will in the end be performed in UUTYPE
4036 : anyway. */
4037 14246012 : common_type = determine_common_wider_type (&ubase, &cbase);
4038 :
4039 : /* use = ubase - ratio * cbase + ratio * var. */
4040 14246012 : tree_to_aff_combination (ubase, common_type, aff_inv);
4041 14246012 : tree_to_aff_combination (cbase, common_type, &aff_cbase);
4042 14246012 : tree_to_aff_combination (var, uutype, aff_var);
4043 :
4044 : /* We need to shift the value if we are after the increment. */
4045 14246012 : if (stmt_after_increment (data->current_loop, cand, at))
4046 : {
4047 3214895 : aff_tree cstep_aff;
4048 :
4049 3214895 : if (common_type != uutype)
4050 835362 : cstep_common = fold_convert (common_type, cstep);
4051 : else
4052 : cstep_common = cstep;
4053 :
4054 3214895 : tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4055 3214895 : aff_combination_add (&aff_cbase, &cstep_aff);
4056 3214895 : }
4057 :
4058 14246012 : aff_combination_scale (&aff_cbase, -rat);
4059 14246012 : aff_combination_add (aff_inv, &aff_cbase);
4060 14246012 : if (common_type != uutype)
4061 9670522 : aff_combination_convert (aff_inv, uutype);
4062 :
4063 14246012 : aff_combination_scale (aff_var, rat);
4064 14246012 : return true;
4065 17179625 : }
4066 :
4067 : /* Determines the expression by that USE is expressed from induction variable
4068 : CAND at statement AT in DATA's current loop. The expression is stored in a
4069 : decomposed form into AFF. Returns false if USE cannot be expressed using
4070 : CAND. */
4071 :
4072 : static bool
4073 1238949 : get_computation_aff (struct ivopts_data *data, gimple *at, struct iv_use *use,
4074 : struct iv_cand *cand, class aff_tree *aff)
4075 : {
4076 1238949 : aff_tree aff_var;
4077 :
4078 1238949 : if (!get_computation_aff_1 (data, at, use, cand, aff, &aff_var))
4079 : return false;
4080 :
4081 1131449 : aff_combination_add (aff, &aff_var);
4082 1131449 : return true;
4083 1238949 : }
4084 :
4085 : /* Return the type of USE. */
4086 :
4087 : static tree
4088 1001675 : get_use_type (struct iv_use *use)
4089 : {
4090 1001675 : tree base_type = TREE_TYPE (use->iv->base);
4091 1001675 : tree type;
4092 :
4093 1001675 : if (use->type == USE_REF_ADDRESS)
4094 : {
4095 : /* The base_type may be a void pointer. Create a pointer type based on
4096 : the mem_ref instead. */
4097 0 : type = build_pointer_type (TREE_TYPE (*use->op_p));
4098 0 : gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4099 : == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4100 : }
4101 : else
4102 : type = base_type;
4103 :
4104 1001675 : return type;
4105 : }
4106 :
4107 : /* Determines the expression by that USE is expressed from induction variable
4108 : CAND at statement AT in DATA's current loop. The computation is
4109 : unshared. */
4110 :
4111 : static tree
4112 380884 : get_computation_at (struct ivopts_data *data, gimple *at,
4113 : struct iv_use *use, struct iv_cand *cand)
4114 : {
4115 380884 : aff_tree aff;
4116 380884 : tree type = get_use_type (use);
4117 :
4118 380884 : if (!get_computation_aff (data, at, use, cand, &aff))
4119 : return NULL_TREE;
4120 273384 : unshare_aff_combination (&aff);
4121 273384 : return fold_convert (type, aff_combination_to_tree (&aff));
4122 380884 : }
4123 :
4124 : /* Like get_computation_at, but try harder, even if the computation
4125 : is more expensive. Intended for debug stmts. */
4126 :
4127 : static tree
4128 176202 : get_debug_computation_at (struct ivopts_data *data, gimple *at,
4129 : struct iv_use *use, struct iv_cand *cand)
4130 : {
4131 176202 : if (tree ret = get_computation_at (data, at, use, cand))
4132 : return ret;
4133 :
4134 107500 : tree ubase = use->iv->base, ustep = use->iv->step;
4135 107500 : tree cbase = cand->iv->base, cstep = cand->iv->step;
4136 107500 : tree var;
4137 107500 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4138 107500 : widest_int rat;
4139 :
4140 : /* We must have a precision to express the values of use. */
4141 107500 : if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4142 : return NULL_TREE;
4143 :
4144 : /* Try to handle the case that get_computation_at doesn't,
4145 : try to express
4146 : use = ubase + (var - cbase) / ratio. */
4147 8600 : if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4148 : &rat, data))
4149 : return NULL_TREE;
4150 :
4151 7507 : bool neg_p = false;
4152 7507 : if (wi::neg_p (rat))
4153 : {
4154 825 : if (TYPE_UNSIGNED (ctype))
4155 : return NULL_TREE;
4156 0 : neg_p = true;
4157 0 : rat = wi::neg (rat);
4158 : }
4159 :
4160 : /* If both IVs can wrap around and CAND doesn't have a power of two step,
4161 : it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4162 : the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4163 : uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4164 : ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4165 6682 : if (!use->iv->no_overflow
4166 62 : && !cand->iv->no_overflow
4167 6731 : && !integer_pow2p (cstep))
4168 : return NULL_TREE;
4169 :
4170 6668 : int bits = wi::exact_log2 (rat);
4171 6668 : if (bits == -1)
4172 661 : bits = wi::floor_log2 (rat) + 1;
4173 6668 : if (!cand->iv->no_overflow
4174 6668 : && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4175 : return NULL_TREE;
4176 :
4177 6668 : var = var_at_stmt (data->current_loop, cand, at);
4178 :
4179 6668 : if (POINTER_TYPE_P (ctype))
4180 : {
4181 120 : ctype = unsigned_type_for (ctype);
4182 120 : cbase = fold_convert (ctype, cbase);
4183 120 : cstep = fold_convert (ctype, cstep);
4184 120 : var = fold_convert (ctype, var);
4185 : }
4186 :
4187 6668 : if (stmt_after_increment (data->current_loop, cand, at))
4188 70 : var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4189 : unshare_expr (cstep));
4190 :
4191 6668 : var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4192 6668 : var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4193 : wide_int_to_tree (TREE_TYPE (var), rat));
4194 6668 : if (POINTER_TYPE_P (utype))
4195 : {
4196 0 : var = fold_convert (sizetype, var);
4197 0 : if (neg_p)
4198 0 : var = fold_build1 (NEGATE_EXPR, sizetype, var);
4199 0 : var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4200 : }
4201 : else
4202 : {
4203 6668 : var = fold_convert (utype, var);
4204 13336 : var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4205 : ubase, var);
4206 : }
4207 : return var;
4208 107500 : }
4209 :
4210 : /* Adjust the cost COST for being in loop setup rather than loop body.
4211 : If we're optimizing for space, the loop setup overhead is constant;
4212 : if we're optimizing for speed, amortize it over the per-iteration cost.
4213 : If ROUND_UP_P is true, the result is round up rather than to zero when
4214 : optimizing for speed. */
4215 : static int64_t
4216 10316289 : adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4217 : bool round_up_p = false)
4218 : {
4219 10316289 : if (cost == INFTY)
4220 : return cost;
4221 10316289 : else if (optimize_loop_for_speed_p (data->current_loop))
4222 : {
4223 8679972 : uint64_t niters = avg_loop_niter (data->current_loop);
4224 8679972 : if (niters > (uint64_t) cost)
4225 13319235 : return (round_up_p && cost != 0) ? 1 : 0;
4226 1841197 : return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4227 : }
4228 : else
4229 : return cost;
4230 : }
4231 :
4232 : /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4233 : EXPR operand holding the shift. COST0 and COST1 are the costs for
4234 : calculating the operands of EXPR. Returns true if successful, and returns
4235 : the cost in COST. */
4236 :
4237 : static bool
4238 1440272 : get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4239 : comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4240 : {
4241 1440272 : comp_cost res;
4242 1440272 : tree op1 = TREE_OPERAND (expr, 1);
4243 1440272 : tree cst = TREE_OPERAND (mult, 1);
4244 1440272 : tree multop = TREE_OPERAND (mult, 0);
4245 1440272 : int m = exact_log2 (int_cst_value (cst));
4246 4320306 : int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4247 1440272 : int as_cost, sa_cost;
4248 1440272 : bool mult_in_op1;
4249 :
4250 1440272 : if (!(m >= 0 && m < maxm))
4251 : return false;
4252 :
4253 956179 : STRIP_NOPS (op1);
4254 956179 : mult_in_op1 = operand_equal_p (op1, mult, 0);
4255 :
4256 956179 : as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4257 :
4258 : /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4259 : use that in preference to a shift insn followed by an add insn. */
4260 956179 : sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4261 956179 : ? shiftadd_cost (speed, mode, m)
4262 : : (mult_in_op1
4263 144405 : ? shiftsub1_cost (speed, mode, m)
4264 26044 : : shiftsub0_cost (speed, mode, m)));
4265 :
4266 956179 : res = comp_cost (MIN (as_cost, sa_cost), 0);
4267 1731165 : res += (mult_in_op1 ? cost0 : cost1);
4268 :
4269 956179 : STRIP_NOPS (multop);
4270 956179 : if (!is_gimple_val (multop))
4271 487032 : res += force_expr_to_var_cost (multop, speed);
4272 :
4273 956179 : *cost = res;
4274 956179 : return true;
4275 : }
4276 :
4277 : /* Estimates cost of forcing expression EXPR into a variable. */
4278 :
4279 : static comp_cost
4280 28759529 : force_expr_to_var_cost (tree expr, bool speed)
4281 : {
4282 28759529 : static bool costs_initialized = false;
4283 28759529 : static unsigned integer_cost [2];
4284 28759529 : static unsigned symbol_cost [2];
4285 28759529 : static unsigned address_cost [2];
4286 28759529 : tree op0, op1;
4287 28759529 : comp_cost cost0, cost1, cost;
4288 28759529 : machine_mode mode;
4289 28759529 : scalar_int_mode int_mode;
4290 :
4291 28759529 : if (!costs_initialized)
4292 : {
4293 41677 : tree type = build_pointer_type (integer_type_node);
4294 41677 : tree var, addr;
4295 41677 : rtx x;
4296 41677 : int i;
4297 :
4298 41677 : var = create_tmp_var_raw (integer_type_node, "test_var");
4299 41677 : TREE_STATIC (var) = 1;
4300 41677 : x = produce_memory_decl_rtl (var, NULL);
4301 41677 : SET_DECL_RTL (var, x);
4302 :
4303 41677 : addr = build1 (ADDR_EXPR, type, var);
4304 :
4305 :
4306 166708 : for (i = 0; i < 2; i++)
4307 : {
4308 83354 : integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4309 : 2000), i);
4310 :
4311 83354 : symbol_cost[i] = computation_cost (addr, i) + 1;
4312 :
4313 83354 : address_cost[i]
4314 83354 : = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4315 83354 : if (dump_file && (dump_flags & TDF_DETAILS))
4316 : {
4317 105 : fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4318 70 : fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4319 70 : fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4320 70 : fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4321 70 : fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4322 70 : fprintf (dump_file, "\n");
4323 : }
4324 : }
4325 :
4326 41677 : costs_initialized = true;
4327 : }
4328 :
4329 28759529 : STRIP_NOPS (expr);
4330 :
4331 28759529 : if (SSA_VAR_P (expr))
4332 5418115 : return no_cost;
4333 :
4334 23341414 : if (is_gimple_min_invariant (expr))
4335 : {
4336 13918692 : if (poly_int_tree_p (expr))
4337 11830254 : return comp_cost (integer_cost [speed], 0);
4338 :
4339 2088438 : if (TREE_CODE (expr) == ADDR_EXPR)
4340 : {
4341 2088438 : tree obj = TREE_OPERAND (expr, 0);
4342 :
4343 2088438 : if (VAR_P (obj)
4344 : || TREE_CODE (obj) == PARM_DECL
4345 : || TREE_CODE (obj) == RESULT_DECL)
4346 2024831 : return comp_cost (symbol_cost [speed], 0);
4347 : }
4348 :
4349 63607 : return comp_cost (address_cost [speed], 0);
4350 : }
4351 :
4352 9422722 : switch (TREE_CODE (expr))
4353 : {
4354 8057726 : case POINTER_PLUS_EXPR:
4355 8057726 : case PLUS_EXPR:
4356 8057726 : case MINUS_EXPR:
4357 8057726 : case MULT_EXPR:
4358 8057726 : case EXACT_DIV_EXPR:
4359 8057726 : case TRUNC_DIV_EXPR:
4360 8057726 : case BIT_AND_EXPR:
4361 8057726 : case BIT_IOR_EXPR:
4362 8057726 : case LSHIFT_EXPR:
4363 8057726 : case RSHIFT_EXPR:
4364 8057726 : op0 = TREE_OPERAND (expr, 0);
4365 8057726 : op1 = TREE_OPERAND (expr, 1);
4366 8057726 : STRIP_NOPS (op0);
4367 8057726 : STRIP_NOPS (op1);
4368 8057726 : break;
4369 :
4370 1364956 : CASE_CONVERT:
4371 1364956 : case NEGATE_EXPR:
4372 1364956 : case BIT_NOT_EXPR:
4373 1364956 : op0 = TREE_OPERAND (expr, 0);
4374 1364956 : STRIP_NOPS (op0);
4375 1364956 : op1 = NULL_TREE;
4376 1364956 : break;
4377 : /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4378 : introduce COND_EXPR for IV base, need to support better cost estimation
4379 : for this COND_EXPR and tcc_comparison. */
4380 0 : case COND_EXPR:
4381 0 : op0 = TREE_OPERAND (expr, 1);
4382 0 : STRIP_NOPS (op0);
4383 0 : op1 = TREE_OPERAND (expr, 2);
4384 0 : STRIP_NOPS (op1);
4385 0 : break;
4386 0 : case LT_EXPR:
4387 0 : case LE_EXPR:
4388 0 : case GT_EXPR:
4389 0 : case GE_EXPR:
4390 0 : case EQ_EXPR:
4391 0 : case NE_EXPR:
4392 0 : case UNORDERED_EXPR:
4393 0 : case ORDERED_EXPR:
4394 0 : case UNLT_EXPR:
4395 0 : case UNLE_EXPR:
4396 0 : case UNGT_EXPR:
4397 0 : case UNGE_EXPR:
4398 0 : case UNEQ_EXPR:
4399 0 : case LTGT_EXPR:
4400 0 : case MAX_EXPR:
4401 0 : case MIN_EXPR:
4402 0 : op0 = TREE_OPERAND (expr, 0);
4403 0 : STRIP_NOPS (op0);
4404 0 : op1 = TREE_OPERAND (expr, 1);
4405 0 : STRIP_NOPS (op1);
4406 0 : break;
4407 :
4408 40 : default:
4409 : /* Just an arbitrary value, FIXME. */
4410 40 : return comp_cost (target_spill_cost[speed], 0);
4411 : }
4412 :
4413 9422682 : if (op0 == NULL_TREE
4414 9422682 : || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4415 4454490 : cost0 = no_cost;
4416 : else
4417 4968192 : cost0 = force_expr_to_var_cost (op0, speed);
4418 :
4419 9422682 : if (op1 == NULL_TREE
4420 8057726 : || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4421 8655971 : cost1 = no_cost;
4422 : else
4423 766711 : cost1 = force_expr_to_var_cost (op1, speed);
4424 :
4425 9422682 : mode = TYPE_MODE (TREE_TYPE (expr));
4426 9422682 : switch (TREE_CODE (expr))
4427 : {
4428 5657502 : case POINTER_PLUS_EXPR:
4429 5657502 : case PLUS_EXPR:
4430 5657502 : case MINUS_EXPR:
4431 5657502 : case NEGATE_EXPR:
4432 5657502 : cost = comp_cost (add_cost (speed, mode), 0);
4433 5657502 : if (TREE_CODE (expr) != NEGATE_EXPR)
4434 : {
4435 5519075 : tree mult = NULL_TREE;
4436 5519075 : comp_cost sa_cost;
4437 5519075 : if (TREE_CODE (op1) == MULT_EXPR)
4438 : mult = op1;
4439 5121737 : else if (TREE_CODE (op0) == MULT_EXPR)
4440 : mult = op0;
4441 :
4442 : if (mult != NULL_TREE
4443 4562896 : && is_a <scalar_int_mode> (mode, &int_mode)
4444 1685140 : && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4445 1440272 : && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4446 : speed, &sa_cost))
4447 956179 : return sa_cost;
4448 : }
4449 : break;
4450 :
4451 1214039 : CASE_CONVERT:
4452 1214039 : {
4453 1214039 : tree inner_mode, outer_mode;
4454 1214039 : outer_mode = TREE_TYPE (expr);
4455 1214039 : inner_mode = TREE_TYPE (op0);
4456 1214039 : cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4457 1214039 : TYPE_MODE (inner_mode), speed), 0);
4458 : }
4459 1214039 : break;
4460 :
4461 2442039 : case MULT_EXPR:
4462 2442039 : if (cst_and_fits_in_hwi (op0))
4463 0 : cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4464 0 : mode, speed), 0);
4465 2442039 : else if (cst_and_fits_in_hwi (op1))
4466 1997952 : cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4467 1997952 : mode, speed), 0);
4468 : else
4469 444087 : return comp_cost (target_spill_cost [speed], 0);
4470 : break;
4471 :
4472 51828 : case EXACT_DIV_EXPR:
4473 51828 : case TRUNC_DIV_EXPR:
4474 : /* Division by power of two is usually cheap, so we allow it. Forbid
4475 : anything else. */
4476 51828 : if (integer_pow2p (TREE_OPERAND (expr, 1)))
4477 51828 : cost = comp_cost (add_cost (speed, mode), 0);
4478 : else
4479 0 : cost = comp_cost (target_spill_cost[speed], 0);
4480 : break;
4481 :
4482 57274 : case BIT_AND_EXPR:
4483 57274 : case BIT_IOR_EXPR:
4484 57274 : case BIT_NOT_EXPR:
4485 57274 : case LSHIFT_EXPR:
4486 57274 : case RSHIFT_EXPR:
4487 57274 : cost = comp_cost (add_cost (speed, mode), 0);
4488 57274 : break;
4489 0 : case COND_EXPR:
4490 0 : op0 = TREE_OPERAND (expr, 0);
4491 0 : STRIP_NOPS (op0);
4492 0 : if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4493 0 : || CONSTANT_CLASS_P (op0))
4494 0 : cost = no_cost;
4495 : else
4496 0 : cost = force_expr_to_var_cost (op0, speed);
4497 : break;
4498 0 : case LT_EXPR:
4499 0 : case LE_EXPR:
4500 0 : case GT_EXPR:
4501 0 : case GE_EXPR:
4502 0 : case EQ_EXPR:
4503 0 : case NE_EXPR:
4504 0 : case UNORDERED_EXPR:
4505 0 : case ORDERED_EXPR:
4506 0 : case UNLT_EXPR:
4507 0 : case UNLE_EXPR:
4508 0 : case UNGT_EXPR:
4509 0 : case UNGE_EXPR:
4510 0 : case UNEQ_EXPR:
4511 0 : case LTGT_EXPR:
4512 0 : case MAX_EXPR:
4513 0 : case MIN_EXPR:
4514 : /* Simply use add cost for now, FIXME if there is some more accurate cost
4515 : evaluation way. */
4516 0 : cost = comp_cost (add_cost (speed, mode), 0);
4517 0 : break;
4518 :
4519 0 : default:
4520 0 : gcc_unreachable ();
4521 : }
4522 :
4523 8022416 : cost += cost0;
4524 8022416 : cost += cost1;
4525 8022416 : return cost;
4526 : }
4527 :
4528 : /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4529 : invariants the computation depends on. */
4530 :
4531 : static comp_cost
4532 24568558 : force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4533 : {
4534 24568558 : if (!expr)
4535 2030964 : return no_cost;
4536 :
4537 22537594 : find_inv_vars (data, &expr, inv_vars);
4538 22537594 : return force_expr_to_var_cost (expr, data->speed);
4539 : }
4540 :
4541 : /* Returns cost of auto-modifying address expression in shape base + offset.
4542 : AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4543 : address expression. The address expression has ADDR_MODE in addr space
4544 : AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4545 : speed or size. */
4546 :
4547 : enum ainc_type
4548 : {
4549 : AINC_PRE_INC, /* Pre increment. */
4550 : AINC_PRE_DEC, /* Pre decrement. */
4551 : AINC_POST_INC, /* Post increment. */
4552 : AINC_POST_DEC, /* Post decrement. */
4553 : AINC_NONE /* Also the number of auto increment types. */
4554 : };
4555 :
4556 : struct ainc_cost_data
4557 : {
4558 : int64_t costs[AINC_NONE];
4559 : };
4560 :
4561 : static comp_cost
4562 1785243 : get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4563 : machine_mode addr_mode, machine_mode mem_mode,
4564 : addr_space_t as, bool speed)
4565 : {
4566 1785243 : if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4567 : && !USE_STORE_PRE_DECREMENT (mem_mode)
4568 : && !USE_LOAD_POST_DECREMENT (mem_mode)
4569 : && !USE_STORE_POST_DECREMENT (mem_mode)
4570 : && !USE_LOAD_PRE_INCREMENT (mem_mode)
4571 : && !USE_STORE_PRE_INCREMENT (mem_mode)
4572 : && !USE_LOAD_POST_INCREMENT (mem_mode)
4573 : && !USE_STORE_POST_INCREMENT (mem_mode))
4574 1785243 : return infinite_cost;
4575 :
4576 : static vec<ainc_cost_data *> ainc_cost_data_list;
4577 : unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4578 : if (idx >= ainc_cost_data_list.length ())
4579 : {
4580 : unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4581 :
4582 : gcc_assert (nsize > idx);
4583 : ainc_cost_data_list.safe_grow_cleared (nsize, true);
4584 : }
4585 :
4586 : ainc_cost_data *data = ainc_cost_data_list[idx];
4587 : if (data == NULL)
4588 : {
4589 : rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4590 :
4591 : data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4592 : data->costs[AINC_PRE_DEC] = INFTY;
4593 : data->costs[AINC_POST_DEC] = INFTY;
4594 : data->costs[AINC_PRE_INC] = INFTY;
4595 : data->costs[AINC_POST_INC] = INFTY;
4596 : if (USE_LOAD_PRE_DECREMENT (mem_mode)
4597 : || USE_STORE_PRE_DECREMENT (mem_mode))
4598 : {
4599 : rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4600 :
4601 : if (memory_address_addr_space_p (mem_mode, addr, as))
4602 : data->costs[AINC_PRE_DEC]
4603 : = address_cost (addr, mem_mode, as, speed);
4604 : }
4605 : if (USE_LOAD_POST_DECREMENT (mem_mode)
4606 : || USE_STORE_POST_DECREMENT (mem_mode))
4607 : {
4608 : rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4609 :
4610 : if (memory_address_addr_space_p (mem_mode, addr, as))
4611 : data->costs[AINC_POST_DEC]
4612 : = address_cost (addr, mem_mode, as, speed);
4613 : }
4614 : if (USE_LOAD_PRE_INCREMENT (mem_mode)
4615 : || USE_STORE_PRE_INCREMENT (mem_mode))
4616 : {
4617 : rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4618 :
4619 : if (memory_address_addr_space_p (mem_mode, addr, as))
4620 : data->costs[AINC_PRE_INC]
4621 : = address_cost (addr, mem_mode, as, speed);
4622 : }
4623 : if (USE_LOAD_POST_INCREMENT (mem_mode)
4624 : || USE_STORE_POST_INCREMENT (mem_mode))
4625 : {
4626 : rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4627 :
4628 : if (memory_address_addr_space_p (mem_mode, addr, as))
4629 : data->costs[AINC_POST_INC]
4630 : = address_cost (addr, mem_mode, as, speed);
4631 : }
4632 : ainc_cost_data_list[idx] = data;
4633 : }
4634 :
4635 : poly_int64 msize = GET_MODE_SIZE (mem_mode);
4636 : if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4637 : return comp_cost (data->costs[AINC_POST_INC], 0);
4638 : if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4639 : return comp_cost (data->costs[AINC_POST_DEC], 0);
4640 : if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4641 : return comp_cost (data->costs[AINC_PRE_INC], 0);
4642 : if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4643 : return comp_cost (data->costs[AINC_PRE_DEC], 0);
4644 :
4645 : return infinite_cost;
4646 : }
4647 :
4648 : /* Return cost of computing USE's address expression by using CAND.
4649 : AFF_INV and AFF_VAR represent invariant and variant parts of the
4650 : address expression, respectively. If AFF_INV is simple, store
4651 : the loop invariant variables which are depended by it in INV_VARS;
4652 : if AFF_INV is complicated, handle it as a new invariant expression
4653 : and record it in INV_EXPR. RATIO indicates multiple times between
4654 : steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4655 : value to it indicating if this is an auto-increment address. */
4656 :
4657 : static comp_cost
4658 5536534 : get_address_cost (struct ivopts_data *data, struct iv_use *use,
4659 : struct iv_cand *cand, aff_tree *aff_inv,
4660 : aff_tree *aff_var, HOST_WIDE_INT ratio,
4661 : bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4662 : bool *can_autoinc, bool speed)
4663 : {
4664 5536534 : rtx addr;
4665 5536534 : bool simple_inv = true;
4666 5536534 : tree comp_inv = NULL_TREE, type = aff_var->type;
4667 5536534 : comp_cost var_cost = no_cost, cost = no_cost;
4668 5536534 : struct mem_address parts = {NULL_TREE, integer_one_node,
4669 5536534 : NULL_TREE, NULL_TREE, NULL_TREE};
4670 5536534 : machine_mode addr_mode = TYPE_MODE (type);
4671 5536534 : machine_mode mem_mode = TYPE_MODE (use->mem_type);
4672 5536534 : addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4673 : /* Only true if ratio != 1. */
4674 5536534 : bool ok_with_ratio_p = false;
4675 5536534 : bool ok_without_ratio_p = false;
4676 5536534 : code_helper code = ERROR_MARK;
4677 :
4678 5536534 : if (use->type == USE_PTR_ADDRESS)
4679 : {
4680 4502 : gcall *call = as_a<gcall *> (use->stmt);
4681 4502 : gcc_assert (gimple_call_internal_p (call));
4682 4502 : code = gimple_call_internal_fn (call);
4683 : }
4684 :
4685 5536534 : if (!aff_combination_const_p (aff_inv))
4686 : {
4687 3682694 : parts.index = integer_one_node;
4688 : /* Addressing mode "base + index". */
4689 3682694 : ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4690 3682694 : if (ratio != 1)
4691 : {
4692 2797231 : parts.step = wide_int_to_tree (type, ratio);
4693 : /* Addressing mode "base + index << scale". */
4694 2797231 : ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4695 2797231 : if (!ok_with_ratio_p)
4696 1710322 : parts.step = NULL_TREE;
4697 : }
4698 2595785 : if (ok_with_ratio_p || ok_without_ratio_p)
4699 : {
4700 3682694 : if (maybe_ne (aff_inv->offset, 0))
4701 : {
4702 2414909 : parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4703 : /* Addressing mode "base + index [<< scale] + offset". */
4704 2414909 : if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4705 478 : parts.offset = NULL_TREE;
4706 : else
4707 2414431 : aff_inv->offset = 0;
4708 : }
4709 :
4710 3682694 : move_fixed_address_to_symbol (&parts, aff_inv);
4711 : /* Base is fixed address and is moved to symbol part. */
4712 3682694 : if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4713 446114 : parts.base = NULL_TREE;
4714 :
4715 : /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4716 3682694 : if (parts.symbol != NULL_TREE
4717 3682694 : && !valid_mem_ref_p (mem_mode, as, &parts, code))
4718 : {
4719 6638 : aff_combination_add_elt (aff_inv, parts.symbol, 1);
4720 6638 : parts.symbol = NULL_TREE;
4721 : /* Reset SIMPLE_INV since symbol address needs to be computed
4722 : outside of address expression in this case. */
4723 6638 : simple_inv = false;
4724 : /* Symbol part is moved back to base part, it can't be NULL. */
4725 6638 : parts.base = integer_one_node;
4726 : }
4727 : }
4728 : else
4729 0 : parts.index = NULL_TREE;
4730 : }
4731 : else
4732 : {
4733 1853840 : poly_int64 ainc_step;
4734 1853840 : if (can_autoinc
4735 1853840 : && ratio == 1
4736 3707672 : && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4737 : {
4738 1785243 : poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4739 :
4740 1785243 : if (stmt_after_increment (data->current_loop, cand, use->stmt))
4741 : ainc_offset += ainc_step;
4742 1785243 : cost = get_address_cost_ainc (ainc_step, ainc_offset,
4743 : addr_mode, mem_mode, as, speed);
4744 1785243 : if (!cost.infinite_cost_p ())
4745 : {
4746 0 : *can_autoinc = true;
4747 0 : return cost;
4748 : }
4749 1785243 : cost = no_cost;
4750 : }
4751 1853840 : if (!aff_combination_zero_p (aff_inv))
4752 : {
4753 1039354 : parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4754 : /* Addressing mode "base + offset". */
4755 1039354 : if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4756 38 : parts.offset = NULL_TREE;
4757 : else
4758 1039316 : aff_inv->offset = 0;
4759 : }
4760 : }
4761 :
4762 1860478 : if (simple_inv)
4763 5529896 : simple_inv = (aff_inv == NULL
4764 8772267 : || aff_combination_const_p (aff_inv)
4765 8765629 : || aff_combination_singleton_var_p (aff_inv));
4766 5536534 : if (!aff_combination_zero_p (aff_inv))
4767 3242439 : comp_inv = aff_combination_to_tree (aff_inv);
4768 3242439 : if (comp_inv != NULL_TREE)
4769 3242439 : cost = force_var_cost (data, comp_inv, inv_vars);
4770 5536534 : if (ratio != 1 && parts.step == NULL_TREE)
4771 1710330 : var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4772 5536534 : if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4773 38 : var_cost += add_cost (speed, addr_mode);
4774 :
4775 5536534 : if (comp_inv && inv_expr && !simple_inv)
4776 : {
4777 744648 : *inv_expr = get_loop_invariant_expr (data, comp_inv);
4778 : /* Clear depends on. */
4779 744648 : if (*inv_expr != NULL && inv_vars && *inv_vars)
4780 426721 : bitmap_clear (*inv_vars);
4781 :
4782 : /* Cost of small invariant expression adjusted against loop niters
4783 : is usually zero, which makes it difficult to be differentiated
4784 : from candidate based on loop invariant variables. Secondly, the
4785 : generated invariant expression may not be hoisted out of loop by
4786 : following pass. We penalize the cost by rounding up in order to
4787 : neutralize such effects. */
4788 744648 : cost.cost = adjust_setup_cost (data, cost.cost, true);
4789 744648 : cost.scratch = cost.cost;
4790 : }
4791 :
4792 5536534 : cost += var_cost;
4793 5536534 : addr = addr_for_mem_ref (&parts, as, false);
4794 5536534 : gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4795 5536534 : cost += address_cost (addr, mem_mode, as, speed);
4796 :
4797 5536534 : if (parts.symbol != NULL_TREE)
4798 500161 : cost.complexity += 1;
4799 : /* Don't increase the complexity of adding a scaled index if it's
4800 : the only kind of index that the target allows. */
4801 5536534 : if (parts.step != NULL_TREE && ok_without_ratio_p)
4802 1086909 : cost.complexity += 1;
4803 5536534 : if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4804 3242401 : cost.complexity += 1;
4805 5536534 : if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4806 3453747 : cost.complexity += 1;
4807 :
4808 : return cost;
4809 : }
4810 :
4811 : /* Scale (multiply) the computed COST (except scratch part that should be
4812 : hoisted out a loop) by header->frequency / AT->frequency, which makes
4813 : expected cost more accurate. */
4814 :
4815 : static comp_cost
4816 12770992 : get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4817 : {
4818 12770992 : if (data->speed
4819 12770992 : && data->current_loop->header->count.to_frequency (cfun) > 0)
4820 : {
4821 11191476 : basic_block bb = gimple_bb (at);
4822 11191476 : gcc_assert (cost.scratch <= cost.cost);
4823 11191476 : int scale_factor = (int)(intptr_t) bb->aux;
4824 11191476 : if (scale_factor == 1)
4825 10637540 : return cost;
4826 :
4827 553936 : int64_t scaled_cost
4828 553936 : = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4829 :
4830 553936 : if (dump_file && (dump_flags & TDF_DETAILS))
4831 93 : fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4832 : "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4833 : 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4834 :
4835 : cost.cost = scaled_cost;
4836 : }
4837 :
4838 2133452 : return cost;
4839 : }
4840 :
4841 : /* Determines the cost of the computation by that USE is expressed
4842 : from induction variable CAND. If ADDRESS_P is true, we just need
4843 : to create an address from it, otherwise we want to get it into
4844 : register. A set of invariants we depend on is stored in INV_VARS.
4845 : If CAN_AUTOINC is nonnull, use it to record whether autoinc
4846 : addressing is likely. If INV_EXPR is nonnull, record invariant
4847 : expr entry in it. */
4848 :
4849 : static comp_cost
4850 20098845 : get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4851 : struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4852 : bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4853 : {
4854 20098845 : gimple *at = use->stmt;
4855 20098845 : tree ubase = use->iv->base, cbase = cand->iv->base;
4856 20098845 : tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4857 20098845 : tree comp_inv = NULL_TREE;
4858 20098845 : HOST_WIDE_INT ratio, aratio;
4859 20098845 : comp_cost cost;
4860 20098845 : widest_int rat;
4861 40197690 : aff_tree aff_inv, aff_var;
4862 20098845 : bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4863 :
4864 20098845 : if (inv_vars)
4865 17632562 : *inv_vars = NULL;
4866 20098845 : if (can_autoinc)
4867 8740817 : *can_autoinc = false;
4868 20098845 : if (inv_expr)
4869 19682662 : *inv_expr = NULL;
4870 :
4871 : /* Check if we have enough precision to express the values of use. */
4872 20098845 : if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4873 3050667 : return infinite_cost;
4874 :
4875 17048178 : if (address_p
4876 17048178 : || (use->iv->base_object
4877 2087748 : && cand->iv->base_object
4878 1013458 : && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4879 1002022 : && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4880 : {
4881 : /* Do not try to express address of an object with computation based
4882 : on address of a different object. This may cause problems in rtl
4883 : level alias analysis (that does not expect this to be happening,
4884 : as this is illegal in C), and would be unlikely to be useful
4885 : anyway. */
4886 7905924 : if (use->iv->base_object
4887 7905924 : && cand->iv->base_object
4888 12113368 : && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4889 1451073 : return infinite_cost;
4890 : }
4891 :
4892 15597105 : if (!get_computation_aff_1 (data, at, use, cand, &aff_inv, &aff_var, &rat)
4893 15597105 : || !wi::fits_shwi_p (rat))
4894 2826113 : return infinite_cost;
4895 :
4896 12770992 : ratio = rat.to_shwi ();
4897 12770992 : if (address_p)
4898 : {
4899 5536534 : cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4900 : inv_vars, inv_expr, can_autoinc, speed);
4901 5536534 : cost = get_scaled_computation_cost_at (data, at, cost);
4902 : /* For doloop IV cand, add on the extra cost. */
4903 5536534 : cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4904 5536534 : return cost;
4905 : }
4906 :
4907 7234458 : bool simple_inv = (aff_combination_const_p (&aff_inv)
4908 1991595 : || aff_combination_singleton_var_p (&aff_inv));
4909 7234458 : tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4910 7234458 : aff_combination_convert (&aff_inv, signed_type);
4911 7234458 : if (!aff_combination_zero_p (&aff_inv))
4912 5203494 : comp_inv = aff_combination_to_tree (&aff_inv);
4913 :
4914 7234458 : cost = force_var_cost (data, comp_inv, inv_vars);
4915 7234458 : if (comp_inv && inv_expr && !simple_inv)
4916 : {
4917 1390511 : *inv_expr = get_loop_invariant_expr (data, comp_inv);
4918 : /* Clear depends on. */
4919 1390511 : if (*inv_expr != NULL && inv_vars && *inv_vars)
4920 863315 : bitmap_clear (*inv_vars);
4921 :
4922 1390511 : cost.cost = adjust_setup_cost (data, cost.cost);
4923 : /* Record setup cost in scratch field. */
4924 1390511 : cost.scratch = cost.cost;
4925 : }
4926 : /* Cost of constant integer can be covered when adding invariant part to
4927 : variant part. */
4928 5843947 : else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4929 3211874 : cost = no_cost;
4930 :
4931 : /* Need type narrowing to represent use with cand. */
4932 7234458 : if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4933 : {
4934 791037 : machine_mode outer_mode = TYPE_MODE (utype);
4935 791037 : machine_mode inner_mode = TYPE_MODE (ctype);
4936 791037 : cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4937 : }
4938 :
4939 : /* Turn a + i * (-c) into a - i * c. */
4940 7234458 : if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4941 1839237 : aratio = -ratio;
4942 : else
4943 : aratio = ratio;
4944 :
4945 7234458 : if (ratio != 1)
4946 2732857 : cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4947 :
4948 : /* TODO: We may also need to check if we can compute a + i * 4 in one
4949 : instruction. */
4950 : /* Need to add up the invariant and variant parts. */
4951 7234458 : if (comp_inv && !integer_zerop (comp_inv))
4952 10400010 : cost += add_cost (speed, TYPE_MODE (utype));
4953 :
4954 7234458 : cost = get_scaled_computation_cost_at (data, at, cost);
4955 :
4956 : /* For doloop IV cand, add on the extra cost. */
4957 7234458 : if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4958 0 : cost += targetm.doloop_cost_for_generic;
4959 :
4960 7234458 : return cost;
4961 20098845 : }
4962 :
4963 : /* Determines cost of computing the use in GROUP with CAND in a generic
4964 : expression. */
4965 :
4966 : static bool
4967 5504461 : determine_group_iv_cost_generic (struct ivopts_data *data,
4968 : struct iv_group *group, struct iv_cand *cand)
4969 : {
4970 5504461 : comp_cost cost;
4971 5504461 : iv_inv_expr_ent *inv_expr = NULL;
4972 5504461 : bitmap inv_vars = NULL, inv_exprs = NULL;
4973 5504461 : struct iv_use *use = group->vuses[0];
4974 :
4975 : /* The simple case first -- if we need to express value of the preserved
4976 : original biv, the cost is 0. This also prevents us from counting the
4977 : cost of increment twice -- once at this use and once in the cost of
4978 : the candidate. */
4979 5504461 : if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4980 56746 : cost = no_cost;
4981 : /* If the IV candidate involves undefined SSA values and is not the
4982 : same IV as on the USE avoid using that candidate here. */
4983 5447715 : else if (cand->involves_undefs
4984 5447715 : && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
4985 218 : return false;
4986 : else
4987 5447497 : cost = get_computation_cost (data, use, cand, false,
4988 : &inv_vars, NULL, &inv_expr);
4989 :
4990 5504243 : if (inv_expr)
4991 : {
4992 977013 : inv_exprs = BITMAP_ALLOC (NULL);
4993 977013 : bitmap_set_bit (inv_exprs, inv_expr->id);
4994 : }
4995 5504243 : set_group_iv_cost (data, group, cand, cost, inv_vars,
4996 : NULL_TREE, ERROR_MARK, inv_exprs);
4997 5504243 : return !cost.infinite_cost_p ();
4998 : }
4999 :
5000 : /* Determines cost of computing uses in GROUP with CAND in addresses. */
5001 :
5002 : static bool
5003 6274534 : determine_group_iv_cost_address (struct ivopts_data *data,
5004 : struct iv_group *group, struct iv_cand *cand)
5005 : {
5006 6274534 : unsigned i;
5007 6274534 : bitmap inv_vars = NULL, inv_exprs = NULL;
5008 6274534 : bool can_autoinc;
5009 6274534 : iv_inv_expr_ent *inv_expr = NULL;
5010 6274534 : struct iv_use *use = group->vuses[0];
5011 6274534 : comp_cost sum_cost = no_cost, cost;
5012 :
5013 6274534 : cost = get_computation_cost (data, use, cand, true,
5014 : &inv_vars, &can_autoinc, &inv_expr);
5015 :
5016 6274534 : if (inv_expr)
5017 : {
5018 456760 : inv_exprs = BITMAP_ALLOC (NULL);
5019 456760 : bitmap_set_bit (inv_exprs, inv_expr->id);
5020 : }
5021 6274534 : sum_cost = cost;
5022 6274534 : if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5023 : {
5024 0 : if (can_autoinc)
5025 0 : sum_cost -= cand->cost_step;
5026 : /* If we generated the candidate solely for exploiting autoincrement
5027 : opportunities, and it turns out it can't be used, set the cost to
5028 : infinity to make sure we ignore it. */
5029 0 : else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5030 0 : sum_cost = infinite_cost;
5031 : }
5032 :
5033 : /* Compute and add costs for rest uses of this group. */
5034 8324634 : for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5035 : {
5036 2050100 : struct iv_use *next = group->vuses[i];
5037 :
5038 : /* TODO: We could skip computing cost for sub iv_use when it has the
5039 : same cost as the first iv_use, but the cost really depends on the
5040 : offset and where the iv_use is. */
5041 2050100 : cost = get_computation_cost (data, next, cand, true,
5042 : NULL, &can_autoinc, &inv_expr);
5043 2050100 : if (inv_expr)
5044 : {
5045 287659 : if (!inv_exprs)
5046 90 : inv_exprs = BITMAP_ALLOC (NULL);
5047 :
5048 : /* Uses in a group can share setup code,
5049 : so only add setup cost once. */
5050 287659 : if (bitmap_bit_p (inv_exprs, inv_expr->id))
5051 287272 : cost -= cost.scratch;
5052 : else
5053 387 : bitmap_set_bit (inv_exprs, inv_expr->id);
5054 : }
5055 2050100 : sum_cost += cost;
5056 : }
5057 6274534 : set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5058 : NULL_TREE, ERROR_MARK, inv_exprs);
5059 :
5060 6274534 : return !sum_cost.infinite_cost_p ();
5061 : }
5062 :
5063 : /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5064 : and stores it to VAL. */
5065 :
5066 : static void
5067 3796389 : cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5068 : class tree_niter_desc *desc, aff_tree *val)
5069 : {
5070 11389167 : aff_tree step, delta, nit;
5071 3796389 : struct iv *iv = cand->iv;
5072 3796389 : tree type = TREE_TYPE (iv->base);
5073 3796389 : tree niter = desc->niter;
5074 3796389 : bool after_adjust = stmt_after_increment (loop, cand, at);
5075 3796389 : tree steptype;
5076 :
5077 3796389 : if (POINTER_TYPE_P (type))
5078 106503 : steptype = sizetype;
5079 : else
5080 3689886 : steptype = unsigned_type_for (type);
5081 :
5082 : /* If AFTER_ADJUST is required, the code below generates the equivalent
5083 : of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5084 : BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5085 : SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5086 : doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5087 : class for common idioms that we know are safe. */
5088 3796389 : if (after_adjust
5089 3602820 : && desc->control.no_overflow
5090 3595219 : && integer_onep (desc->control.step)
5091 955880 : && (desc->cmp == LT_EXPR
5092 35549 : || desc->cmp == NE_EXPR)
5093 4752269 : && TREE_CODE (desc->bound) == SSA_NAME)
5094 : {
5095 504569 : if (integer_onep (desc->control.base))
5096 : {
5097 374982 : niter = desc->bound;
5098 374982 : after_adjust = false;
5099 : }
5100 129587 : else if (TREE_CODE (niter) == MINUS_EXPR
5101 129587 : && integer_onep (TREE_OPERAND (niter, 1)))
5102 : {
5103 67659 : niter = TREE_OPERAND (niter, 0);
5104 67659 : after_adjust = false;
5105 : }
5106 : }
5107 :
5108 3796389 : tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5109 3796389 : aff_combination_convert (&step, steptype);
5110 3796389 : tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5111 3796389 : aff_combination_convert (&nit, steptype);
5112 3796389 : aff_combination_mult (&nit, &step, &delta);
5113 3796389 : if (after_adjust)
5114 3160179 : aff_combination_add (&delta, &step);
5115 :
5116 3796389 : tree_to_aff_combination (iv->base, type, val);
5117 3796389 : if (!POINTER_TYPE_P (type))
5118 3689886 : aff_combination_convert (val, steptype);
5119 3796389 : aff_combination_add (val, &delta);
5120 3796389 : }
5121 :
5122 : /* Returns period of induction variable iv. */
5123 :
5124 : static tree
5125 4062583 : iv_period (struct iv *iv)
5126 : {
5127 4062583 : tree step = iv->step, period, type;
5128 4062583 : tree pow2div;
5129 :
5130 4062583 : gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5131 :
5132 4062583 : type = unsigned_type_for (TREE_TYPE (step));
5133 : /* Period of the iv is lcm (step, type_range)/step -1,
5134 : i.e., N*type_range/step - 1. Since type range is power
5135 : of two, N == (step >> num_of_ending_zeros_binary (step),
5136 : so the final result is
5137 :
5138 : (type_range >> num_of_ending_zeros_binary (step)) - 1
5139 :
5140 : */
5141 4062583 : pow2div = num_ending_zeros (step);
5142 :
5143 12187749 : period = build_low_bits_mask (type,
5144 4062583 : (TYPE_PRECISION (type)
5145 4062583 : - tree_to_uhwi (pow2div)));
5146 :
5147 4062583 : return period;
5148 : }
5149 :
5150 : /* Returns the comparison operator used when eliminating the iv USE. */
5151 :
5152 : static enum tree_code
5153 3796389 : iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5154 : {
5155 3796389 : class loop *loop = data->current_loop;
5156 3796389 : basic_block ex_bb;
5157 3796389 : edge exit;
5158 :
5159 3796389 : ex_bb = gimple_bb (use->stmt);
5160 3796389 : exit = EDGE_SUCC (ex_bb, 0);
5161 3796389 : if (flow_bb_inside_loop_p (loop, exit->dest))
5162 2847330 : exit = EDGE_SUCC (ex_bb, 1);
5163 :
5164 3796389 : return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5165 : }
5166 :
5167 : /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5168 : we only detect the situation that BASE = SOMETHING + OFFSET, where the
5169 : calculation is performed in non-wrapping type.
5170 :
5171 : TODO: More generally, we could test for the situation that
5172 : BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5173 : This would require knowing the sign of OFFSET. */
5174 :
5175 : static bool
5176 477 : difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5177 : {
5178 477 : enum tree_code code;
5179 477 : tree e1, e2;
5180 1431 : aff_tree aff_e1, aff_e2, aff_offset;
5181 :
5182 477 : if (!nowrap_type_p (TREE_TYPE (base)))
5183 : return false;
5184 :
5185 477 : base = expand_simple_operations (base);
5186 :
5187 477 : if (TREE_CODE (base) == SSA_NAME)
5188 : {
5189 476 : gimple *stmt = SSA_NAME_DEF_STMT (base);
5190 :
5191 476 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
5192 : return false;
5193 :
5194 18 : code = gimple_assign_rhs_code (stmt);
5195 18 : if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5196 : return false;
5197 :
5198 5 : e1 = gimple_assign_rhs1 (stmt);
5199 5 : e2 = gimple_assign_rhs2 (stmt);
5200 : }
5201 : else
5202 : {
5203 1 : code = TREE_CODE (base);
5204 1 : if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5205 : return false;
5206 0 : e1 = TREE_OPERAND (base, 0);
5207 0 : e2 = TREE_OPERAND (base, 1);
5208 : }
5209 :
5210 : /* Use affine expansion as deeper inspection to prove the equality. */
5211 5 : tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5212 : &aff_e2, &data->name_expansion_cache);
5213 5 : tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5214 : &aff_offset, &data->name_expansion_cache);
5215 5 : aff_combination_scale (&aff_offset, -1);
5216 5 : switch (code)
5217 : {
5218 3 : case PLUS_EXPR:
5219 3 : aff_combination_add (&aff_e2, &aff_offset);
5220 3 : if (aff_combination_zero_p (&aff_e2))
5221 : return true;
5222 :
5223 1 : tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5224 : &aff_e1, &data->name_expansion_cache);
5225 1 : aff_combination_add (&aff_e1, &aff_offset);
5226 1 : return aff_combination_zero_p (&aff_e1);
5227 :
5228 2 : case POINTER_PLUS_EXPR:
5229 2 : aff_combination_add (&aff_e2, &aff_offset);
5230 2 : return aff_combination_zero_p (&aff_e2);
5231 :
5232 : default:
5233 : return false;
5234 : }
5235 477 : }
5236 :
5237 : /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5238 : comparison with CAND. NITER describes the number of iterations of
5239 : the loops. If successful, the comparison in COMP_P is altered accordingly.
5240 :
5241 : We aim to handle the following situation:
5242 :
5243 : sometype *base, *p;
5244 : int a, b, i;
5245 :
5246 : i = a;
5247 : p = p_0 = base + a;
5248 :
5249 : do
5250 : {
5251 : bla (*p);
5252 : p++;
5253 : i++;
5254 : }
5255 : while (i < b);
5256 :
5257 : Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5258 : We aim to optimize this to
5259 :
5260 : p = p_0 = base + a;
5261 : do
5262 : {
5263 : bla (*p);
5264 : p++;
5265 : }
5266 : while (p < p_0 - a + b);
5267 :
5268 : This preserves the correctness, since the pointer arithmetics does not
5269 : overflow. More precisely:
5270 :
5271 : 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5272 : overflow in computing it or the values of p.
5273 : 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5274 : overflow. To prove this, we use the fact that p_0 = base + a. */
5275 :
5276 : static bool
5277 205614 : iv_elimination_compare_lt (struct ivopts_data *data,
5278 : struct iv_cand *cand, enum tree_code *comp_p,
5279 : class tree_niter_desc *niter)
5280 : {
5281 205614 : tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5282 616842 : class aff_tree nit, tmpa, tmpb;
5283 205614 : enum tree_code comp;
5284 205614 : HOST_WIDE_INT step;
5285 :
5286 : /* We need to know that the candidate induction variable does not overflow.
5287 : While more complex analysis may be used to prove this, for now just
5288 : check that the variable appears in the original program and that it
5289 : is computed in a type that guarantees no overflows. */
5290 205614 : cand_type = TREE_TYPE (cand->iv->base);
5291 205614 : if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5292 183641 : return false;
5293 :
5294 : /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5295 : the calculation of the BOUND could overflow, making the comparison
5296 : invalid. */
5297 21973 : if (!data->loop_single_exit_p)
5298 : return false;
5299 :
5300 : /* We need to be able to decide whether candidate is increasing or decreasing
5301 : in order to choose the right comparison operator. */
5302 15364 : if (!cst_and_fits_in_hwi (cand->iv->step))
5303 : return false;
5304 15364 : step = int_cst_value (cand->iv->step);
5305 :
5306 : /* Check that the number of iterations matches the expected pattern:
5307 : a + 1 > b ? 0 : b - a - 1. */
5308 15364 : mbz = niter->may_be_zero;
5309 15364 : if (TREE_CODE (mbz) == GT_EXPR)
5310 : {
5311 : /* Handle a + 1 > b. */
5312 1716 : tree op0 = TREE_OPERAND (mbz, 0);
5313 1716 : if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5314 : {
5315 794 : a = TREE_OPERAND (op0, 0);
5316 794 : b = TREE_OPERAND (mbz, 1);
5317 : }
5318 : else
5319 922 : return false;
5320 : }
5321 13648 : else if (TREE_CODE (mbz) == LT_EXPR)
5322 : {
5323 4637 : tree op1 = TREE_OPERAND (mbz, 1);
5324 :
5325 : /* Handle b < a + 1. */
5326 4637 : if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5327 : {
5328 82 : a = TREE_OPERAND (op1, 0);
5329 82 : b = TREE_OPERAND (mbz, 0);
5330 : }
5331 : else
5332 4555 : return false;
5333 : }
5334 : else
5335 : return false;
5336 :
5337 : /* Expected number of iterations is B - A - 1. Check that it matches
5338 : the actual number, i.e., that B - A - NITER = 1. */
5339 876 : tree_to_aff_combination (niter->niter, nit_type, &nit);
5340 876 : tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5341 876 : tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5342 876 : aff_combination_scale (&nit, -1);
5343 876 : aff_combination_scale (&tmpa, -1);
5344 876 : aff_combination_add (&tmpb, &tmpa);
5345 876 : aff_combination_add (&tmpb, &nit);
5346 876 : if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5347 399 : return false;
5348 :
5349 : /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5350 : overflow. */
5351 477 : offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5352 : cand->iv->step,
5353 : fold_convert (TREE_TYPE (cand->iv->step), a));
5354 477 : if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5355 : return false;
5356 :
5357 : /* Determine the new comparison operator. */
5358 4 : comp = step < 0 ? GT_EXPR : LT_EXPR;
5359 4 : if (*comp_p == NE_EXPR)
5360 4 : *comp_p = comp;
5361 0 : else if (*comp_p == EQ_EXPR)
5362 0 : *comp_p = invert_tree_comparison (comp, false);
5363 : else
5364 0 : gcc_unreachable ();
5365 :
5366 : return true;
5367 205614 : }
5368 :
5369 : /* Check whether it is possible to express the condition in USE by comparison
5370 : of candidate CAND. If so, store the value compared with to BOUND, and the
5371 : comparison operator to COMP. */
5372 :
5373 : static bool
5374 4904011 : may_eliminate_iv (struct ivopts_data *data,
5375 : struct iv_use *use, struct iv_cand *cand, tree *bound,
5376 : enum tree_code *comp)
5377 : {
5378 4904011 : basic_block ex_bb;
5379 4904011 : edge exit;
5380 4904011 : tree period;
5381 4904011 : class loop *loop = data->current_loop;
5382 4904011 : aff_tree bnd;
5383 4904011 : class tree_niter_desc *desc = NULL;
5384 :
5385 : /* If the IV candidate involves undefs do not attempt to use it to
5386 : express a condition. */
5387 4904011 : if (cand->involves_undefs)
5388 : return false;
5389 :
5390 4903659 : if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5391 : return false;
5392 :
5393 : /* For now works only for exits that dominate the loop latch.
5394 : TODO: extend to other conditions inside loop body. */
5395 4709422 : ex_bb = gimple_bb (use->stmt);
5396 4709422 : if (use->stmt != last_nondebug_stmt (ex_bb)
5397 4602717 : || gimple_code (use->stmt) != GIMPLE_COND
5398 9309974 : || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5399 236704 : return false;
5400 :
5401 4472718 : exit = EDGE_SUCC (ex_bb, 0);
5402 4472718 : if (flow_bb_inside_loop_p (loop, exit->dest))
5403 3367868 : exit = EDGE_SUCC (ex_bb, 1);
5404 4472718 : if (flow_bb_inside_loop_p (loop, exit->dest))
5405 : return false;
5406 :
5407 4355882 : desc = niter_for_exit (data, exit);
5408 4355882 : if (!desc)
5409 : return false;
5410 :
5411 : /* Determine whether we can use the variable to test the exit condition.
5412 : This is the case iff the period of the induction variable is greater
5413 : than the number of iterations for which the exit condition is true. */
5414 4062583 : period = iv_period (cand->iv);
5415 :
5416 : /* If the number of iterations is constant, compare against it directly. */
5417 4062583 : if (TREE_CODE (desc->niter) == INTEGER_CST)
5418 : {
5419 : /* See cand_value_at. */
5420 2658426 : if (stmt_after_increment (loop, cand, use->stmt))
5421 : {
5422 2602847 : if (!tree_int_cst_lt (desc->niter, period))
5423 : return false;
5424 : }
5425 : else
5426 : {
5427 55579 : if (tree_int_cst_lt (period, desc->niter))
5428 : return false;
5429 : }
5430 : }
5431 :
5432 : /* If not, and if this is the only possible exit of the loop, see whether
5433 : we can get a conservative estimate on the number of iterations of the
5434 : entire loop and compare against that instead. */
5435 : else
5436 : {
5437 1404157 : widest_int period_value, max_niter;
5438 :
5439 1404157 : max_niter = desc->max;
5440 1404157 : if (stmt_after_increment (loop, cand, use->stmt))
5441 1196531 : max_niter += 1;
5442 1404157 : period_value = wi::to_widest (period);
5443 1404157 : if (wi::gtu_p (max_niter, period_value))
5444 : {
5445 : /* See if we can take advantage of inferred loop bound
5446 : information. */
5447 471393 : if (data->loop_single_exit_p)
5448 : {
5449 282146 : if (!max_loop_iterations (loop, &max_niter))
5450 : return false;
5451 : /* The loop bound is already adjusted by adding 1. */
5452 282146 : if (wi::gtu_p (max_niter, period_value))
5453 : return false;
5454 : }
5455 : else
5456 : return false;
5457 : }
5458 1404157 : }
5459 :
5460 : /* For doloop IV cand, the bound would be zero. It's safe whether
5461 : may_be_zero set or not. */
5462 3796389 : if (cand->doloop_p)
5463 : {
5464 0 : *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5465 0 : *comp = iv_elimination_compare (data, use);
5466 0 : return true;
5467 : }
5468 :
5469 3796389 : cand_value_at (loop, cand, use->stmt, desc, &bnd);
5470 :
5471 3796389 : *bound = fold_convert (TREE_TYPE (cand->iv->base),
5472 : aff_combination_to_tree (&bnd));
5473 3796389 : *comp = iv_elimination_compare (data, use);
5474 :
5475 : /* It is unlikely that computing the number of iterations using division
5476 : would be more profitable than keeping the original induction variable. */
5477 3796389 : bool cond_overflow_p;
5478 3796389 : if (expression_expensive_p (*bound, &cond_overflow_p))
5479 : return false;
5480 :
5481 : /* Sometimes, it is possible to handle the situation that the number of
5482 : iterations may be zero unless additional assumptions by using <
5483 : instead of != in the exit condition.
5484 :
5485 : TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5486 : base the exit condition on it. However, that is often too
5487 : expensive. */
5488 3787529 : if (!integer_zerop (desc->may_be_zero))
5489 205614 : return iv_elimination_compare_lt (data, cand, comp, desc);
5490 :
5491 : return true;
5492 4904011 : }
5493 :
5494 : /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5495 : be copied, if it is used in the loop body and DATA->body_includes_call. */
5496 :
5497 : static int
5498 8333292 : parm_decl_cost (struct ivopts_data *data, tree bound)
5499 : {
5500 8333292 : tree sbound = bound;
5501 8333292 : STRIP_NOPS (sbound);
5502 :
5503 8333292 : if (TREE_CODE (sbound) == SSA_NAME
5504 2894619 : && SSA_NAME_IS_DEFAULT_DEF (sbound)
5505 151970 : && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5506 8483220 : && data->body_includes_call)
5507 37495 : return COSTS_N_INSNS (1);
5508 :
5509 : return 0;
5510 : }
5511 :
5512 : /* Determines cost of computing the use in GROUP with CAND in a condition. */
5513 :
5514 : static bool
5515 5910531 : determine_group_iv_cost_cond (struct ivopts_data *data,
5516 : struct iv_group *group, struct iv_cand *cand)
5517 : {
5518 5910531 : tree bound = NULL_TREE;
5519 5910531 : struct iv *cmp_iv;
5520 5910531 : bitmap inv_exprs = NULL;
5521 5910531 : bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5522 5910531 : comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5523 5910531 : enum comp_iv_rewrite rewrite_type;
5524 5910531 : iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5525 5910531 : tree *control_var, *bound_cst;
5526 5910531 : enum tree_code comp = ERROR_MARK;
5527 5910531 : struct iv_use *use = group->vuses[0];
5528 :
5529 : /* Extract condition operands. */
5530 5910531 : rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5531 : &bound_cst, NULL, &cmp_iv);
5532 5910531 : gcc_assert (rewrite_type != COMP_IV_NA);
5533 :
5534 : /* Try iv elimination. */
5535 5910531 : if (rewrite_type == COMP_IV_ELIM
5536 5910531 : && may_eliminate_iv (data, use, cand, &bound, &comp))
5537 : {
5538 3581919 : elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5539 3581919 : if (elim_cost.cost == 0)
5540 2444063 : elim_cost.cost = parm_decl_cost (data, bound);
5541 1137856 : else if (TREE_CODE (bound) == INTEGER_CST)
5542 0 : elim_cost.cost = 0;
5543 : /* If we replace a loop condition 'i < n' with 'p < base + n',
5544 : inv_vars_elim will have 'base' and 'n' set, which implies that both
5545 : 'base' and 'n' will be live during the loop. More likely,
5546 : 'base + n' will be loop invariant, resulting in only one live value
5547 : during the loop. So in that case we clear inv_vars_elim and set
5548 : inv_expr_elim instead. */
5549 3581919 : if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5550 : {
5551 310985 : inv_expr_elim = get_loop_invariant_expr (data, bound);
5552 310985 : bitmap_clear (inv_vars_elim);
5553 : }
5554 : /* The bound is a loop invariant, so it will be only computed
5555 : once. */
5556 3581919 : elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5557 : }
5558 :
5559 : /* When the condition is a comparison of the candidate IV against
5560 : zero, prefer this IV.
5561 :
5562 : TODO: The constant that we're subtracting from the cost should
5563 : be target-dependent. This information should be added to the
5564 : target costs for each backend. */
5565 5910531 : if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5566 3581919 : && integer_zerop (*bound_cst)
5567 8502595 : && (operand_equal_p (*control_var, cand->var_after, 0)
5568 2343709 : || operand_equal_p (*control_var, cand->var_before, 0)))
5569 254607 : elim_cost -= 1;
5570 :
5571 5910531 : express_cost = get_computation_cost (data, use, cand, false,
5572 : &inv_vars_express, NULL,
5573 : &inv_expr_express);
5574 5910531 : if (cmp_iv != NULL)
5575 4992995 : find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5576 :
5577 : /* Count the cost of the original bound as well. */
5578 5910531 : bound_cost = force_var_cost (data, *bound_cst, NULL);
5579 5910531 : if (bound_cost.cost == 0)
5580 5889229 : bound_cost.cost = parm_decl_cost (data, *bound_cst);
5581 21302 : else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5582 0 : bound_cost.cost = 0;
5583 5910531 : express_cost += bound_cost;
5584 :
5585 : /* Choose the better approach, preferring the eliminated IV. */
5586 5910531 : if (elim_cost <= express_cost)
5587 : {
5588 4500104 : cost = elim_cost;
5589 4500104 : inv_vars = inv_vars_elim;
5590 4500104 : inv_vars_elim = NULL;
5591 4500104 : inv_expr = inv_expr_elim;
5592 : /* For doloop candidate/use pair, adjust to zero cost. */
5593 4500104 : if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5594 0 : cost = no_cost;
5595 : }
5596 : else
5597 : {
5598 1410427 : cost = express_cost;
5599 1410427 : inv_vars = inv_vars_express;
5600 1410427 : inv_vars_express = NULL;
5601 1410427 : bound = NULL_TREE;
5602 1410427 : comp = ERROR_MARK;
5603 1410427 : inv_expr = inv_expr_express;
5604 : }
5605 :
5606 5910531 : if (inv_expr)
5607 : {
5608 599894 : inv_exprs = BITMAP_ALLOC (NULL);
5609 599894 : bitmap_set_bit (inv_exprs, inv_expr->id);
5610 : }
5611 5910531 : set_group_iv_cost (data, group, cand, cost,
5612 : inv_vars, bound, comp, inv_exprs);
5613 :
5614 5910531 : if (inv_vars_elim)
5615 24071 : BITMAP_FREE (inv_vars_elim);
5616 5910531 : if (inv_vars_express)
5617 1250352 : BITMAP_FREE (inv_vars_express);
5618 :
5619 5910531 : return !cost.infinite_cost_p ();
5620 : }
5621 :
5622 : /* Determines cost of computing uses in GROUP with CAND. Returns false
5623 : if USE cannot be represented with CAND. */
5624 :
5625 : static bool
5626 17689526 : determine_group_iv_cost (struct ivopts_data *data,
5627 : struct iv_group *group, struct iv_cand *cand)
5628 : {
5629 17689526 : switch (group->type)
5630 : {
5631 5504461 : case USE_NONLINEAR_EXPR:
5632 5504461 : return determine_group_iv_cost_generic (data, group, cand);
5633 :
5634 6274534 : case USE_REF_ADDRESS:
5635 6274534 : case USE_PTR_ADDRESS:
5636 6274534 : return determine_group_iv_cost_address (data, group, cand);
5637 :
5638 5910531 : case USE_COMPARE:
5639 5910531 : return determine_group_iv_cost_cond (data, group, cand);
5640 :
5641 0 : default:
5642 0 : gcc_unreachable ();
5643 : }
5644 : }
5645 :
5646 : /* Return true if get_computation_cost indicates that autoincrement is
5647 : a possibility for the pair of USE and CAND, false otherwise. */
5648 :
5649 : static bool
5650 1278698 : autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5651 : struct iv_cand *cand)
5652 : {
5653 1278698 : if (!address_p (use->type))
5654 : return false;
5655 :
5656 416183 : bool can_autoinc = false;
5657 416183 : get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5658 416183 : return can_autoinc;
5659 : }
5660 :
5661 : /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5662 : use that allows autoincrement, and set their AINC_USE if possible. */
5663 :
5664 : static void
5665 500070 : set_autoinc_for_original_candidates (struct ivopts_data *data)
5666 : {
5667 500070 : unsigned i, j;
5668 :
5669 5099281 : for (i = 0; i < data->vcands.length (); i++)
5670 : {
5671 4599211 : struct iv_cand *cand = data->vcands[i];
5672 4599211 : struct iv_use *closest_before = NULL;
5673 4599211 : struct iv_use *closest_after = NULL;
5674 4599211 : if (cand->pos != IP_ORIGINAL)
5675 3734318 : continue;
5676 :
5677 3814070 : for (j = 0; j < data->vgroups.length (); j++)
5678 : {
5679 2949177 : struct iv_group *group = data->vgroups[j];
5680 2949177 : struct iv_use *use = group->vuses[0];
5681 2949177 : unsigned uid = gimple_uid (use->stmt);
5682 :
5683 2949177 : if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5684 1175964 : continue;
5685 :
5686 1773213 : if (uid < gimple_uid (cand->incremented_at)
5687 1773213 : && (closest_before == NULL
5688 371419 : || uid > gimple_uid (closest_before->stmt)))
5689 : closest_before = use;
5690 :
5691 1773213 : if (uid > gimple_uid (cand->incremented_at)
5692 1773213 : && (closest_after == NULL
5693 66350 : || uid < gimple_uid (closest_after->stmt)))
5694 : closest_after = use;
5695 : }
5696 :
5697 864893 : if (closest_before != NULL
5698 864893 : && autoinc_possible_for_pair (data, closest_before, cand))
5699 0 : cand->ainc_use = closest_before;
5700 864893 : else if (closest_after != NULL
5701 864893 : && autoinc_possible_for_pair (data, closest_after, cand))
5702 0 : cand->ainc_use = closest_after;
5703 : }
5704 500070 : }
5705 :
5706 : /* Relate compare use with all candidates. */
5707 :
5708 : static void
5709 299 : relate_compare_use_with_all_cands (struct ivopts_data *data)
5710 : {
5711 299 : unsigned i, count = data->vcands.length ();
5712 10000 : for (i = 0; i < data->vgroups.length (); i++)
5713 : {
5714 9701 : struct iv_group *group = data->vgroups[i];
5715 :
5716 9701 : if (group->type == USE_COMPARE)
5717 2185 : bitmap_set_range (group->related_cands, 0, count);
5718 : }
5719 299 : }
5720 :
5721 : /* If PREFERRED_MODE is suitable and profitable, use the preferred
5722 : PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
5723 :
5724 : static tree
5725 0 : compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5726 : const widest_int &iterations_max)
5727 : {
5728 0 : tree ntype = TREE_TYPE (niter);
5729 0 : tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5730 0 : if (!pref_type)
5731 0 : return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5732 : build_int_cst (ntype, 1));
5733 :
5734 0 : gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5735 :
5736 0 : int prec = TYPE_PRECISION (ntype);
5737 0 : int pref_prec = TYPE_PRECISION (pref_type);
5738 :
5739 0 : tree base;
5740 :
5741 : /* Check if the PREFERRED_MODED is able to present niter. */
5742 0 : if (pref_prec > prec
5743 0 : || wi::ltu_p (iterations_max,
5744 0 : widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5745 : UNSIGNED)))
5746 : {
5747 : /* No wrap, it is safe to use preferred type after niter + 1. */
5748 0 : if (wi::ltu_p (iterations_max,
5749 0 : widest_int::from (wi::max_value (prec, UNSIGNED),
5750 : UNSIGNED)))
5751 : {
5752 : /* This could help to optimize "-1 +1" pair when niter looks
5753 : like "n-1": n is in original mode. "base = (n - 1) + 1"
5754 : in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
5755 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5756 : build_int_cst (ntype, 1));
5757 0 : base = fold_convert (pref_type, base);
5758 : }
5759 :
5760 : /* To avoid wrap, convert niter to preferred type before plus 1. */
5761 : else
5762 : {
5763 0 : niter = fold_convert (pref_type, niter);
5764 0 : base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5765 : build_int_cst (pref_type, 1));
5766 : }
5767 : }
5768 : else
5769 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5770 : build_int_cst (ntype, 1));
5771 : return base;
5772 : }
5773 :
5774 : /* Add one doloop dedicated IV candidate:
5775 : - Base is (may_be_zero ? 1 : (niter + 1)).
5776 : - Step is -1. */
5777 :
5778 : static void
5779 0 : add_iv_candidate_for_doloop (struct ivopts_data *data)
5780 : {
5781 0 : tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5782 0 : gcc_assert (niter_desc && niter_desc->assumptions);
5783 :
5784 0 : tree niter = niter_desc->niter;
5785 0 : tree ntype = TREE_TYPE (niter);
5786 0 : gcc_assert (INTEGRAL_NB_TYPE_P (ntype));
5787 :
5788 0 : tree may_be_zero = niter_desc->may_be_zero;
5789 0 : if (may_be_zero && integer_zerop (may_be_zero))
5790 : may_be_zero = NULL_TREE;
5791 0 : if (may_be_zero)
5792 : {
5793 0 : if (COMPARISON_CLASS_P (may_be_zero))
5794 : {
5795 0 : niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5796 : build_int_cst (ntype, 0),
5797 : rewrite_to_non_trapping_overflow (niter));
5798 : }
5799 : /* Don't try to obtain the iteration count expression when may_be_zero is
5800 : integer_nonzerop (actually iteration count is one) or else. */
5801 : else
5802 : return;
5803 : }
5804 :
5805 0 : machine_mode mode = TYPE_MODE (ntype);
5806 0 : machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5807 :
5808 0 : tree base;
5809 0 : if (mode != pref_mode)
5810 : {
5811 0 : base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5812 0 : ntype = TREE_TYPE (base);
5813 : }
5814 : else
5815 0 : base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5816 : build_int_cst (ntype, 1));
5817 :
5818 : /* For non integer types or non-mode precision types,
5819 : convert directly to an integer type. */
5820 0 : if (TREE_CODE (ntype) != INTEGER_TYPE
5821 0 : || !type_has_mode_precision_p (ntype))
5822 : {
5823 0 : ntype = lang_hooks.types.type_for_mode (TYPE_MODE (ntype),
5824 0 : TYPE_UNSIGNED (ntype));
5825 0 : base = fold_convert (ntype, base);
5826 : }
5827 :
5828 0 : add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5829 : }
5830 :
5831 : /* Finds the candidates for the induction variables. */
5832 :
5833 : static void
5834 500070 : find_iv_candidates (struct ivopts_data *data)
5835 : {
5836 : /* Add commonly used ivs. */
5837 500070 : add_standard_iv_candidates (data);
5838 :
5839 : /* Add doloop dedicated ivs. */
5840 500070 : if (data->doloop_use_p)
5841 0 : add_iv_candidate_for_doloop (data);
5842 :
5843 : /* Add old induction variables. */
5844 500070 : add_iv_candidate_for_bivs (data);
5845 :
5846 : /* Add induction variables derived from uses. */
5847 500070 : add_iv_candidate_for_groups (data);
5848 :
5849 500070 : set_autoinc_for_original_candidates (data);
5850 :
5851 : /* Record the important candidates. */
5852 500070 : record_important_candidates (data);
5853 :
5854 : /* Relate compare iv_use with all candidates. */
5855 500070 : if (!data->consider_all_candidates)
5856 299 : relate_compare_use_with_all_cands (data);
5857 :
5858 500070 : if (dump_file && (dump_flags & TDF_DETAILS))
5859 : {
5860 67 : unsigned i;
5861 :
5862 67 : fprintf (dump_file, "\n<Important Candidates>:\t");
5863 820 : for (i = 0; i < data->vcands.length (); i++)
5864 686 : if (data->vcands[i]->important)
5865 492 : fprintf (dump_file, " %d,", data->vcands[i]->id);
5866 67 : fprintf (dump_file, "\n");
5867 :
5868 67 : fprintf (dump_file, "\n<Group, Cand> Related:\n");
5869 287 : for (i = 0; i < data->vgroups.length (); i++)
5870 : {
5871 220 : struct iv_group *group = data->vgroups[i];
5872 :
5873 220 : if (group->related_cands)
5874 : {
5875 220 : fprintf (dump_file, " Group %d:\t", group->id);
5876 220 : dump_bitmap (dump_file, group->related_cands);
5877 : }
5878 : }
5879 67 : fprintf (dump_file, "\n");
5880 : }
5881 500070 : }
5882 :
5883 : /* Determines costs of computing use of iv with an iv candidate. */
5884 :
5885 : static void
5886 500070 : determine_group_iv_costs (struct ivopts_data *data)
5887 : {
5888 500070 : unsigned i, j;
5889 500070 : struct iv_cand *cand;
5890 500070 : struct iv_group *group;
5891 500070 : bitmap to_clear = BITMAP_ALLOC (NULL);
5892 :
5893 500070 : alloc_use_cost_map (data);
5894 :
5895 2140764 : for (i = 0; i < data->vgroups.length (); i++)
5896 : {
5897 1640694 : group = data->vgroups[i];
5898 :
5899 1640694 : if (data->consider_all_candidates)
5900 : {
5901 18975784 : for (j = 0; j < data->vcands.length (); j++)
5902 : {
5903 17335090 : cand = data->vcands[j];
5904 17335090 : determine_group_iv_cost (data, group, cand);
5905 : }
5906 : }
5907 : else
5908 : {
5909 9701 : bitmap_iterator bi;
5910 :
5911 364137 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5912 : {
5913 354436 : cand = data->vcands[j];
5914 354436 : if (!determine_group_iv_cost (data, group, cand))
5915 210918 : bitmap_set_bit (to_clear, j);
5916 : }
5917 :
5918 : /* Remove the candidates for that the cost is infinite from
5919 : the list of related candidates. */
5920 9701 : bitmap_and_compl_into (group->related_cands, to_clear);
5921 9701 : bitmap_clear (to_clear);
5922 : }
5923 : }
5924 :
5925 500070 : BITMAP_FREE (to_clear);
5926 :
5927 500070 : if (dump_file && (dump_flags & TDF_DETAILS))
5928 : {
5929 67 : bitmap_iterator bi;
5930 :
5931 : /* Dump invariant variables. */
5932 67 : fprintf (dump_file, "\n<Invariant Vars>:\n");
5933 1041 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5934 : {
5935 974 : struct version_info *info = ver_info (data, i);
5936 974 : if (info->inv_id)
5937 : {
5938 222 : fprintf (dump_file, "Inv %d:\t", info->inv_id);
5939 222 : print_generic_expr (dump_file, info->name, TDF_SLIM);
5940 222 : fprintf (dump_file, "%s\n",
5941 222 : info->has_nonlin_use ? "" : "\t(eliminable)");
5942 : }
5943 : }
5944 :
5945 : /* Dump invariant expressions. */
5946 67 : fprintf (dump_file, "\n<Invariant Expressions>:\n");
5947 67 : auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5948 :
5949 439 : for (hash_table<iv_inv_expr_hasher>::iterator it
5950 506 : = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5951 372 : ++it)
5952 372 : list.safe_push (*it);
5953 :
5954 67 : list.qsort (sort_iv_inv_expr_ent);
5955 :
5956 439 : for (i = 0; i < list.length (); ++i)
5957 : {
5958 372 : fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5959 372 : print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5960 372 : fprintf (dump_file, "\n");
5961 : }
5962 :
5963 67 : fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5964 :
5965 287 : for (i = 0; i < data->vgroups.length (); i++)
5966 : {
5967 220 : group = data->vgroups[i];
5968 :
5969 220 : fprintf (dump_file, "Group %d:\n", i);
5970 220 : fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5971 2982 : for (j = 0; j < group->n_map_members; j++)
5972 : {
5973 3856 : if (!group->cost_map[j].cand
5974 2762 : || group->cost_map[j].cost.infinite_cost_p ())
5975 1094 : continue;
5976 :
5977 1668 : fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5978 1668 : group->cost_map[j].cand->id,
5979 : group->cost_map[j].cost.cost,
5980 1668 : group->cost_map[j].cost.complexity);
5981 1668 : if (!group->cost_map[j].inv_exprs
5982 1668 : || bitmap_empty_p (group->cost_map[j].inv_exprs))
5983 1168 : fprintf (dump_file, "NIL;\t");
5984 : else
5985 500 : bitmap_print (dump_file,
5986 : group->cost_map[j].inv_exprs, "", ";\t");
5987 1668 : if (!group->cost_map[j].inv_vars
5988 1668 : || bitmap_empty_p (group->cost_map[j].inv_vars))
5989 1347 : fprintf (dump_file, "NIL;\n");
5990 : else
5991 321 : bitmap_print (dump_file,
5992 : group->cost_map[j].inv_vars, "", "\n");
5993 : }
5994 :
5995 220 : fprintf (dump_file, "\n");
5996 : }
5997 67 : fprintf (dump_file, "\n");
5998 67 : }
5999 500070 : }
6000 :
6001 : /* Determines cost of the candidate CAND. */
6002 :
6003 : static void
6004 4599211 : determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
6005 : {
6006 4599211 : comp_cost cost_base;
6007 4599211 : int64_t cost, cost_step;
6008 4599211 : tree base;
6009 :
6010 4599211 : gcc_assert (cand->iv != NULL);
6011 :
6012 : /* There are two costs associated with the candidate -- its increment
6013 : and its initialization. The second is almost negligible for any loop
6014 : that rolls enough, so we take it just very little into account. */
6015 :
6016 4599211 : base = cand->iv->base;
6017 4599211 : cost_base = force_var_cost (data, base, NULL);
6018 : /* It will be exceptional that the iv register happens to be initialized with
6019 : the proper value at no cost. In general, there will at least be a regcopy
6020 : or a const set. */
6021 4599211 : if (cost_base.cost == 0)
6022 3650205 : cost_base.cost = COSTS_N_INSNS (1);
6023 : /* Doloop decrement should be considered as zero cost. */
6024 4599211 : if (cand->doloop_p)
6025 : cost_step = 0;
6026 : else
6027 4599211 : cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6028 4599211 : cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6029 :
6030 : /* Prefer the original ivs unless we may gain something by replacing it.
6031 : The reason is to make debugging simpler; so this is not relevant for
6032 : artificial ivs created by other optimization passes. */
6033 4599211 : if ((cand->pos != IP_ORIGINAL
6034 864893 : || !SSA_NAME_VAR (cand->var_before)
6035 432438 : || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6036 : /* Prefer doloop as well. */
6037 5122005 : && !cand->doloop_p)
6038 4257112 : cost++;
6039 :
6040 : /* Prefer not to insert statements into latch unless there are some
6041 : already (so that we do not create unnecessary jumps). */
6042 4599211 : if (cand->pos == IP_END
6043 4599211 : && empty_block_p (ip_end_pos (data->current_loop)))
6044 1951 : cost++;
6045 :
6046 4599211 : cand->cost = cost;
6047 4599211 : cand->cost_step = cost_step;
6048 4599211 : }
6049 :
6050 : /* Determines costs of computation of the candidates. */
6051 :
6052 : static void
6053 500070 : determine_iv_costs (struct ivopts_data *data)
6054 : {
6055 500070 : unsigned i;
6056 :
6057 500070 : if (dump_file && (dump_flags & TDF_DETAILS))
6058 : {
6059 67 : fprintf (dump_file, "<Candidate Costs>:\n");
6060 67 : fprintf (dump_file, " cand\tcost\n");
6061 : }
6062 :
6063 5099281 : for (i = 0; i < data->vcands.length (); i++)
6064 : {
6065 4599211 : struct iv_cand *cand = data->vcands[i];
6066 :
6067 4599211 : determine_iv_cost (data, cand);
6068 :
6069 4599211 : if (dump_file && (dump_flags & TDF_DETAILS))
6070 686 : fprintf (dump_file, " %d\t%d\n", i, cand->cost);
6071 : }
6072 :
6073 500070 : if (dump_file && (dump_flags & TDF_DETAILS))
6074 67 : fprintf (dump_file, "\n");
6075 500070 : }
6076 :
6077 : /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6078 : induction variables. Note N_INVS includes both invariant variables and
6079 : invariant expressions. */
6080 :
6081 : static unsigned
6082 418817455 : ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6083 : unsigned n_cands)
6084 : {
6085 418817455 : unsigned cost;
6086 418817455 : unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6087 418817455 : unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6088 418817455 : bool speed = data->speed;
6089 :
6090 : /* If there is a call in the loop body, the call-clobbered registers
6091 : are not available for loop invariants. */
6092 418817455 : if (data->body_includes_call)
6093 93434586 : available_regs = available_regs - target_clobbered_regs;
6094 :
6095 : /* If we have enough registers. */
6096 418817455 : if (regs_needed + target_res_regs < available_regs)
6097 : cost = n_new;
6098 : /* If close to running out of registers, try to preserve them. */
6099 183555800 : else if (regs_needed <= available_regs)
6100 51763018 : cost = target_reg_cost [speed] * regs_needed;
6101 : /* If we run out of available registers but the number of candidates
6102 : does not, we penalize extra registers using target_spill_cost. */
6103 131792782 : else if (n_cands <= available_regs)
6104 117694906 : cost = target_reg_cost [speed] * available_regs
6105 117694906 : + target_spill_cost [speed] * (regs_needed - available_regs);
6106 : /* If the number of candidates runs out available registers, we penalize
6107 : extra candidate registers using target_spill_cost * 2. Because it is
6108 : more expensive to spill induction variable than invariant. */
6109 : else
6110 14097876 : cost = target_reg_cost [speed] * available_regs
6111 14097876 : + target_spill_cost [speed] * (n_cands - available_regs) * 2
6112 14097876 : + target_spill_cost [speed] * (regs_needed - n_cands);
6113 :
6114 : /* Finally, add the number of candidates, so that we prefer eliminating
6115 : induction variables if possible. */
6116 418817455 : return cost + n_cands;
6117 : }
6118 :
6119 : /* For each size of the induction variable set determine the penalty. */
6120 :
6121 : static void
6122 500070 : determine_set_costs (struct ivopts_data *data)
6123 : {
6124 500070 : unsigned j, n;
6125 500070 : gphi *phi;
6126 500070 : gphi_iterator psi;
6127 500070 : tree op;
6128 500070 : class loop *loop = data->current_loop;
6129 500070 : bitmap_iterator bi;
6130 :
6131 500070 : if (dump_file && (dump_flags & TDF_DETAILS))
6132 : {
6133 67 : fprintf (dump_file, "<Global Costs>:\n");
6134 67 : fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
6135 67 : fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6136 67 : fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6137 67 : fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6138 : }
6139 :
6140 500070 : n = 0;
6141 1945731 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6142 : {
6143 1445661 : phi = psi.phi ();
6144 1445661 : op = PHI_RESULT (phi);
6145 :
6146 2891322 : if (virtual_operand_p (op))
6147 306096 : continue;
6148 :
6149 1139565 : if (get_iv (data, op))
6150 870083 : continue;
6151 :
6152 498585 : if (!POINTER_TYPE_P (TREE_TYPE (op))
6153 498422 : && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6154 101846 : continue;
6155 :
6156 167636 : n++;
6157 : }
6158 :
6159 5515385 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6160 : {
6161 5015315 : struct version_info *info = ver_info (data, j);
6162 :
6163 5015315 : if (info->inv_id && info->has_nonlin_use)
6164 506131 : n++;
6165 : }
6166 :
6167 500070 : data->regs_used = n;
6168 500070 : if (dump_file && (dump_flags & TDF_DETAILS))
6169 67 : fprintf (dump_file, " regs_used %d\n", n);
6170 :
6171 500070 : if (dump_file && (dump_flags & TDF_DETAILS))
6172 : {
6173 67 : fprintf (dump_file, " cost for size:\n");
6174 67 : fprintf (dump_file, " ivs\tcost\n");
6175 2144 : for (j = 0; j <= 2 * target_avail_regs; j++)
6176 2077 : fprintf (dump_file, " %d\t%d\n", j,
6177 : ivopts_estimate_reg_pressure (data, 0, j));
6178 67 : fprintf (dump_file, "\n");
6179 : }
6180 500070 : }
6181 :
6182 : /* Returns true if A is a cheaper cost pair than B. */
6183 :
6184 : static bool
6185 82730267 : cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6186 : {
6187 82730267 : if (!a)
6188 : return false;
6189 :
6190 77436744 : if (!b)
6191 : return true;
6192 :
6193 74245385 : if (a->cost < b->cost)
6194 : return true;
6195 :
6196 54897704 : if (b->cost < a->cost)
6197 : return false;
6198 :
6199 : /* In case the costs are the same, prefer the cheaper candidate. */
6200 31457588 : if (a->cand->cost < b->cand->cost)
6201 : return true;
6202 :
6203 : return false;
6204 : }
6205 :
6206 : /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6207 : for more expensive, equal and cheaper respectively. */
6208 :
6209 : static int
6210 29168422 : compare_cost_pair (class cost_pair *a, class cost_pair *b)
6211 : {
6212 29168422 : if (cheaper_cost_pair (a, b))
6213 : return -1;
6214 22992381 : if (cheaper_cost_pair (b, a))
6215 14854282 : return 1;
6216 :
6217 : return 0;
6218 : }
6219 :
6220 : /* Returns candidate by that USE is expressed in IVS. */
6221 :
6222 : static class cost_pair *
6223 279247948 : iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6224 : {
6225 279247948 : return ivs->cand_for_group[group->id];
6226 : }
6227 :
6228 : /* Computes the cost field of IVS structure. */
6229 :
6230 : static void
6231 418815130 : iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6232 : {
6233 418815130 : comp_cost cost = ivs->cand_use_cost;
6234 :
6235 418815130 : cost += ivs->cand_cost;
6236 418815130 : cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6237 418815130 : ivs->cost = cost;
6238 418815130 : }
6239 :
6240 : /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6241 : and IVS. */
6242 :
6243 : static void
6244 573707938 : iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6245 : {
6246 573707938 : bitmap_iterator bi;
6247 573707938 : unsigned iid;
6248 :
6249 573707938 : if (!invs)
6250 455812770 : return;
6251 :
6252 117895168 : gcc_assert (n_inv_uses != NULL);
6253 203424873 : EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6254 : {
6255 85529705 : n_inv_uses[iid]--;
6256 85529705 : if (n_inv_uses[iid] == 0)
6257 63212998 : ivs->n_invs--;
6258 : }
6259 : }
6260 :
6261 : /* Set USE not to be expressed by any candidate in IVS. */
6262 :
6263 : static void
6264 207768853 : iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6265 : struct iv_group *group)
6266 : {
6267 207768853 : unsigned gid = group->id, cid;
6268 207768853 : class cost_pair *cp;
6269 :
6270 207768853 : cp = ivs->cand_for_group[gid];
6271 207768853 : if (!cp)
6272 : return;
6273 207768853 : cid = cp->cand->id;
6274 :
6275 207768853 : ivs->bad_groups++;
6276 207768853 : ivs->cand_for_group[gid] = NULL;
6277 207768853 : ivs->n_cand_uses[cid]--;
6278 :
6279 207768853 : if (ivs->n_cand_uses[cid] == 0)
6280 : {
6281 79085116 : bitmap_clear_bit (ivs->cands, cid);
6282 79085116 : if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6283 79085116 : ivs->n_cands--;
6284 79085116 : ivs->cand_cost -= cp->cand->cost;
6285 79085116 : iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6286 79085116 : iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6287 : }
6288 :
6289 207768853 : ivs->cand_use_cost -= cp->cost;
6290 207768853 : iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6291 207768853 : iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6292 207768853 : iv_ca_recount_cost (data, ivs);
6293 : }
6294 :
6295 : /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6296 : IVS. */
6297 :
6298 : static void
6299 583042816 : iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6300 : {
6301 583042816 : bitmap_iterator bi;
6302 583042816 : unsigned iid;
6303 :
6304 583042816 : if (!invs)
6305 464016019 : return;
6306 :
6307 119026797 : gcc_assert (n_inv_uses != NULL);
6308 205521871 : EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6309 : {
6310 86495074 : n_inv_uses[iid]++;
6311 86495074 : if (n_inv_uses[iid] == 1)
6312 64100800 : ivs->n_invs++;
6313 : }
6314 : }
6315 :
6316 : /* Set cost pair for GROUP in set IVS to CP. */
6317 :
6318 : static void
6319 223999871 : iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6320 : struct iv_group *group, class cost_pair *cp)
6321 : {
6322 223999871 : unsigned gid = group->id, cid;
6323 :
6324 223999871 : if (ivs->cand_for_group[gid] == cp)
6325 : return;
6326 :
6327 211046277 : if (ivs->cand_for_group[gid])
6328 195544110 : iv_ca_set_no_cp (data, ivs, group);
6329 :
6330 211046277 : if (cp)
6331 : {
6332 211046277 : cid = cp->cand->id;
6333 :
6334 211046277 : ivs->bad_groups--;
6335 211046277 : ivs->cand_for_group[gid] = cp;
6336 211046277 : ivs->n_cand_uses[cid]++;
6337 211046277 : if (ivs->n_cand_uses[cid] == 1)
6338 : {
6339 80475131 : bitmap_set_bit (ivs->cands, cid);
6340 80475131 : if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6341 80475131 : ivs->n_cands++;
6342 80475131 : ivs->cand_cost += cp->cand->cost;
6343 80475131 : iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6344 80475131 : iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6345 : }
6346 :
6347 211046277 : ivs->cand_use_cost += cp->cost;
6348 211046277 : iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6349 211046277 : iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6350 211046277 : iv_ca_recount_cost (data, ivs);
6351 : }
6352 : }
6353 :
6354 : /* Extend set IVS by expressing USE by some of the candidates in it
6355 : if possible. Consider all important candidates if candidates in
6356 : set IVS don't give any result. */
6357 :
6358 : static void
6359 3278666 : iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6360 : struct iv_group *group)
6361 : {
6362 3278666 : class cost_pair *best_cp = NULL, *cp;
6363 3278666 : bitmap_iterator bi;
6364 3278666 : unsigned i;
6365 3278666 : struct iv_cand *cand;
6366 :
6367 3278666 : gcc_assert (ivs->upto >= group->id);
6368 3278666 : ivs->upto++;
6369 3278666 : ivs->bad_groups++;
6370 :
6371 6181898 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6372 : {
6373 2903232 : cand = data->vcands[i];
6374 2903232 : cp = get_group_iv_cost (data, group, cand);
6375 2903232 : if (cheaper_cost_pair (cp, best_cp))
6376 2015282 : best_cp = cp;
6377 : }
6378 :
6379 3278666 : if (best_cp == NULL)
6380 : {
6381 11743599 : EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6382 : {
6383 10407303 : cand = data->vcands[i];
6384 10407303 : cp = get_group_iv_cost (data, group, cand);
6385 10407303 : if (cheaper_cost_pair (cp, best_cp))
6386 2382710 : best_cp = cp;
6387 : }
6388 : }
6389 :
6390 3278666 : iv_ca_set_cp (data, ivs, group, best_cp);
6391 3278666 : }
6392 :
6393 : /* Get cost for assignment IVS. */
6394 :
6395 : static comp_cost
6396 82358281 : iv_ca_cost (class iv_ca *ivs)
6397 : {
6398 : /* This was a conditional expression but it triggered a bug in
6399 : Sun C 5.5. */
6400 0 : if (ivs->bad_groups)
6401 87307 : return infinite_cost;
6402 : else
6403 82270974 : return ivs->cost;
6404 : }
6405 :
6406 : /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6407 : than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6408 : respectively. */
6409 :
6410 : static int
6411 38757635 : iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6412 : struct iv_group *group, class cost_pair *old_cp,
6413 : class cost_pair *new_cp)
6414 : {
6415 38757635 : gcc_assert (old_cp && new_cp && old_cp != new_cp);
6416 38757635 : unsigned old_n_invs = ivs->n_invs;
6417 38757635 : iv_ca_set_cp (data, ivs, group, new_cp);
6418 38757635 : unsigned new_n_invs = ivs->n_invs;
6419 38757635 : iv_ca_set_cp (data, ivs, group, old_cp);
6420 :
6421 38757635 : return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6422 : }
6423 :
6424 : /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6425 : it before NEXT. */
6426 :
6427 : static struct iv_ca_delta *
6428 48511905 : iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6429 : class cost_pair *new_cp, struct iv_ca_delta *next)
6430 : {
6431 0 : struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6432 :
6433 48511905 : change->group = group;
6434 48511905 : change->old_cp = old_cp;
6435 48511905 : change->new_cp = new_cp;
6436 48511905 : change->next = next;
6437 :
6438 48511905 : return change;
6439 : }
6440 :
6441 : /* Joins two lists of changes L1 and L2. Destructive -- old lists
6442 : are rewritten. */
6443 :
6444 : static struct iv_ca_delta *
6445 8143953 : iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6446 : {
6447 8143953 : struct iv_ca_delta *last;
6448 :
6449 0 : if (!l2)
6450 : return l1;
6451 :
6452 0 : if (!l1)
6453 : return l2;
6454 :
6455 3494350 : for (last = l1; last->next; last = last->next)
6456 1130034 : continue;
6457 2364316 : last->next = l2;
6458 :
6459 2364316 : return l1;
6460 1130034 : }
6461 :
6462 : /* Reverse the list of changes DELTA, forming the inverse to it. */
6463 :
6464 : static struct iv_ca_delta *
6465 0 : iv_ca_delta_reverse (struct iv_ca_delta *delta)
6466 : {
6467 0 : struct iv_ca_delta *act, *next, *prev = NULL;
6468 :
6469 161219976 : for (act = delta; act; act = next)
6470 : {
6471 91215814 : next = act->next;
6472 91215814 : act->next = prev;
6473 91215814 : prev = act;
6474 :
6475 91215814 : std::swap (act->old_cp, act->new_cp);
6476 : }
6477 :
6478 0 : return prev;
6479 : }
6480 :
6481 : /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6482 : reverted instead. */
6483 :
6484 : static void
6485 73768862 : iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6486 : struct iv_ca_delta *delta, bool forward)
6487 : {
6488 73768862 : class cost_pair *from, *to;
6489 73768862 : struct iv_ca_delta *act;
6490 :
6491 73768862 : if (!forward)
6492 73768862 : delta = iv_ca_delta_reverse (delta);
6493 :
6494 169861148 : for (act = delta; act; act = act->next)
6495 : {
6496 96092286 : from = act->old_cp;
6497 96092286 : to = act->new_cp;
6498 96092286 : gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6499 96092286 : iv_ca_set_cp (data, ivs, act->group, to);
6500 : }
6501 :
6502 73768862 : if (!forward)
6503 73768862 : iv_ca_delta_reverse (delta);
6504 73768862 : }
6505 :
6506 : /* Returns true if CAND is used in IVS. */
6507 :
6508 : static bool
6509 29027837 : iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6510 : {
6511 29027837 : return ivs->n_cand_uses[cand->id] > 0;
6512 : }
6513 :
6514 : /* Returns number of induction variable candidates in the set IVS. */
6515 :
6516 : static unsigned
6517 12645477 : iv_ca_n_cands (class iv_ca *ivs)
6518 : {
6519 12645477 : return ivs->n_cands;
6520 : }
6521 :
6522 : /* Free the list of changes DELTA. */
6523 :
6524 : static void
6525 43529566 : iv_ca_delta_free (struct iv_ca_delta **delta)
6526 : {
6527 43529566 : struct iv_ca_delta *act, *next;
6528 :
6529 92041471 : for (act = *delta; act; act = next)
6530 : {
6531 48511905 : next = act->next;
6532 48511905 : free (act);
6533 : }
6534 :
6535 43529566 : *delta = NULL;
6536 43529566 : }
6537 :
6538 : /* Allocates new iv candidates assignment. */
6539 :
6540 : static class iv_ca *
6541 1000140 : iv_ca_new (struct ivopts_data *data)
6542 : {
6543 1000140 : class iv_ca *nw = XNEW (class iv_ca);
6544 :
6545 1000140 : nw->upto = 0;
6546 1000140 : nw->bad_groups = 0;
6547 2000280 : nw->cand_for_group = XCNEWVEC (class cost_pair *,
6548 : data->vgroups.length ());
6549 2000280 : nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6550 1000140 : nw->cands = BITMAP_ALLOC (NULL);
6551 1000140 : nw->n_cands = 0;
6552 1000140 : nw->n_invs = 0;
6553 1000140 : nw->cand_use_cost = no_cost;
6554 1000140 : nw->cand_cost = 0;
6555 1000140 : nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6556 1000140 : nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6557 1000140 : nw->cost = no_cost;
6558 :
6559 1000140 : return nw;
6560 : }
6561 :
6562 : /* Free memory occupied by the set IVS. */
6563 :
6564 : static void
6565 1000140 : iv_ca_free (class iv_ca **ivs)
6566 : {
6567 1000140 : free ((*ivs)->cand_for_group);
6568 1000140 : free ((*ivs)->n_cand_uses);
6569 1000140 : BITMAP_FREE ((*ivs)->cands);
6570 1000140 : free ((*ivs)->n_inv_var_uses);
6571 1000140 : free ((*ivs)->n_inv_expr_uses);
6572 1000140 : free (*ivs);
6573 1000140 : *ivs = NULL;
6574 1000140 : }
6575 :
6576 : /* Dumps IVS to FILE. */
6577 :
6578 : static void
6579 248 : iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6580 : {
6581 248 : unsigned i;
6582 248 : comp_cost cost = iv_ca_cost (ivs);
6583 :
6584 248 : fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6585 : cost.complexity);
6586 248 : fprintf (file, " reg_cost: %d\n",
6587 : ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6588 248 : fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6589 : "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6590 : ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6591 248 : bitmap_print (file, ivs->cands, " candidates: ","\n");
6592 :
6593 1285 : for (i = 0; i < ivs->upto; i++)
6594 : {
6595 1037 : struct iv_group *group = data->vgroups[i];
6596 1037 : class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6597 1037 : if (cp)
6598 1037 : fprintf (file, " group:%d --> iv_cand:%d, cost=("
6599 1037 : "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6600 : cp->cost.cost, cp->cost.complexity);
6601 : else
6602 0 : fprintf (file, " group:%d --> ??\n", group->id);
6603 : }
6604 :
6605 248 : const char *pref = "";
6606 248 : fprintf (file, " invariant variables: ");
6607 1438 : for (i = 1; i <= data->max_inv_var_id; i++)
6608 942 : if (ivs->n_inv_var_uses[i])
6609 : {
6610 133 : fprintf (file, "%s%d", pref, i);
6611 133 : pref = ", ";
6612 : }
6613 :
6614 248 : pref = "";
6615 248 : fprintf (file, "\n invariant expressions: ");
6616 2486 : for (i = 1; i <= data->max_inv_expr_id; i++)
6617 1990 : if (ivs->n_inv_expr_uses[i])
6618 : {
6619 303 : fprintf (file, "%s%d", pref, i);
6620 303 : pref = ", ";
6621 : }
6622 :
6623 248 : fprintf (file, "\n\n");
6624 248 : }
6625 :
6626 : /* Try changing candidate in IVS to CAND for each use. Return cost of the
6627 : new set, and store differences in DELTA. Number of induction variables
6628 : in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6629 : the function will try to find a solution with mimimal iv candidates. */
6630 :
6631 : static comp_cost
6632 21678861 : iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6633 : struct iv_cand *cand, struct iv_ca_delta **delta,
6634 : unsigned *n_ivs, bool min_ncand)
6635 : {
6636 21678861 : unsigned i;
6637 21678861 : comp_cost cost;
6638 21678861 : struct iv_group *group;
6639 21678861 : class cost_pair *old_cp, *new_cp;
6640 :
6641 21678861 : *delta = NULL;
6642 120727282 : for (i = 0; i < ivs->upto; i++)
6643 : {
6644 99048421 : group = data->vgroups[i];
6645 99048421 : old_cp = iv_ca_cand_for_group (ivs, group);
6646 :
6647 99048421 : if (old_cp
6648 99048421 : && old_cp->cand == cand)
6649 9033384 : continue;
6650 :
6651 90015037 : new_cp = get_group_iv_cost (data, group, cand);
6652 90015037 : if (!new_cp)
6653 34907566 : continue;
6654 :
6655 55107471 : if (!min_ncand)
6656 : {
6657 38757635 : int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6658 : /* Skip if new_cp depends on more invariants. */
6659 38757635 : if (cmp_invs > 0)
6660 9589213 : continue;
6661 :
6662 29168422 : int cmp_cost = compare_cost_pair (new_cp, old_cp);
6663 : /* Skip if new_cp is not cheaper. */
6664 29168422 : if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6665 22599246 : continue;
6666 : }
6667 :
6668 22919012 : *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6669 : }
6670 :
6671 21678861 : iv_ca_delta_commit (data, ivs, *delta, true);
6672 21678861 : cost = iv_ca_cost (ivs);
6673 21678861 : if (n_ivs)
6674 12645477 : *n_ivs = iv_ca_n_cands (ivs);
6675 21678861 : iv_ca_delta_commit (data, ivs, *delta, false);
6676 :
6677 21678861 : return cost;
6678 : }
6679 :
6680 : /* Try narrowing set IVS by removing CAND. Return the cost of
6681 : the new set and store the differences in DELTA. START is
6682 : the candidate with which we start narrowing. */
6683 :
6684 : static comp_cost
6685 15091892 : iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6686 : struct iv_cand *cand, struct iv_cand *start,
6687 : struct iv_ca_delta **delta)
6688 : {
6689 15091892 : unsigned i, ci;
6690 15091892 : struct iv_group *group;
6691 15091892 : class cost_pair *old_cp, *new_cp, *cp;
6692 15091892 : bitmap_iterator bi;
6693 15091892 : struct iv_cand *cnd;
6694 15091892 : comp_cost cost, best_cost, acost;
6695 :
6696 15091892 : *delta = NULL;
6697 80081892 : for (i = 0; i < data->vgroups.length (); i++)
6698 : {
6699 74902625 : group = data->vgroups[i];
6700 :
6701 74902625 : old_cp = iv_ca_cand_for_group (ivs, group);
6702 74902625 : if (old_cp->cand != cand)
6703 53106532 : continue;
6704 :
6705 21796093 : best_cost = iv_ca_cost (ivs);
6706 : /* Start narrowing with START. */
6707 21796093 : new_cp = get_group_iv_cost (data, group, start);
6708 :
6709 21796093 : if (data->consider_all_candidates)
6710 : {
6711 92343976 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6712 : {
6713 71770438 : if (ci == cand->id || (start && ci == start->id))
6714 35396060 : continue;
6715 :
6716 36374378 : cnd = data->vcands[ci];
6717 :
6718 36374378 : cp = get_group_iv_cost (data, group, cnd);
6719 36374378 : if (!cp)
6720 21780808 : continue;
6721 :
6722 14593570 : iv_ca_set_cp (data, ivs, group, cp);
6723 14593570 : acost = iv_ca_cost (ivs);
6724 :
6725 14593570 : if (acost < best_cost)
6726 : {
6727 1943779 : best_cost = acost;
6728 1943779 : new_cp = cp;
6729 : }
6730 : }
6731 : }
6732 : else
6733 : {
6734 4982112 : EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6735 : {
6736 3759557 : if (ci == cand->id || (start && ci == start->id))
6737 2068955 : continue;
6738 :
6739 1690602 : cnd = data->vcands[ci];
6740 :
6741 1690602 : cp = get_group_iv_cost (data, group, cnd);
6742 1690602 : if (!cp)
6743 0 : continue;
6744 :
6745 1690602 : iv_ca_set_cp (data, ivs, group, cp);
6746 1690602 : acost = iv_ca_cost (ivs);
6747 :
6748 1690602 : if (acost < best_cost)
6749 : {
6750 43334 : best_cost = acost;
6751 43334 : new_cp = cp;
6752 : }
6753 : }
6754 : }
6755 : /* Restore to old cp for use. */
6756 21796093 : iv_ca_set_cp (data, ivs, group, old_cp);
6757 :
6758 21796093 : if (!new_cp)
6759 : {
6760 9912625 : iv_ca_delta_free (delta);
6761 9912625 : return infinite_cost;
6762 : }
6763 :
6764 11883468 : *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6765 : }
6766 :
6767 5179267 : iv_ca_delta_commit (data, ivs, *delta, true);
6768 5179267 : cost = iv_ca_cost (ivs);
6769 5179267 : iv_ca_delta_commit (data, ivs, *delta, false);
6770 :
6771 5179267 : return cost;
6772 : }
6773 :
6774 : /* Try optimizing the set of candidates IVS by removing candidates different
6775 : from to EXCEPT_CAND from it. Return cost of the new set, and store
6776 : differences in DELTA. */
6777 :
6778 : static comp_cost
6779 9173314 : iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6780 : struct iv_cand *except_cand, struct iv_ca_delta **delta)
6781 : {
6782 9173314 : bitmap_iterator bi;
6783 9173314 : struct iv_ca_delta *act_delta, *best_delta;
6784 9173314 : unsigned i;
6785 9173314 : comp_cost best_cost, acost;
6786 9173314 : struct iv_cand *cand;
6787 :
6788 9173314 : best_delta = NULL;
6789 9173314 : best_cost = iv_ca_cost (ivs);
6790 :
6791 30705802 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6792 : {
6793 21532488 : cand = data->vcands[i];
6794 :
6795 21532488 : if (cand == except_cand)
6796 6440596 : continue;
6797 :
6798 15091892 : acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6799 :
6800 15091892 : if (acost < best_cost)
6801 : {
6802 2543775 : best_cost = acost;
6803 2543775 : iv_ca_delta_free (&best_delta);
6804 2543775 : best_delta = act_delta;
6805 : }
6806 : else
6807 12548117 : iv_ca_delta_free (&act_delta);
6808 : }
6809 :
6810 9173314 : if (!best_delta)
6811 : {
6812 6807822 : *delta = NULL;
6813 6807822 : return best_cost;
6814 : }
6815 :
6816 : /* Recurse to possibly remove other unnecessary ivs. */
6817 2365492 : iv_ca_delta_commit (data, ivs, best_delta, true);
6818 2365492 : best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6819 2365492 : iv_ca_delta_commit (data, ivs, best_delta, false);
6820 2365492 : *delta = iv_ca_delta_join (best_delta, *delta);
6821 2365492 : return best_cost;
6822 : }
6823 :
6824 : /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6825 : cheaper local cost for GROUP than BEST_CP. Return pointer to
6826 : the corresponding cost_pair, otherwise just return BEST_CP. */
6827 :
6828 : static class cost_pair*
6829 29134441 : cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6830 : unsigned int cand_idx, struct iv_cand *old_cand,
6831 : class cost_pair *best_cp)
6832 : {
6833 29134441 : struct iv_cand *cand;
6834 29134441 : class cost_pair *cp;
6835 :
6836 29134441 : gcc_assert (old_cand != NULL && best_cp != NULL);
6837 29134441 : if (cand_idx == old_cand->id)
6838 : return best_cp;
6839 :
6840 26322902 : cand = data->vcands[cand_idx];
6841 26322902 : cp = get_group_iv_cost (data, group, cand);
6842 26322902 : if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6843 : return cp;
6844 :
6845 : return best_cp;
6846 : }
6847 :
6848 : /* Try breaking local optimal fixed-point for IVS by replacing candidates
6849 : which are used by more than one iv uses. For each of those candidates,
6850 : this function tries to represent iv uses under that candidate using
6851 : other ones with lower local cost, then tries to prune the new set.
6852 : If the new set has lower cost, It returns the new cost after recording
6853 : candidate replacement in list DELTA. */
6854 :
6855 : static comp_cost
6856 998898 : iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6857 : struct iv_ca_delta **delta)
6858 : {
6859 998898 : bitmap_iterator bi, bj;
6860 998898 : unsigned int i, j, k;
6861 998898 : struct iv_cand *cand;
6862 998898 : comp_cost orig_cost, acost;
6863 998898 : struct iv_ca_delta *act_delta, *tmp_delta;
6864 998898 : class cost_pair *old_cp, *best_cp = NULL;
6865 :
6866 998898 : *delta = NULL;
6867 998898 : orig_cost = iv_ca_cost (ivs);
6868 :
6869 2325006 : EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6870 : {
6871 1355395 : if (ivs->n_cand_uses[i] == 1
6872 1018537 : || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6873 342837 : continue;
6874 :
6875 1012558 : cand = data->vcands[i];
6876 :
6877 1012558 : act_delta = NULL;
6878 : /* Represent uses under current candidate using other ones with
6879 : lower local cost. */
6880 5192305 : for (j = 0; j < ivs->upto; j++)
6881 : {
6882 4179747 : struct iv_group *group = data->vgroups[j];
6883 4179747 : old_cp = iv_ca_cand_for_group (ivs, group);
6884 :
6885 4179747 : if (old_cp->cand != cand)
6886 1368208 : continue;
6887 :
6888 2811539 : best_cp = old_cp;
6889 2811539 : if (data->consider_all_candidates)
6890 31846488 : for (k = 0; k < data->vcands.length (); k++)
6891 29042282 : best_cp = cheaper_cost_with_cand (data, group, k,
6892 : old_cp->cand, best_cp);
6893 : else
6894 99492 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6895 92159 : best_cp = cheaper_cost_with_cand (data, group, k,
6896 : old_cp->cand, best_cp);
6897 :
6898 2811539 : if (best_cp == old_cp)
6899 1326857 : continue;
6900 :
6901 1484682 : act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6902 : }
6903 : /* No need for further prune. */
6904 1012558 : if (!act_delta)
6905 230701 : continue;
6906 :
6907 : /* Prune the new candidate set. */
6908 781857 : iv_ca_delta_commit (data, ivs, act_delta, true);
6909 781857 : acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6910 781857 : iv_ca_delta_commit (data, ivs, act_delta, false);
6911 781857 : act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6912 :
6913 781857 : if (acost < orig_cost)
6914 : {
6915 29287 : *delta = act_delta;
6916 29287 : return acost;
6917 : }
6918 : else
6919 752570 : iv_ca_delta_free (&act_delta);
6920 : }
6921 :
6922 969611 : return orig_cost;
6923 : }
6924 :
6925 : /* Tries to extend the sets IVS in the best possible way in order to
6926 : express the GROUP. If ORIGINALP is true, prefer candidates from
6927 : the original set of IVs, otherwise favor important candidates not
6928 : based on any memory object. */
6929 :
6930 : static bool
6931 3278666 : try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6932 : struct iv_group *group, bool originalp)
6933 : {
6934 3278666 : comp_cost best_cost, act_cost;
6935 3278666 : unsigned i;
6936 3278666 : bitmap_iterator bi;
6937 3278666 : struct iv_cand *cand;
6938 3278666 : struct iv_ca_delta *best_delta = NULL, *act_delta;
6939 3278666 : class cost_pair *cp;
6940 :
6941 3278666 : iv_ca_add_group (data, ivs, group);
6942 3278666 : best_cost = iv_ca_cost (ivs);
6943 3278666 : cp = iv_ca_cand_for_group (ivs, group);
6944 3278666 : if (cp)
6945 : {
6946 3191359 : best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6947 3191359 : iv_ca_set_no_cp (data, ivs, group);
6948 : }
6949 :
6950 : /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6951 : first try important candidates not based on any memory object. Only if
6952 : this fails, try the specific ones. Rationale -- in loops with many
6953 : variables the best choice often is to use just one generic biv. If we
6954 : added here many ivs specific to the uses, the optimization algorithm later
6955 : would be likely to get stuck in a local minimum, thus causing us to create
6956 : too many ivs. The approach from few ivs to more seems more likely to be
6957 : successful -- starting from few ivs, replacing an expensive use by a
6958 : specific iv should always be a win. */
6959 30579038 : EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6960 : {
6961 27300372 : cand = data->vcands[i];
6962 :
6963 27300372 : if (originalp && cand->pos !=IP_ORIGINAL)
6964 10759373 : continue;
6965 :
6966 13650186 : if (!originalp && cand->iv->base_object != NULL_TREE)
6967 2486739 : continue;
6968 :
6969 14054260 : if (iv_ca_cand_used_p (ivs, cand))
6970 1512680 : continue;
6971 :
6972 12541580 : cp = get_group_iv_cost (data, group, cand);
6973 12541580 : if (!cp)
6974 3615292 : continue;
6975 :
6976 8926288 : iv_ca_set_cp (data, ivs, group, cp);
6977 8926288 : act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6978 : true);
6979 8926288 : iv_ca_set_no_cp (data, ivs, group);
6980 8926288 : act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6981 :
6982 8926288 : if (act_cost < best_cost)
6983 : {
6984 396196 : best_cost = act_cost;
6985 :
6986 396196 : iv_ca_delta_free (&best_delta);
6987 396196 : best_delta = act_delta;
6988 : }
6989 : else
6990 8530092 : iv_ca_delta_free (&act_delta);
6991 : }
6992 :
6993 3278666 : if (best_cost.infinite_cost_p ())
6994 : {
6995 693772 : for (i = 0; i < group->n_map_members; i++)
6996 : {
6997 629957 : cp = group->cost_map + i;
6998 629957 : cand = cp->cand;
6999 629957 : if (!cand)
7000 522861 : continue;
7001 :
7002 : /* Already tried this. */
7003 107096 : if (cand->important)
7004 : {
7005 0 : if (originalp && cand->pos == IP_ORIGINAL)
7006 0 : continue;
7007 0 : if (!originalp && cand->iv->base_object == NULL_TREE)
7008 0 : continue;
7009 : }
7010 :
7011 107096 : if (iv_ca_cand_used_p (ivs, cand))
7012 0 : continue;
7013 :
7014 107096 : act_delta = NULL;
7015 107096 : iv_ca_set_cp (data, ivs, group, cp);
7016 107096 : act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7017 107096 : iv_ca_set_no_cp (data, ivs, group);
7018 107096 : act_delta = iv_ca_delta_add (group,
7019 : iv_ca_cand_for_group (ivs, group),
7020 : cp, act_delta);
7021 :
7022 107096 : if (act_cost < best_cost)
7023 : {
7024 65321 : best_cost = act_cost;
7025 :
7026 65321 : if (best_delta)
7027 2748 : iv_ca_delta_free (&best_delta);
7028 65321 : best_delta = act_delta;
7029 : }
7030 : else
7031 41775 : iv_ca_delta_free (&act_delta);
7032 : }
7033 : }
7034 :
7035 3278666 : iv_ca_delta_commit (data, ivs, best_delta, true);
7036 3278666 : iv_ca_delta_free (&best_delta);
7037 :
7038 3278666 : return !best_cost.infinite_cost_p ();
7039 : }
7040 :
7041 : /* Finds an initial assignment of candidates to uses. */
7042 :
7043 : static class iv_ca *
7044 1000140 : get_initial_solution (struct ivopts_data *data, bool originalp)
7045 : {
7046 1000140 : unsigned i;
7047 1000140 : class iv_ca *ivs = iv_ca_new (data);
7048 :
7049 4277564 : for (i = 0; i < data->vgroups.length (); i++)
7050 3278666 : if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7051 : {
7052 1242 : iv_ca_free (&ivs);
7053 1242 : return NULL;
7054 : }
7055 :
7056 : return ivs;
7057 : }
7058 :
7059 : /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
7060 : points to a bool variable, this function tries to break local
7061 : optimal fixed-point by replacing candidates in IVS if it's true. */
7062 :
7063 : static bool
7064 1484932 : try_improve_iv_set (struct ivopts_data *data,
7065 : class iv_ca *ivs, bool *try_replace_p)
7066 : {
7067 1484932 : unsigned i, n_ivs;
7068 1484932 : comp_cost acost, best_cost = iv_ca_cost (ivs);
7069 1484932 : struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7070 1484932 : struct iv_cand *cand;
7071 :
7072 : /* Try extending the set of induction variables by one. */
7073 16351413 : for (i = 0; i < data->vcands.length (); i++)
7074 : {
7075 14866481 : cand = data->vcands[i];
7076 :
7077 14866481 : if (iv_ca_cand_used_p (ivs, cand))
7078 2221004 : continue;
7079 :
7080 12645477 : acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7081 12645477 : if (!act_delta)
7082 7608509 : continue;
7083 :
7084 : /* If we successfully added the candidate and the set is small enough,
7085 : try optimizing it by removing other candidates. */
7086 5036968 : if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7087 : {
7088 4996604 : iv_ca_delta_commit (data, ivs, act_delta, true);
7089 4996604 : acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7090 4996604 : iv_ca_delta_commit (data, ivs, act_delta, false);
7091 4996604 : act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7092 : }
7093 :
7094 5036968 : if (acost < best_cost)
7095 : {
7096 580938 : best_cost = acost;
7097 580938 : iv_ca_delta_free (&best_delta);
7098 580938 : best_delta = act_delta;
7099 : }
7100 : else
7101 4456030 : iv_ca_delta_free (&act_delta);
7102 : }
7103 :
7104 1484932 : if (!best_delta)
7105 : {
7106 : /* Try removing the candidates from the set instead. */
7107 1029361 : best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7108 :
7109 1029361 : if (!best_delta && *try_replace_p)
7110 : {
7111 998898 : *try_replace_p = false;
7112 : /* So far candidate selecting algorithm tends to choose fewer IVs
7113 : so that it can handle cases in which loops have many variables
7114 : but the best choice is often to use only one general biv. One
7115 : weakness is it can't handle opposite cases, in which different
7116 : candidates should be chosen with respect to each use. To solve
7117 : the problem, we replace candidates in a manner described by the
7118 : comments of iv_ca_replace, thus give general algorithm a chance
7119 : to break local optimal fixed-point in these cases. */
7120 998898 : best_cost = iv_ca_replace (data, ivs, &best_delta);
7121 : }
7122 :
7123 1029361 : if (!best_delta)
7124 : return false;
7125 : }
7126 :
7127 486034 : iv_ca_delta_commit (data, ivs, best_delta, true);
7128 486034 : iv_ca_delta_free (&best_delta);
7129 972068 : return best_cost == iv_ca_cost (ivs);
7130 : }
7131 :
7132 : /* Attempts to find the optimal set of induction variables. We do simple
7133 : greedy heuristic -- we try to replace at most one candidate in the selected
7134 : solution and remove the unused ivs while this improves the cost. */
7135 :
7136 : static class iv_ca *
7137 1000140 : find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7138 : {
7139 1000140 : class iv_ca *set;
7140 1000140 : bool try_replace_p = true;
7141 :
7142 : /* Get the initial solution. */
7143 1000140 : set = get_initial_solution (data, originalp);
7144 1000140 : if (!set)
7145 : {
7146 1242 : if (dump_file && (dump_flags & TDF_DETAILS))
7147 0 : fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7148 1242 : return NULL;
7149 : }
7150 :
7151 998898 : if (dump_file && (dump_flags & TDF_DETAILS))
7152 : {
7153 134 : fprintf (dump_file, "Initial set of candidates:\n");
7154 134 : iv_ca_dump (data, dump_file, set);
7155 : }
7156 :
7157 1484932 : while (try_improve_iv_set (data, set, &try_replace_p))
7158 : {
7159 486034 : if (dump_file && (dump_flags & TDF_DETAILS))
7160 : {
7161 114 : fprintf (dump_file, "Improved to:\n");
7162 114 : iv_ca_dump (data, dump_file, set);
7163 : }
7164 : }
7165 :
7166 : /* If the set has infinite_cost, it can't be optimal. */
7167 1997796 : if (iv_ca_cost (set).infinite_cost_p ())
7168 : {
7169 0 : if (dump_file && (dump_flags & TDF_DETAILS))
7170 0 : fprintf (dump_file,
7171 : "Overflow to infinite cost in try_improve_iv_set.\n");
7172 0 : iv_ca_free (&set);
7173 : }
7174 998898 : return set;
7175 : }
7176 :
7177 : static class iv_ca *
7178 500070 : find_optimal_iv_set (struct ivopts_data *data)
7179 : {
7180 500070 : unsigned i;
7181 500070 : comp_cost cost, origcost;
7182 500070 : class iv_ca *set, *origset;
7183 :
7184 : /* Determine the cost based on a strategy that starts with original IVs,
7185 : and try again using a strategy that prefers candidates not based
7186 : on any IVs. */
7187 500070 : origset = find_optimal_iv_set_1 (data, true);
7188 500070 : set = find_optimal_iv_set_1 (data, false);
7189 :
7190 500070 : if (!origset && !set)
7191 : return NULL;
7192 :
7193 499449 : origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7194 499449 : cost = set ? iv_ca_cost (set) : infinite_cost;
7195 :
7196 499449 : if (dump_file && (dump_flags & TDF_DETAILS))
7197 : {
7198 67 : fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7199 : origcost.cost, origcost.complexity);
7200 67 : fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7201 : cost.cost, cost.complexity);
7202 : }
7203 :
7204 : /* Choose the one with the best cost. */
7205 499449 : if (origcost <= cost)
7206 : {
7207 464378 : if (set)
7208 464378 : iv_ca_free (&set);
7209 464378 : set = origset;
7210 : }
7211 35071 : else if (origset)
7212 35071 : iv_ca_free (&origset);
7213 :
7214 2137519 : for (i = 0; i < data->vgroups.length (); i++)
7215 : {
7216 1638070 : struct iv_group *group = data->vgroups[i];
7217 1638070 : group->selected = iv_ca_cand_for_group (set, group)->cand;
7218 : }
7219 :
7220 499449 : return set;
7221 : }
7222 :
7223 : /* Creates a new induction variable corresponding to CAND. */
7224 :
7225 : static void
7226 674594 : create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7227 : {
7228 674594 : gimple_stmt_iterator incr_pos;
7229 674594 : tree base;
7230 674594 : struct iv_use *use;
7231 674594 : struct iv_group *group;
7232 674594 : bool after = false;
7233 :
7234 674594 : gcc_assert (cand->iv != NULL);
7235 :
7236 674594 : switch (cand->pos)
7237 : {
7238 465641 : case IP_NORMAL:
7239 465641 : incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7240 465641 : break;
7241 :
7242 10252 : case IP_END:
7243 10252 : incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7244 10252 : after = true;
7245 10252 : gcc_assert (gsi_end_p (incr_pos) || !stmt_ends_bb_p (*incr_pos));
7246 : break;
7247 :
7248 0 : case IP_AFTER_USE:
7249 0 : after = true;
7250 : /* fall through */
7251 0 : case IP_BEFORE_USE:
7252 0 : incr_pos = gsi_for_stmt (cand->incremented_at);
7253 0 : break;
7254 :
7255 198701 : case IP_ORIGINAL:
7256 : /* Mark that the iv is preserved. */
7257 198701 : name_info (data, cand->var_before)->preserve_biv = true;
7258 198701 : name_info (data, cand->var_after)->preserve_biv = true;
7259 :
7260 : /* Rewrite the increment so that it uses var_before directly. */
7261 198701 : use = find_interesting_uses_op (data, cand->var_after);
7262 198701 : group = data->vgroups[use->group_id];
7263 198701 : group->selected = cand;
7264 198701 : return;
7265 : }
7266 :
7267 475893 : gimple_add_tmp_var (cand->var_before);
7268 :
7269 475893 : base = unshare_expr (cand->iv->base);
7270 :
7271 : /* The step computation could invoke UB when the loop does not iterate.
7272 : Avoid inserting it on the preheader in its native form but rewrite
7273 : it to a well-defined form. This also helps masking SCEV issues
7274 : which freely re-associates the IV computations when building up
7275 : CHRECs without much regard for signed overflow invoking UB. */
7276 475893 : gimple_seq stmts = NULL;
7277 475893 : tree step = force_gimple_operand (unshare_expr (cand->iv->step), &stmts,
7278 : true, NULL_TREE);
7279 475893 : if (stmts)
7280 : {
7281 143431 : for (auto gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
7282 95462 : if (gimple_needing_rewrite_undefined (gsi_stmt (gsi)))
7283 10821 : rewrite_to_defined_unconditional (&gsi);
7284 47969 : gsi_insert_seq_on_edge_immediate
7285 47969 : (loop_preheader_edge (data->current_loop), stmts);
7286 : }
7287 :
7288 475893 : create_iv (base, PLUS_EXPR, step,
7289 : cand->var_before, data->current_loop,
7290 : &incr_pos, after, &cand->var_before, &cand->var_after);
7291 : }
7292 :
7293 : /* Creates new induction variables described in SET. */
7294 :
7295 : static void
7296 499449 : create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7297 : {
7298 499449 : unsigned i;
7299 499449 : struct iv_cand *cand;
7300 499449 : bitmap_iterator bi;
7301 :
7302 1174043 : EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7303 : {
7304 674594 : cand = data->vcands[i];
7305 674594 : create_new_iv (data, cand);
7306 : }
7307 :
7308 499449 : if (dump_file && (dump_flags & TDF_DETAILS))
7309 : {
7310 67 : fprintf (dump_file, "Selected IV set for loop %d",
7311 67 : data->current_loop->num);
7312 67 : if (data->loop_loc != UNKNOWN_LOCATION)
7313 65 : fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7314 130 : LOCATION_LINE (data->loop_loc));
7315 67 : fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " avg niters",
7316 : avg_loop_niter (data->current_loop));
7317 67 : fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7318 178 : EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7319 : {
7320 111 : cand = data->vcands[i];
7321 111 : dump_cand (dump_file, cand);
7322 : }
7323 67 : fprintf (dump_file, "\n");
7324 : }
7325 499449 : }
7326 :
7327 : /* Rewrites USE (definition of iv used in a nonlinear expression)
7328 : using candidate CAND. */
7329 :
7330 : static void
7331 619995 : rewrite_use_nonlinear_expr (struct ivopts_data *data,
7332 : struct iv_use *use, struct iv_cand *cand)
7333 : {
7334 619995 : gassign *ass;
7335 619995 : gimple_stmt_iterator bsi;
7336 619995 : tree comp, type = get_use_type (use), tgt;
7337 :
7338 : /* An important special case -- if we are asked to express value of
7339 : the original iv by itself, just exit; there is no need to
7340 : introduce a new computation (that might also need casting the
7341 : variable to unsigned and back). */
7342 619995 : if (cand->pos == IP_ORIGINAL
7343 329201 : && cand->incremented_at == use->stmt)
7344 : {
7345 198701 : tree op = NULL_TREE;
7346 198701 : enum tree_code stmt_code;
7347 :
7348 198701 : gcc_assert (is_gimple_assign (use->stmt));
7349 198701 : gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7350 :
7351 : /* Check whether we may leave the computation unchanged.
7352 : This is the case only if it does not rely on other
7353 : computations in the loop -- otherwise, the computation
7354 : we rely upon may be removed in remove_unused_ivs,
7355 : thus leading to ICE. */
7356 198701 : stmt_code = gimple_assign_rhs_code (use->stmt);
7357 198701 : if (stmt_code == PLUS_EXPR
7358 198701 : || stmt_code == MINUS_EXPR
7359 198701 : || stmt_code == POINTER_PLUS_EXPR)
7360 : {
7361 194721 : if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7362 192857 : op = gimple_assign_rhs2 (use->stmt);
7363 1864 : else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7364 : op = gimple_assign_rhs1 (use->stmt);
7365 : }
7366 :
7367 193357 : if (op != NULL_TREE)
7368 : {
7369 193357 : if (expr_invariant_in_loop_p (data->current_loop, op))
7370 276424 : return;
7371 181 : if (TREE_CODE (op) == SSA_NAME)
7372 : {
7373 181 : struct iv *iv = get_iv (data, op);
7374 181 : if (iv != NULL && integer_zerop (iv->step))
7375 : return;
7376 : }
7377 : }
7378 : }
7379 :
7380 426638 : switch (gimple_code (use->stmt))
7381 : {
7382 125548 : case GIMPLE_PHI:
7383 125548 : tgt = PHI_RESULT (use->stmt);
7384 :
7385 : /* If we should keep the biv, do not replace it. */
7386 125548 : if (name_info (data, tgt)->preserve_biv)
7387 : return;
7388 :
7389 42481 : bsi = gsi_after_labels (gimple_bb (use->stmt));
7390 42481 : break;
7391 :
7392 301090 : case GIMPLE_ASSIGN:
7393 301090 : tgt = gimple_assign_lhs (use->stmt);
7394 301090 : bsi = gsi_for_stmt (use->stmt);
7395 301090 : break;
7396 :
7397 0 : default:
7398 0 : gcc_unreachable ();
7399 : }
7400 :
7401 1030713 : aff_tree aff_inv, aff_var;
7402 343571 : if (!get_computation_aff_1 (data, use->stmt, use, cand, &aff_inv, &aff_var))
7403 0 : gcc_unreachable ();
7404 :
7405 343571 : unshare_aff_combination (&aff_inv);
7406 343571 : unshare_aff_combination (&aff_var);
7407 : /* Prefer CSE opportunity than loop invariant by adding offset at last
7408 : so that iv_uses have different offsets can be CSEed. */
7409 687142 : poly_widest_int offset = aff_inv.offset;
7410 343571 : aff_inv.offset = 0;
7411 :
7412 343571 : gimple_seq stmt_list = NULL, seq = NULL;
7413 343571 : tree comp_op1 = aff_combination_to_tree (&aff_inv);
7414 343571 : tree comp_op2 = aff_combination_to_tree (&aff_var);
7415 343571 : gcc_assert (comp_op1 && comp_op2);
7416 :
7417 343571 : comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7418 343571 : gimple_seq_add_seq (&stmt_list, seq);
7419 343571 : comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7420 343571 : gimple_seq_add_seq (&stmt_list, seq);
7421 :
7422 343571 : if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7423 : std::swap (comp_op1, comp_op2);
7424 :
7425 343571 : if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7426 : {
7427 0 : comp = fold_build_pointer_plus (comp_op1,
7428 : fold_convert (sizetype, comp_op2));
7429 0 : comp = fold_build_pointer_plus (comp,
7430 : wide_int_to_tree (sizetype, offset));
7431 : }
7432 : else
7433 : {
7434 343571 : comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7435 : fold_convert (TREE_TYPE (comp_op1), comp_op2));
7436 343571 : comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7437 : wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7438 : }
7439 :
7440 343571 : comp = fold_convert (type, comp);
7441 343571 : comp = force_gimple_operand (comp, &seq, false, NULL);
7442 343571 : gimple_seq_add_seq (&stmt_list, seq);
7443 343571 : if (gimple_code (use->stmt) != GIMPLE_PHI
7444 : /* We can't allow re-allocating the stmt as it might be pointed
7445 : to still. */
7446 343571 : && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7447 301090 : >= gimple_num_ops (gsi_stmt (bsi))))
7448 : {
7449 8371 : comp = force_gimple_operand (comp, &seq, true, NULL);
7450 8371 : gimple_seq_add_seq (&stmt_list, seq);
7451 8371 : if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7452 : {
7453 0 : duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7454 : /* As this isn't a plain copy we have to reset alignment
7455 : information. */
7456 0 : if (SSA_NAME_PTR_INFO (comp))
7457 0 : mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7458 : }
7459 : }
7460 :
7461 343571 : gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7462 343571 : if (gimple_code (use->stmt) == GIMPLE_PHI)
7463 : {
7464 42481 : ass = gimple_build_assign (tgt, comp);
7465 42481 : gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7466 :
7467 42481 : bsi = gsi_for_stmt (use->stmt);
7468 42481 : remove_phi_node (&bsi, false);
7469 : }
7470 : else
7471 : {
7472 301090 : gimple_assign_set_rhs_from_tree (&bsi, comp);
7473 301090 : use->stmt = gsi_stmt (bsi);
7474 : }
7475 : }
7476 :
7477 : /* Performs a peephole optimization to reorder the iv update statement with
7478 : a mem ref to enable instruction combining in later phases. The mem ref uses
7479 : the iv value before the update, so the reordering transformation requires
7480 : adjustment of the offset. CAND is the selected IV_CAND.
7481 :
7482 : Example:
7483 :
7484 : t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7485 : iv2 = iv1 + 1;
7486 :
7487 : if (t < val) (1)
7488 : goto L;
7489 : goto Head;
7490 :
7491 :
7492 : directly propagating t over to (1) will introduce overlapping live range
7493 : thus increase register pressure. This peephole transform it into:
7494 :
7495 :
7496 : iv2 = iv1 + 1;
7497 : t = MEM_REF (base, iv2, 8, 8);
7498 : if (t < val)
7499 : goto L;
7500 : goto Head;
7501 : */
7502 :
7503 : static void
7504 858065 : adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7505 : {
7506 858065 : tree var_after;
7507 858065 : gimple *iv_update, *stmt;
7508 858065 : basic_block bb;
7509 858065 : gimple_stmt_iterator gsi, gsi_iv;
7510 :
7511 858065 : if (cand->pos != IP_NORMAL)
7512 855872 : return;
7513 :
7514 660011 : var_after = cand->var_after;
7515 660011 : iv_update = SSA_NAME_DEF_STMT (var_after);
7516 :
7517 660011 : bb = gimple_bb (iv_update);
7518 660011 : gsi = gsi_last_nondebug_bb (bb);
7519 660011 : stmt = gsi_stmt (gsi);
7520 :
7521 : /* Only handle conditional statement for now. */
7522 660011 : if (gimple_code (stmt) != GIMPLE_COND)
7523 : return;
7524 :
7525 660011 : gsi_prev_nondebug (&gsi);
7526 660011 : stmt = gsi_stmt (gsi);
7527 660011 : if (stmt != iv_update)
7528 : return;
7529 :
7530 532116 : gsi_prev_nondebug (&gsi);
7531 532116 : if (gsi_end_p (gsi))
7532 : return;
7533 :
7534 529016 : stmt = gsi_stmt (gsi);
7535 529016 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
7536 : return;
7537 :
7538 528855 : if (stmt != use->stmt)
7539 : return;
7540 :
7541 4984 : if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7542 : return;
7543 :
7544 2193 : if (dump_file && (dump_flags & TDF_DETAILS))
7545 : {
7546 0 : fprintf (dump_file, "Reordering \n");
7547 0 : print_gimple_stmt (dump_file, iv_update, 0);
7548 0 : print_gimple_stmt (dump_file, use->stmt, 0);
7549 0 : fprintf (dump_file, "\n");
7550 : }
7551 :
7552 2193 : gsi = gsi_for_stmt (use->stmt);
7553 2193 : gsi_iv = gsi_for_stmt (iv_update);
7554 2193 : gsi_move_before (&gsi_iv, &gsi);
7555 :
7556 2193 : cand->pos = IP_BEFORE_USE;
7557 2193 : cand->incremented_at = use->stmt;
7558 : }
7559 :
7560 : /* Return the alias pointer type that should be used for a MEM_REF
7561 : associated with USE, which has type USE_PTR_ADDRESS. */
7562 :
7563 : static tree
7564 796 : get_alias_ptr_type_for_ptr_address (iv_use *use)
7565 : {
7566 796 : gcall *call = as_a <gcall *> (use->stmt);
7567 796 : switch (gimple_call_internal_fn (call))
7568 : {
7569 796 : case IFN_MASK_LOAD:
7570 796 : case IFN_MASK_STORE:
7571 796 : case IFN_MASK_LOAD_LANES:
7572 796 : case IFN_MASK_STORE_LANES:
7573 796 : case IFN_MASK_LEN_LOAD_LANES:
7574 796 : case IFN_MASK_LEN_STORE_LANES:
7575 796 : case IFN_LEN_LOAD:
7576 796 : case IFN_LEN_STORE:
7577 796 : case IFN_MASK_LEN_LOAD:
7578 796 : case IFN_MASK_LEN_STORE:
7579 : /* The second argument contains the correct alias type. */
7580 796 : gcc_assert (use->op_p == gimple_call_arg_ptr (call, 0));
7581 796 : return TREE_TYPE (gimple_call_arg (call, 1));
7582 :
7583 0 : default:
7584 0 : gcc_unreachable ();
7585 : }
7586 : }
7587 :
7588 :
7589 : /* Rewrites USE (address that is an iv) using candidate CAND. */
7590 :
7591 : static void
7592 858065 : rewrite_use_address (struct ivopts_data *data,
7593 : struct iv_use *use, struct iv_cand *cand)
7594 : {
7595 858065 : aff_tree aff;
7596 858065 : bool ok;
7597 :
7598 858065 : adjust_iv_update_pos (cand, use);
7599 858065 : ok = get_computation_aff (data, use->stmt, use, cand, &aff);
7600 858065 : gcc_assert (ok);
7601 858065 : unshare_aff_combination (&aff);
7602 :
7603 : /* To avoid undefined overflow problems, all IV candidates use unsigned
7604 : integer types. The drawback is that this makes it impossible for
7605 : create_mem_ref to distinguish an IV that is based on a memory object
7606 : from one that represents simply an offset.
7607 :
7608 : To work around this problem, we pass a hint to create_mem_ref that
7609 : indicates which variable (if any) in aff is an IV based on a memory
7610 : object. Note that we only consider the candidate. If this is not
7611 : based on an object, the base of the reference is in some subexpression
7612 : of the use -- but these will use pointer types, so they are recognized
7613 : by the create_mem_ref heuristics anyway. */
7614 858065 : tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7615 858065 : tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7616 858065 : gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7617 858065 : tree type = use->mem_type;
7618 858065 : tree alias_ptr_type;
7619 858065 : if (use->type == USE_PTR_ADDRESS)
7620 796 : alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7621 : else
7622 : {
7623 857269 : gcc_assert (type == TREE_TYPE (*use->op_p));
7624 857269 : unsigned int align = get_object_alignment (*use->op_p);
7625 857269 : if (align != TYPE_ALIGN (type))
7626 34111 : type = build_aligned_type (type, align);
7627 857269 : alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7628 : }
7629 1716130 : tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7630 858065 : iv, base_hint, data->speed);
7631 :
7632 858065 : if (use->type == USE_PTR_ADDRESS)
7633 : {
7634 796 : ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7635 796 : ref = fold_convert (get_use_type (use), ref);
7636 796 : ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7637 : true, GSI_SAME_STMT);
7638 : }
7639 : else
7640 : {
7641 : /* When we end up confused enough and have no suitable base but
7642 : stuffed everything to index2 use a LEA for the address and
7643 : create a plain MEM_REF to avoid basing a memory reference
7644 : on address zero which create_mem_ref_raw does as fallback. */
7645 857269 : if (TREE_CODE (ref) == TARGET_MEM_REF
7646 857269 : && TMR_INDEX2 (ref) != NULL_TREE
7647 867826 : && integer_zerop (TREE_OPERAND (ref, 0)))
7648 : {
7649 20 : ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7650 20 : ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7651 : true, GSI_SAME_STMT);
7652 20 : ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7653 : }
7654 857269 : copy_ref_info (ref, *use->op_p);
7655 : }
7656 :
7657 858065 : *use->op_p = ref;
7658 858065 : }
7659 :
7660 : /* Rewrites USE (the condition such that one of the arguments is an iv) using
7661 : candidate CAND. */
7662 :
7663 : static void
7664 596691 : rewrite_use_compare (struct ivopts_data *data,
7665 : struct iv_use *use, struct iv_cand *cand)
7666 : {
7667 596691 : tree comp, op, bound;
7668 596691 : gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7669 596691 : enum tree_code compare;
7670 596691 : struct iv_group *group = data->vgroups[use->group_id];
7671 596691 : class cost_pair *cp = get_group_iv_cost (data, group, cand);
7672 :
7673 596691 : bound = cp->value;
7674 596691 : if (bound)
7675 : {
7676 392009 : tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7677 392009 : tree var_type = TREE_TYPE (var);
7678 392009 : gimple_seq stmts;
7679 :
7680 392009 : if (dump_file && (dump_flags & TDF_DETAILS))
7681 : {
7682 58 : fprintf (dump_file, "Replacing exit test: ");
7683 58 : print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7684 : }
7685 392009 : compare = cp->comp;
7686 392009 : bound = unshare_expr (fold_convert (var_type, bound));
7687 392009 : op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7688 392009 : if (stmts)
7689 180298 : gsi_insert_seq_on_edge_immediate (
7690 180298 : loop_preheader_edge (data->current_loop),
7691 : stmts);
7692 :
7693 392009 : gcond *cond_stmt = as_a <gcond *> (use->stmt);
7694 392009 : gimple_cond_set_lhs (cond_stmt, var);
7695 392009 : gimple_cond_set_code (cond_stmt, compare);
7696 392009 : gimple_cond_set_rhs (cond_stmt, op);
7697 392009 : return;
7698 : }
7699 :
7700 : /* The induction variable elimination failed; just express the original
7701 : giv. */
7702 204682 : comp = get_computation_at (data, use->stmt, use, cand);
7703 204682 : gcc_assert (comp != NULL_TREE);
7704 204682 : gcc_assert (use->op_p != NULL);
7705 204682 : *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7706 204682 : SSA_NAME_VAR (*use->op_p),
7707 : true, GSI_SAME_STMT);
7708 : }
7709 :
7710 : /* Rewrite the groups using the selected induction variables. */
7711 :
7712 : static void
7713 499449 : rewrite_groups (struct ivopts_data *data)
7714 : {
7715 499449 : unsigned i, j;
7716 :
7717 2296373 : for (i = 0; i < data->vgroups.length (); i++)
7718 : {
7719 1796924 : struct iv_group *group = data->vgroups[i];
7720 1796924 : struct iv_cand *cand = group->selected;
7721 :
7722 1796924 : gcc_assert (cand);
7723 :
7724 1796924 : if (group->type == USE_NONLINEAR_EXPR)
7725 : {
7726 1239990 : for (j = 0; j < group->vuses.length (); j++)
7727 : {
7728 619995 : rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7729 619995 : update_stmt (group->vuses[j]->stmt);
7730 : }
7731 : }
7732 1176929 : else if (address_p (group->type))
7733 : {
7734 1438303 : for (j = 0; j < group->vuses.length (); j++)
7735 : {
7736 858065 : rewrite_use_address (data, group->vuses[j], cand);
7737 858065 : update_stmt (group->vuses[j]->stmt);
7738 : }
7739 : }
7740 : else
7741 : {
7742 596691 : gcc_assert (group->type == USE_COMPARE);
7743 :
7744 2393615 : for (j = 0; j < group->vuses.length (); j++)
7745 : {
7746 596691 : rewrite_use_compare (data, group->vuses[j], cand);
7747 596691 : update_stmt (group->vuses[j]->stmt);
7748 : }
7749 : }
7750 : }
7751 499449 : }
7752 :
7753 : /* Removes the ivs that are not used after rewriting. */
7754 :
7755 : static void
7756 499449 : remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7757 : {
7758 499449 : unsigned j;
7759 499449 : bitmap_iterator bi;
7760 :
7761 : /* Figure out an order in which to release SSA DEFs so that we don't
7762 : release something that we'd have to propagate into a debug stmt
7763 : afterwards. */
7764 5498834 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7765 : {
7766 4999385 : struct version_info *info;
7767 :
7768 4999385 : info = ver_info (data, j);
7769 4999385 : if (info->iv
7770 4856772 : && !integer_zerop (info->iv->step)
7771 3199616 : && !info->inv_id
7772 3199616 : && !info->iv->nonlin_use
7773 7579006 : && !info->preserve_biv)
7774 : {
7775 2463987 : bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7776 :
7777 2463987 : tree def = info->iv->ssa_name;
7778 :
7779 3198273 : if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7780 : {
7781 734286 : imm_use_iterator imm_iter;
7782 734286 : use_operand_p use_p;
7783 734286 : gimple *stmt;
7784 734286 : int count = 0;
7785 :
7786 2176005 : FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7787 : {
7788 734770 : if (!gimple_debug_bind_p (stmt))
7789 620357 : continue;
7790 :
7791 : /* We just want to determine whether to do nothing
7792 : (count == 0), to substitute the computed
7793 : expression into a single use of the SSA DEF by
7794 : itself (count == 1), or to use a debug temp
7795 : because the SSA DEF is used multiple times or as
7796 : part of a larger expression (count > 1). */
7797 114413 : count++;
7798 114413 : if (gimple_debug_bind_get_value (stmt) != def)
7799 7542 : count++;
7800 :
7801 114413 : if (count > 1)
7802 : break;
7803 734286 : }
7804 :
7805 734286 : if (!count)
7806 661258 : continue;
7807 :
7808 94397 : struct iv_use dummy_use;
7809 94397 : struct iv_cand *best_cand = NULL, *cand;
7810 94397 : unsigned i, best_pref = 0, cand_pref;
7811 94397 : tree comp = NULL_TREE;
7812 :
7813 94397 : memset (&dummy_use, 0, sizeof (dummy_use));
7814 94397 : dummy_use.iv = info->iv;
7815 485595 : for (i = 0; i < data->vgroups.length () && i < 64; i++)
7816 : {
7817 391198 : cand = data->vgroups[i]->selected;
7818 391198 : if (cand == best_cand)
7819 161179 : continue;
7820 152736 : cand_pref = operand_equal_p (cand->iv->step,
7821 230019 : info->iv->step, 0)
7822 230019 : ? 4 : 0;
7823 230019 : cand_pref
7824 230019 : += TYPE_MODE (TREE_TYPE (cand->iv->base))
7825 230019 : == TYPE_MODE (TREE_TYPE (info->iv->base))
7826 230019 : ? 2 : 0;
7827 230019 : cand_pref
7828 460038 : += TREE_CODE (cand->iv->base) == INTEGER_CST
7829 230019 : ? 1 : 0;
7830 230019 : if (best_cand == NULL || best_pref < cand_pref)
7831 : {
7832 176202 : tree this_comp
7833 352404 : = get_debug_computation_at (data,
7834 176202 : SSA_NAME_DEF_STMT (def),
7835 : &dummy_use, cand);
7836 176202 : if (this_comp)
7837 : {
7838 391198 : best_cand = cand;
7839 391198 : best_pref = cand_pref;
7840 391198 : comp = this_comp;
7841 : }
7842 : }
7843 : }
7844 :
7845 94397 : if (!best_cand)
7846 21369 : continue;
7847 :
7848 73028 : comp = unshare_expr (comp);
7849 73028 : if (count > 1)
7850 : {
7851 23131 : tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7852 : /* FIXME: Is setting the mode really necessary? */
7853 23131 : if (SSA_NAME_VAR (def))
7854 13263 : SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7855 : else
7856 9868 : SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7857 23131 : gdebug *def_temp
7858 23131 : = gimple_build_debug_bind (vexpr, comp, NULL);
7859 23131 : gimple_stmt_iterator gsi;
7860 :
7861 23131 : if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7862 13461 : gsi = gsi_after_labels (gimple_bb
7863 13461 : (SSA_NAME_DEF_STMT (def)));
7864 : else
7865 9670 : gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7866 :
7867 23131 : gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7868 23131 : comp = vexpr;
7869 : }
7870 :
7871 351711 : FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7872 : {
7873 205655 : if (!gimple_debug_bind_p (stmt))
7874 80284 : continue;
7875 :
7876 376185 : FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7877 125407 : SET_USE (use_p, comp);
7878 :
7879 125371 : update_stmt (stmt);
7880 73028 : }
7881 : }
7882 : }
7883 : }
7884 499449 : }
7885 :
7886 : /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7887 : for hash_map::traverse. */
7888 :
7889 : bool
7890 482496 : free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7891 : {
7892 482496 : if (value)
7893 : {
7894 442796 : value->~tree_niter_desc ();
7895 442796 : free (value);
7896 : }
7897 482496 : return true;
7898 : }
7899 :
7900 : /* Frees data allocated by the optimization of a single loop. */
7901 :
7902 : static void
7903 866062 : free_loop_data (struct ivopts_data *data)
7904 : {
7905 866062 : unsigned i, j;
7906 866062 : bitmap_iterator bi;
7907 866062 : tree obj;
7908 :
7909 866062 : if (data->niters)
7910 : {
7911 952838 : data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7912 940684 : delete data->niters;
7913 470342 : data->niters = NULL;
7914 : }
7915 :
7916 5881387 : EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7917 : {
7918 5015325 : struct version_info *info;
7919 :
7920 5015325 : info = ver_info (data, i);
7921 5015325 : info->iv = NULL;
7922 5015325 : info->has_nonlin_use = false;
7923 5015325 : info->preserve_biv = false;
7924 5015325 : info->inv_id = 0;
7925 : }
7926 866062 : bitmap_clear (data->relevant);
7927 866062 : bitmap_clear (data->important_candidates);
7928 :
7929 2665614 : for (i = 0; i < data->vgroups.length (); i++)
7930 : {
7931 1799552 : struct iv_group *group = data->vgroups[i];
7932 :
7933 3876970 : for (j = 0; j < group->vuses.length (); j++)
7934 2077418 : free (group->vuses[j]);
7935 1799552 : group->vuses.release ();
7936 :
7937 1799552 : BITMAP_FREE (group->related_cands);
7938 19609218 : for (j = 0; j < group->n_map_members; j++)
7939 : {
7940 17809666 : if (group->cost_map[j].inv_vars)
7941 3719247 : BITMAP_FREE (group->cost_map[j].inv_vars);
7942 17809666 : if (group->cost_map[j].inv_exprs)
7943 2033757 : BITMAP_FREE (group->cost_map[j].inv_exprs);
7944 : }
7945 :
7946 1799552 : free (group->cost_map);
7947 1799552 : free (group);
7948 : }
7949 866062 : data->vgroups.truncate (0);
7950 :
7951 5465273 : for (i = 0; i < data->vcands.length (); i++)
7952 : {
7953 4599211 : struct iv_cand *cand = data->vcands[i];
7954 :
7955 4599211 : if (cand->inv_vars)
7956 74347 : BITMAP_FREE (cand->inv_vars);
7957 4599211 : if (cand->inv_exprs)
7958 100484 : BITMAP_FREE (cand->inv_exprs);
7959 4599211 : free (cand);
7960 : }
7961 866062 : data->vcands.truncate (0);
7962 :
7963 866062 : if (data->version_info_size < num_ssa_names)
7964 : {
7965 167 : data->version_info_size = 2 * num_ssa_names;
7966 167 : free (data->version_info);
7967 167 : data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7968 : }
7969 :
7970 866062 : data->max_inv_var_id = 0;
7971 866062 : data->max_inv_expr_id = 0;
7972 :
7973 866062 : FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7974 0 : SET_DECL_RTL (obj, NULL_RTX);
7975 :
7976 866062 : decl_rtl_to_reset.truncate (0);
7977 :
7978 866062 : data->inv_expr_tab->empty ();
7979 :
7980 866062 : data->iv_common_cand_tab->empty ();
7981 866062 : data->iv_common_cands.truncate (0);
7982 866062 : }
7983 :
7984 : /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7985 : loop tree. */
7986 :
7987 : static void
7988 239985 : tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7989 : {
7990 239985 : free_loop_data (data);
7991 239985 : free (data->version_info);
7992 239985 : BITMAP_FREE (data->relevant);
7993 239985 : BITMAP_FREE (data->important_candidates);
7994 :
7995 239985 : decl_rtl_to_reset.release ();
7996 239985 : data->vgroups.release ();
7997 239985 : data->vcands.release ();
7998 239985 : delete data->inv_expr_tab;
7999 239985 : data->inv_expr_tab = NULL;
8000 239985 : free_affine_expand_cache (&data->name_expansion_cache);
8001 239985 : if (data->base_object_map)
8002 162090 : delete data->base_object_map;
8003 239985 : delete data->iv_common_cand_tab;
8004 239985 : data->iv_common_cand_tab = NULL;
8005 239985 : data->iv_common_cands.release ();
8006 239985 : obstack_free (&data->iv_obstack, NULL);
8007 239985 : }
8008 :
8009 : /* Returns true if the loop body BODY includes any function calls. */
8010 :
8011 : static bool
8012 626077 : loop_body_includes_call (basic_block *body, unsigned num_nodes)
8013 : {
8014 626077 : gimple_stmt_iterator gsi;
8015 626077 : unsigned i;
8016 :
8017 2819607 : for (i = 0; i < num_nodes; i++)
8018 23454448 : for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
8019 : {
8020 18858733 : gimple *stmt = gsi_stmt (gsi);
8021 18858733 : if (is_gimple_call (stmt)
8022 278828 : && !gimple_call_internal_p (stmt)
8023 19073529 : && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
8024 : return true;
8025 : }
8026 : return false;
8027 : }
8028 :
8029 : /* Determine cost scaling factor for basic blocks in loop. */
8030 : #define COST_SCALING_FACTOR_BOUND (20)
8031 :
8032 : static void
8033 500070 : determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8034 : {
8035 500070 : int lfreq = data->current_loop->header->count.to_frequency (cfun);
8036 500070 : if (!data->speed || lfreq <= 0)
8037 : return;
8038 :
8039 : int max_freq = lfreq;
8040 2852924 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8041 : {
8042 2439935 : body[i]->aux = (void *)(intptr_t) 1;
8043 2439935 : if (max_freq < body[i]->count.to_frequency (cfun))
8044 102485 : max_freq = body[i]->count.to_frequency (cfun);
8045 : }
8046 412989 : if (max_freq > lfreq)
8047 : {
8048 65411 : int divisor, factor;
8049 : /* Check if scaling factor itself needs to be scaled by the bound. This
8050 : is to avoid overflow when scaling cost according to profile info. */
8051 65411 : if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8052 : {
8053 : divisor = max_freq;
8054 : factor = COST_SCALING_FACTOR_BOUND;
8055 : }
8056 : else
8057 : {
8058 49564 : divisor = lfreq;
8059 49564 : factor = 1;
8060 : }
8061 994256 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8062 : {
8063 928845 : int bfreq = body[i]->count.to_frequency (cfun);
8064 928845 : if (bfreq <= lfreq)
8065 512599 : continue;
8066 :
8067 416246 : body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8068 : }
8069 : }
8070 : }
8071 :
8072 : /* Find doloop comparison use and set its doloop_p on if found. */
8073 :
8074 : static bool
8075 0 : find_doloop_use (struct ivopts_data *data)
8076 : {
8077 0 : struct loop *loop = data->current_loop;
8078 :
8079 0 : for (unsigned i = 0; i < data->vgroups.length (); i++)
8080 : {
8081 0 : struct iv_group *group = data->vgroups[i];
8082 0 : if (group->type == USE_COMPARE)
8083 : {
8084 0 : gcc_assert (group->vuses.length () == 1);
8085 0 : struct iv_use *use = group->vuses[0];
8086 0 : gimple *stmt = use->stmt;
8087 0 : if (gimple_code (stmt) == GIMPLE_COND)
8088 : {
8089 0 : basic_block bb = gimple_bb (stmt);
8090 0 : edge true_edge, false_edge;
8091 0 : extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8092 : /* This comparison is used for loop latch. Require latch is empty
8093 : for now. */
8094 0 : if ((loop->latch == true_edge->dest
8095 0 : || loop->latch == false_edge->dest)
8096 0 : && empty_block_p (loop->latch))
8097 : {
8098 0 : group->doloop_p = true;
8099 0 : if (dump_file && (dump_flags & TDF_DETAILS))
8100 : {
8101 0 : fprintf (dump_file, "Doloop cmp iv use: ");
8102 0 : print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8103 : }
8104 0 : return true;
8105 : }
8106 : }
8107 : }
8108 : }
8109 :
8110 : return false;
8111 : }
8112 :
8113 : /* For the targets which support doloop, to predict whether later RTL doloop
8114 : transformation will perform on this loop, further detect the doloop use and
8115 : mark the flag doloop_use_p if predicted. */
8116 :
8117 : void
8118 500070 : analyze_and_mark_doloop_use (struct ivopts_data *data)
8119 : {
8120 500070 : data->doloop_use_p = false;
8121 :
8122 500070 : if (!flag_branch_on_count_reg)
8123 : return;
8124 :
8125 500070 : if (data->current_loop->unroll == USHRT_MAX)
8126 : return;
8127 :
8128 500070 : if (!generic_predict_doloop_p (data))
8129 : return;
8130 :
8131 0 : if (find_doloop_use (data))
8132 : {
8133 0 : data->doloop_use_p = true;
8134 0 : if (dump_file && (dump_flags & TDF_DETAILS))
8135 : {
8136 0 : struct loop *loop = data->current_loop;
8137 0 : fprintf (dump_file,
8138 : "Predict loop %d can perform"
8139 : " doloop optimization later.\n",
8140 : loop->num);
8141 0 : flow_loop_dump (loop, dump_file, NULL, 1);
8142 : }
8143 : }
8144 : }
8145 :
8146 : /* Optimizes the LOOP. Returns true if anything changed. */
8147 :
8148 : static bool
8149 626077 : tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8150 : bitmap toremove)
8151 : {
8152 626077 : bool changed = false;
8153 626077 : class iv_ca *iv_ca;
8154 626077 : edge exit = single_dom_exit (loop);
8155 626077 : basic_block *body;
8156 :
8157 626077 : gcc_assert (!data->niters);
8158 626077 : data->current_loop = loop;
8159 626077 : data->loop_loc = find_loop_location (loop).get_location_t ();
8160 626077 : data->speed = optimize_loop_for_speed_p (loop);
8161 :
8162 626077 : if (dump_file && (dump_flags & TDF_DETAILS))
8163 : {
8164 67 : fprintf (dump_file, "Processing loop %d", loop->num);
8165 67 : if (data->loop_loc != UNKNOWN_LOCATION)
8166 65 : fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8167 130 : LOCATION_LINE (data->loop_loc));
8168 67 : fprintf (dump_file, "\n");
8169 :
8170 67 : if (exit)
8171 : {
8172 57 : fprintf (dump_file, " single exit %d -> %d, exit condition ",
8173 57 : exit->src->index, exit->dest->index);
8174 114 : print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8175 : 0, TDF_SLIM);
8176 57 : fprintf (dump_file, "\n");
8177 : }
8178 :
8179 67 : fprintf (dump_file, "\n");
8180 : }
8181 :
8182 626077 : body = get_loop_body (loop);
8183 626077 : data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8184 626077 : renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8185 :
8186 626077 : data->loop_single_exit_p
8187 626077 : = exit != NULL && loop_only_exit_p (loop, body, exit);
8188 :
8189 : /* For each ssa name determines whether it behaves as an induction variable
8190 : in some loop. */
8191 626077 : if (!find_induction_variables (data, body))
8192 126006 : goto finish;
8193 :
8194 : /* Finds interesting uses (item 1). */
8195 500071 : find_interesting_uses (data, body);
8196 500071 : if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8197 1 : goto finish;
8198 :
8199 : /* Determine cost scaling factor for basic blocks in loop. */
8200 500070 : determine_scaling_factor (data, body);
8201 :
8202 : /* Analyze doloop possibility and mark the doloop use if predicted. */
8203 500070 : analyze_and_mark_doloop_use (data);
8204 :
8205 : /* Finds candidates for the induction variables (item 2). */
8206 500070 : find_iv_candidates (data);
8207 :
8208 : /* Calculates the costs (item 3, part 1). */
8209 500070 : determine_iv_costs (data);
8210 500070 : determine_group_iv_costs (data);
8211 500070 : determine_set_costs (data);
8212 :
8213 : /* Find the optimal set of induction variables (item 3, part 2). */
8214 500070 : iv_ca = find_optimal_iv_set (data);
8215 : /* Cleanup basic block aux field. */
8216 3299873 : for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8217 2799803 : body[i]->aux = NULL;
8218 500070 : if (!iv_ca)
8219 621 : goto finish;
8220 499449 : changed = true;
8221 :
8222 : /* Create the new induction variables (item 4, part 1). */
8223 499449 : create_new_ivs (data, iv_ca);
8224 499449 : iv_ca_free (&iv_ca);
8225 :
8226 : /* Rewrite the uses (item 4, part 2). */
8227 499449 : rewrite_groups (data);
8228 :
8229 : /* Remove the ivs that are unused after rewriting. */
8230 499449 : remove_unused_ivs (data, toremove);
8231 :
8232 626077 : finish:
8233 626077 : free (body);
8234 626077 : free_loop_data (data);
8235 :
8236 626077 : return changed;
8237 : }
8238 :
8239 : /* Main entry point. Optimizes induction variables in loops. */
8240 :
8241 : void
8242 239985 : tree_ssa_iv_optimize (void)
8243 : {
8244 239985 : struct ivopts_data data;
8245 239985 : auto_bitmap toremove;
8246 :
8247 239985 : tree_ssa_iv_optimize_init (&data);
8248 239985 : mark_ssa_maybe_undefs ();
8249 :
8250 : /* Optimize the loops starting with the innermost ones. */
8251 1346032 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8252 : {
8253 626077 : if (!dbg_cnt (ivopts_loop))
8254 0 : continue;
8255 :
8256 626077 : if (dump_file && (dump_flags & TDF_DETAILS))
8257 67 : flow_loop_dump (loop, dump_file, NULL, 1);
8258 :
8259 626077 : tree_ssa_iv_optimize_loop (&data, loop, toremove);
8260 239985 : }
8261 :
8262 : /* Remove eliminated IV defs. */
8263 239985 : release_defs_bitset (toremove);
8264 :
8265 : /* We have changed the structure of induction variables; it might happen
8266 : that definitions in the scev database refer to some of them that were
8267 : eliminated. */
8268 239985 : scev_reset_htab ();
8269 : /* Likewise niter and control-IV information. */
8270 239985 : free_numbers_of_iterations_estimates (cfun);
8271 :
8272 239985 : tree_ssa_iv_optimize_finalize (&data);
8273 239985 : }
8274 :
8275 : #include "gt-tree-ssa-loop-ivopts.h"
|