Branch data Line data Source code
1 : : /* Scalar evolution detector.
2 : : Copyright (C) 2003-2025 Free Software Foundation, Inc.
3 : : Contributed by Sebastian Pop <s.pop@laposte.net>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : /*
22 : : Description:
23 : :
24 : : This pass analyzes the evolution of scalar variables in loop
25 : : structures. The algorithm is based on the SSA representation,
26 : : and on the loop hierarchy tree. This algorithm is not based on
27 : : the notion of versions of a variable, as it was the case for the
28 : : previous implementations of the scalar evolution algorithm, but
29 : : it assumes that each defined name is unique.
30 : :
31 : : The notation used in this file is called "chains of recurrences",
32 : : and has been proposed by Eugene Zima, Robert Van Engelen, and
33 : : others for describing induction variables in programs. For example
34 : : "b -> {0, +, 2}_1" means that the scalar variable "b" is equal to 0
35 : : when entering in the loop_1 and has a step 2 in this loop, in other
36 : : words "for (b = 0; b < N; b+=2);". Note that the coefficients of
37 : : this chain of recurrence (or chrec [shrek]) can contain the name of
38 : : other variables, in which case they are called parametric chrecs.
39 : : For example, "b -> {a, +, 2}_1" means that the initial value of "b"
40 : : is the value of "a". In most of the cases these parametric chrecs
41 : : are fully instantiated before their use because symbolic names can
42 : : hide some difficult cases such as self-references described later
43 : : (see the Fibonacci example).
44 : :
45 : : A short sketch of the algorithm is:
46 : :
47 : : Given a scalar variable to be analyzed, follow the SSA edge to
48 : : its definition:
49 : :
50 : : - When the definition is a GIMPLE_ASSIGN: if the right hand side
51 : : (RHS) of the definition cannot be statically analyzed, the answer
52 : : of the analyzer is: "don't know".
53 : : Otherwise, for all the variables that are not yet analyzed in the
54 : : RHS, try to determine their evolution, and finally try to
55 : : evaluate the operation of the RHS that gives the evolution
56 : : function of the analyzed variable.
57 : :
58 : : - When the definition is a condition-phi-node: determine the
59 : : evolution function for all the branches of the phi node, and
60 : : finally merge these evolutions (see chrec_merge).
61 : :
62 : : - When the definition is a loop-phi-node: determine its initial
63 : : condition, that is the SSA edge defined in an outer loop, and
64 : : keep it symbolic. Then determine the SSA edges that are defined
65 : : in the body of the loop. Follow the inner edges until ending on
66 : : another loop-phi-node of the same analyzed loop. If the reached
67 : : loop-phi-node is not the starting loop-phi-node, then we keep
68 : : this definition under a symbolic form. If the reached
69 : : loop-phi-node is the same as the starting one, then we compute a
70 : : symbolic stride on the return path. The result is then the
71 : : symbolic chrec {initial_condition, +, symbolic_stride}_loop.
72 : :
73 : : Examples:
74 : :
75 : : Example 1: Illustration of the basic algorithm.
76 : :
77 : : | a = 3
78 : : | loop_1
79 : : | b = phi (a, c)
80 : : | c = b + 1
81 : : | if (c > 10) exit_loop
82 : : | endloop
83 : :
84 : : Suppose that we want to know the number of iterations of the
85 : : loop_1. The exit_loop is controlled by a COND_EXPR (c > 10). We
86 : : ask the scalar evolution analyzer two questions: what's the
87 : : scalar evolution (scev) of "c", and what's the scev of "10". For
88 : : "10" the answer is "10" since it is a scalar constant. For the
89 : : scalar variable "c", it follows the SSA edge to its definition,
90 : : "c = b + 1", and then asks again what's the scev of "b".
91 : : Following the SSA edge, we end on a loop-phi-node "b = phi (a,
92 : : c)", where the initial condition is "a", and the inner loop edge
93 : : is "c". The initial condition is kept under a symbolic form (it
94 : : may be the case that the copy constant propagation has done its
95 : : work and we end with the constant "3" as one of the edges of the
96 : : loop-phi-node). The update edge is followed to the end of the
97 : : loop, and until reaching again the starting loop-phi-node: b -> c
98 : : -> b. At this point we have drawn a path from "b" to "b" from
99 : : which we compute the stride in the loop: in this example it is
100 : : "+1". The resulting scev for "b" is "b -> {a, +, 1}_1". Now
101 : : that the scev for "b" is known, it is possible to compute the
102 : : scev for "c", that is "c -> {a + 1, +, 1}_1". In order to
103 : : determine the number of iterations in the loop_1, we have to
104 : : instantiate_parameters (loop_1, {a + 1, +, 1}_1), that gives after some
105 : : more analysis the scev {4, +, 1}_1, or in other words, this is
106 : : the function "f (x) = x + 4", where x is the iteration count of
107 : : the loop_1. Now we have to solve the inequality "x + 4 > 10",
108 : : and take the smallest iteration number for which the loop is
109 : : exited: x = 7. This loop runs from x = 0 to x = 7, and in total
110 : : there are 8 iterations. In terms of loop normalization, we have
111 : : created a variable that is implicitly defined, "x" or just "_1",
112 : : and all the other analyzed scalars of the loop are defined in
113 : : function of this variable:
114 : :
115 : : a -> 3
116 : : b -> {3, +, 1}_1
117 : : c -> {4, +, 1}_1
118 : :
119 : : or in terms of a C program:
120 : :
121 : : | a = 3
122 : : | for (x = 0; x <= 7; x++)
123 : : | {
124 : : | b = x + 3
125 : : | c = x + 4
126 : : | }
127 : :
128 : : Example 2a: Illustration of the algorithm on nested loops.
129 : :
130 : : | loop_1
131 : : | a = phi (1, b)
132 : : | c = a + 2
133 : : | loop_2 10 times
134 : : | b = phi (c, d)
135 : : | d = b + 3
136 : : | endloop
137 : : | endloop
138 : :
139 : : For analyzing the scalar evolution of "a", the algorithm follows
140 : : the SSA edge into the loop's body: "a -> b". "b" is an inner
141 : : loop-phi-node, and its analysis as in Example 1, gives:
142 : :
143 : : b -> {c, +, 3}_2
144 : : d -> {c + 3, +, 3}_2
145 : :
146 : : Following the SSA edge for the initial condition, we end on "c = a
147 : : + 2", and then on the starting loop-phi-node "a". From this point,
148 : : the loop stride is computed: back on "c = a + 2" we get a "+2" in
149 : : the loop_1, then on the loop-phi-node "b" we compute the overall
150 : : effect of the inner loop that is "b = c + 30", and we get a "+30"
151 : : in the loop_1. That means that the overall stride in loop_1 is
152 : : equal to "+32", and the result is:
153 : :
154 : : a -> {1, +, 32}_1
155 : : c -> {3, +, 32}_1
156 : :
157 : : Example 2b: Multivariate chains of recurrences.
158 : :
159 : : | loop_1
160 : : | k = phi (0, k + 1)
161 : : | loop_2 4 times
162 : : | j = phi (0, j + 1)
163 : : | loop_3 4 times
164 : : | i = phi (0, i + 1)
165 : : | A[j + k] = ...
166 : : | endloop
167 : : | endloop
168 : : | endloop
169 : :
170 : : Analyzing the access function of array A with
171 : : instantiate_parameters (loop_1, "j + k"), we obtain the
172 : : instantiation and the analysis of the scalar variables "j" and "k"
173 : : in loop_1. This leads to the scalar evolution {4, +, 1}_1: the end
174 : : value of loop_2 for "j" is 4, and the evolution of "k" in loop_1 is
175 : : {0, +, 1}_1. To obtain the evolution function in loop_3 and
176 : : instantiate the scalar variables up to loop_1, one has to use:
177 : : instantiate_scev (block_before_loop (loop_1), loop_3, "j + k").
178 : : The result of this call is {{0, +, 1}_1, +, 1}_2.
179 : :
180 : : Example 3: Higher degree polynomials.
181 : :
182 : : | loop_1
183 : : | a = phi (2, b)
184 : : | c = phi (5, d)
185 : : | b = a + 1
186 : : | d = c + a
187 : : | endloop
188 : :
189 : : a -> {2, +, 1}_1
190 : : b -> {3, +, 1}_1
191 : : c -> {5, +, a}_1
192 : : d -> {5 + a, +, a}_1
193 : :
194 : : instantiate_parameters (loop_1, {5, +, a}_1) -> {5, +, 2, +, 1}_1
195 : : instantiate_parameters (loop_1, {5 + a, +, a}_1) -> {7, +, 3, +, 1}_1
196 : :
197 : : Example 4: Lucas, Fibonacci, or mixers in general.
198 : :
199 : : | loop_1
200 : : | a = phi (1, b)
201 : : | c = phi (3, d)
202 : : | b = c
203 : : | d = c + a
204 : : | endloop
205 : :
206 : : a -> (1, c)_1
207 : : c -> {3, +, a}_1
208 : :
209 : : The syntax "(1, c)_1" stands for a PEELED_CHREC that has the
210 : : following semantics: during the first iteration of the loop_1, the
211 : : variable contains the value 1, and then it contains the value "c".
212 : : Note that this syntax is close to the syntax of the loop-phi-node:
213 : : "a -> (1, c)_1" vs. "a = phi (1, c)".
214 : :
215 : : The symbolic chrec representation contains all the semantics of the
216 : : original code. What is more difficult is to use this information.
217 : :
218 : : Example 5: Flip-flops, or exchangers.
219 : :
220 : : | loop_1
221 : : | a = phi (1, b)
222 : : | c = phi (3, d)
223 : : | b = c
224 : : | d = a
225 : : | endloop
226 : :
227 : : a -> (1, c)_1
228 : : c -> (3, a)_1
229 : :
230 : : Based on these symbolic chrecs, it is possible to refine this
231 : : information into the more precise PERIODIC_CHRECs:
232 : :
233 : : a -> |1, 3|_1
234 : : c -> |3, 1|_1
235 : :
236 : : This transformation is not yet implemented.
237 : :
238 : : Further readings:
239 : :
240 : : You can find a more detailed description of the algorithm in:
241 : : http://icps.u-strasbg.fr/~pop/DEA_03_Pop.pdf
242 : : http://icps.u-strasbg.fr/~pop/DEA_03_Pop.ps.gz. But note that
243 : : this is a preliminary report and some of the details of the
244 : : algorithm have changed. I'm working on a research report that
245 : : updates the description of the algorithms to reflect the design
246 : : choices used in this implementation.
247 : :
248 : : A set of slides show a high level overview of the algorithm and run
249 : : an example through the scalar evolution analyzer:
250 : : http://cri.ensmp.fr/~pop/gcc/mar04/slides.pdf
251 : :
252 : : The slides that I have presented at the GCC Summit'04 are available
253 : : at: http://cri.ensmp.fr/~pop/gcc/20040604/gccsummit-lno-spop.pdf
254 : : */
255 : :
256 : : #include "config.h"
257 : : #include "system.h"
258 : : #include "coretypes.h"
259 : : #include "backend.h"
260 : : #include "target.h"
261 : : #include "rtl.h"
262 : : #include "optabs-query.h"
263 : : #include "tree.h"
264 : : #include "gimple.h"
265 : : #include "ssa.h"
266 : : #include "gimple-pretty-print.h"
267 : : #include "fold-const.h"
268 : : #include "gimplify.h"
269 : : #include "gimple-iterator.h"
270 : : #include "gimplify-me.h"
271 : : #include "tree-cfg.h"
272 : : #include "tree-ssa-loop-ivopts.h"
273 : : #include "tree-ssa-loop-manip.h"
274 : : #include "tree-ssa-loop-niter.h"
275 : : #include "tree-ssa-loop.h"
276 : : #include "tree-ssa.h"
277 : : #include "cfgloop.h"
278 : : #include "tree-chrec.h"
279 : : #include "tree-affine.h"
280 : : #include "tree-scalar-evolution.h"
281 : : #include "dumpfile.h"
282 : : #include "tree-ssa-propagate.h"
283 : : #include "gimple-fold.h"
284 : : #include "tree-into-ssa.h"
285 : : #include "builtins.h"
286 : : #include "case-cfn-macros.h"
287 : : #include "tree-eh.h"
288 : :
289 : : static tree analyze_scalar_evolution_1 (class loop *, tree);
290 : : static tree analyze_scalar_evolution_for_address_of (class loop *loop,
291 : : tree var);
292 : :
293 : : /* The cached information about an SSA name with version NAME_VERSION,
294 : : claiming that below basic block with index INSTANTIATED_BELOW, the
295 : : value of the SSA name can be expressed as CHREC. */
296 : :
297 : : struct GTY((for_user)) scev_info_str {
298 : : unsigned int name_version;
299 : : int instantiated_below;
300 : : tree chrec;
301 : : };
302 : :
303 : : /* Counters for the scev database. */
304 : : static unsigned nb_set_scev = 0;
305 : : static unsigned nb_get_scev = 0;
306 : :
307 : : struct scev_info_hasher : ggc_ptr_hash<scev_info_str>
308 : : {
309 : : static hashval_t hash (scev_info_str *i);
310 : : static bool equal (const scev_info_str *a, const scev_info_str *b);
311 : : };
312 : :
313 : : static GTY (()) hash_table<scev_info_hasher> *scalar_evolution_info;
314 : :
315 : :
316 : : /* Constructs a new SCEV_INFO_STR structure for VAR and INSTANTIATED_BELOW. */
317 : :
318 : : static inline struct scev_info_str *
319 : 51241426 : new_scev_info_str (basic_block instantiated_below, tree var)
320 : : {
321 : 51241426 : struct scev_info_str *res;
322 : :
323 : 51241426 : res = ggc_alloc<scev_info_str> ();
324 : 51241426 : res->name_version = SSA_NAME_VERSION (var);
325 : 51241426 : res->chrec = chrec_not_analyzed_yet;
326 : 51241426 : res->instantiated_below = instantiated_below->index;
327 : :
328 : 51241426 : return res;
329 : : }
330 : :
331 : : /* Computes a hash function for database element ELT. */
332 : :
333 : : hashval_t
334 : 1039566432 : scev_info_hasher::hash (scev_info_str *elt)
335 : : {
336 : 1039566432 : return elt->name_version ^ elt->instantiated_below;
337 : : }
338 : :
339 : : /* Compares database elements E1 and E2. */
340 : :
341 : : bool
342 : 1030957049 : scev_info_hasher::equal (const scev_info_str *elt1, const scev_info_str *elt2)
343 : : {
344 : 1030957049 : return (elt1->name_version == elt2->name_version
345 : 1030957049 : && elt1->instantiated_below == elt2->instantiated_below);
346 : : }
347 : :
348 : : /* Get the scalar evolution of VAR for INSTANTIATED_BELOW basic block.
349 : : A first query on VAR returns chrec_not_analyzed_yet. */
350 : :
351 : : static tree *
352 : 201467518 : find_var_scev_info (basic_block instantiated_below, tree var)
353 : : {
354 : 201467518 : struct scev_info_str *res;
355 : 201467518 : struct scev_info_str tmp;
356 : :
357 : 201467518 : tmp.name_version = SSA_NAME_VERSION (var);
358 : 201467518 : tmp.instantiated_below = instantiated_below->index;
359 : 201467518 : scev_info_str **slot = scalar_evolution_info->find_slot (&tmp, INSERT);
360 : :
361 : 201467518 : if (!*slot)
362 : 51241426 : *slot = new_scev_info_str (instantiated_below, var);
363 : 201467518 : res = *slot;
364 : :
365 : 201467518 : return &res->chrec;
366 : : }
367 : :
368 : :
369 : : /* Hashtable helpers for a temporary hash-table used when
370 : : analyzing a scalar evolution, instantiating a CHREC or
371 : : resolving mixers. */
372 : :
373 : : class instantiate_cache_type
374 : : {
375 : : public:
376 : : htab_t map;
377 : : vec<scev_info_str> entries;
378 : :
379 : 127020530 : instantiate_cache_type () : map (NULL), entries (vNULL) {}
380 : : ~instantiate_cache_type ();
381 : 124312466 : tree get (unsigned slot) { return entries[slot].chrec; }
382 : 96505155 : void set (unsigned slot, tree chrec) { entries[slot].chrec = chrec; }
383 : : };
384 : :
385 : 127020530 : instantiate_cache_type::~instantiate_cache_type ()
386 : : {
387 : 127020530 : if (map != NULL)
388 : : {
389 : 28752845 : htab_delete (map);
390 : 28752845 : entries.release ();
391 : : }
392 : 127020530 : }
393 : :
394 : : /* Cache to avoid infinite recursion when instantiating an SSA name.
395 : : Live during the outermost analyze_scalar_evolution, instantiate_scev
396 : : or resolve_mixers call. */
397 : : static instantiate_cache_type *global_cache;
398 : :
399 : :
400 : : /* Return true when PHI is a loop-phi-node. */
401 : :
402 : : static bool
403 : 25642579 : loop_phi_node_p (gimple *phi)
404 : : {
405 : : /* The implementation of this function is based on the following
406 : : property: "all the loop-phi-nodes of a loop are contained in the
407 : : loop's header basic block". */
408 : :
409 : 0 : return loop_containing_stmt (phi)->header == gimple_bb (phi);
410 : : }
411 : :
412 : : /* Compute the scalar evolution for EVOLUTION_FN after crossing LOOP.
413 : : In general, in the case of multivariate evolutions we want to get
414 : : the evolution in different loops. LOOP specifies the level for
415 : : which to get the evolution.
416 : :
417 : : Example:
418 : :
419 : : | for (j = 0; j < 100; j++)
420 : : | {
421 : : | for (k = 0; k < 100; k++)
422 : : | {
423 : : | i = k + j; - Here the value of i is a function of j, k.
424 : : | }
425 : : | ... = i - Here the value of i is a function of j.
426 : : | }
427 : : | ... = i - Here the value of i is a scalar.
428 : :
429 : : Example:
430 : :
431 : : | i_0 = ...
432 : : | loop_1 10 times
433 : : | i_1 = phi (i_0, i_2)
434 : : | i_2 = i_1 + 2
435 : : | endloop
436 : :
437 : : This loop has the same effect as:
438 : : LOOP_1 has the same effect as:
439 : :
440 : : | i_1 = i_0 + 20
441 : :
442 : : The overall effect of the loop, "i_0 + 20" in the previous example,
443 : : is obtained by passing in the parameters: LOOP = 1,
444 : : EVOLUTION_FN = {i_0, +, 2}_1.
445 : : */
446 : :
447 : : tree
448 : 5867478 : compute_overall_effect_of_inner_loop (class loop *loop, tree evolution_fn)
449 : : {
450 : 6702338 : bool val = false;
451 : :
452 : 6702338 : if (evolution_fn == chrec_dont_know)
453 : : return chrec_dont_know;
454 : :
455 : 6590125 : else if (TREE_CODE (evolution_fn) == POLYNOMIAL_CHREC)
456 : : {
457 : 2346843 : class loop *inner_loop = get_chrec_loop (evolution_fn);
458 : :
459 : 2346843 : if (inner_loop == loop
460 : 2346843 : || flow_loop_nested_p (loop, inner_loop))
461 : : {
462 : 2346843 : tree nb_iter = number_of_latch_executions (inner_loop);
463 : :
464 : 2346843 : if (nb_iter == chrec_dont_know)
465 : : return chrec_dont_know;
466 : : else
467 : : {
468 : 834860 : tree res;
469 : :
470 : : /* evolution_fn is the evolution function in LOOP. Get
471 : : its value in the nb_iter-th iteration. */
472 : 834860 : res = chrec_apply (inner_loop->num, evolution_fn, nb_iter);
473 : :
474 : 834860 : if (chrec_contains_symbols_defined_in_loop (res, loop->num))
475 : 56596 : res = instantiate_parameters (loop, res);
476 : :
477 : : /* Continue the computation until ending on a parent of LOOP. */
478 : 834860 : return compute_overall_effect_of_inner_loop (loop, res);
479 : : }
480 : : }
481 : : else
482 : : return evolution_fn;
483 : : }
484 : :
485 : : /* If the evolution function is an invariant, there is nothing to do. */
486 : 4243282 : else if (no_evolution_in_loop_p (evolution_fn, loop->num, &val) && val)
487 : : return evolution_fn;
488 : :
489 : : else
490 : 3505592 : return chrec_dont_know;
491 : : }
492 : :
493 : : /* Associate CHREC to SCALAR. */
494 : :
495 : : static void
496 : 48874322 : set_scalar_evolution (basic_block instantiated_below, tree scalar, tree chrec)
497 : : {
498 : 48874322 : tree *scalar_info;
499 : :
500 : 48874322 : if (TREE_CODE (scalar) != SSA_NAME)
501 : : return;
502 : :
503 : 48874322 : scalar_info = find_var_scev_info (instantiated_below, scalar);
504 : :
505 : 48874322 : if (dump_file)
506 : : {
507 : 71390 : if (dump_flags & TDF_SCEV)
508 : : {
509 : 8 : fprintf (dump_file, "(set_scalar_evolution \n");
510 : 8 : fprintf (dump_file, " instantiated_below = %d \n",
511 : : instantiated_below->index);
512 : 8 : fprintf (dump_file, " (scalar = ");
513 : 8 : print_generic_expr (dump_file, scalar);
514 : 8 : fprintf (dump_file, ")\n (scalar_evolution = ");
515 : 8 : print_generic_expr (dump_file, chrec);
516 : 8 : fprintf (dump_file, "))\n");
517 : : }
518 : 71390 : if (dump_flags & TDF_STATS)
519 : 7243 : nb_set_scev++;
520 : : }
521 : :
522 : 48874322 : *scalar_info = chrec;
523 : : }
524 : :
525 : : /* Retrieve the chrec associated to SCALAR instantiated below
526 : : INSTANTIATED_BELOW block. */
527 : :
528 : : static tree
529 : 189358174 : get_scalar_evolution (basic_block instantiated_below, tree scalar)
530 : : {
531 : 189358174 : tree res;
532 : :
533 : 189358174 : if (dump_file)
534 : : {
535 : 596083 : if (dump_flags & TDF_SCEV)
536 : : {
537 : 30 : fprintf (dump_file, "(get_scalar_evolution \n");
538 : 30 : fprintf (dump_file, " (scalar = ");
539 : 30 : print_generic_expr (dump_file, scalar);
540 : 30 : fprintf (dump_file, ")\n");
541 : : }
542 : 596083 : if (dump_flags & TDF_STATS)
543 : 51116 : nb_get_scev++;
544 : : }
545 : :
546 : 189358174 : if (VECTOR_TYPE_P (TREE_TYPE (scalar))
547 : 189358174 : || TREE_CODE (TREE_TYPE (scalar)) == COMPLEX_TYPE)
548 : : /* For chrec_dont_know we keep the symbolic form. */
549 : : res = scalar;
550 : : else
551 : 189069830 : switch (TREE_CODE (scalar))
552 : : {
553 : 154619974 : case SSA_NAME:
554 : 154619974 : if (SSA_NAME_IS_DEFAULT_DEF (scalar))
555 : : res = scalar;
556 : : else
557 : 152593196 : res = *find_var_scev_info (instantiated_below, scalar);
558 : : break;
559 : :
560 : : case REAL_CST:
561 : : case FIXED_CST:
562 : : case INTEGER_CST:
563 : : res = scalar;
564 : : break;
565 : :
566 : : default:
567 : 189358174 : res = chrec_not_analyzed_yet;
568 : : break;
569 : : }
570 : :
571 : 189358174 : if (dump_file && (dump_flags & TDF_SCEV))
572 : : {
573 : 30 : fprintf (dump_file, " (scalar_evolution = ");
574 : 30 : print_generic_expr (dump_file, res);
575 : 30 : fprintf (dump_file, "))\n");
576 : : }
577 : :
578 : 189358174 : return res;
579 : : }
580 : :
581 : :
582 : : /* Depth first search algorithm. */
583 : :
584 : : enum t_bool {
585 : : t_false,
586 : : t_true,
587 : : t_dont_know
588 : : };
589 : :
590 : : class scev_dfs
591 : : {
592 : : public:
593 : 10560564 : scev_dfs (class loop *loop_, gphi *phi_, tree init_cond_)
594 : 10560564 : : loop (loop_), loop_phi_node (phi_), init_cond (init_cond_) {}
595 : : t_bool get_ev (tree *, tree);
596 : :
597 : : private:
598 : : t_bool follow_ssa_edge_expr (gimple *, tree, tree *, int);
599 : : t_bool follow_ssa_edge_binary (gimple *at_stmt,
600 : : tree type, tree rhs0, enum tree_code code,
601 : : tree rhs1, tree *evolution_of_loop, int limit);
602 : : t_bool follow_ssa_edge_in_condition_phi_branch (int i,
603 : : gphi *condition_phi,
604 : : tree *evolution_of_branch,
605 : : tree init_cond, int limit);
606 : : t_bool follow_ssa_edge_in_condition_phi (gphi *condition_phi,
607 : : tree *evolution_of_loop, int limit);
608 : : t_bool follow_ssa_edge_inner_loop_phi (gphi *loop_phi_node,
609 : : tree *evolution_of_loop, int limit);
610 : : tree add_to_evolution (tree chrec_before, enum tree_code code,
611 : : tree to_add, gimple *at_stmt);
612 : : tree add_to_evolution_1 (tree chrec_before, tree to_add, gimple *at_stmt);
613 : :
614 : : class loop *loop;
615 : : gphi *loop_phi_node;
616 : : tree init_cond;
617 : : };
618 : :
619 : : t_bool
620 : 10560564 : scev_dfs::get_ev (tree *ev_fn, tree arg)
621 : : {
622 : 10560564 : *ev_fn = chrec_dont_know;
623 : 10560564 : return follow_ssa_edge_expr (loop_phi_node, arg, ev_fn, 0);
624 : : }
625 : :
626 : : /* Helper function for add_to_evolution. Returns the evolution
627 : : function for an assignment of the form "a = b + c", where "a" and
628 : : "b" are on the strongly connected component. CHREC_BEFORE is the
629 : : information that we already have collected up to this point.
630 : : TO_ADD is the evolution of "c".
631 : :
632 : : When CHREC_BEFORE has an evolution part in LOOP_NB, add to this
633 : : evolution the expression TO_ADD, otherwise construct an evolution
634 : : part for this loop. */
635 : :
636 : : tree
637 : 8636578 : scev_dfs::add_to_evolution_1 (tree chrec_before, tree to_add, gimple *at_stmt)
638 : : {
639 : 8636578 : tree type, left, right;
640 : 8636578 : unsigned loop_nb = loop->num;
641 : 8636578 : class loop *chloop;
642 : :
643 : 8636578 : switch (TREE_CODE (chrec_before))
644 : : {
645 : 131823 : case POLYNOMIAL_CHREC:
646 : 131823 : chloop = get_chrec_loop (chrec_before);
647 : 131823 : if (chloop == loop
648 : 131823 : || flow_loop_nested_p (chloop, loop))
649 : : {
650 : 131823 : unsigned var;
651 : :
652 : 131823 : type = chrec_type (chrec_before);
653 : :
654 : : /* When there is no evolution part in this loop, build it. */
655 : 131823 : if (chloop != loop)
656 : : {
657 : 0 : var = loop_nb;
658 : 0 : left = chrec_before;
659 : 0 : right = SCALAR_FLOAT_TYPE_P (type)
660 : 0 : ? build_real (type, dconst0)
661 : 0 : : build_int_cst (type, 0);
662 : : }
663 : : else
664 : : {
665 : 131823 : var = CHREC_VARIABLE (chrec_before);
666 : 131823 : left = CHREC_LEFT (chrec_before);
667 : 131823 : right = CHREC_RIGHT (chrec_before);
668 : : }
669 : :
670 : 131823 : to_add = chrec_convert (type, to_add, at_stmt);
671 : 131823 : right = chrec_convert_rhs (type, right, at_stmt);
672 : 131823 : right = chrec_fold_plus (chrec_type (right), right, to_add);
673 : : /* When we have an evolution in a non-wrapping type and
674 : : in the process of accumulating CHREC_RIGHT there was
675 : : overflow this indicates in the association that happened
676 : : in building the CHREC clearly involved UB. Avoid this.
677 : : In building a CHREC we basically turn (a + INCR1) + INCR2
678 : : into a + (INCR1 + INCR2) which is not always valid.
679 : : Note this check only catches few invalid cases. */
680 : 60528 : if ((INTEGRAL_TYPE_P (type) && ! TYPE_OVERFLOW_WRAPS (type))
681 : 21892 : && TREE_CODE (right) == INTEGER_CST
682 : 140085 : && TREE_OVERFLOW (right))
683 : 5 : return chrec_dont_know;
684 : 131818 : return build_polynomial_chrec (var, left, right);
685 : : }
686 : : else
687 : : {
688 : 0 : gcc_assert (flow_loop_nested_p (loop, chloop));
689 : :
690 : : /* Search the evolution in LOOP_NB. */
691 : 0 : left = add_to_evolution_1 (CHREC_LEFT (chrec_before),
692 : : to_add, at_stmt);
693 : 0 : right = CHREC_RIGHT (chrec_before);
694 : 0 : right = chrec_convert_rhs (chrec_type (left), right, at_stmt);
695 : 0 : return build_polynomial_chrec (CHREC_VARIABLE (chrec_before),
696 : 0 : left, right);
697 : : }
698 : :
699 : 8504755 : default:
700 : : /* These nodes do not depend on a loop. */
701 : 8504755 : if (chrec_before == chrec_dont_know)
702 : : return chrec_dont_know;
703 : :
704 : 8475740 : left = chrec_before;
705 : 8475740 : right = chrec_convert_rhs (chrec_type (left), to_add, at_stmt);
706 : : /* When we add the first evolution we need to replace the symbolic
707 : : evolution we've put in when the DFS reached the loop PHI node
708 : : with the initial value. There's only a limited cases of
709 : : extra operations ontop of that symbol allowed, namely
710 : : sign-conversions we can look through. For other cases we leave
711 : : the symbolic initial condition which causes build_polynomial_chrec
712 : : to return chrec_dont_know. See PR42512, PR66375 and PR107176 for
713 : : cases we mishandled before. */
714 : 8475740 : STRIP_NOPS (chrec_before);
715 : 8475740 : if (chrec_before == gimple_phi_result (loop_phi_node))
716 : 8474881 : left = fold_convert (TREE_TYPE (left), init_cond);
717 : 8475740 : return build_polynomial_chrec (loop_nb, left, right);
718 : : }
719 : : }
720 : :
721 : : /* Add TO_ADD to the evolution part of CHREC_BEFORE in the dimension
722 : : of LOOP_NB.
723 : :
724 : : Description (provided for completeness, for those who read code in
725 : : a plane, and for my poor 62 bytes brain that would have forgotten
726 : : all this in the next two or three months):
727 : :
728 : : The algorithm of translation of programs from the SSA representation
729 : : into the chrecs syntax is based on a pattern matching. After having
730 : : reconstructed the overall tree expression for a loop, there are only
731 : : two cases that can arise:
732 : :
733 : : 1. a = loop-phi (init, a + expr)
734 : : 2. a = loop-phi (init, expr)
735 : :
736 : : where EXPR is either a scalar constant with respect to the analyzed
737 : : loop (this is a degree 0 polynomial), or an expression containing
738 : : other loop-phi definitions (these are higher degree polynomials).
739 : :
740 : : Examples:
741 : :
742 : : 1.
743 : : | init = ...
744 : : | loop_1
745 : : | a = phi (init, a + 5)
746 : : | endloop
747 : :
748 : : 2.
749 : : | inita = ...
750 : : | initb = ...
751 : : | loop_1
752 : : | a = phi (inita, 2 * b + 3)
753 : : | b = phi (initb, b + 1)
754 : : | endloop
755 : :
756 : : For the first case, the semantics of the SSA representation is:
757 : :
758 : : | a (x) = init + \sum_{j = 0}^{x - 1} expr (j)
759 : :
760 : : that is, there is a loop index "x" that determines the scalar value
761 : : of the variable during the loop execution. During the first
762 : : iteration, the value is that of the initial condition INIT, while
763 : : during the subsequent iterations, it is the sum of the initial
764 : : condition with the sum of all the values of EXPR from the initial
765 : : iteration to the before last considered iteration.
766 : :
767 : : For the second case, the semantics of the SSA program is:
768 : :
769 : : | a (x) = init, if x = 0;
770 : : | expr (x - 1), otherwise.
771 : :
772 : : The second case corresponds to the PEELED_CHREC, whose syntax is
773 : : close to the syntax of a loop-phi-node:
774 : :
775 : : | phi (init, expr) vs. (init, expr)_x
776 : :
777 : : The proof of the translation algorithm for the first case is a
778 : : proof by structural induction based on the degree of EXPR.
779 : :
780 : : Degree 0:
781 : : When EXPR is a constant with respect to the analyzed loop, or in
782 : : other words when EXPR is a polynomial of degree 0, the evolution of
783 : : the variable A in the loop is an affine function with an initial
784 : : condition INIT, and a step EXPR. In order to show this, we start
785 : : from the semantics of the SSA representation:
786 : :
787 : : f (x) = init + \sum_{j = 0}^{x - 1} expr (j)
788 : :
789 : : and since "expr (j)" is a constant with respect to "j",
790 : :
791 : : f (x) = init + x * expr
792 : :
793 : : Finally, based on the semantics of the pure sum chrecs, by
794 : : identification we get the corresponding chrecs syntax:
795 : :
796 : : f (x) = init * \binom{x}{0} + expr * \binom{x}{1}
797 : : f (x) -> {init, +, expr}_x
798 : :
799 : : Higher degree:
800 : : Suppose that EXPR is a polynomial of degree N with respect to the
801 : : analyzed loop_x for which we have already determined that it is
802 : : written under the chrecs syntax:
803 : :
804 : : | expr (x) -> {b_0, +, b_1, +, ..., +, b_{n-1}} (x)
805 : :
806 : : We start from the semantics of the SSA program:
807 : :
808 : : | f (x) = init + \sum_{j = 0}^{x - 1} expr (j)
809 : : |
810 : : | f (x) = init + \sum_{j = 0}^{x - 1}
811 : : | (b_0 * \binom{j}{0} + ... + b_{n-1} * \binom{j}{n-1})
812 : : |
813 : : | f (x) = init + \sum_{j = 0}^{x - 1}
814 : : | \sum_{k = 0}^{n - 1} (b_k * \binom{j}{k})
815 : : |
816 : : | f (x) = init + \sum_{k = 0}^{n - 1}
817 : : | (b_k * \sum_{j = 0}^{x - 1} \binom{j}{k})
818 : : |
819 : : | f (x) = init + \sum_{k = 0}^{n - 1}
820 : : | (b_k * \binom{x}{k + 1})
821 : : |
822 : : | f (x) = init + b_0 * \binom{x}{1} + ...
823 : : | + b_{n-1} * \binom{x}{n}
824 : : |
825 : : | f (x) = init * \binom{x}{0} + b_0 * \binom{x}{1} + ...
826 : : | + b_{n-1} * \binom{x}{n}
827 : : |
828 : :
829 : : And finally from the definition of the chrecs syntax, we identify:
830 : : | f (x) -> {init, +, b_0, +, ..., +, b_{n-1}}_x
831 : :
832 : : This shows the mechanism that stands behind the add_to_evolution
833 : : function. An important point is that the use of symbolic
834 : : parameters avoids the need of an analysis schedule.
835 : :
836 : : Example:
837 : :
838 : : | inita = ...
839 : : | initb = ...
840 : : | loop_1
841 : : | a = phi (inita, a + 2 + b)
842 : : | b = phi (initb, b + 1)
843 : : | endloop
844 : :
845 : : When analyzing "a", the algorithm keeps "b" symbolically:
846 : :
847 : : | a -> {inita, +, 2 + b}_1
848 : :
849 : : Then, after instantiation, the analyzer ends on the evolution:
850 : :
851 : : | a -> {inita, +, 2 + initb, +, 1}_1
852 : :
853 : : */
854 : :
855 : : tree
856 : 8636578 : scev_dfs::add_to_evolution (tree chrec_before, enum tree_code code,
857 : : tree to_add, gimple *at_stmt)
858 : : {
859 : 8636578 : tree type = chrec_type (to_add);
860 : 8636578 : tree res = NULL_TREE;
861 : :
862 : 8636578 : if (to_add == NULL_TREE)
863 : : return chrec_before;
864 : :
865 : : /* TO_ADD is either a scalar, or a parameter. TO_ADD is not
866 : : instantiated at this point. */
867 : 8636578 : if (TREE_CODE (to_add) == POLYNOMIAL_CHREC)
868 : : /* This should not happen. */
869 : 0 : return chrec_dont_know;
870 : :
871 : 8636578 : if (dump_file && (dump_flags & TDF_SCEV))
872 : : {
873 : 0 : fprintf (dump_file, "(add_to_evolution \n");
874 : 0 : fprintf (dump_file, " (loop_nb = %d)\n", loop->num);
875 : 0 : fprintf (dump_file, " (chrec_before = ");
876 : 0 : print_generic_expr (dump_file, chrec_before);
877 : 0 : fprintf (dump_file, ")\n (to_add = ");
878 : 0 : print_generic_expr (dump_file, to_add);
879 : 0 : fprintf (dump_file, ")\n");
880 : : }
881 : :
882 : 8636578 : if (code == MINUS_EXPR)
883 : 1560408 : to_add = chrec_fold_multiply (type, to_add, SCALAR_FLOAT_TYPE_P (type)
884 : 4633 : ? build_real (type, dconstm1)
885 : 1555775 : : build_int_cst_type (type, -1));
886 : :
887 : 8636578 : res = add_to_evolution_1 (chrec_before, to_add, at_stmt);
888 : :
889 : 8636578 : if (dump_file && (dump_flags & TDF_SCEV))
890 : : {
891 : 0 : fprintf (dump_file, " (res = ");
892 : 0 : print_generic_expr (dump_file, res);
893 : 0 : fprintf (dump_file, "))\n");
894 : : }
895 : :
896 : : return res;
897 : : }
898 : :
899 : :
900 : : /* Follow the ssa edge into the binary expression RHS0 CODE RHS1.
901 : : Return true if the strongly connected component has been found. */
902 : :
903 : : t_bool
904 : 1021728 : scev_dfs::follow_ssa_edge_binary (gimple *at_stmt, tree type, tree rhs0,
905 : : enum tree_code code, tree rhs1,
906 : : tree *evolution_of_loop, int limit)
907 : : {
908 : 1021728 : t_bool res = t_false;
909 : 1021728 : tree evol;
910 : :
911 : 1021728 : switch (code)
912 : : {
913 : 1017375 : case POINTER_PLUS_EXPR:
914 : 1017375 : case PLUS_EXPR:
915 : 1017375 : if (TREE_CODE (rhs0) == SSA_NAME)
916 : : {
917 : 1002784 : if (TREE_CODE (rhs1) == SSA_NAME)
918 : : {
919 : : /* Match an assignment under the form:
920 : : "a = b + c". */
921 : :
922 : : /* We want only assignments of form "name + name" contribute to
923 : : LIMIT, as the other cases do not necessarily contribute to
924 : : the complexity of the expression. */
925 : 1002784 : limit++;
926 : :
927 : 1002784 : evol = *evolution_of_loop;
928 : 1002784 : res = follow_ssa_edge_expr (at_stmt, rhs0, &evol, limit);
929 : 1002784 : if (res == t_true)
930 : 429627 : *evolution_of_loop = add_to_evolution
931 : 429627 : (chrec_convert (type, evol, at_stmt), code, rhs1, at_stmt);
932 : 573157 : else if (res == t_false)
933 : : {
934 : 556274 : res = follow_ssa_edge_expr
935 : 556274 : (at_stmt, rhs1, evolution_of_loop, limit);
936 : 556274 : if (res == t_true)
937 : 412051 : *evolution_of_loop = add_to_evolution
938 : 412051 : (chrec_convert (type, *evolution_of_loop, at_stmt),
939 : : code, rhs0, at_stmt);
940 : : }
941 : : }
942 : :
943 : : else
944 : 0 : gcc_unreachable (); /* Handled in caller. */
945 : : }
946 : :
947 : 14591 : else if (TREE_CODE (rhs1) == SSA_NAME)
948 : : {
949 : : /* Match an assignment under the form:
950 : : "a = ... + c". */
951 : 5810 : res = follow_ssa_edge_expr (at_stmt, rhs1, evolution_of_loop, limit);
952 : 5810 : if (res == t_true)
953 : 5139 : *evolution_of_loop = add_to_evolution
954 : 5139 : (chrec_convert (type, *evolution_of_loop, at_stmt),
955 : : code, rhs0, at_stmt);
956 : : }
957 : :
958 : : else
959 : : /* Otherwise, match an assignment under the form:
960 : : "a = ... + ...". */
961 : : /* And there is nothing to do. */
962 : : res = t_false;
963 : : break;
964 : :
965 : 4353 : case MINUS_EXPR:
966 : : /* This case is under the form "opnd0 = rhs0 - rhs1". */
967 : 4353 : if (TREE_CODE (rhs0) == SSA_NAME)
968 : 0 : gcc_unreachable (); /* Handled in caller. */
969 : : else
970 : : /* Otherwise, match an assignment under the form:
971 : : "a = ... - ...". */
972 : : /* And there is nothing to do. */
973 : : res = t_false;
974 : : break;
975 : :
976 : : default:
977 : : res = t_false;
978 : : }
979 : :
980 : 1021728 : return res;
981 : : }
982 : :
983 : : /* Checks whether the I-th argument of a PHI comes from a backedge. */
984 : :
985 : : static bool
986 : 8386222 : backedge_phi_arg_p (gphi *phi, int i)
987 : : {
988 : 8386222 : const_edge e = gimple_phi_arg_edge (phi, i);
989 : :
990 : : /* We would in fact like to test EDGE_DFS_BACK here, but we do not care
991 : : about updating it anywhere, and this should work as well most of the
992 : : time. */
993 : 8386222 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
994 : 51107 : return true;
995 : :
996 : : return false;
997 : : }
998 : :
999 : : /* Helper function for one branch of the condition-phi-node. Return
1000 : : true if the strongly connected component has been found following
1001 : : this path. */
1002 : :
1003 : : t_bool
1004 : 3392975 : scev_dfs::follow_ssa_edge_in_condition_phi_branch (int i,
1005 : : gphi *condition_phi,
1006 : : tree *evolution_of_branch,
1007 : : tree init_cond, int limit)
1008 : : {
1009 : 3392975 : tree branch = PHI_ARG_DEF (condition_phi, i);
1010 : 3392975 : *evolution_of_branch = chrec_dont_know;
1011 : :
1012 : : /* Do not follow back edges (they must belong to an irreducible loop, which
1013 : : we really do not want to worry about). */
1014 : 3392975 : if (backedge_phi_arg_p (condition_phi, i))
1015 : : return t_false;
1016 : :
1017 : 3387229 : if (TREE_CODE (branch) == SSA_NAME)
1018 : : {
1019 : 3162213 : *evolution_of_branch = init_cond;
1020 : 3162213 : return follow_ssa_edge_expr (condition_phi, branch,
1021 : 3162213 : evolution_of_branch, limit);
1022 : : }
1023 : :
1024 : : /* This case occurs when one of the condition branches sets
1025 : : the variable to a constant: i.e. a phi-node like
1026 : : "a_2 = PHI <a_7(5), 2(6)>;".
1027 : :
1028 : : FIXME: This case have to be refined correctly:
1029 : : in some cases it is possible to say something better than
1030 : : chrec_dont_know, for example using a wrap-around notation. */
1031 : : return t_false;
1032 : : }
1033 : :
1034 : : /* This function merges the branches of a condition-phi-node in a
1035 : : loop. */
1036 : :
1037 : : t_bool
1038 : 2226846 : scev_dfs::follow_ssa_edge_in_condition_phi (gphi *condition_phi,
1039 : : tree *evolution_of_loop, int limit)
1040 : : {
1041 : 2226846 : int i, n;
1042 : 2226846 : tree init = *evolution_of_loop;
1043 : 2226846 : tree evolution_of_branch;
1044 : 2226846 : t_bool res = follow_ssa_edge_in_condition_phi_branch (0, condition_phi,
1045 : : &evolution_of_branch,
1046 : : init, limit);
1047 : 2226846 : if (res == t_false || res == t_dont_know)
1048 : : return res;
1049 : :
1050 : 1254159 : *evolution_of_loop = evolution_of_branch;
1051 : :
1052 : 1254159 : n = gimple_phi_num_args (condition_phi);
1053 : 1776794 : for (i = 1; i < n; i++)
1054 : : {
1055 : : /* Quickly give up when the evolution of one of the branches is
1056 : : not known. */
1057 : 1284564 : if (*evolution_of_loop == chrec_dont_know)
1058 : : return t_true;
1059 : :
1060 : : /* Increase the limit by the PHI argument number to avoid exponential
1061 : : time and memory complexity. */
1062 : 1166129 : res = follow_ssa_edge_in_condition_phi_branch (i, condition_phi,
1063 : : &evolution_of_branch,
1064 : : init, limit + i);
1065 : 1166129 : if (res == t_false || res == t_dont_know)
1066 : : return res;
1067 : :
1068 : 522635 : *evolution_of_loop = chrec_merge (*evolution_of_loop,
1069 : : evolution_of_branch);
1070 : : }
1071 : :
1072 : : return t_true;
1073 : : }
1074 : :
1075 : : /* Follow an SSA edge in an inner loop. It computes the overall
1076 : : effect of the loop, and following the symbolic initial conditions,
1077 : : it follows the edges in the parent loop. The inner loop is
1078 : : considered as a single statement. */
1079 : :
1080 : : t_bool
1081 : 259264 : scev_dfs::follow_ssa_edge_inner_loop_phi (gphi *loop_phi_node,
1082 : : tree *evolution_of_loop, int limit)
1083 : : {
1084 : 259264 : class loop *loop = loop_containing_stmt (loop_phi_node);
1085 : 259264 : tree ev = analyze_scalar_evolution (loop, PHI_RESULT (loop_phi_node));
1086 : :
1087 : : /* Sometimes, the inner loop is too difficult to analyze, and the
1088 : : result of the analysis is a symbolic parameter. */
1089 : 259264 : if (ev == PHI_RESULT (loop_phi_node))
1090 : : {
1091 : 112919 : t_bool res = t_false;
1092 : 112919 : int i, n = gimple_phi_num_args (loop_phi_node);
1093 : :
1094 : 159521 : for (i = 0; i < n; i++)
1095 : : {
1096 : 151056 : tree arg = PHI_ARG_DEF (loop_phi_node, i);
1097 : 151056 : basic_block bb;
1098 : :
1099 : : /* Follow the edges that exit the inner loop. */
1100 : 151056 : bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1101 : 151056 : if (!flow_bb_inside_loop_p (loop, bb))
1102 : 112919 : res = follow_ssa_edge_expr (loop_phi_node,
1103 : : arg, evolution_of_loop, limit);
1104 : 151056 : if (res == t_true)
1105 : : break;
1106 : : }
1107 : :
1108 : : /* If the path crosses this loop-phi, give up. */
1109 : 112919 : if (res == t_true)
1110 : 104454 : *evolution_of_loop = chrec_dont_know;
1111 : :
1112 : 112919 : return res;
1113 : : }
1114 : :
1115 : : /* Otherwise, compute the overall effect of the inner loop. */
1116 : 146345 : ev = compute_overall_effect_of_inner_loop (loop, ev);
1117 : 146345 : return follow_ssa_edge_expr (loop_phi_node, ev, evolution_of_loop, limit);
1118 : : }
1119 : :
1120 : : /* Follow the ssa edge into the expression EXPR.
1121 : : Return true if the strongly connected component has been found. */
1122 : :
1123 : : t_bool
1124 : 23800138 : scev_dfs::follow_ssa_edge_expr (gimple *at_stmt, tree expr,
1125 : : tree *evolution_of_loop, int limit)
1126 : : {
1127 : 23800138 : gphi *halting_phi = loop_phi_node;
1128 : 23800138 : enum tree_code code;
1129 : 23800138 : tree type, rhs0, rhs1 = NULL_TREE;
1130 : :
1131 : : /* The EXPR is one of the following cases:
1132 : : - an SSA_NAME,
1133 : : - an INTEGER_CST,
1134 : : - a PLUS_EXPR,
1135 : : - a POINTER_PLUS_EXPR,
1136 : : - a MINUS_EXPR,
1137 : : - other cases are not yet handled. */
1138 : :
1139 : : /* For SSA_NAME look at the definition statement, handling
1140 : : PHI nodes and otherwise expand appropriately for the expression
1141 : : handling below. */
1142 : 23800138 : if (TREE_CODE (expr) == SSA_NAME)
1143 : : {
1144 : 23641888 : gimple *def = SSA_NAME_DEF_STMT (expr);
1145 : :
1146 : 23641888 : if (gimple_nop_p (def))
1147 : : return t_false;
1148 : :
1149 : : /* Give up if the path is longer than the MAX that we allow. */
1150 : 23626672 : if (limit > param_scev_max_expr_complexity)
1151 : : {
1152 : 7182 : *evolution_of_loop = chrec_dont_know;
1153 : 7182 : return t_dont_know;
1154 : : }
1155 : :
1156 : 23619490 : if (gphi *phi = dyn_cast <gphi *>(def))
1157 : : {
1158 : 24595590 : if (!loop_phi_node_p (phi))
1159 : : /* DEF is a condition-phi-node. Follow the branches, and
1160 : : record their evolutions. Finally, merge the collected
1161 : : information and set the approximation to the main
1162 : : variable. */
1163 : 2226846 : return follow_ssa_edge_in_condition_phi (phi, evolution_of_loop,
1164 : 2226846 : limit);
1165 : :
1166 : : /* When the analyzed phi is the halting_phi, the
1167 : : depth-first search is over: we have found a path from
1168 : : the halting_phi to itself in the loop. */
1169 : 10070949 : if (phi == halting_phi)
1170 : : {
1171 : 9596017 : *evolution_of_loop = expr;
1172 : 9596017 : return t_true;
1173 : : }
1174 : :
1175 : : /* Otherwise, the evolution of the HALTING_PHI depends
1176 : : on the evolution of another loop-phi-node, i.e. the
1177 : : evolution function is a higher degree polynomial. */
1178 : 474932 : class loop *def_loop = loop_containing_stmt (def);
1179 : 474932 : if (def_loop == loop)
1180 : : return t_false;
1181 : :
1182 : : /* Inner loop. */
1183 : 283609 : if (flow_loop_nested_p (loop, def_loop))
1184 : 259264 : return follow_ssa_edge_inner_loop_phi (phi, evolution_of_loop,
1185 : 259264 : limit + 1);
1186 : :
1187 : : /* Outer loop. */
1188 : : return t_false;
1189 : : }
1190 : :
1191 : : /* At this level of abstraction, the program is just a set
1192 : : of GIMPLE_ASSIGNs and PHI_NODEs. In principle there is no
1193 : : other def to be handled. */
1194 : 11321695 : if (!is_gimple_assign (def))
1195 : : return t_false;
1196 : :
1197 : 11217717 : code = gimple_assign_rhs_code (def);
1198 : 11217717 : switch (get_gimple_rhs_class (code))
1199 : : {
1200 : 9760523 : case GIMPLE_BINARY_RHS:
1201 : 9760523 : rhs0 = gimple_assign_rhs1 (def);
1202 : 9760523 : rhs1 = gimple_assign_rhs2 (def);
1203 : 9760523 : break;
1204 : 1444317 : case GIMPLE_UNARY_RHS:
1205 : 1444317 : case GIMPLE_SINGLE_RHS:
1206 : 1444317 : rhs0 = gimple_assign_rhs1 (def);
1207 : 1444317 : break;
1208 : : default:
1209 : : return t_false;
1210 : : }
1211 : 11204840 : type = TREE_TYPE (gimple_assign_lhs (def));
1212 : 11204840 : at_stmt = def;
1213 : : }
1214 : : else
1215 : : {
1216 : 158250 : code = TREE_CODE (expr);
1217 : 158250 : type = TREE_TYPE (expr);
1218 : : /* Via follow_ssa_edge_inner_loop_phi we arrive here with the
1219 : : GENERIC scalar evolution of the inner loop. */
1220 : 158250 : switch (code)
1221 : : {
1222 : 10167 : CASE_CONVERT:
1223 : 10167 : rhs0 = TREE_OPERAND (expr, 0);
1224 : 10167 : break;
1225 : 19532 : case POINTER_PLUS_EXPR:
1226 : 19532 : case PLUS_EXPR:
1227 : 19532 : case MINUS_EXPR:
1228 : 19532 : rhs0 = TREE_OPERAND (expr, 0);
1229 : 19532 : rhs1 = TREE_OPERAND (expr, 1);
1230 : 19532 : STRIP_USELESS_TYPE_CONVERSION (rhs0);
1231 : 19532 : STRIP_USELESS_TYPE_CONVERSION (rhs1);
1232 : 19532 : break;
1233 : : default:
1234 : : rhs0 = expr;
1235 : : }
1236 : : }
1237 : :
1238 : 11363090 : switch (code)
1239 : : {
1240 : 338294 : CASE_CONVERT:
1241 : 338294 : {
1242 : : /* This assignment is under the form "a_1 = (cast) rhs. We cannot
1243 : : validate any precision altering conversion during the SCC
1244 : : analysis, so don't even try. */
1245 : 338294 : if (!tree_nop_conversion_p (type, TREE_TYPE (rhs0)))
1246 : : return t_false;
1247 : 222386 : t_bool res = follow_ssa_edge_expr (at_stmt, rhs0,
1248 : : evolution_of_loop, limit);
1249 : 222386 : if (res == t_true)
1250 : 106756 : *evolution_of_loop = chrec_convert (type, *evolution_of_loop,
1251 : : at_stmt);
1252 : : return res;
1253 : : }
1254 : :
1255 : : case INTEGER_CST:
1256 : : /* This assignment is under the form "a_1 = 7". */
1257 : : return t_false;
1258 : :
1259 : 2262 : case ADDR_EXPR:
1260 : 2262 : {
1261 : : /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
1262 : 2262 : if (TREE_CODE (TREE_OPERAND (rhs0, 0)) != MEM_REF)
1263 : : return t_false;
1264 : 0 : tree mem = TREE_OPERAND (rhs0, 0);
1265 : 0 : rhs0 = TREE_OPERAND (mem, 0);
1266 : 0 : rhs1 = TREE_OPERAND (mem, 1);
1267 : 0 : code = POINTER_PLUS_EXPR;
1268 : : }
1269 : : /* Fallthru. */
1270 : 9052571 : case POINTER_PLUS_EXPR:
1271 : 9052571 : case PLUS_EXPR:
1272 : 9052571 : case MINUS_EXPR:
1273 : : /* This case is under the form "rhs0 +- rhs1". */
1274 : 9052571 : if (TREE_CODE (rhs0) == SSA_NAME
1275 : 9033627 : && (TREE_CODE (rhs1) != SSA_NAME || code == MINUS_EXPR))
1276 : : {
1277 : : /* Match an assignment under the form:
1278 : : "a = b +- ...". */
1279 : 8030843 : t_bool res = follow_ssa_edge_expr (at_stmt, rhs0,
1280 : : evolution_of_loop, limit);
1281 : 8030843 : if (res == t_true)
1282 : 7789761 : *evolution_of_loop = add_to_evolution
1283 : 7789761 : (chrec_convert (type, *evolution_of_loop, at_stmt),
1284 : : code, rhs1, at_stmt);
1285 : 8030843 : return res;
1286 : : }
1287 : : /* Else search for the SCC in both rhs0 and rhs1. */
1288 : 1021728 : return follow_ssa_edge_binary (at_stmt, type, rhs0, code, rhs1,
1289 : 1021728 : evolution_of_loop, limit);
1290 : :
1291 : : default:
1292 : : return t_false;
1293 : : }
1294 : : }
1295 : :
1296 : :
1297 : : /* This section selects the loops that will be good candidates for the
1298 : : scalar evolution analysis. For the moment, greedily select all the
1299 : : loop nests we could analyze. */
1300 : :
1301 : : /* For a loop with a single exit edge, return the COND_EXPR that
1302 : : guards the exit edge. If the expression is too difficult to
1303 : : analyze, then give up. */
1304 : :
1305 : : gcond *
1306 : 236 : get_loop_exit_condition (const class loop *loop)
1307 : : {
1308 : 236 : return get_loop_exit_condition (single_exit (loop));
1309 : : }
1310 : :
1311 : : /* If the statement just before the EXIT_EDGE contains a condition then
1312 : : return the condition, otherwise NULL. */
1313 : :
1314 : : gcond *
1315 : 5250911 : get_loop_exit_condition (const_edge exit_edge)
1316 : : {
1317 : 5250911 : gcond *res = NULL;
1318 : :
1319 : 5250911 : if (dump_file && (dump_flags & TDF_SCEV))
1320 : 2 : fprintf (dump_file, "(get_loop_exit_condition \n ");
1321 : :
1322 : 5250911 : if (exit_edge)
1323 : 15752733 : res = safe_dyn_cast <gcond *> (*gsi_last_bb (exit_edge->src));
1324 : :
1325 : 5250911 : if (dump_file && (dump_flags & TDF_SCEV))
1326 : : {
1327 : 2 : print_gimple_stmt (dump_file, res, 0);
1328 : 2 : fprintf (dump_file, ")\n");
1329 : : }
1330 : :
1331 : 5250911 : return res;
1332 : : }
1333 : :
1334 : :
1335 : : /* Simplify PEELED_CHREC represented by (init_cond, arg) in LOOP.
1336 : : Handle below case and return the corresponding POLYNOMIAL_CHREC:
1337 : :
1338 : : # i_17 = PHI <i_13(5), 0(3)>
1339 : : # _20 = PHI <_5(5), start_4(D)(3)>
1340 : : ...
1341 : : i_13 = i_17 + 1;
1342 : : _5 = start_4(D) + i_13;
1343 : :
1344 : : Though variable _20 appears as a PEELED_CHREC in the form of
1345 : : (start_4, _5)_LOOP, it's a POLYNOMIAL_CHREC like {start_4, 1}_LOOP.
1346 : :
1347 : : See PR41488. */
1348 : :
1349 : : static tree
1350 : 1564409 : simplify_peeled_chrec (class loop *loop, tree arg, tree init_cond)
1351 : : {
1352 : 3128818 : aff_tree aff1, aff2;
1353 : 1564409 : tree ev, left, right, type, step_val;
1354 : 1564409 : hash_map<tree, name_expansion *> *peeled_chrec_map = NULL;
1355 : :
1356 : 1564409 : ev = instantiate_parameters (loop, analyze_scalar_evolution (loop, arg));
1357 : 1564409 : if (ev == NULL_TREE)
1358 : 0 : return chrec_dont_know;
1359 : :
1360 : : /* Support the case where we can derive the original CHREC from the
1361 : : peeled one if that's a converted other IV. This can be done
1362 : : when the original unpeeled converted IV does not overflow and
1363 : : has the same initial value. */
1364 : 1554525 : if (CONVERT_EXPR_P (ev)
1365 : 9884 : && TREE_CODE (init_cond) == INTEGER_CST
1366 : 3511 : && TREE_CODE (TREE_OPERAND (ev, 0)) == POLYNOMIAL_CHREC
1367 : 3247 : && (TYPE_PRECISION (TREE_TYPE (ev))
1368 : 3247 : > TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (ev, 0))))
1369 : 1567478 : && (!TYPE_UNSIGNED (TREE_TYPE (ev))
1370 : 3069 : || TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (ev, 0)))))
1371 : : {
1372 : 3069 : left = CHREC_LEFT (TREE_OPERAND (ev, 0));
1373 : 3069 : right = CHREC_RIGHT (TREE_OPERAND (ev, 0));
1374 : 3069 : tree left_before = chrec_fold_minus (TREE_TYPE (TREE_OPERAND (ev, 0)),
1375 : : left, right);
1376 : 3069 : if (TREE_CODE (left_before) == INTEGER_CST
1377 : 3069 : && wi::to_widest (init_cond) == wi::to_widest (left_before)
1378 : 6138 : && !scev_probably_wraps_p (NULL_TREE, left_before, right, NULL,
1379 : : loop, false))
1380 : 152 : return build_polynomial_chrec (loop->num, init_cond,
1381 : 152 : chrec_convert (TREE_TYPE (ev),
1382 : : right, NULL,
1383 : 152 : false, NULL_TREE));
1384 : 2917 : return chrec_dont_know;
1385 : : }
1386 : :
1387 : 1561340 : if (TREE_CODE (ev) != POLYNOMIAL_CHREC)
1388 : 1524422 : return chrec_dont_know;
1389 : :
1390 : 36918 : left = CHREC_LEFT (ev);
1391 : 36918 : right = CHREC_RIGHT (ev);
1392 : 36918 : type = TREE_TYPE (left);
1393 : 36918 : step_val = chrec_fold_plus (type, init_cond, right);
1394 : :
1395 : : /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP
1396 : : if "left" equals to "init + right". */
1397 : 36918 : if (operand_equal_p (left, step_val, 0))
1398 : : {
1399 : 16770 : if (dump_file && (dump_flags & TDF_SCEV))
1400 : 1 : fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n");
1401 : :
1402 : 16770 : return build_polynomial_chrec (loop->num, init_cond, right);
1403 : : }
1404 : :
1405 : : /* The affine code only deals with pointer and integer types. */
1406 : 20148 : if (!POINTER_TYPE_P (type)
1407 : 15983 : && !INTEGRAL_TYPE_P (type))
1408 : 15 : return chrec_dont_know;
1409 : :
1410 : : /* Try harder to check if they are equal. */
1411 : 20133 : tree_to_aff_combination_expand (left, type, &aff1, &peeled_chrec_map);
1412 : 20133 : tree_to_aff_combination_expand (step_val, type, &aff2, &peeled_chrec_map);
1413 : 20133 : free_affine_expand_cache (&peeled_chrec_map);
1414 : 20133 : aff_combination_scale (&aff2, -1);
1415 : 20133 : aff_combination_add (&aff1, &aff2);
1416 : :
1417 : : /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP
1418 : : if "left" equals to "init + right". */
1419 : 20133 : if (aff_combination_zero_p (&aff1))
1420 : : {
1421 : 11459 : if (dump_file && (dump_flags & TDF_SCEV))
1422 : 1 : fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n");
1423 : :
1424 : 11459 : return build_polynomial_chrec (loop->num, init_cond, right);
1425 : : }
1426 : 8674 : return chrec_dont_know;
1427 : 1564409 : }
1428 : :
1429 : : /* Given a LOOP_PHI_NODE, this function determines the evolution
1430 : : function from LOOP_PHI_NODE to LOOP_PHI_NODE in the loop. */
1431 : :
1432 : : static tree
1433 : 10680434 : analyze_evolution_in_loop (gphi *loop_phi_node,
1434 : : tree init_cond)
1435 : : {
1436 : 10680434 : int i, n = gimple_phi_num_args (loop_phi_node);
1437 : 10680434 : tree evolution_function = chrec_not_analyzed_yet;
1438 : 10680434 : class loop *loop = loop_containing_stmt (loop_phi_node);
1439 : 10680434 : basic_block bb;
1440 : 10680434 : static bool simplify_peeled_chrec_p = true;
1441 : :
1442 : 10680434 : if (dump_file && (dump_flags & TDF_SCEV))
1443 : : {
1444 : 2 : fprintf (dump_file, "(analyze_evolution_in_loop \n");
1445 : 2 : fprintf (dump_file, " (loop_phi_node = ");
1446 : 2 : print_gimple_stmt (dump_file, loop_phi_node, 0);
1447 : 2 : fprintf (dump_file, ")\n");
1448 : : }
1449 : :
1450 : 28266476 : for (i = 0; i < n; i++)
1451 : : {
1452 : 20136555 : tree arg = PHI_ARG_DEF (loop_phi_node, i);
1453 : 20136555 : tree ev_fn = chrec_dont_know;
1454 : 20136555 : t_bool res;
1455 : :
1456 : : /* Select the edges that enter the loop body. */
1457 : 20136555 : bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1458 : 20136555 : if (!flow_bb_inside_loop_p (loop, bb))
1459 : 9456121 : continue;
1460 : :
1461 : 10680434 : if (TREE_CODE (arg) == SSA_NAME)
1462 : : {
1463 : 10560564 : bool val = false;
1464 : :
1465 : : /* Pass in the initial condition to the follow edge function. */
1466 : 10560564 : scev_dfs dfs (loop, loop_phi_node, init_cond);
1467 : 10560564 : res = dfs.get_ev (&ev_fn, arg);
1468 : :
1469 : : /* If ev_fn has no evolution in the inner loop, and the
1470 : : init_cond is not equal to ev_fn, then we have an
1471 : : ambiguity between two possible values, as we cannot know
1472 : : the number of iterations at this point. */
1473 : 10560564 : if (TREE_CODE (ev_fn) != POLYNOMIAL_CHREC
1474 : 2519628 : && no_evolution_in_loop_p (ev_fn, loop->num, &val) && val
1475 : 10560570 : && !operand_equal_p (init_cond, ev_fn, 0))
1476 : 0 : ev_fn = chrec_dont_know;
1477 : : }
1478 : : else
1479 : : res = t_false;
1480 : :
1481 : : /* When it is impossible to go back on the same
1482 : : loop_phi_node by following the ssa edges, the
1483 : : evolution is represented by a peeled chrec, i.e. the
1484 : : first iteration, EV_FN has the value INIT_COND, then
1485 : : all the other iterations it has the value of ARG.
1486 : : For the moment, PEELED_CHREC nodes are not built. */
1487 : 10560564 : if (res != t_true)
1488 : : {
1489 : 2250546 : ev_fn = chrec_dont_know;
1490 : : /* Try to recognize POLYNOMIAL_CHREC which appears in
1491 : : the form of PEELED_CHREC, but guard the process with
1492 : : a bool variable to keep the analyzer from infinite
1493 : : recurrence for real PEELED_RECs. */
1494 : 2250546 : if (simplify_peeled_chrec_p && TREE_CODE (arg) == SSA_NAME)
1495 : : {
1496 : 1564409 : simplify_peeled_chrec_p = false;
1497 : 1564409 : ev_fn = simplify_peeled_chrec (loop, arg, init_cond);
1498 : 1564409 : simplify_peeled_chrec_p = true;
1499 : : }
1500 : : }
1501 : :
1502 : : /* When there are multiple back edges of the loop (which in fact never
1503 : : happens currently, but nevertheless), merge their evolutions. */
1504 : 10680434 : evolution_function = chrec_merge (evolution_function, ev_fn);
1505 : :
1506 : 10680434 : if (evolution_function == chrec_dont_know)
1507 : : break;
1508 : : }
1509 : :
1510 : 10680434 : if (dump_file && (dump_flags & TDF_SCEV))
1511 : : {
1512 : 2 : fprintf (dump_file, " (evolution_function = ");
1513 : 2 : print_generic_expr (dump_file, evolution_function);
1514 : 2 : fprintf (dump_file, "))\n");
1515 : : }
1516 : :
1517 : 10680434 : return evolution_function;
1518 : : }
1519 : :
1520 : : /* Looks to see if VAR is a copy of a constant (via straightforward assignments
1521 : : or degenerate phi's). If so, returns the constant; else, returns VAR. */
1522 : :
1523 : : static tree
1524 : 21921672 : follow_copies_to_constant (tree var)
1525 : : {
1526 : 21921672 : tree res = var;
1527 : 21921672 : while (TREE_CODE (res) == SSA_NAME
1528 : : /* We face not updated SSA form in multiple places and this walk
1529 : : may end up in sibling loops so we have to guard it. */
1530 : 26559899 : && !name_registered_for_update_p (res))
1531 : : {
1532 : 15786787 : gimple *def = SSA_NAME_DEF_STMT (res);
1533 : 15786787 : if (gphi *phi = dyn_cast <gphi *> (def))
1534 : : {
1535 : 4358880 : if (tree rhs = degenerate_phi_result (phi))
1536 : : res = rhs;
1537 : : else
1538 : : break;
1539 : : }
1540 : 11427907 : else if (gimple_assign_single_p (def))
1541 : : /* Will exit loop if not an SSA_NAME. */
1542 : 4206949 : res = gimple_assign_rhs1 (def);
1543 : : else
1544 : : break;
1545 : : }
1546 : 21921672 : if (CONSTANT_CLASS_P (res))
1547 : 6421343 : return res;
1548 : : return var;
1549 : : }
1550 : :
1551 : : /* Given a loop-phi-node, return the initial conditions of the
1552 : : variable on entry of the loop. When the CCP has propagated
1553 : : constants into the loop-phi-node, the initial condition is
1554 : : instantiated, otherwise the initial condition is kept symbolic.
1555 : : This analyzer does not analyze the evolution outside the current
1556 : : loop, and leaves this task to the on-demand tree reconstructor. */
1557 : :
1558 : : static tree
1559 : 10680434 : analyze_initial_condition (gphi *loop_phi_node)
1560 : : {
1561 : 10680434 : int i, n;
1562 : 10680434 : tree init_cond = chrec_not_analyzed_yet;
1563 : 10680434 : class loop *loop = loop_containing_stmt (loop_phi_node);
1564 : :
1565 : 10680434 : if (dump_file && (dump_flags & TDF_SCEV))
1566 : : {
1567 : 2 : fprintf (dump_file, "(analyze_initial_condition \n");
1568 : 2 : fprintf (dump_file, " (loop_phi_node = \n");
1569 : 2 : print_gimple_stmt (dump_file, loop_phi_node, 0);
1570 : 2 : fprintf (dump_file, ")\n");
1571 : : }
1572 : :
1573 : 10680434 : n = gimple_phi_num_args (loop_phi_node);
1574 : 32041302 : for (i = 0; i < n; i++)
1575 : : {
1576 : 21360868 : tree branch = PHI_ARG_DEF (loop_phi_node, i);
1577 : 21360868 : basic_block bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1578 : :
1579 : : /* When the branch is oriented to the loop's body, it does
1580 : : not contribute to the initial condition. */
1581 : 21360868 : if (flow_bb_inside_loop_p (loop, bb))
1582 : 10680434 : continue;
1583 : :
1584 : 10680434 : if (init_cond == chrec_not_analyzed_yet)
1585 : : {
1586 : 10680434 : init_cond = branch;
1587 : 10680434 : continue;
1588 : : }
1589 : :
1590 : 0 : if (TREE_CODE (branch) == SSA_NAME)
1591 : : {
1592 : 0 : init_cond = chrec_dont_know;
1593 : 0 : break;
1594 : : }
1595 : :
1596 : 0 : init_cond = chrec_merge (init_cond, branch);
1597 : : }
1598 : :
1599 : : /* Ooops -- a loop without an entry??? */
1600 : 10680434 : if (init_cond == chrec_not_analyzed_yet)
1601 : 0 : init_cond = chrec_dont_know;
1602 : :
1603 : : /* We may not have fully constant propagated IL. Handle degenerate PHIs here
1604 : : to not miss important early loop unrollings. */
1605 : 10680434 : init_cond = follow_copies_to_constant (init_cond);
1606 : :
1607 : 10680434 : if (dump_file && (dump_flags & TDF_SCEV))
1608 : : {
1609 : 2 : fprintf (dump_file, " (init_cond = ");
1610 : 2 : print_generic_expr (dump_file, init_cond);
1611 : 2 : fprintf (dump_file, "))\n");
1612 : : }
1613 : :
1614 : 10680434 : return init_cond;
1615 : : }
1616 : :
1617 : : /* Analyze the scalar evolution for LOOP_PHI_NODE. */
1618 : :
1619 : : static tree
1620 : 10680434 : interpret_loop_phi (class loop *loop, gphi *loop_phi_node)
1621 : : {
1622 : 10680434 : class loop *phi_loop = loop_containing_stmt (loop_phi_node);
1623 : 10680434 : tree init_cond;
1624 : :
1625 : 10680434 : gcc_assert (phi_loop == loop);
1626 : :
1627 : : /* Otherwise really interpret the loop phi. */
1628 : 10680434 : init_cond = analyze_initial_condition (loop_phi_node);
1629 : 10680434 : return analyze_evolution_in_loop (loop_phi_node, init_cond);
1630 : : }
1631 : :
1632 : : /* This function merges the branches of a condition-phi-node,
1633 : : contained in the outermost loop, and whose arguments are already
1634 : : analyzed. */
1635 : :
1636 : : static tree
1637 : 2664350 : interpret_condition_phi (class loop *loop, gphi *condition_phi)
1638 : : {
1639 : 2664350 : int i, n = gimple_phi_num_args (condition_phi);
1640 : 2664350 : tree res = chrec_not_analyzed_yet;
1641 : :
1642 : 5638896 : for (i = 0; i < n; i++)
1643 : : {
1644 : 4993247 : tree branch_chrec;
1645 : :
1646 : 4993247 : if (backedge_phi_arg_p (condition_phi, i))
1647 : : {
1648 : 45361 : res = chrec_dont_know;
1649 : 45361 : break;
1650 : : }
1651 : :
1652 : 4947886 : branch_chrec = analyze_scalar_evolution
1653 : 4947886 : (loop, PHI_ARG_DEF (condition_phi, i));
1654 : :
1655 : 4947886 : res = chrec_merge (res, branch_chrec);
1656 : 4947886 : if (res == chrec_dont_know)
1657 : : break;
1658 : : }
1659 : :
1660 : 2664350 : return res;
1661 : : }
1662 : :
1663 : : /* Interpret the operation RHS1 OP RHS2. If we didn't
1664 : : analyze this node before, follow the definitions until ending
1665 : : either on an analyzed GIMPLE_ASSIGN, or on a loop-phi-node. On the
1666 : : return path, this function propagates evolutions (ala constant copy
1667 : : propagation). OPND1 is not a GIMPLE expression because we could
1668 : : analyze the effect of an inner loop: see interpret_loop_phi. */
1669 : :
1670 : : static tree
1671 : 43603879 : interpret_rhs_expr (class loop *loop, gimple *at_stmt,
1672 : : tree type, tree rhs1, enum tree_code code, tree rhs2)
1673 : : {
1674 : 43603879 : tree res, chrec1, chrec2, ctype;
1675 : 43603879 : gimple *def;
1676 : :
1677 : 43603879 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1678 : : {
1679 : 10639972 : if (is_gimple_min_invariant (rhs1))
1680 : 2415671 : return chrec_convert (type, rhs1, at_stmt);
1681 : :
1682 : 8224301 : if (code == SSA_NAME)
1683 : 126489 : return chrec_convert (type, analyze_scalar_evolution (loop, rhs1),
1684 : 126489 : at_stmt);
1685 : : }
1686 : :
1687 : 41061719 : switch (code)
1688 : : {
1689 : 346525 : case ADDR_EXPR:
1690 : 346525 : if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
1691 : 346525 : || handled_component_p (TREE_OPERAND (rhs1, 0)))
1692 : : {
1693 : 346517 : machine_mode mode;
1694 : 346517 : poly_int64 bitsize, bitpos;
1695 : 346517 : int unsignedp, reversep;
1696 : 346517 : int volatilep = 0;
1697 : 346517 : tree base, offset;
1698 : 346517 : tree chrec3;
1699 : 346517 : tree unitpos;
1700 : :
1701 : 346517 : base = get_inner_reference (TREE_OPERAND (rhs1, 0),
1702 : : &bitsize, &bitpos, &offset, &mode,
1703 : : &unsignedp, &reversep, &volatilep);
1704 : :
1705 : 346517 : if (TREE_CODE (base) == MEM_REF)
1706 : : {
1707 : 267888 : rhs2 = TREE_OPERAND (base, 1);
1708 : 267888 : rhs1 = TREE_OPERAND (base, 0);
1709 : :
1710 : 267888 : chrec1 = analyze_scalar_evolution (loop, rhs1);
1711 : 267888 : chrec2 = analyze_scalar_evolution (loop, rhs2);
1712 : 267888 : chrec1 = chrec_convert (type, chrec1, at_stmt);
1713 : 267888 : chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt);
1714 : 267888 : chrec1 = instantiate_parameters (loop, chrec1);
1715 : 267888 : chrec2 = instantiate_parameters (loop, chrec2);
1716 : 267888 : res = chrec_fold_plus (type, chrec1, chrec2);
1717 : : }
1718 : : else
1719 : : {
1720 : 78629 : chrec1 = analyze_scalar_evolution_for_address_of (loop, base);
1721 : 78629 : chrec1 = chrec_convert (type, chrec1, at_stmt);
1722 : 78629 : res = chrec1;
1723 : : }
1724 : :
1725 : 346517 : if (offset != NULL_TREE)
1726 : : {
1727 : 147120 : chrec2 = analyze_scalar_evolution (loop, offset);
1728 : 147120 : chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
1729 : 147120 : chrec2 = instantiate_parameters (loop, chrec2);
1730 : 147120 : res = chrec_fold_plus (type, res, chrec2);
1731 : : }
1732 : :
1733 : 346517 : if (maybe_ne (bitpos, 0))
1734 : : {
1735 : 137262 : unitpos = size_int (exact_div (bitpos, BITS_PER_UNIT));
1736 : 137262 : chrec3 = analyze_scalar_evolution (loop, unitpos);
1737 : 137262 : chrec3 = chrec_convert (TREE_TYPE (unitpos), chrec3, at_stmt);
1738 : 137262 : chrec3 = instantiate_parameters (loop, chrec3);
1739 : 137262 : res = chrec_fold_plus (type, res, chrec3);
1740 : : }
1741 : : }
1742 : : else
1743 : 8 : res = chrec_dont_know;
1744 : : break;
1745 : :
1746 : 3338030 : case POINTER_PLUS_EXPR:
1747 : 3338030 : chrec1 = analyze_scalar_evolution (loop, rhs1);
1748 : 3338030 : chrec2 = analyze_scalar_evolution (loop, rhs2);
1749 : 3338030 : chrec1 = chrec_convert (type, chrec1, at_stmt);
1750 : 3338030 : chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt);
1751 : 3338030 : chrec1 = instantiate_parameters (loop, chrec1);
1752 : 3338030 : chrec2 = instantiate_parameters (loop, chrec2);
1753 : 3338030 : res = chrec_fold_plus (type, chrec1, chrec2);
1754 : 3338030 : break;
1755 : :
1756 : 11451086 : case PLUS_EXPR:
1757 : 11451086 : chrec1 = analyze_scalar_evolution (loop, rhs1);
1758 : 11451086 : chrec2 = analyze_scalar_evolution (loop, rhs2);
1759 : 11451086 : ctype = type;
1760 : : /* When the stmt is conditionally executed re-write the CHREC
1761 : : into a form that has well-defined behavior on overflow. */
1762 : 11451086 : if (at_stmt
1763 : 10357523 : && INTEGRAL_TYPE_P (type)
1764 : 10255098 : && ! TYPE_OVERFLOW_WRAPS (type)
1765 : 19246696 : && ! dominated_by_p (CDI_DOMINATORS, loop->latch,
1766 : 7795610 : gimple_bb (at_stmt)))
1767 : 644559 : ctype = unsigned_type_for (type);
1768 : 11451086 : chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1769 : 11451086 : chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1770 : 11451086 : chrec1 = instantiate_parameters (loop, chrec1);
1771 : 11451086 : chrec2 = instantiate_parameters (loop, chrec2);
1772 : 11451086 : res = chrec_fold_plus (ctype, chrec1, chrec2);
1773 : 11451086 : if (type != ctype)
1774 : 644559 : res = chrec_convert (type, res, at_stmt);
1775 : : break;
1776 : :
1777 : 1413781 : case MINUS_EXPR:
1778 : 1413781 : chrec1 = analyze_scalar_evolution (loop, rhs1);
1779 : 1413781 : chrec2 = analyze_scalar_evolution (loop, rhs2);
1780 : 1413781 : ctype = type;
1781 : : /* When the stmt is conditionally executed re-write the CHREC
1782 : : into a form that has well-defined behavior on overflow. */
1783 : 1413781 : if (at_stmt
1784 : 1356139 : && INTEGRAL_TYPE_P (type)
1785 : 1317856 : && ! TYPE_OVERFLOW_WRAPS (type)
1786 : 1902741 : && ! dominated_by_p (CDI_DOMINATORS,
1787 : 488960 : loop->latch, gimple_bb (at_stmt)))
1788 : 90879 : ctype = unsigned_type_for (type);
1789 : 1413781 : chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1790 : 1413781 : chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1791 : 1413781 : chrec1 = instantiate_parameters (loop, chrec1);
1792 : 1413781 : chrec2 = instantiate_parameters (loop, chrec2);
1793 : 1413781 : res = chrec_fold_minus (ctype, chrec1, chrec2);
1794 : 1413781 : if (type != ctype)
1795 : 90879 : res = chrec_convert (type, res, at_stmt);
1796 : : break;
1797 : :
1798 : 77624 : case NEGATE_EXPR:
1799 : 77624 : chrec1 = analyze_scalar_evolution (loop, rhs1);
1800 : 77624 : ctype = type;
1801 : : /* When the stmt is conditionally executed re-write the CHREC
1802 : : into a form that has well-defined behavior on overflow. */
1803 : 77624 : if (at_stmt
1804 : 67052 : && INTEGRAL_TYPE_P (type)
1805 : 65286 : && ! TYPE_OVERFLOW_WRAPS (type)
1806 : 113712 : && ! dominated_by_p (CDI_DOMINATORS,
1807 : 36088 : loop->latch, gimple_bb (at_stmt)))
1808 : 8047 : ctype = unsigned_type_for (type);
1809 : 77624 : chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1810 : : /* TYPE may be integer, real or complex, so use fold_convert. */
1811 : 77624 : chrec1 = instantiate_parameters (loop, chrec1);
1812 : 77624 : res = chrec_fold_multiply (ctype, chrec1,
1813 : : fold_convert (ctype, integer_minus_one_node));
1814 : 77624 : if (type != ctype)
1815 : 8047 : res = chrec_convert (type, res, at_stmt);
1816 : : break;
1817 : :
1818 : 33982 : case BIT_NOT_EXPR:
1819 : : /* Handle ~X as -1 - X. */
1820 : 33982 : chrec1 = analyze_scalar_evolution (loop, rhs1);
1821 : 33982 : chrec1 = chrec_convert (type, chrec1, at_stmt);
1822 : 33982 : chrec1 = instantiate_parameters (loop, chrec1);
1823 : 33982 : res = chrec_fold_minus (type,
1824 : : fold_convert (type, integer_minus_one_node),
1825 : : chrec1);
1826 : 33982 : break;
1827 : :
1828 : 5878247 : case MULT_EXPR:
1829 : 5878247 : chrec1 = analyze_scalar_evolution (loop, rhs1);
1830 : 5878247 : chrec2 = analyze_scalar_evolution (loop, rhs2);
1831 : 5878247 : ctype = type;
1832 : : /* When the stmt is conditionally executed re-write the CHREC
1833 : : into a form that has well-defined behavior on overflow. */
1834 : 5878247 : if (at_stmt
1835 : 4021457 : && INTEGRAL_TYPE_P (type)
1836 : 3903123 : && ! TYPE_OVERFLOW_WRAPS (type)
1837 : 7617686 : && ! dominated_by_p (CDI_DOMINATORS,
1838 : 1739439 : loop->latch, gimple_bb (at_stmt)))
1839 : 153489 : ctype = unsigned_type_for (type);
1840 : 5878247 : chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1841 : 5878247 : chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1842 : 5878247 : chrec1 = instantiate_parameters (loop, chrec1);
1843 : 5878247 : chrec2 = instantiate_parameters (loop, chrec2);
1844 : 5878247 : res = chrec_fold_multiply (ctype, chrec1, chrec2);
1845 : 5878247 : if (type != ctype)
1846 : 153489 : res = chrec_convert (type, res, at_stmt);
1847 : : break;
1848 : :
1849 : 148358 : case LSHIFT_EXPR:
1850 : 148358 : {
1851 : : /* Handle A<<B as A * (1<<B). */
1852 : 148358 : tree uns = unsigned_type_for (type);
1853 : 148358 : chrec1 = analyze_scalar_evolution (loop, rhs1);
1854 : 148358 : chrec2 = analyze_scalar_evolution (loop, rhs2);
1855 : 148358 : chrec1 = chrec_convert (uns, chrec1, at_stmt);
1856 : 148358 : chrec1 = instantiate_parameters (loop, chrec1);
1857 : 148358 : chrec2 = instantiate_parameters (loop, chrec2);
1858 : :
1859 : 148358 : tree one = build_int_cst (uns, 1);
1860 : 148358 : chrec2 = fold_build2 (LSHIFT_EXPR, uns, one, chrec2);
1861 : 148358 : res = chrec_fold_multiply (uns, chrec1, chrec2);
1862 : 148358 : res = chrec_convert (type, res, at_stmt);
1863 : : }
1864 : 148358 : break;
1865 : :
1866 : 8283470 : CASE_CONVERT:
1867 : : /* In case we have a truncation of a widened operation that in
1868 : : the truncated type has undefined overflow behavior analyze
1869 : : the operation done in an unsigned type of the same precision
1870 : : as the final truncation. We cannot derive a scalar evolution
1871 : : for the widened operation but for the truncated result. */
1872 : 8283470 : if (TREE_CODE (type) == INTEGER_TYPE
1873 : 7991643 : && TREE_CODE (TREE_TYPE (rhs1)) == INTEGER_TYPE
1874 : 7469862 : && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (rhs1))
1875 : 466083 : && TYPE_OVERFLOW_UNDEFINED (type)
1876 : 265157 : && TREE_CODE (rhs1) == SSA_NAME
1877 : 264989 : && (def = SSA_NAME_DEF_STMT (rhs1))
1878 : 264989 : && is_gimple_assign (def)
1879 : 160001 : && TREE_CODE_CLASS (gimple_assign_rhs_code (def)) == tcc_binary
1880 : 8396098 : && TREE_CODE (gimple_assign_rhs2 (def)) == INTEGER_CST)
1881 : : {
1882 : 80310 : tree utype = unsigned_type_for (type);
1883 : 80310 : chrec1 = interpret_rhs_expr (loop, at_stmt, utype,
1884 : : gimple_assign_rhs1 (def),
1885 : : gimple_assign_rhs_code (def),
1886 : : gimple_assign_rhs2 (def));
1887 : : }
1888 : : else
1889 : 8203160 : chrec1 = analyze_scalar_evolution (loop, rhs1);
1890 : 8283470 : res = chrec_convert (type, chrec1, at_stmt, true, rhs1);
1891 : 8283470 : break;
1892 : :
1893 : 395331 : case BIT_AND_EXPR:
1894 : : /* Given int variable A, handle A&0xffff as (int)(unsigned short)A.
1895 : : If A is SCEV and its value is in the range of representable set
1896 : : of type unsigned short, the result expression is a (no-overflow)
1897 : : SCEV. */
1898 : 395331 : res = chrec_dont_know;
1899 : 395331 : if (tree_fits_uhwi_p (rhs2))
1900 : : {
1901 : 268520 : int precision;
1902 : 268520 : unsigned HOST_WIDE_INT val = tree_to_uhwi (rhs2);
1903 : :
1904 : 268520 : val ++;
1905 : : /* Skip if value of rhs2 wraps in unsigned HOST_WIDE_INT or
1906 : : it's not the maximum value of a smaller type than rhs1. */
1907 : 268520 : if (val != 0
1908 : 213202 : && (precision = exact_log2 (val)) > 0
1909 : 481722 : && (unsigned) precision < TYPE_PRECISION (TREE_TYPE (rhs1)))
1910 : : {
1911 : 213202 : tree utype = build_nonstandard_integer_type (precision, 1);
1912 : :
1913 : 213202 : if (TYPE_PRECISION (utype) < TYPE_PRECISION (TREE_TYPE (rhs1)))
1914 : : {
1915 : 213202 : chrec1 = analyze_scalar_evolution (loop, rhs1);
1916 : 213202 : chrec1 = chrec_convert (utype, chrec1, at_stmt);
1917 : 213202 : res = chrec_convert (TREE_TYPE (rhs1), chrec1, at_stmt);
1918 : : }
1919 : : }
1920 : : }
1921 : : break;
1922 : :
1923 : 9695285 : default:
1924 : 9695285 : res = chrec_dont_know;
1925 : 9695285 : break;
1926 : : }
1927 : :
1928 : : return res;
1929 : : }
1930 : :
1931 : : /* Interpret the expression EXPR. */
1932 : :
1933 : : static tree
1934 : 8625465 : interpret_expr (class loop *loop, gimple *at_stmt, tree expr)
1935 : : {
1936 : 8625465 : enum tree_code code;
1937 : 8625465 : tree type = TREE_TYPE (expr), op0, op1;
1938 : :
1939 : 8625465 : if (automatically_generated_chrec_p (expr))
1940 : : return expr;
1941 : :
1942 : 8620547 : if (TREE_CODE (expr) == POLYNOMIAL_CHREC
1943 : 8620159 : || TREE_CODE (expr) == CALL_EXPR
1944 : 17240661 : || get_gimple_rhs_class (TREE_CODE (expr)) == GIMPLE_TERNARY_RHS)
1945 : : return chrec_dont_know;
1946 : :
1947 : 8548924 : extract_ops_from_tree (expr, &code, &op0, &op1);
1948 : :
1949 : 8548924 : return interpret_rhs_expr (loop, at_stmt, type,
1950 : 8548924 : op0, code, op1);
1951 : : }
1952 : :
1953 : : /* Interpret the rhs of the assignment STMT. */
1954 : :
1955 : : static tree
1956 : 34974645 : interpret_gimple_assign (class loop *loop, gimple *stmt)
1957 : : {
1958 : 34974645 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1959 : 34974645 : enum tree_code code = gimple_assign_rhs_code (stmt);
1960 : :
1961 : 34974645 : return interpret_rhs_expr (loop, stmt, type,
1962 : : gimple_assign_rhs1 (stmt), code,
1963 : 34974645 : gimple_assign_rhs2 (stmt));
1964 : : }
1965 : :
1966 : :
1967 : :
1968 : : /* This section contains all the entry points:
1969 : : - number_of_iterations_in_loop,
1970 : : - analyze_scalar_evolution,
1971 : : - instantiate_parameters.
1972 : : */
1973 : :
1974 : : /* Helper recursive function. */
1975 : :
1976 : : static tree
1977 : 72741228 : analyze_scalar_evolution_1 (class loop *loop, tree var)
1978 : : {
1979 : 72741228 : gimple *def;
1980 : 72741228 : basic_block bb;
1981 : 72741228 : class loop *def_loop;
1982 : 72741228 : tree res;
1983 : :
1984 : 72741228 : if (TREE_CODE (var) != SSA_NAME)
1985 : 8625465 : return interpret_expr (loop, NULL, var);
1986 : :
1987 : 64115763 : def = SSA_NAME_DEF_STMT (var);
1988 : 64115763 : bb = gimple_bb (def);
1989 : 64115763 : def_loop = bb->loop_father;
1990 : :
1991 : 64115763 : if (!flow_bb_inside_loop_p (loop, bb))
1992 : : {
1993 : : /* Keep symbolic form, but look through obvious copies for constants. */
1994 : 11241238 : res = follow_copies_to_constant (var);
1995 : 11241238 : goto set_and_end;
1996 : : }
1997 : :
1998 : 52874525 : if (loop != def_loop)
1999 : : {
2000 : 4000203 : res = analyze_scalar_evolution_1 (def_loop, var);
2001 : 4000203 : class loop *loop_to_skip = superloop_at_depth (def_loop,
2002 : 4000203 : loop_depth (loop) + 1);
2003 : 4000203 : res = compute_overall_effect_of_inner_loop (loop_to_skip, res);
2004 : 4000203 : if (chrec_contains_symbols_defined_in_loop (res, loop->num))
2005 : 280604 : res = analyze_scalar_evolution_1 (loop, res);
2006 : 4000203 : goto set_and_end;
2007 : : }
2008 : :
2009 : 48874322 : switch (gimple_code (def))
2010 : : {
2011 : 34974645 : case GIMPLE_ASSIGN:
2012 : 34974645 : res = interpret_gimple_assign (loop, def);
2013 : 34974645 : break;
2014 : :
2015 : 13344784 : case GIMPLE_PHI:
2016 : 26689568 : if (loop_phi_node_p (def))
2017 : 10680434 : res = interpret_loop_phi (loop, as_a <gphi *> (def));
2018 : : else
2019 : 2664350 : res = interpret_condition_phi (loop, as_a <gphi *> (def));
2020 : : break;
2021 : :
2022 : 554893 : default:
2023 : 554893 : res = chrec_dont_know;
2024 : 554893 : break;
2025 : : }
2026 : :
2027 : 64115763 : set_and_end:
2028 : :
2029 : : /* Keep the symbolic form. */
2030 : 64115763 : if (res == chrec_dont_know)
2031 : 23620540 : res = var;
2032 : :
2033 : 64115763 : if (loop == def_loop)
2034 : 48874322 : set_scalar_evolution (block_before_loop (loop), var, res);
2035 : :
2036 : : return res;
2037 : : }
2038 : :
2039 : : /* Analyzes and returns the scalar evolution of the ssa_name VAR in
2040 : : LOOP. LOOP is the loop in which the variable is used.
2041 : :
2042 : : Example of use: having a pointer VAR to a SSA_NAME node, STMT a
2043 : : pointer to the statement that uses this variable, in order to
2044 : : determine the evolution function of the variable, use the following
2045 : : calls:
2046 : :
2047 : : loop_p loop = loop_containing_stmt (stmt);
2048 : : tree chrec_with_symbols = analyze_scalar_evolution (loop, var);
2049 : : tree chrec_instantiated = instantiate_parameters (loop, chrec_with_symbols);
2050 : : */
2051 : :
2052 : : tree
2053 : 189513071 : analyze_scalar_evolution (class loop *loop, tree var)
2054 : : {
2055 : 189513071 : tree res;
2056 : :
2057 : : /* ??? Fix callers. */
2058 : 189513071 : if (! loop)
2059 : : return var;
2060 : :
2061 : 189358174 : if (dump_file && (dump_flags & TDF_SCEV))
2062 : : {
2063 : 30 : fprintf (dump_file, "(analyze_scalar_evolution \n");
2064 : 30 : fprintf (dump_file, " (loop_nb = %d)\n", loop->num);
2065 : 30 : fprintf (dump_file, " (scalar = ");
2066 : 30 : print_generic_expr (dump_file, var);
2067 : 30 : fprintf (dump_file, ")\n");
2068 : : }
2069 : :
2070 : 189358174 : res = get_scalar_evolution (block_before_loop (loop), var);
2071 : 189358174 : if (res == chrec_not_analyzed_yet)
2072 : : {
2073 : : /* We'll recurse into instantiate_scev, avoid tearing down the
2074 : : instantiate cache repeatedly and keep it live from here. */
2075 : 68460421 : bool destr = false;
2076 : 68460421 : if (!global_cache)
2077 : : {
2078 : 41111086 : global_cache = new instantiate_cache_type;
2079 : 41111086 : destr = true;
2080 : : }
2081 : 68460421 : res = analyze_scalar_evolution_1 (loop, var);
2082 : 68460421 : if (destr)
2083 : : {
2084 : 41111086 : delete global_cache;
2085 : 41111086 : global_cache = NULL;
2086 : : }
2087 : : }
2088 : :
2089 : 189358174 : if (dump_file && (dump_flags & TDF_SCEV))
2090 : 30 : fprintf (dump_file, ")\n");
2091 : :
2092 : : return res;
2093 : : }
2094 : :
2095 : : /* If CHREC doesn't overflow, set the nonwrapping flag. */
2096 : :
2097 : 10357281 : void record_nonwrapping_chrec (tree chrec)
2098 : : {
2099 : 10357281 : CHREC_NOWRAP(chrec) = 1;
2100 : :
2101 : 10357281 : if (dump_file && (dump_flags & TDF_SCEV))
2102 : : {
2103 : 6 : fprintf (dump_file, "(record_nonwrapping_chrec: ");
2104 : 6 : print_generic_expr (dump_file, chrec);
2105 : 6 : fprintf (dump_file, ")\n");
2106 : : }
2107 : 10357281 : }
2108 : :
2109 : : /* Return true if CHREC's nonwrapping flag is set. */
2110 : :
2111 : 238781 : bool nonwrapping_chrec_p (tree chrec)
2112 : : {
2113 : 238781 : if (!chrec || TREE_CODE(chrec) != POLYNOMIAL_CHREC)
2114 : : return false;
2115 : :
2116 : 238781 : return CHREC_NOWRAP(chrec);
2117 : : }
2118 : :
2119 : : /* Analyzes and returns the scalar evolution of VAR address in LOOP. */
2120 : :
2121 : : static tree
2122 : 78629 : analyze_scalar_evolution_for_address_of (class loop *loop, tree var)
2123 : : {
2124 : 78629 : return analyze_scalar_evolution (loop, build_fold_addr_expr (var));
2125 : : }
2126 : :
2127 : : /* Analyze scalar evolution of use of VERSION in USE_LOOP with respect to
2128 : : WRTO_LOOP (which should be a superloop of USE_LOOP)
2129 : :
2130 : : FOLDED_CASTS is set to true if resolve_mixers used
2131 : : chrec_convert_aggressive (TODO -- not really, we are way too conservative
2132 : : at the moment in order to keep things simple).
2133 : :
2134 : : To illustrate the meaning of USE_LOOP and WRTO_LOOP, consider the following
2135 : : example:
2136 : :
2137 : : for (i = 0; i < 100; i++) -- loop 1
2138 : : {
2139 : : for (j = 0; j < 100; j++) -- loop 2
2140 : : {
2141 : : k1 = i;
2142 : : k2 = j;
2143 : :
2144 : : use2 (k1, k2);
2145 : :
2146 : : for (t = 0; t < 100; t++) -- loop 3
2147 : : use3 (k1, k2);
2148 : :
2149 : : }
2150 : : use1 (k1, k2);
2151 : : }
2152 : :
2153 : : Both k1 and k2 are invariants in loop3, thus
2154 : : analyze_scalar_evolution_in_loop (loop3, loop3, k1) = k1
2155 : : analyze_scalar_evolution_in_loop (loop3, loop3, k2) = k2
2156 : :
2157 : : As they are invariant, it does not matter whether we consider their
2158 : : usage in loop 3 or loop 2, hence
2159 : : analyze_scalar_evolution_in_loop (loop2, loop3, k1) =
2160 : : analyze_scalar_evolution_in_loop (loop2, loop2, k1) = i
2161 : : analyze_scalar_evolution_in_loop (loop2, loop3, k2) =
2162 : : analyze_scalar_evolution_in_loop (loop2, loop2, k2) = [0,+,1]_2
2163 : :
2164 : : Similarly for their evolutions with respect to loop 1. The values of K2
2165 : : in the use in loop 2 vary independently on loop 1, thus we cannot express
2166 : : the evolution with respect to loop 1:
2167 : : analyze_scalar_evolution_in_loop (loop1, loop3, k1) =
2168 : : analyze_scalar_evolution_in_loop (loop1, loop2, k1) = [0,+,1]_1
2169 : : analyze_scalar_evolution_in_loop (loop1, loop3, k2) =
2170 : : analyze_scalar_evolution_in_loop (loop1, loop2, k2) = dont_know
2171 : :
2172 : : The value of k2 in the use in loop 1 is known, though:
2173 : : analyze_scalar_evolution_in_loop (loop1, loop1, k1) = [0,+,1]_1
2174 : : analyze_scalar_evolution_in_loop (loop1, loop1, k2) = 100
2175 : : */
2176 : :
2177 : : static tree
2178 : 48832963 : analyze_scalar_evolution_in_loop (class loop *wrto_loop, class loop *use_loop,
2179 : : tree version, bool *folded_casts)
2180 : : {
2181 : 48832963 : bool val = false;
2182 : 48832963 : tree ev = version, tmp;
2183 : :
2184 : : /* We cannot just do
2185 : :
2186 : : tmp = analyze_scalar_evolution (use_loop, version);
2187 : : ev = resolve_mixers (wrto_loop, tmp, folded_casts);
2188 : :
2189 : : as resolve_mixers would query the scalar evolution with respect to
2190 : : wrto_loop. For example, in the situation described in the function
2191 : : comment, suppose that wrto_loop = loop1, use_loop = loop3 and
2192 : : version = k2. Then
2193 : :
2194 : : analyze_scalar_evolution (use_loop, version) = k2
2195 : :
2196 : : and resolve_mixers (loop1, k2, folded_casts) finds that the value of
2197 : : k2 in loop 1 is 100, which is a wrong result, since we are interested
2198 : : in the value in loop 3.
2199 : :
2200 : : Instead, we need to proceed from use_loop to wrto_loop loop by loop,
2201 : : each time checking that there is no evolution in the inner loop. */
2202 : :
2203 : 48832963 : if (folded_casts)
2204 : 48832963 : *folded_casts = false;
2205 : 51140475 : while (1)
2206 : : {
2207 : 49986719 : tmp = analyze_scalar_evolution (use_loop, ev);
2208 : 49986719 : ev = resolve_mixers (use_loop, tmp, folded_casts);
2209 : :
2210 : 49986719 : if (use_loop == wrto_loop)
2211 : : return ev;
2212 : :
2213 : : /* If the value of the use changes in the inner loop, we cannot express
2214 : : its value in the outer loop (we might try to return interval chrec,
2215 : : but we do not have a user for it anyway) */
2216 : 4350616 : if (!no_evolution_in_loop_p (ev, use_loop->num, &val)
2217 : 4350616 : || !val)
2218 : 3196860 : return chrec_dont_know;
2219 : :
2220 : 1153756 : use_loop = loop_outer (use_loop);
2221 : : }
2222 : : }
2223 : :
2224 : :
2225 : : /* Computes a hash function for database element ELT. */
2226 : :
2227 : : static inline hashval_t
2228 : 315133 : hash_idx_scev_info (const void *elt_)
2229 : : {
2230 : 315133 : unsigned idx = ((size_t) elt_) - 2;
2231 : 315133 : return scev_info_hasher::hash (&global_cache->entries[idx]);
2232 : : }
2233 : :
2234 : : /* Compares database elements E1 and E2. */
2235 : :
2236 : : static inline int
2237 : 31067350 : eq_idx_scev_info (const void *e1, const void *e2)
2238 : : {
2239 : 31067350 : unsigned idx1 = ((size_t) e1) - 2;
2240 : 31067350 : return scev_info_hasher::equal (&global_cache->entries[idx1],
2241 : 31067350 : (const scev_info_str *) e2);
2242 : : }
2243 : :
2244 : : /* Returns from CACHE the slot number of the cached chrec for NAME. */
2245 : :
2246 : : static unsigned
2247 : 62156233 : get_instantiated_value_entry (instantiate_cache_type &cache,
2248 : : tree name, edge instantiate_below)
2249 : : {
2250 : 62156233 : if (!cache.map)
2251 : : {
2252 : 28752845 : cache.map = htab_create (10, hash_idx_scev_info, eq_idx_scev_info, NULL);
2253 : 28752845 : cache.entries.create (10);
2254 : : }
2255 : :
2256 : 62156233 : scev_info_str e;
2257 : 62156233 : e.name_version = SSA_NAME_VERSION (name);
2258 : 62156233 : e.instantiated_below = instantiate_below->dest->index;
2259 : 62156233 : void **slot = htab_find_slot_with_hash (cache.map, &e,
2260 : : scev_info_hasher::hash (&e), INSERT);
2261 : 62156233 : if (!*slot)
2262 : : {
2263 : 32205653 : e.chrec = chrec_not_analyzed_yet;
2264 : 32205653 : *slot = (void *)(size_t)(cache.entries.length () + 2);
2265 : 32205653 : cache.entries.safe_push (e);
2266 : : }
2267 : :
2268 : 62156233 : return ((size_t)*slot) - 2;
2269 : : }
2270 : :
2271 : :
2272 : : /* Return the closed_loop_phi node for VAR. If there is none, return
2273 : : NULL_TREE. */
2274 : :
2275 : : static tree
2276 : 1936720 : loop_closed_phi_def (tree var)
2277 : : {
2278 : 1936720 : class loop *loop;
2279 : 1936720 : edge exit;
2280 : 1936720 : gphi *phi;
2281 : 1936720 : gphi_iterator psi;
2282 : :
2283 : 1936720 : if (var == NULL_TREE
2284 : 1936720 : || TREE_CODE (var) != SSA_NAME)
2285 : : return NULL_TREE;
2286 : :
2287 : 1936720 : loop = loop_containing_stmt (SSA_NAME_DEF_STMT (var));
2288 : 1936720 : exit = single_exit (loop);
2289 : 1936720 : if (!exit)
2290 : : return NULL_TREE;
2291 : :
2292 : 1511878 : for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2293 : : {
2294 : 923092 : phi = psi.phi ();
2295 : 923092 : if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == var)
2296 : 248953 : return PHI_RESULT (phi);
2297 : : }
2298 : :
2299 : : return NULL_TREE;
2300 : : }
2301 : :
2302 : : static tree instantiate_scev_r (edge, class loop *, class loop *,
2303 : : tree, bool *, int);
2304 : :
2305 : : /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2306 : : and EVOLUTION_LOOP, that were left under a symbolic form.
2307 : :
2308 : : CHREC is an SSA_NAME to be instantiated.
2309 : :
2310 : : CACHE is the cache of already instantiated values.
2311 : :
2312 : : Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2313 : : conversions that may wrap in signed/pointer type are folded, as long
2314 : : as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2315 : : then we don't do such fold.
2316 : :
2317 : : SIZE_EXPR is used for computing the size of the expression to be
2318 : : instantiated, and to stop if it exceeds some limit. */
2319 : :
2320 : : static tree
2321 : 108371472 : instantiate_scev_name (edge instantiate_below,
2322 : : class loop *evolution_loop, class loop *inner_loop,
2323 : : tree chrec,
2324 : : bool *fold_conversions,
2325 : : int size_expr)
2326 : : {
2327 : 108371472 : tree res;
2328 : 108371472 : class loop *def_loop;
2329 : 108371472 : basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (chrec));
2330 : :
2331 : : /* A parameter, nothing to do. */
2332 : 108371472 : if (!def_bb
2333 : 108371472 : || !dominated_by_p (CDI_DOMINATORS, def_bb, instantiate_below->dest))
2334 : 46215239 : return chrec;
2335 : :
2336 : : /* We cache the value of instantiated variable to avoid exponential
2337 : : time complexity due to reevaluations. We also store the convenient
2338 : : value in the cache in order to prevent infinite recursion -- we do
2339 : : not want to instantiate the SSA_NAME if it is in a mixer
2340 : : structure. This is used for avoiding the instantiation of
2341 : : recursively defined functions, such as:
2342 : :
2343 : : | a_2 -> {0, +, 1, +, a_2}_1 */
2344 : :
2345 : 62156233 : unsigned si = get_instantiated_value_entry (*global_cache,
2346 : : chrec, instantiate_below);
2347 : 62156233 : if (global_cache->get (si) != chrec_not_analyzed_yet)
2348 : : return global_cache->get (si);
2349 : :
2350 : : /* On recursion return chrec_dont_know. */
2351 : 32205653 : global_cache->set (si, chrec_dont_know);
2352 : :
2353 : 32205653 : def_loop = find_common_loop (evolution_loop, def_bb->loop_father);
2354 : :
2355 : 32205653 : if (! dominated_by_p (CDI_DOMINATORS,
2356 : 32205653 : def_loop->header, instantiate_below->dest))
2357 : : {
2358 : 185776 : gimple *def = SSA_NAME_DEF_STMT (chrec);
2359 : 185776 : if (gassign *ass = dyn_cast <gassign *> (def))
2360 : : {
2361 : 135116 : switch (gimple_assign_rhs_class (ass))
2362 : : {
2363 : 5438 : case GIMPLE_UNARY_RHS:
2364 : 5438 : {
2365 : 5438 : tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2366 : : inner_loop, gimple_assign_rhs1 (ass),
2367 : : fold_conversions, size_expr);
2368 : 5438 : if (op0 == chrec_dont_know)
2369 : : return chrec_dont_know;
2370 : 1248 : res = fold_build1 (gimple_assign_rhs_code (ass),
2371 : : TREE_TYPE (chrec), op0);
2372 : 1248 : break;
2373 : : }
2374 : 53986 : case GIMPLE_BINARY_RHS:
2375 : 53986 : {
2376 : 53986 : tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2377 : : inner_loop, gimple_assign_rhs1 (ass),
2378 : : fold_conversions, size_expr);
2379 : 53986 : if (op0 == chrec_dont_know)
2380 : : return chrec_dont_know;
2381 : 11846 : tree op1 = instantiate_scev_r (instantiate_below, evolution_loop,
2382 : : inner_loop, gimple_assign_rhs2 (ass),
2383 : : fold_conversions, size_expr);
2384 : 5923 : if (op1 == chrec_dont_know)
2385 : : return chrec_dont_know;
2386 : 2274 : res = fold_build2 (gimple_assign_rhs_code (ass),
2387 : : TREE_TYPE (chrec), op0, op1);
2388 : 2274 : break;
2389 : : }
2390 : 75692 : default:
2391 : 75692 : res = chrec_dont_know;
2392 : : }
2393 : : }
2394 : : else
2395 : 50660 : res = chrec_dont_know;
2396 : 129874 : global_cache->set (si, res);
2397 : 129874 : return res;
2398 : : }
2399 : :
2400 : : /* If the analysis yields a parametric chrec, instantiate the
2401 : : result again. */
2402 : 32019877 : res = analyze_scalar_evolution (def_loop, chrec);
2403 : :
2404 : : /* Don't instantiate default definitions. */
2405 : 32019877 : if (TREE_CODE (res) == SSA_NAME
2406 : 32019877 : && SSA_NAME_IS_DEFAULT_DEF (res))
2407 : : ;
2408 : :
2409 : : /* Don't instantiate loop-closed-ssa phi nodes. */
2410 : 32003973 : else if (TREE_CODE (res) == SSA_NAME
2411 : 94362646 : && loop_depth (loop_containing_stmt (SSA_NAME_DEF_STMT (res)))
2412 : 31179748 : > loop_depth (def_loop))
2413 : : {
2414 : 1958202 : if (res == chrec)
2415 : 1936720 : res = loop_closed_phi_def (chrec);
2416 : : else
2417 : : res = chrec;
2418 : :
2419 : : /* When there is no loop_closed_phi_def, it means that the
2420 : : variable is not used after the loop: try to still compute the
2421 : : value of the variable when exiting the loop. */
2422 : 1958202 : if (res == NULL_TREE)
2423 : : {
2424 : 1687767 : loop_p loop = loop_containing_stmt (SSA_NAME_DEF_STMT (chrec));
2425 : 1687767 : res = analyze_scalar_evolution (loop, chrec);
2426 : 1687767 : res = compute_overall_effect_of_inner_loop (loop, res);
2427 : 1687767 : res = instantiate_scev_r (instantiate_below, evolution_loop,
2428 : : inner_loop, res,
2429 : : fold_conversions, size_expr);
2430 : : }
2431 : 270435 : else if (dominated_by_p (CDI_DOMINATORS,
2432 : 270435 : gimple_bb (SSA_NAME_DEF_STMT (res)),
2433 : 270435 : instantiate_below->dest))
2434 : 270435 : res = chrec_dont_know;
2435 : : }
2436 : :
2437 : 30045771 : else if (res != chrec_dont_know)
2438 : : {
2439 : 30045771 : if (inner_loop
2440 : 1213391 : && def_bb->loop_father != inner_loop
2441 : 30629384 : && !flow_loop_nested_p (def_bb->loop_father, inner_loop))
2442 : : /* ??? We could try to compute the overall effect of the loop here. */
2443 : 320 : res = chrec_dont_know;
2444 : : else
2445 : 30045451 : res = instantiate_scev_r (instantiate_below, evolution_loop,
2446 : : inner_loop, res,
2447 : : fold_conversions, size_expr);
2448 : : }
2449 : :
2450 : : /* Store the correct value to the cache. */
2451 : 32019877 : global_cache->set (si, res);
2452 : 32019877 : return res;
2453 : : }
2454 : :
2455 : : /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2456 : : and EVOLUTION_LOOP, that were left under a symbolic form.
2457 : :
2458 : : CHREC is a polynomial chain of recurrence to be instantiated.
2459 : :
2460 : : CACHE is the cache of already instantiated values.
2461 : :
2462 : : Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2463 : : conversions that may wrap in signed/pointer type are folded, as long
2464 : : as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2465 : : then we don't do such fold.
2466 : :
2467 : : SIZE_EXPR is used for computing the size of the expression to be
2468 : : instantiated, and to stop if it exceeds some limit. */
2469 : :
2470 : : static tree
2471 : 58949161 : instantiate_scev_poly (edge instantiate_below,
2472 : : class loop *evolution_loop, class loop *,
2473 : : tree chrec, bool *fold_conversions, int size_expr)
2474 : : {
2475 : 58949161 : tree op1;
2476 : 117898322 : tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2477 : : get_chrec_loop (chrec),
2478 : 58949161 : CHREC_LEFT (chrec), fold_conversions,
2479 : : size_expr);
2480 : 58949161 : if (op0 == chrec_dont_know)
2481 : : return chrec_dont_know;
2482 : :
2483 : 117491224 : op1 = instantiate_scev_r (instantiate_below, evolution_loop,
2484 : : get_chrec_loop (chrec),
2485 : 58745612 : CHREC_RIGHT (chrec), fold_conversions,
2486 : : size_expr);
2487 : 58745612 : if (op1 == chrec_dont_know)
2488 : : return chrec_dont_know;
2489 : :
2490 : 57997361 : if (CHREC_LEFT (chrec) != op0
2491 : 57997361 : || CHREC_RIGHT (chrec) != op1)
2492 : : {
2493 : 7694016 : op1 = chrec_convert_rhs (chrec_type (op0), op1, NULL);
2494 : 7694016 : chrec = build_polynomial_chrec (CHREC_VARIABLE (chrec), op0, op1);
2495 : : }
2496 : :
2497 : : return chrec;
2498 : : }
2499 : :
2500 : : /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2501 : : and EVOLUTION_LOOP, that were left under a symbolic form.
2502 : :
2503 : : "C0 CODE C1" is a binary expression of type TYPE to be instantiated.
2504 : :
2505 : : CACHE is the cache of already instantiated values.
2506 : :
2507 : : Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2508 : : conversions that may wrap in signed/pointer type are folded, as long
2509 : : as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2510 : : then we don't do such fold.
2511 : :
2512 : : SIZE_EXPR is used for computing the size of the expression to be
2513 : : instantiated, and to stop if it exceeds some limit. */
2514 : :
2515 : : static tree
2516 : 23663375 : instantiate_scev_binary (edge instantiate_below,
2517 : : class loop *evolution_loop, class loop *inner_loop,
2518 : : tree chrec, enum tree_code code,
2519 : : tree type, tree c0, tree c1,
2520 : : bool *fold_conversions, int size_expr)
2521 : : {
2522 : 23663375 : tree op1;
2523 : 23663375 : tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop,
2524 : : c0, fold_conversions, size_expr);
2525 : 23663375 : if (op0 == chrec_dont_know)
2526 : : return chrec_dont_know;
2527 : :
2528 : : /* While we eventually compute the same op1 if c0 == c1 the process
2529 : : of doing this is expensive so the following short-cut prevents
2530 : : exponential compile-time behavior. */
2531 : 23322048 : if (c0 != c1)
2532 : : {
2533 : 23300995 : op1 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop,
2534 : : c1, fold_conversions, size_expr);
2535 : 23300995 : if (op1 == chrec_dont_know)
2536 : : return chrec_dont_know;
2537 : : }
2538 : : else
2539 : : op1 = op0;
2540 : :
2541 : 23262691 : if (c0 != op0
2542 : 23262691 : || c1 != op1)
2543 : : {
2544 : 13391617 : op0 = chrec_convert (type, op0, NULL);
2545 : 13391617 : op1 = chrec_convert_rhs (type, op1, NULL);
2546 : :
2547 : 13391617 : switch (code)
2548 : : {
2549 : 7829108 : case POINTER_PLUS_EXPR:
2550 : 7829108 : case PLUS_EXPR:
2551 : 7829108 : return chrec_fold_plus (type, op0, op1);
2552 : :
2553 : 827590 : case MINUS_EXPR:
2554 : 827590 : return chrec_fold_minus (type, op0, op1);
2555 : :
2556 : 4734919 : case MULT_EXPR:
2557 : 4734919 : return chrec_fold_multiply (type, op0, op1);
2558 : :
2559 : 0 : default:
2560 : 0 : gcc_unreachable ();
2561 : : }
2562 : : }
2563 : :
2564 : 9871074 : return chrec ? chrec : fold_build2 (code, type, c0, c1);
2565 : : }
2566 : :
2567 : : /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2568 : : and EVOLUTION_LOOP, that were left under a symbolic form.
2569 : :
2570 : : "CHREC" that stands for a convert expression "(TYPE) OP" is to be
2571 : : instantiated.
2572 : :
2573 : : CACHE is the cache of already instantiated values.
2574 : :
2575 : : Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2576 : : conversions that may wrap in signed/pointer type are folded, as long
2577 : : as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2578 : : then we don't do such fold.
2579 : :
2580 : : SIZE_EXPR is used for computing the size of the expression to be
2581 : : instantiated, and to stop if it exceeds some limit. */
2582 : :
2583 : : static tree
2584 : 23265943 : instantiate_scev_convert (edge instantiate_below,
2585 : : class loop *evolution_loop, class loop *inner_loop,
2586 : : tree chrec, tree type, tree op,
2587 : : bool *fold_conversions, int size_expr)
2588 : : {
2589 : 23265943 : tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2590 : : inner_loop, op,
2591 : : fold_conversions, size_expr);
2592 : :
2593 : 23265943 : if (op0 == chrec_dont_know)
2594 : : return chrec_dont_know;
2595 : :
2596 : 18626885 : if (fold_conversions)
2597 : : {
2598 : 7430820 : tree tmp = chrec_convert_aggressive (type, op0, fold_conversions);
2599 : 7430820 : if (tmp)
2600 : : return tmp;
2601 : :
2602 : : /* If we used chrec_convert_aggressive, we can no longer assume that
2603 : : signed chrecs do not overflow, as chrec_convert does, so avoid
2604 : : calling it in that case. */
2605 : 6941471 : if (*fold_conversions)
2606 : : {
2607 : 7280 : if (chrec && op0 == op)
2608 : : return chrec;
2609 : :
2610 : 7280 : return fold_convert (type, op0);
2611 : : }
2612 : : }
2613 : :
2614 : 18130256 : return chrec_convert (type, op0, NULL);
2615 : : }
2616 : :
2617 : : /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2618 : : and EVOLUTION_LOOP, that were left under a symbolic form.
2619 : :
2620 : : CHREC is a BIT_NOT_EXPR or a NEGATE_EXPR expression to be instantiated.
2621 : : Handle ~X as -1 - X.
2622 : : Handle -X as -1 * X.
2623 : :
2624 : : CACHE is the cache of already instantiated values.
2625 : :
2626 : : Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2627 : : conversions that may wrap in signed/pointer type are folded, as long
2628 : : as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2629 : : then we don't do such fold.
2630 : :
2631 : : SIZE_EXPR is used for computing the size of the expression to be
2632 : : instantiated, and to stop if it exceeds some limit. */
2633 : :
2634 : : static tree
2635 : 335474 : instantiate_scev_not (edge instantiate_below,
2636 : : class loop *evolution_loop, class loop *inner_loop,
2637 : : tree chrec,
2638 : : enum tree_code code, tree type, tree op,
2639 : : bool *fold_conversions, int size_expr)
2640 : : {
2641 : 335474 : tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2642 : : inner_loop, op,
2643 : : fold_conversions, size_expr);
2644 : :
2645 : 335474 : if (op0 == chrec_dont_know)
2646 : : return chrec_dont_know;
2647 : :
2648 : 267827 : if (op != op0)
2649 : : {
2650 : 35115 : op0 = chrec_convert (type, op0, NULL);
2651 : :
2652 : 35115 : switch (code)
2653 : : {
2654 : 1851 : case BIT_NOT_EXPR:
2655 : 1851 : return chrec_fold_minus
2656 : 1851 : (type, fold_convert (type, integer_minus_one_node), op0);
2657 : :
2658 : 33264 : case NEGATE_EXPR:
2659 : 33264 : return chrec_fold_multiply
2660 : 33264 : (type, fold_convert (type, integer_minus_one_node), op0);
2661 : :
2662 : 0 : default:
2663 : 0 : gcc_unreachable ();
2664 : : }
2665 : : }
2666 : :
2667 : 232712 : return chrec ? chrec : fold_build1 (code, type, op0);
2668 : : }
2669 : :
2670 : : /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2671 : : and EVOLUTION_LOOP, that were left under a symbolic form.
2672 : :
2673 : : CHREC is the scalar evolution to instantiate.
2674 : :
2675 : : CACHE is the cache of already instantiated values.
2676 : :
2677 : : Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2678 : : conversions that may wrap in signed/pointer type are folded, as long
2679 : : as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2680 : : then we don't do such fold.
2681 : :
2682 : : SIZE_EXPR is used for computing the size of the expression to be
2683 : : instantiated, and to stop if it exceeds some limit. */
2684 : :
2685 : : static tree
2686 : 354537305 : instantiate_scev_r (edge instantiate_below,
2687 : : class loop *evolution_loop, class loop *inner_loop,
2688 : : tree chrec,
2689 : : bool *fold_conversions, int size_expr)
2690 : : {
2691 : : /* Give up if the expression is larger than the MAX that we allow. */
2692 : 354537305 : if (size_expr++ > param_scev_max_expr_size)
2693 : 11 : return chrec_dont_know;
2694 : :
2695 : 354537294 : if (chrec == NULL_TREE
2696 : 491755012 : || automatically_generated_chrec_p (chrec)
2697 : 707028193 : || is_gimple_min_invariant (chrec))
2698 : 139264113 : return chrec;
2699 : :
2700 : 215273181 : switch (TREE_CODE (chrec))
2701 : : {
2702 : 108371472 : case SSA_NAME:
2703 : 108371472 : return instantiate_scev_name (instantiate_below, evolution_loop,
2704 : : inner_loop, chrec,
2705 : 108371472 : fold_conversions, size_expr);
2706 : :
2707 : 58949161 : case POLYNOMIAL_CHREC:
2708 : 58949161 : return instantiate_scev_poly (instantiate_below, evolution_loop,
2709 : : inner_loop, chrec,
2710 : 58949161 : fold_conversions, size_expr);
2711 : :
2712 : 23663375 : case POINTER_PLUS_EXPR:
2713 : 23663375 : case PLUS_EXPR:
2714 : 23663375 : case MINUS_EXPR:
2715 : 23663375 : case MULT_EXPR:
2716 : 23663375 : return instantiate_scev_binary (instantiate_below, evolution_loop,
2717 : : inner_loop, chrec,
2718 : : TREE_CODE (chrec), chrec_type (chrec),
2719 : 23663375 : TREE_OPERAND (chrec, 0),
2720 : 23663375 : TREE_OPERAND (chrec, 1),
2721 : 23663375 : fold_conversions, size_expr);
2722 : :
2723 : 23265943 : CASE_CONVERT:
2724 : 23265943 : return instantiate_scev_convert (instantiate_below, evolution_loop,
2725 : : inner_loop, chrec,
2726 : 23265943 : TREE_TYPE (chrec), TREE_OPERAND (chrec, 0),
2727 : 23265943 : fold_conversions, size_expr);
2728 : :
2729 : 335474 : case NEGATE_EXPR:
2730 : 335474 : case BIT_NOT_EXPR:
2731 : 335474 : return instantiate_scev_not (instantiate_below, evolution_loop,
2732 : : inner_loop, chrec,
2733 : 335474 : TREE_CODE (chrec), TREE_TYPE (chrec),
2734 : 335474 : TREE_OPERAND (chrec, 0),
2735 : 335474 : fold_conversions, size_expr);
2736 : :
2737 : 0 : case ADDR_EXPR:
2738 : 0 : if (is_gimple_min_invariant (chrec))
2739 : : return chrec;
2740 : : /* Fallthru. */
2741 : 0 : case SCEV_NOT_KNOWN:
2742 : 0 : return chrec_dont_know;
2743 : :
2744 : 0 : case SCEV_KNOWN:
2745 : 0 : return chrec_known;
2746 : :
2747 : 687756 : default:
2748 : 687756 : if (CONSTANT_CLASS_P (chrec))
2749 : : return chrec;
2750 : 687756 : return chrec_dont_know;
2751 : : }
2752 : : }
2753 : :
2754 : : /* Analyze all the parameters of the chrec that were left under a
2755 : : symbolic form. INSTANTIATE_BELOW is the basic block that stops the
2756 : : recursive instantiation of parameters: a parameter is a variable
2757 : : that is defined in a basic block that dominates INSTANTIATE_BELOW or
2758 : : a function parameter. */
2759 : :
2760 : : tree
2761 : 84491461 : instantiate_scev (edge instantiate_below, class loop *evolution_loop,
2762 : : tree chrec)
2763 : : {
2764 : 84491461 : tree res;
2765 : :
2766 : 84491461 : if (dump_file && (dump_flags & TDF_SCEV))
2767 : : {
2768 : 16 : fprintf (dump_file, "(instantiate_scev \n");
2769 : 16 : fprintf (dump_file, " (instantiate_below = %d -> %d)\n",
2770 : 16 : instantiate_below->src->index, instantiate_below->dest->index);
2771 : 16 : if (evolution_loop)
2772 : 16 : fprintf (dump_file, " (evolution_loop = %d)\n", evolution_loop->num);
2773 : 16 : fprintf (dump_file, " (chrec = ");
2774 : 16 : print_generic_expr (dump_file, chrec);
2775 : 16 : fprintf (dump_file, ")\n");
2776 : : }
2777 : :
2778 : 84491461 : bool destr = false;
2779 : 84491461 : if (!global_cache)
2780 : : {
2781 : 36626228 : global_cache = new instantiate_cache_type;
2782 : 36626228 : destr = true;
2783 : : }
2784 : :
2785 : 84491461 : res = instantiate_scev_r (instantiate_below, evolution_loop,
2786 : : NULL, chrec, NULL, 0);
2787 : :
2788 : 84491461 : if (destr)
2789 : : {
2790 : 36626228 : delete global_cache;
2791 : 36626228 : global_cache = NULL;
2792 : : }
2793 : :
2794 : 84491461 : if (dump_file && (dump_flags & TDF_SCEV))
2795 : : {
2796 : 16 : fprintf (dump_file, " (res = ");
2797 : 16 : print_generic_expr (dump_file, res);
2798 : 16 : fprintf (dump_file, "))\n");
2799 : : }
2800 : :
2801 : 84491461 : return res;
2802 : : }
2803 : :
2804 : : /* Similar to instantiate_parameters, but does not introduce the
2805 : : evolutions in outer loops for LOOP invariants in CHREC, and does not
2806 : : care about causing overflows, as long as they do not affect value
2807 : : of an expression. */
2808 : :
2809 : : tree
2810 : 49986719 : resolve_mixers (class loop *loop, tree chrec, bool *folded_casts)
2811 : : {
2812 : 49986719 : bool destr = false;
2813 : 49986719 : bool fold_conversions = false;
2814 : 49986719 : if (!global_cache)
2815 : : {
2816 : 49283216 : global_cache = new instantiate_cache_type;
2817 : 49283216 : destr = true;
2818 : : }
2819 : :
2820 : 49986719 : tree ret = instantiate_scev_r (loop_preheader_edge (loop), loop, NULL,
2821 : : chrec, &fold_conversions, 0);
2822 : :
2823 : 49986719 : if (folded_casts && !*folded_casts)
2824 : 49986719 : *folded_casts = fold_conversions;
2825 : :
2826 : 49986719 : if (destr)
2827 : : {
2828 : 49283216 : delete global_cache;
2829 : 49283216 : global_cache = NULL;
2830 : : }
2831 : :
2832 : 49986719 : return ret;
2833 : : }
2834 : :
2835 : : /* Entry point for the analysis of the number of iterations pass.
2836 : : This function tries to safely approximate the number of iterations
2837 : : the loop will run. When this property is not decidable at compile
2838 : : time, the result is chrec_dont_know. Otherwise the result is a
2839 : : scalar or a symbolic parameter. When the number of iterations may
2840 : : be equal to zero and the property cannot be determined at compile
2841 : : time, the result is a COND_EXPR that represents in a symbolic form
2842 : : the conditions under which the number of iterations is not zero.
2843 : :
2844 : : Example of analysis: suppose that the loop has an exit condition:
2845 : :
2846 : : "if (b > 49) goto end_loop;"
2847 : :
2848 : : and that in a previous analysis we have determined that the
2849 : : variable 'b' has an evolution function:
2850 : :
2851 : : "EF = {23, +, 5}_2".
2852 : :
2853 : : When we evaluate the function at the point 5, i.e. the value of the
2854 : : variable 'b' after 5 iterations in the loop, we have EF (5) = 48,
2855 : : and EF (6) = 53. In this case the value of 'b' on exit is '53' and
2856 : : the loop body has been executed 6 times. */
2857 : :
2858 : : tree
2859 : 10379902 : number_of_latch_executions (class loop *loop)
2860 : : {
2861 : 10379902 : edge exit;
2862 : 10379902 : class tree_niter_desc niter_desc;
2863 : 10379902 : tree may_be_zero;
2864 : 10379902 : tree res;
2865 : :
2866 : : /* Determine whether the number of iterations in loop has already
2867 : : been computed. */
2868 : 10379902 : res = loop->nb_iterations;
2869 : 10379902 : if (res)
2870 : : return res;
2871 : :
2872 : 6848700 : may_be_zero = NULL_TREE;
2873 : :
2874 : 6848700 : if (dump_file && (dump_flags & TDF_SCEV))
2875 : 0 : fprintf (dump_file, "(number_of_iterations_in_loop = \n");
2876 : :
2877 : 6848700 : res = chrec_dont_know;
2878 : 6848700 : exit = single_exit (loop);
2879 : :
2880 : 6848700 : if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
2881 : : {
2882 : 3362830 : may_be_zero = niter_desc.may_be_zero;
2883 : 3362830 : res = niter_desc.niter;
2884 : : }
2885 : :
2886 : 6848700 : if (res == chrec_dont_know
2887 : 3362830 : || !may_be_zero
2888 : 10211530 : || integer_zerop (may_be_zero))
2889 : : ;
2890 : 538586 : else if (integer_nonzerop (may_be_zero))
2891 : 33 : res = build_int_cst (TREE_TYPE (res), 0);
2892 : :
2893 : 538553 : else if (COMPARISON_CLASS_P (may_be_zero))
2894 : 538553 : res = fold_build3 (COND_EXPR, TREE_TYPE (res), may_be_zero,
2895 : : build_int_cst (TREE_TYPE (res), 0), res);
2896 : : else
2897 : 0 : res = chrec_dont_know;
2898 : :
2899 : 6848700 : if (dump_file && (dump_flags & TDF_SCEV))
2900 : : {
2901 : 0 : fprintf (dump_file, " (set_nb_iterations_in_loop = ");
2902 : 0 : print_generic_expr (dump_file, res);
2903 : 0 : fprintf (dump_file, "))\n");
2904 : : }
2905 : :
2906 : 6848700 : loop->nb_iterations = res;
2907 : 6848700 : return res;
2908 : 10379902 : }
2909 : :
2910 : :
2911 : : /* Counters for the stats. */
2912 : :
2913 : : struct chrec_stats
2914 : : {
2915 : : unsigned nb_chrecs;
2916 : : unsigned nb_affine;
2917 : : unsigned nb_affine_multivar;
2918 : : unsigned nb_higher_poly;
2919 : : unsigned nb_chrec_dont_know;
2920 : : unsigned nb_undetermined;
2921 : : };
2922 : :
2923 : : /* Reset the counters. */
2924 : :
2925 : : static inline void
2926 : 0 : reset_chrecs_counters (struct chrec_stats *stats)
2927 : : {
2928 : 0 : stats->nb_chrecs = 0;
2929 : 0 : stats->nb_affine = 0;
2930 : 0 : stats->nb_affine_multivar = 0;
2931 : 0 : stats->nb_higher_poly = 0;
2932 : 0 : stats->nb_chrec_dont_know = 0;
2933 : 0 : stats->nb_undetermined = 0;
2934 : : }
2935 : :
2936 : : /* Dump the contents of a CHREC_STATS structure. */
2937 : :
2938 : : static void
2939 : 0 : dump_chrecs_stats (FILE *file, struct chrec_stats *stats)
2940 : : {
2941 : 0 : fprintf (file, "\n(\n");
2942 : 0 : fprintf (file, "-----------------------------------------\n");
2943 : 0 : fprintf (file, "%d\taffine univariate chrecs\n", stats->nb_affine);
2944 : 0 : fprintf (file, "%d\taffine multivariate chrecs\n", stats->nb_affine_multivar);
2945 : 0 : fprintf (file, "%d\tdegree greater than 2 polynomials\n",
2946 : : stats->nb_higher_poly);
2947 : 0 : fprintf (file, "%d\tchrec_dont_know chrecs\n", stats->nb_chrec_dont_know);
2948 : 0 : fprintf (file, "-----------------------------------------\n");
2949 : 0 : fprintf (file, "%d\ttotal chrecs\n", stats->nb_chrecs);
2950 : 0 : fprintf (file, "%d\twith undetermined coefficients\n",
2951 : : stats->nb_undetermined);
2952 : 0 : fprintf (file, "-----------------------------------------\n");
2953 : 0 : fprintf (file, "%d\tchrecs in the scev database\n",
2954 : 0 : (int) scalar_evolution_info->elements ());
2955 : 0 : fprintf (file, "%d\tsets in the scev database\n", nb_set_scev);
2956 : 0 : fprintf (file, "%d\tgets in the scev database\n", nb_get_scev);
2957 : 0 : fprintf (file, "-----------------------------------------\n");
2958 : 0 : fprintf (file, ")\n\n");
2959 : 0 : }
2960 : :
2961 : : /* Gather statistics about CHREC. */
2962 : :
2963 : : static void
2964 : 0 : gather_chrec_stats (tree chrec, struct chrec_stats *stats)
2965 : : {
2966 : 0 : if (dump_file && (dump_flags & TDF_STATS))
2967 : : {
2968 : 0 : fprintf (dump_file, "(classify_chrec ");
2969 : 0 : print_generic_expr (dump_file, chrec);
2970 : 0 : fprintf (dump_file, "\n");
2971 : : }
2972 : :
2973 : 0 : stats->nb_chrecs++;
2974 : :
2975 : 0 : if (chrec == NULL_TREE)
2976 : : {
2977 : 0 : stats->nb_undetermined++;
2978 : 0 : return;
2979 : : }
2980 : :
2981 : 0 : switch (TREE_CODE (chrec))
2982 : : {
2983 : 0 : case POLYNOMIAL_CHREC:
2984 : 0 : if (evolution_function_is_affine_p (chrec))
2985 : : {
2986 : 0 : if (dump_file && (dump_flags & TDF_STATS))
2987 : 0 : fprintf (dump_file, " affine_univariate\n");
2988 : 0 : stats->nb_affine++;
2989 : : }
2990 : 0 : else if (evolution_function_is_affine_multivariate_p (chrec, 0))
2991 : : {
2992 : 0 : if (dump_file && (dump_flags & TDF_STATS))
2993 : 0 : fprintf (dump_file, " affine_multivariate\n");
2994 : 0 : stats->nb_affine_multivar++;
2995 : : }
2996 : : else
2997 : : {
2998 : 0 : if (dump_file && (dump_flags & TDF_STATS))
2999 : 0 : fprintf (dump_file, " higher_degree_polynomial\n");
3000 : 0 : stats->nb_higher_poly++;
3001 : : }
3002 : :
3003 : : break;
3004 : :
3005 : : default:
3006 : : break;
3007 : : }
3008 : :
3009 : 0 : if (chrec_contains_undetermined (chrec))
3010 : : {
3011 : 0 : if (dump_file && (dump_flags & TDF_STATS))
3012 : 0 : fprintf (dump_file, " undetermined\n");
3013 : 0 : stats->nb_undetermined++;
3014 : : }
3015 : :
3016 : 0 : if (dump_file && (dump_flags & TDF_STATS))
3017 : 0 : fprintf (dump_file, ")\n");
3018 : : }
3019 : :
3020 : : /* Classify the chrecs of the whole database. */
3021 : :
3022 : : void
3023 : 0 : gather_stats_on_scev_database (void)
3024 : : {
3025 : 0 : struct chrec_stats stats;
3026 : :
3027 : 0 : if (!dump_file)
3028 : 0 : return;
3029 : :
3030 : 0 : reset_chrecs_counters (&stats);
3031 : :
3032 : 0 : hash_table<scev_info_hasher>::iterator iter;
3033 : 0 : scev_info_str *elt;
3034 : 0 : FOR_EACH_HASH_TABLE_ELEMENT (*scalar_evolution_info, elt, scev_info_str *,
3035 : : iter)
3036 : 0 : gather_chrec_stats (elt->chrec, &stats);
3037 : :
3038 : 0 : dump_chrecs_stats (dump_file, &stats);
3039 : : }
3040 : :
3041 : :
3042 : : /* Initialize the analysis of scalar evolutions for LOOPS. */
3043 : :
3044 : : void
3045 : 15138000 : scev_initialize (void)
3046 : : {
3047 : 15138000 : gcc_assert (! scev_initialized_p ()
3048 : : && loops_state_satisfies_p (cfun, LOOPS_NORMAL));
3049 : :
3050 : 15138000 : scalar_evolution_info = hash_table<scev_info_hasher>::create_ggc (100);
3051 : :
3052 : 54758989 : for (auto loop : loops_list (cfun, 0))
3053 : 9344989 : loop->nb_iterations = NULL_TREE;
3054 : 15138000 : }
3055 : :
3056 : : /* Return true if SCEV is initialized. */
3057 : :
3058 : : bool
3059 : 100486865 : scev_initialized_p (void)
3060 : : {
3061 : 100486865 : return scalar_evolution_info != NULL;
3062 : : }
3063 : :
3064 : : /* Cleans up the information cached by the scalar evolutions analysis
3065 : : in the hash table. */
3066 : :
3067 : : void
3068 : 24282047 : scev_reset_htab (void)
3069 : : {
3070 : 24282047 : if (!scalar_evolution_info)
3071 : : return;
3072 : :
3073 : 6110159 : scalar_evolution_info->empty ();
3074 : : }
3075 : :
3076 : : /* Cleans up the information cached by the scalar evolutions analysis
3077 : : in the hash table and in the loop->nb_iterations. */
3078 : :
3079 : : void
3080 : 13028020 : scev_reset (void)
3081 : : {
3082 : 13028020 : scev_reset_htab ();
3083 : :
3084 : 63755672 : for (auto loop : loops_list (cfun, 0))
3085 : 24671612 : loop->nb_iterations = NULL_TREE;
3086 : 13028020 : }
3087 : :
3088 : : /* Return true if the IV calculation in TYPE can overflow based on the knowledge
3089 : : of the upper bound on the number of iterations of LOOP, the BASE and STEP
3090 : : of IV.
3091 : :
3092 : : We do not use information whether TYPE can overflow so it is safe to
3093 : : use this test even for derived IVs not computed every iteration or
3094 : : hypotetical IVs to be inserted into code. */
3095 : :
3096 : : bool
3097 : 14885116 : iv_can_overflow_p (class loop *loop, tree type, tree base, tree step)
3098 : : {
3099 : 14885116 : widest_int nit;
3100 : 14885116 : wide_int base_min, base_max, step_min, step_max, type_min, type_max;
3101 : 14885116 : signop sgn = TYPE_SIGN (type);
3102 : 14885116 : int_range_max r;
3103 : :
3104 : 14885116 : if (integer_zerop (step))
3105 : : return false;
3106 : :
3107 : 29769632 : if (!INTEGRAL_TYPE_P (TREE_TYPE (base))
3108 : 27652204 : || !get_range_query (cfun)->range_of_expr (r, base)
3109 : 13826102 : || r.varying_p ()
3110 : 27103140 : || r.undefined_p ())
3111 : 2669443 : return true;
3112 : :
3113 : 12215669 : base_min = r.lower_bound ();
3114 : 12215669 : base_max = r.upper_bound ();
3115 : :
3116 : 24430773 : if (!INTEGRAL_TYPE_P (TREE_TYPE (step))
3117 : 24431338 : || !get_range_query (cfun)->range_of_expr (r, step)
3118 : 12215669 : || r.varying_p ()
3119 : 24354033 : || r.undefined_p ())
3120 : 77305 : return true;
3121 : :
3122 : 12138364 : step_min = r.lower_bound ();
3123 : 12138364 : step_max = r.upper_bound ();
3124 : :
3125 : 12138364 : if (!get_max_loop_iterations (loop, &nit))
3126 : : return true;
3127 : :
3128 : 11432060 : type_min = wi::min_value (type);
3129 : 11432060 : type_max = wi::max_value (type);
3130 : :
3131 : : /* Just sanity check that we don't see values out of the range of the type.
3132 : : In this case the arithmetics below would overflow. */
3133 : 11432060 : gcc_checking_assert (wi::ge_p (base_min, type_min, sgn)
3134 : : && wi::le_p (base_max, type_max, sgn));
3135 : :
3136 : : /* Account the possible increment in the last ieration. */
3137 : 11432060 : wi::overflow_type overflow = wi::OVF_NONE;
3138 : 11432060 : nit = wi::add (nit, 1, SIGNED, &overflow);
3139 : 11432060 : if (overflow)
3140 : : return true;
3141 : :
3142 : : /* NIT is typeless and can exceed the precision of the type. In this case
3143 : : overflow is always possible, because we know STEP is non-zero. */
3144 : 11432060 : if (wi::min_precision (nit, UNSIGNED) > TYPE_PRECISION (type))
3145 : : return true;
3146 : 11190572 : wide_int nit2 = wide_int::from (nit, TYPE_PRECISION (type), UNSIGNED);
3147 : :
3148 : : /* If step can be positive, check that nit*step <= type_max-base.
3149 : : This can be done by unsigned arithmetic and we only need to watch overflow
3150 : : in the multiplication. The right hand side can always be represented in
3151 : : the type. */
3152 : 11190572 : if (sgn == UNSIGNED || !wi::neg_p (step_max))
3153 : : {
3154 : 11148063 : wi::overflow_type overflow = wi::OVF_NONE;
3155 : 11148063 : if (wi::gtu_p (wi::mul (step_max, nit2, UNSIGNED, &overflow),
3156 : 22296126 : type_max - base_max)
3157 : 22296126 : || overflow)
3158 : 5645608 : return true;
3159 : : }
3160 : : /* If step can be negative, check that nit*(-step) <= base_min-type_min. */
3161 : 5544964 : if (sgn == SIGNED && wi::neg_p (step_min))
3162 : : {
3163 : 42835 : wi::overflow_type overflow, overflow2;
3164 : 42835 : overflow = overflow2 = wi::OVF_NONE;
3165 : 85670 : if (wi::gtu_p (wi::mul (wi::neg (step_min, &overflow2),
3166 : : nit2, UNSIGNED, &overflow),
3167 : 85670 : base_min - type_min)
3168 : 85670 : || overflow || overflow2)
3169 : 16212 : return true;
3170 : : }
3171 : :
3172 : : return false;
3173 : 14885116 : }
3174 : :
3175 : : /* Given EV with form of "(type) {inner_base, inner_step}_loop", this
3176 : : function tries to derive condition under which it can be simplified
3177 : : into "{(type)inner_base, (type)inner_step}_loop". The condition is
3178 : : the maximum number that inner iv can iterate. */
3179 : :
3180 : : static tree
3181 : 38799 : derive_simple_iv_with_niters (tree ev, tree *niters)
3182 : : {
3183 : 38799 : if (!CONVERT_EXPR_P (ev))
3184 : : return ev;
3185 : :
3186 : 38799 : tree inner_ev = TREE_OPERAND (ev, 0);
3187 : 38799 : if (TREE_CODE (inner_ev) != POLYNOMIAL_CHREC)
3188 : : return ev;
3189 : :
3190 : 38799 : tree init = CHREC_LEFT (inner_ev);
3191 : 38799 : tree step = CHREC_RIGHT (inner_ev);
3192 : 38799 : if (TREE_CODE (init) != INTEGER_CST
3193 : 38799 : || TREE_CODE (step) != INTEGER_CST || integer_zerop (step))
3194 : 7237 : return ev;
3195 : :
3196 : 31562 : tree type = TREE_TYPE (ev);
3197 : 31562 : tree inner_type = TREE_TYPE (inner_ev);
3198 : 31562 : if (TYPE_PRECISION (inner_type) >= TYPE_PRECISION (type))
3199 : : return ev;
3200 : :
3201 : : /* Type conversion in "(type) {inner_base, inner_step}_loop" can be
3202 : : folded only if inner iv won't overflow. We compute the maximum
3203 : : number the inner iv can iterate before overflowing and return the
3204 : : simplified affine iv. */
3205 : 31562 : tree delta;
3206 : 31562 : init = fold_convert (type, init);
3207 : 31562 : step = fold_convert (type, step);
3208 : 31562 : ev = build_polynomial_chrec (CHREC_VARIABLE (inner_ev), init, step);
3209 : 31562 : if (tree_int_cst_sign_bit (step))
3210 : : {
3211 : 0 : tree bound = lower_bound_in_type (inner_type, inner_type);
3212 : 0 : delta = fold_build2 (MINUS_EXPR, type, init, fold_convert (type, bound));
3213 : 0 : step = fold_build1 (NEGATE_EXPR, type, step);
3214 : : }
3215 : : else
3216 : : {
3217 : 31562 : tree bound = upper_bound_in_type (inner_type, inner_type);
3218 : 31562 : delta = fold_build2 (MINUS_EXPR, type, fold_convert (type, bound), init);
3219 : : }
3220 : 31562 : *niters = fold_build2 (FLOOR_DIV_EXPR, type, delta, step);
3221 : 31562 : return ev;
3222 : : }
3223 : :
3224 : : /* Checks whether use of OP in USE_LOOP behaves as a simple affine iv with
3225 : : respect to WRTO_LOOP and returns its base and step in IV if possible
3226 : : (see analyze_scalar_evolution_in_loop for more details on USE_LOOP
3227 : : and WRTO_LOOP). If ALLOW_NONCONSTANT_STEP is true, we want step to be
3228 : : invariant in LOOP. Otherwise we require it to be an integer constant.
3229 : :
3230 : : IV->no_overflow is set to true if we are sure the iv cannot overflow (e.g.
3231 : : because it is computed in signed arithmetics). Consequently, adding an
3232 : : induction variable
3233 : :
3234 : : for (i = IV->base; ; i += IV->step)
3235 : :
3236 : : is only safe if IV->no_overflow is false, or TYPE_OVERFLOW_UNDEFINED is
3237 : : false for the type of the induction variable, or you can prove that i does
3238 : : not wrap by some other argument. Otherwise, this might introduce undefined
3239 : : behavior, and
3240 : :
3241 : : i = iv->base;
3242 : : for (; ; i = (type) ((unsigned type) i + (unsigned type) iv->step))
3243 : :
3244 : : must be used instead.
3245 : :
3246 : : When IV_NITERS is not NULL, this function also checks case in which OP
3247 : : is a conversion of an inner simple iv of below form:
3248 : :
3249 : : (outer_type){inner_base, inner_step}_loop.
3250 : :
3251 : : If type of inner iv has smaller precision than outer_type, it can't be
3252 : : folded into {(outer_type)inner_base, (outer_type)inner_step}_loop because
3253 : : the inner iv could overflow/wrap. In this case, we derive a condition
3254 : : under which the inner iv won't overflow/wrap and do the simplification.
3255 : : The derived condition normally is the maximum number the inner iv can
3256 : : iterate, and will be stored in IV_NITERS. This is useful in loop niter
3257 : : analysis, to derive break conditions when a loop must terminate, when is
3258 : : infinite. */
3259 : :
3260 : : bool
3261 : 50800386 : simple_iv_with_niters (class loop *wrto_loop, class loop *use_loop,
3262 : : tree op, affine_iv *iv, tree *iv_niters,
3263 : : bool allow_nonconstant_step)
3264 : : {
3265 : 50800386 : enum tree_code code;
3266 : 50800386 : tree type, ev, base, e;
3267 : 50800386 : wide_int extreme;
3268 : 50800386 : bool folded_casts;
3269 : :
3270 : 50800386 : iv->base = NULL_TREE;
3271 : 50800386 : iv->step = NULL_TREE;
3272 : 50800386 : iv->no_overflow = false;
3273 : :
3274 : 50800386 : type = TREE_TYPE (op);
3275 : 50800386 : if (!POINTER_TYPE_P (type)
3276 : 41636400 : && !INTEGRAL_TYPE_P (type))
3277 : : return false;
3278 : :
3279 : 48727127 : ev = analyze_scalar_evolution_in_loop (wrto_loop, use_loop, op,
3280 : : &folded_casts);
3281 : 48727127 : if (chrec_contains_undetermined (ev)
3282 : 48727127 : || chrec_contains_symbols_defined_in_loop (ev, wrto_loop->num))
3283 : 13001737 : return false;
3284 : :
3285 : 35725390 : if (tree_does_not_contain_chrecs (ev))
3286 : : {
3287 : 15785953 : iv->base = ev;
3288 : 15785953 : tree ev_type = TREE_TYPE (ev);
3289 : 15785953 : if (POINTER_TYPE_P (ev_type))
3290 : 2672872 : ev_type = sizetype;
3291 : :
3292 : 15785953 : iv->step = build_int_cst (ev_type, 0);
3293 : 15785953 : iv->no_overflow = true;
3294 : 15785953 : return true;
3295 : : }
3296 : :
3297 : : /* If we can derive valid scalar evolution with assumptions. */
3298 : 19939437 : if (iv_niters && TREE_CODE (ev) != POLYNOMIAL_CHREC)
3299 : 38799 : ev = derive_simple_iv_with_niters (ev, iv_niters);
3300 : :
3301 : 19939437 : if (TREE_CODE (ev) != POLYNOMIAL_CHREC)
3302 : : return false;
3303 : :
3304 : 19890916 : if (CHREC_VARIABLE (ev) != (unsigned) wrto_loop->num)
3305 : : return false;
3306 : :
3307 : 19890902 : iv->step = CHREC_RIGHT (ev);
3308 : 12636522 : if ((!allow_nonconstant_step && TREE_CODE (iv->step) != INTEGER_CST)
3309 : 32276986 : || tree_contains_chrecs (iv->step, NULL))
3310 : 261719 : return false;
3311 : :
3312 : 19629183 : iv->base = CHREC_LEFT (ev);
3313 : 19629183 : if (tree_contains_chrecs (iv->base, NULL))
3314 : : return false;
3315 : :
3316 : 19629183 : iv->no_overflow = !folded_casts && nowrap_type_p (type);
3317 : :
3318 : 19629183 : if (!iv->no_overflow
3319 : 19629183 : && !iv_can_overflow_p (wrto_loop, type, iv->base, iv->step))
3320 : 3689608 : iv->no_overflow = true;
3321 : :
3322 : : /* Try to simplify iv base:
3323 : :
3324 : : (signed T) ((unsigned T)base + step) ;; TREE_TYPE (base) == signed T
3325 : : == (signed T)(unsigned T)base + step
3326 : : == base + step
3327 : :
3328 : : If we can prove operation (base + step) doesn't overflow or underflow.
3329 : : Specifically, we try to prove below conditions are satisfied:
3330 : :
3331 : : base <= UPPER_BOUND (type) - step ;;step > 0
3332 : : base >= LOWER_BOUND (type) - step ;;step < 0
3333 : :
3334 : : This is done by proving the reverse conditions are false using loop's
3335 : : initial conditions.
3336 : :
3337 : : The is necessary to make loop niter, or iv overflow analysis easier
3338 : : for below example:
3339 : :
3340 : : int foo (int *a, signed char s, signed char l)
3341 : : {
3342 : : signed char i;
3343 : : for (i = s; i < l; i++)
3344 : : a[i] = 0;
3345 : : return 0;
3346 : : }
3347 : :
3348 : : Note variable I is firstly converted to type unsigned char, incremented,
3349 : : then converted back to type signed char. */
3350 : :
3351 : 19629183 : if (wrto_loop->num != use_loop->num)
3352 : : return true;
3353 : :
3354 : 19459988 : if (!CONVERT_EXPR_P (iv->base) || TREE_CODE (iv->step) != INTEGER_CST)
3355 : : return true;
3356 : :
3357 : 220989 : type = TREE_TYPE (iv->base);
3358 : 220989 : e = TREE_OPERAND (iv->base, 0);
3359 : 220989 : if (!tree_nop_conversion_p (type, TREE_TYPE (e))
3360 : 190445 : || TREE_CODE (e) != PLUS_EXPR
3361 : 106020 : || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
3362 : 300225 : || !tree_int_cst_equal (iv->step,
3363 : 79236 : fold_convert (type, TREE_OPERAND (e, 1))))
3364 : 160090 : return true;
3365 : 60899 : e = TREE_OPERAND (e, 0);
3366 : 60899 : if (!CONVERT_EXPR_P (e))
3367 : : return true;
3368 : 35822 : base = TREE_OPERAND (e, 0);
3369 : 35822 : if (!useless_type_conversion_p (type, TREE_TYPE (base)))
3370 : : return true;
3371 : :
3372 : 27812 : if (tree_int_cst_sign_bit (iv->step))
3373 : : {
3374 : 11909 : code = LT_EXPR;
3375 : 11909 : extreme = wi::min_value (type);
3376 : : }
3377 : : else
3378 : : {
3379 : 15903 : code = GT_EXPR;
3380 : 15903 : extreme = wi::max_value (type);
3381 : : }
3382 : 27812 : wi::overflow_type overflow = wi::OVF_NONE;
3383 : 27812 : extreme = wi::sub (extreme, wi::to_wide (iv->step),
3384 : 55624 : TYPE_SIGN (type), &overflow);
3385 : 27812 : if (overflow)
3386 : : return true;
3387 : 27788 : e = fold_build2 (code, boolean_type_node, base,
3388 : : wide_int_to_tree (type, extreme));
3389 : 27788 : e = simplify_using_initial_conditions (use_loop, e);
3390 : 27788 : if (!integer_zerop (e))
3391 : : return true;
3392 : :
3393 : 12044 : if (POINTER_TYPE_P (TREE_TYPE (base)))
3394 : : code = POINTER_PLUS_EXPR;
3395 : : else
3396 : : code = PLUS_EXPR;
3397 : :
3398 : 12044 : iv->base = fold_build2 (code, TREE_TYPE (base), base, iv->step);
3399 : 12044 : return true;
3400 : 50800386 : }
3401 : :
3402 : : /* Like simple_iv_with_niters, but return TRUE when OP behaves as a simple
3403 : : affine iv unconditionally. */
3404 : :
3405 : : bool
3406 : 20970720 : simple_iv (class loop *wrto_loop, class loop *use_loop, tree op,
3407 : : affine_iv *iv, bool allow_nonconstant_step)
3408 : : {
3409 : 20970720 : return simple_iv_with_niters (wrto_loop, use_loop, op, iv,
3410 : 20970720 : NULL, allow_nonconstant_step);
3411 : : }
3412 : :
3413 : : /* Finalize the scalar evolution analysis. */
3414 : :
3415 : : void
3416 : 15138001 : scev_finalize (void)
3417 : : {
3418 : 15138001 : if (!scalar_evolution_info)
3419 : : return;
3420 : 15138000 : scalar_evolution_info->empty ();
3421 : 15138000 : scalar_evolution_info = NULL;
3422 : 15138000 : free_numbers_of_iterations_estimates (cfun);
3423 : : }
3424 : :
3425 : : /* Returns true if the expression EXPR is considered to be too expensive
3426 : : for scev_const_prop. Sets *COND_OVERFLOW_P to true when the
3427 : : expression might contain a sub-expression that is subject to undefined
3428 : : overflow behavior and conditionally evaluated. */
3429 : :
3430 : : static bool
3431 : 10321716 : expression_expensive_p (tree expr, bool *cond_overflow_p,
3432 : : hash_map<tree, uint64_t> &cache, uint64_t &cost)
3433 : : {
3434 : 10321716 : enum tree_code code;
3435 : :
3436 : 10321716 : if (is_gimple_val (expr))
3437 : : return false;
3438 : :
3439 : 4319701 : code = TREE_CODE (expr);
3440 : 4319701 : if (code == TRUNC_DIV_EXPR
3441 : : || code == CEIL_DIV_EXPR
3442 : : || code == FLOOR_DIV_EXPR
3443 : : || code == ROUND_DIV_EXPR
3444 : : || code == TRUNC_MOD_EXPR
3445 : : || code == CEIL_MOD_EXPR
3446 : : || code == FLOOR_MOD_EXPR
3447 : 4319701 : || code == ROUND_MOD_EXPR
3448 : 4319701 : || code == EXACT_DIV_EXPR)
3449 : : {
3450 : : /* Division by power of two is usually cheap, so we allow it.
3451 : : Forbid anything else. */
3452 : 55497 : if (!integer_pow2p (TREE_OPERAND (expr, 1)))
3453 : : return true;
3454 : : }
3455 : :
3456 : 4309711 : bool visited_p;
3457 : 4309711 : uint64_t &local_cost = cache.get_or_insert (expr, &visited_p);
3458 : 4309711 : if (visited_p)
3459 : : {
3460 : 311 : uint64_t tem = cost + local_cost;
3461 : 311 : if (tem < cost)
3462 : : return true;
3463 : 311 : cost = tem;
3464 : 311 : return false;
3465 : : }
3466 : 4309400 : local_cost = 1;
3467 : :
3468 : 4309400 : uint64_t op_cost = 0;
3469 : 4309400 : if (code == CALL_EXPR)
3470 : : {
3471 : 127 : tree arg;
3472 : 127 : call_expr_arg_iterator iter;
3473 : : /* Even though is_inexpensive_builtin might say true, we will get a
3474 : : library call for popcount when backend does not have an instruction
3475 : : to do so. We consider this to be expensive and generate
3476 : : __builtin_popcount only when backend defines it. */
3477 : 127 : optab optab;
3478 : 127 : combined_fn cfn = get_call_combined_fn (expr);
3479 : 127 : switch (cfn)
3480 : : {
3481 : 31 : CASE_CFN_POPCOUNT:
3482 : 31 : optab = popcount_optab;
3483 : 31 : goto bitcount_call;
3484 : 83 : CASE_CFN_CLZ:
3485 : 83 : optab = clz_optab;
3486 : 83 : goto bitcount_call;
3487 : : CASE_CFN_CTZ:
3488 : : optab = ctz_optab;
3489 : 127 : bitcount_call:
3490 : : /* Check if opcode for popcount is available in the mode required. */
3491 : 127 : if (optab_handler (optab,
3492 : 127 : TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0))))
3493 : : == CODE_FOR_nothing)
3494 : : {
3495 : 27 : machine_mode mode;
3496 : 27 : mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0)));
3497 : 27 : scalar_int_mode int_mode;
3498 : :
3499 : : /* If the mode is of 2 * UNITS_PER_WORD size, we can handle
3500 : : double-word popcount by emitting two single-word popcount
3501 : : instructions. */
3502 : 27 : if (is_a <scalar_int_mode> (mode, &int_mode)
3503 : 29 : && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
3504 : 2 : && (optab_handler (optab, word_mode)
3505 : : != CODE_FOR_nothing))
3506 : : break;
3507 : : /* If popcount is available for a wider mode, we emulate the
3508 : : operation for a narrow mode by first zero-extending the value
3509 : : and then computing popcount in the wider mode. Analogue for
3510 : : ctz. For clz we do the same except that we additionally have
3511 : : to subtract the difference of the mode precisions from the
3512 : : result. */
3513 : 25 : if (is_a <scalar_int_mode> (mode, &int_mode))
3514 : : {
3515 : 25 : machine_mode wider_mode_iter;
3516 : 124 : FOR_EACH_WIDER_MODE (wider_mode_iter, mode)
3517 : 99 : if (optab_handler (optab, wider_mode_iter)
3518 : : != CODE_FOR_nothing)
3519 : 0 : goto check_call_args;
3520 : : /* Operation ctz may be emulated via clz in expand_ctz. */
3521 : 25 : if (optab == ctz_optab)
3522 : : {
3523 : 0 : FOR_EACH_WIDER_MODE_FROM (wider_mode_iter, mode)
3524 : 0 : if (optab_handler (clz_optab, wider_mode_iter)
3525 : : != CODE_FOR_nothing)
3526 : 0 : goto check_call_args;
3527 : : }
3528 : : }
3529 : 25 : return true;
3530 : : }
3531 : : break;
3532 : :
3533 : 0 : default:
3534 : 0 : if (cfn == CFN_LAST
3535 : 0 : || !is_inexpensive_builtin (get_callee_fndecl (expr)))
3536 : 0 : return true;
3537 : : break;
3538 : : }
3539 : :
3540 : 102 : check_call_args:
3541 : 306 : FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
3542 : 102 : if (expression_expensive_p (arg, cond_overflow_p, cache, op_cost))
3543 : : return true;
3544 : 102 : *cache.get (expr) += op_cost;
3545 : 102 : cost += op_cost + 1;
3546 : 102 : return false;
3547 : : }
3548 : :
3549 : 4309273 : if (code == COND_EXPR)
3550 : : {
3551 : 1966 : if (expression_expensive_p (TREE_OPERAND (expr, 0), cond_overflow_p,
3552 : : cache, op_cost)
3553 : 1966 : || (EXPR_P (TREE_OPERAND (expr, 1))
3554 : 1964 : && EXPR_P (TREE_OPERAND (expr, 2)))
3555 : : /* If either branch has side effects or could trap. */
3556 : 1964 : || TREE_SIDE_EFFECTS (TREE_OPERAND (expr, 1))
3557 : 1964 : || generic_expr_could_trap_p (TREE_OPERAND (expr, 1))
3558 : 1963 : || TREE_SIDE_EFFECTS (TREE_OPERAND (expr, 0))
3559 : 1963 : || generic_expr_could_trap_p (TREE_OPERAND (expr, 0))
3560 : 1963 : || expression_expensive_p (TREE_OPERAND (expr, 1), cond_overflow_p,
3561 : : cache, op_cost)
3562 : 3824 : || expression_expensive_p (TREE_OPERAND (expr, 2), cond_overflow_p,
3563 : : cache, op_cost))
3564 : 108 : return true;
3565 : : /* Conservatively assume there's overflow for now. */
3566 : 1858 : *cond_overflow_p = true;
3567 : 1858 : *cache.get (expr) += op_cost;
3568 : 1858 : cost += op_cost + 1;
3569 : 1858 : return false;
3570 : : }
3571 : :
3572 : 4307307 : switch (TREE_CODE_CLASS (code))
3573 : : {
3574 : 2230973 : case tcc_binary:
3575 : 2230973 : case tcc_comparison:
3576 : 2230973 : if (expression_expensive_p (TREE_OPERAND (expr, 1), cond_overflow_p,
3577 : : cache, op_cost))
3578 : : return true;
3579 : :
3580 : : /* Fallthru. */
3581 : 4304551 : case tcc_unary:
3582 : 4304551 : if (expression_expensive_p (TREE_OPERAND (expr, 0), cond_overflow_p,
3583 : : cache, op_cost))
3584 : : return true;
3585 : 4279977 : *cache.get (expr) += op_cost;
3586 : 4279977 : cost += op_cost + 1;
3587 : 4279977 : return false;
3588 : :
3589 : : default:
3590 : : return true;
3591 : : }
3592 : : }
3593 : :
3594 : : bool
3595 : 3780303 : expression_expensive_p (tree expr, bool *cond_overflow_p)
3596 : : {
3597 : 3780303 : hash_map<tree, uint64_t> cache;
3598 : 3780303 : uint64_t expanded_size = 0;
3599 : 3780303 : *cond_overflow_p = false;
3600 : 3780303 : return (expression_expensive_p (expr, cond_overflow_p, cache, expanded_size)
3601 : : /* ??? Both the explicit unsharing and gimplification of expr will
3602 : : expand shared trees to multiple copies.
3603 : : Guard against exponential growth by counting the visits and
3604 : : comparing againt the number of original nodes. Allow a tiny
3605 : : bit of duplication to catch some additional optimizations. */
3606 : 3790445 : || expanded_size > (cache.elements () + 1));
3607 : 3780303 : }
3608 : :
3609 : : /* Match.pd function to match bitwise inductive expression.
3610 : : .i.e.
3611 : : _2 = 1 << _1;
3612 : : _3 = ~_2;
3613 : : tmp_9 = _3 & tmp_12; */
3614 : : extern bool gimple_bitwise_induction_p (tree, tree *, tree (*)(tree));
3615 : :
3616 : : /* Return the inductive expression of bitwise operation if possible,
3617 : : otherwise returns DEF. */
3618 : : static tree
3619 : 20002 : analyze_and_compute_bitwise_induction_effect (class loop* loop,
3620 : : tree phidef,
3621 : : unsigned HOST_WIDE_INT niter)
3622 : : {
3623 : 20002 : tree match_op[3],inv, bitwise_scev;
3624 : 20002 : tree type = TREE_TYPE (phidef);
3625 : 20002 : gphi* header_phi = NULL;
3626 : :
3627 : : /* Match things like op2(MATCH_OP[2]), op1(MATCH_OP[1]), phidef(PHIDEF)
3628 : :
3629 : : op2 = PHI <phidef, inv>
3630 : : _1 = (int) bit_17;
3631 : : _3 = 1 << _1;
3632 : : op1 = ~_3;
3633 : : phidef = op1 & op2; */
3634 : 20002 : if (!gimple_bitwise_induction_p (phidef, &match_op[0], NULL)
3635 : 99 : || TREE_CODE (match_op[2]) != SSA_NAME
3636 : 20002 : || !(header_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (match_op[2])))
3637 : 99 : || gimple_bb (header_phi) != loop->header
3638 : 20099 : || gimple_phi_num_args (header_phi) != 2)
3639 : : return NULL_TREE;
3640 : :
3641 : 97 : if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef)
3642 : : return NULL_TREE;
3643 : :
3644 : 97 : bitwise_scev = analyze_scalar_evolution (loop, match_op[1]);
3645 : 97 : bitwise_scev = instantiate_parameters (loop, bitwise_scev);
3646 : :
3647 : : /* Make sure bits is in range of type precision. */
3648 : 97 : if (TREE_CODE (bitwise_scev) != POLYNOMIAL_CHREC
3649 : 97 : || !INTEGRAL_TYPE_P (TREE_TYPE (bitwise_scev))
3650 : 97 : || !tree_fits_uhwi_p (CHREC_LEFT (bitwise_scev))
3651 : 97 : || tree_to_uhwi (CHREC_LEFT (bitwise_scev)) >= TYPE_PRECISION (type)
3652 : 194 : || !tree_fits_shwi_p (CHREC_RIGHT (bitwise_scev)))
3653 : : return NULL_TREE;
3654 : :
3655 : 97 : enum bit_op_kind
3656 : : {
3657 : : INDUCTION_BIT_CLEAR,
3658 : : INDUCTION_BIT_IOR,
3659 : : INDUCTION_BIT_XOR,
3660 : : INDUCTION_BIT_RESET,
3661 : : INDUCTION_ZERO,
3662 : : INDUCTION_ALL
3663 : : };
3664 : :
3665 : 97 : enum bit_op_kind induction_kind;
3666 : 97 : enum tree_code code1
3667 : 97 : = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (phidef));
3668 : 97 : enum tree_code code2
3669 : 97 : = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (match_op[0]));
3670 : :
3671 : : /* BIT_CLEAR: A &= ~(1 << bit)
3672 : : BIT_RESET: A ^= (1 << bit).
3673 : : BIT_IOR: A |= (1 << bit)
3674 : : BIT_ZERO: A &= (1 << bit)
3675 : : BIT_ALL: A |= ~(1 << bit)
3676 : : BIT_XOR: A ^= ~(1 << bit).
3677 : : bit is induction variable. */
3678 : 97 : switch (code1)
3679 : : {
3680 : 27 : case BIT_AND_EXPR:
3681 : 27 : induction_kind = code2 == BIT_NOT_EXPR
3682 : 27 : ? INDUCTION_BIT_CLEAR
3683 : : : INDUCTION_ZERO;
3684 : : break;
3685 : 46 : case BIT_IOR_EXPR:
3686 : 46 : induction_kind = code2 == BIT_NOT_EXPR
3687 : 46 : ? INDUCTION_ALL
3688 : : : INDUCTION_BIT_IOR;
3689 : : break;
3690 : 12 : case BIT_XOR_EXPR:
3691 : 12 : induction_kind = code2 == BIT_NOT_EXPR
3692 : 12 : ? INDUCTION_BIT_XOR
3693 : : : INDUCTION_BIT_RESET;
3694 : : break;
3695 : : /* A ^ ~(1 << bit) is equal to ~(A ^ (1 << bit)). */
3696 : 12 : case BIT_NOT_EXPR:
3697 : 12 : gcc_assert (code2 == BIT_XOR_EXPR);
3698 : : induction_kind = INDUCTION_BIT_XOR;
3699 : : break;
3700 : 0 : default:
3701 : 0 : gcc_unreachable ();
3702 : : }
3703 : :
3704 : 34 : if (induction_kind == INDUCTION_ZERO)
3705 : 12 : return build_zero_cst (type);
3706 : 85 : if (induction_kind == INDUCTION_ALL)
3707 : 12 : return build_all_ones_cst (type);
3708 : :
3709 : 146 : wide_int bits = wi::zero (TYPE_PRECISION (type));
3710 : 73 : HOST_WIDE_INT bit_start = tree_to_shwi (CHREC_LEFT (bitwise_scev));
3711 : 73 : HOST_WIDE_INT step = tree_to_shwi (CHREC_RIGHT (bitwise_scev));
3712 : 73 : HOST_WIDE_INT bit_final = bit_start + step * niter;
3713 : :
3714 : : /* bit_start, bit_final in range of [0,TYPE_PRECISION)
3715 : : implies all bits are set in range. */
3716 : 73 : if (bit_final >= TYPE_PRECISION (type)
3717 : 73 : || bit_final < 0)
3718 : : return NULL_TREE;
3719 : :
3720 : : /* Loop tripcount should be niter + 1. */
3721 : 1248 : for (unsigned i = 0; i != niter + 1; i++)
3722 : : {
3723 : 1175 : bits = wi::set_bit (bits, bit_start);
3724 : 1175 : bit_start += step;
3725 : : }
3726 : :
3727 : 73 : bool inverted = false;
3728 : 73 : switch (induction_kind)
3729 : : {
3730 : : case INDUCTION_BIT_CLEAR:
3731 : : code1 = BIT_AND_EXPR;
3732 : : inverted = true;
3733 : : break;
3734 : : case INDUCTION_BIT_IOR:
3735 : : code1 = BIT_IOR_EXPR;
3736 : : break;
3737 : : case INDUCTION_BIT_RESET:
3738 : : code1 = BIT_XOR_EXPR;
3739 : : break;
3740 : : /* A ^= ~(1 << bit) is special, when loop tripcount is even,
3741 : : it's equal to A ^= bits, else A ^= ~bits. */
3742 : 12 : case INDUCTION_BIT_XOR:
3743 : 12 : code1 = BIT_XOR_EXPR;
3744 : 12 : if (niter % 2 == 0)
3745 : : inverted = true;
3746 : : break;
3747 : : default:
3748 : : gcc_unreachable ();
3749 : : }
3750 : :
3751 : : if (inverted)
3752 : 19 : bits = wi::bit_not (bits);
3753 : :
3754 : 73 : inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop));
3755 : 73 : return fold_build2 (code1, type, inv, wide_int_to_tree (type, bits));
3756 : : }
3757 : :
3758 : : /* Match.pd function to match bitop with invariant expression
3759 : : .i.e.
3760 : : tmp_7 = _0 & _1; */
3761 : : extern bool gimple_bitop_with_inv_p (tree, tree *, tree (*)(tree));
3762 : :
3763 : : /* Return the inductive expression of bitop with invariant if possible,
3764 : : otherwise returns DEF. */
3765 : : static tree
3766 : 72673 : analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef,
3767 : : tree niter)
3768 : : {
3769 : 72673 : tree match_op[2],inv;
3770 : 72673 : tree type = TREE_TYPE (phidef);
3771 : 72673 : gphi* header_phi = NULL;
3772 : 72673 : enum tree_code code;
3773 : : /* match thing like op0 (match[0]), op1 (match[1]), phidef (PHIDEF)
3774 : :
3775 : : op1 = PHI <phidef, inv>
3776 : : phidef = op0 & op1
3777 : : if op0 is an invariant, it could change to
3778 : : phidef = op0 & inv. */
3779 : 72673 : gimple *def;
3780 : 72673 : def = SSA_NAME_DEF_STMT (phidef);
3781 : 72673 : if (!(is_gimple_assign (def)
3782 : 26787 : && ((code = gimple_assign_rhs_code (def)), true)
3783 : 26787 : && (code == BIT_AND_EXPR || code == BIT_IOR_EXPR
3784 : 21264 : || code == BIT_XOR_EXPR)))
3785 : : return NULL_TREE;
3786 : :
3787 : 6179 : match_op[0] = gimple_assign_rhs1 (def);
3788 : 6179 : match_op[1] = gimple_assign_rhs2 (def);
3789 : :
3790 : 6179 : if (expr_invariant_in_loop_p (loop, match_op[1]))
3791 : 213 : std::swap (match_op[0], match_op[1]);
3792 : :
3793 : 6179 : if (TREE_CODE (match_op[1]) != SSA_NAME
3794 : 6179 : || !expr_invariant_in_loop_p (loop, match_op[0])
3795 : 306 : || !(header_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (match_op[1])))
3796 : 216 : || gimple_bb (header_phi) != loop->header
3797 : 6380 : || gimple_phi_num_args (header_phi) != 2)
3798 : 5978 : return NULL_TREE;
3799 : :
3800 : 201 : if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef)
3801 : : return NULL_TREE;
3802 : :
3803 : 196 : enum tree_code code1
3804 : 196 : = gimple_assign_rhs_code (def);
3805 : :
3806 : 196 : if (code1 == BIT_XOR_EXPR)
3807 : : {
3808 : 51 : if (!tree_fits_uhwi_p (niter))
3809 : : return NULL_TREE;
3810 : 19 : unsigned HOST_WIDE_INT niter_num;
3811 : 19 : niter_num = tree_to_uhwi (niter);
3812 : 19 : if (niter_num % 2 != 0)
3813 : 10 : match_op[0] = build_zero_cst (type);
3814 : : }
3815 : :
3816 : 164 : inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop));
3817 : 164 : return fold_build2 (code1, type, inv, match_op[0]);
3818 : : }
3819 : :
3820 : : /* Do final value replacement for LOOP, return true if we did anything. */
3821 : :
3822 : : bool
3823 : 692960 : final_value_replacement_loop (class loop *loop)
3824 : : {
3825 : : /* If we do not know exact number of iterations of the loop, we cannot
3826 : : replace the final value. */
3827 : 692960 : edge exit = single_exit (loop);
3828 : 692960 : if (!exit)
3829 : : return false;
3830 : :
3831 : 455730 : tree niter = number_of_latch_executions (loop);
3832 : 455730 : if (niter == chrec_dont_know)
3833 : : return false;
3834 : :
3835 : : /* Ensure that it is possible to insert new statements somewhere. */
3836 : 335628 : if (!single_pred_p (exit->dest))
3837 : 38773 : split_loop_exit_edge (exit);
3838 : :
3839 : : /* Set stmt insertion pointer. All stmts are inserted before this point. */
3840 : :
3841 : 335628 : class loop *ex_loop
3842 : 671256 : = superloop_at_depth (loop,
3843 : 409817 : loop_depth (exit->dest->loop_father) + 1);
3844 : :
3845 : 335628 : bool any = false;
3846 : 335628 : gphi_iterator psi;
3847 : 749222 : for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
3848 : : {
3849 : 413594 : gphi *phi = psi.phi ();
3850 : 413594 : tree rslt = PHI_RESULT (phi);
3851 : 413594 : tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3852 : 413594 : tree def = phidef;
3853 : 826518 : if (virtual_operand_p (def))
3854 : : {
3855 : 234025 : gsi_next (&psi);
3856 : 615917 : continue;
3857 : : }
3858 : :
3859 : 341389 : if (!POINTER_TYPE_P (TREE_TYPE (def))
3860 : 341247 : && !INTEGRAL_TYPE_P (TREE_TYPE (def)))
3861 : : {
3862 : 73733 : gsi_next (&psi);
3863 : 73733 : continue;
3864 : : }
3865 : :
3866 : 105836 : bool folded_casts;
3867 : 105836 : def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
3868 : : &folded_casts);
3869 : :
3870 : 105836 : tree bitinv_def, bit_def;
3871 : 105836 : unsigned HOST_WIDE_INT niter_num;
3872 : :
3873 : 105836 : if (def != chrec_dont_know)
3874 : 33163 : def = compute_overall_effect_of_inner_loop (ex_loop, def);
3875 : :
3876 : : /* Handle bitop with invariant induction expression.
3877 : :
3878 : : .i.e
3879 : : for (int i =0 ;i < 32; i++)
3880 : : tmp &= bit2;
3881 : : if bit2 is an invariant in loop which could simple to
3882 : : tmp &= bit2. */
3883 : 145346 : else if ((bitinv_def
3884 : 72673 : = analyze_and_compute_bitop_with_inv_effect (loop,
3885 : : phidef, niter)))
3886 : : def = bitinv_def;
3887 : :
3888 : : /* Handle bitwise induction expression.
3889 : :
3890 : : .i.e.
3891 : : for (int i = 0; i != 64; i+=3)
3892 : : res &= ~(1UL << i);
3893 : :
3894 : : RES can't be analyzed out by SCEV because it is not polynomially
3895 : : expressible, but in fact final value of RES can be replaced by
3896 : : RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
3897 : : being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR. */
3898 : 72509 : else if (tree_fits_uhwi_p (niter)
3899 : 30440 : && (niter_num = tree_to_uhwi (niter)) != 0
3900 : 30427 : && niter_num < TYPE_PRECISION (TREE_TYPE (phidef))
3901 : 72509 : && (bit_def
3902 : 20002 : = analyze_and_compute_bitwise_induction_effect (loop,
3903 : : phidef,
3904 : : niter_num)))
3905 : : def = bit_def;
3906 : :
3907 : 105836 : bool cond_overflow_p;
3908 : 105836 : if (!tree_does_not_contain_chrecs (def)
3909 : 32840 : || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
3910 : : /* Moving the computation from the loop may prolong life range
3911 : : of some ssa names, which may cause problems if they appear
3912 : : on abnormal edges. */
3913 : 32840 : || contains_abnormal_ssa_name_p (def)
3914 : : /* Do not emit expensive expressions. The rationale is that
3915 : : when someone writes a code like
3916 : :
3917 : : while (n > 45) n -= 45;
3918 : :
3919 : : he probably knows that n is not large, and does not want it
3920 : : to be turned into n %= 45. */
3921 : 138676 : || expression_expensive_p (def, &cond_overflow_p))
3922 : : {
3923 : 74134 : if (dump_file && (dump_flags & TDF_DETAILS))
3924 : : {
3925 : 56 : fprintf (dump_file, "not replacing:\n ");
3926 : 56 : print_gimple_stmt (dump_file, phi, 0);
3927 : 56 : fprintf (dump_file, "\n");
3928 : : }
3929 : 74134 : gsi_next (&psi);
3930 : 74134 : continue;
3931 : : }
3932 : :
3933 : : /* Eliminate the PHI node and replace it by a computation outside
3934 : : the loop. */
3935 : 31702 : if (dump_file)
3936 : : {
3937 : 130 : fprintf (dump_file, "\nfinal value replacement:\n ");
3938 : 130 : print_gimple_stmt (dump_file, phi, 0);
3939 : 130 : fprintf (dump_file, " with expr: ");
3940 : 130 : print_generic_expr (dump_file, def);
3941 : 130 : fprintf (dump_file, "\n");
3942 : : }
3943 : 31702 : any = true;
3944 : : /* ??? Here we'd like to have a unshare_expr that would assign
3945 : : shared sub-trees to new temporary variables either gimplified
3946 : : to a GIMPLE sequence or to a statement list (keeping this a
3947 : : GENERIC interface). */
3948 : 31702 : def = unshare_expr (def);
3949 : 31702 : auto loc = gimple_phi_arg_location (phi, exit->dest_idx);
3950 : 31702 : remove_phi_node (&psi, false);
3951 : :
3952 : : /* Propagate constants immediately, but leave an unused initialization
3953 : : around to avoid invalidating the SCEV cache. */
3954 : 38323 : if (CONSTANT_CLASS_P (def) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
3955 : 6620 : replace_uses_by (rslt, def);
3956 : :
3957 : : /* Create the replacement statements. */
3958 : 31702 : gimple_seq stmts;
3959 : 31702 : def = force_gimple_operand (def, &stmts, false, NULL_TREE);
3960 : 31702 : gassign *ass = gimple_build_assign (rslt, def);
3961 : 31702 : gimple_set_location (ass, loc);
3962 : 31702 : gimple_seq_add_stmt (&stmts, ass);
3963 : :
3964 : : /* If def's type has undefined overflow and there were folded
3965 : : casts, rewrite all stmts added for def into arithmetics
3966 : : with defined overflow behavior. */
3967 : 31702 : if ((folded_casts
3968 : 401 : && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
3969 : 682 : && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
3970 : 32103 : || cond_overflow_p)
3971 : : {
3972 : 2110 : gimple_stmt_iterator gsi2;
3973 : 2110 : gsi2 = gsi_start (stmts);
3974 : 19274 : while (!gsi_end_p (gsi2))
3975 : : {
3976 : 17164 : if (gimple_needing_rewrite_undefined (gsi_stmt (gsi2)))
3977 : 423 : rewrite_to_defined_unconditional (&gsi2);
3978 : 17164 : gsi_next (&gsi2);
3979 : : }
3980 : : }
3981 : 31702 : gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
3982 : 31702 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
3983 : 31702 : if (dump_file)
3984 : : {
3985 : 130 : fprintf (dump_file, " final stmt:\n ");
3986 : 130 : print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (rslt), 0);
3987 : 130 : fprintf (dump_file, "\n");
3988 : : }
3989 : :
3990 : : /* Re-fold immediate uses of the replaced def, but avoid
3991 : : CFG manipulations from this function. For now only do
3992 : : a single-level re-folding, not re-folding uses of
3993 : : folded uses. */
3994 : 31702 : if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
3995 : : {
3996 : 31701 : gimple *use_stmt;
3997 : 31701 : imm_use_iterator imm_iter;
3998 : 62776 : FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, rslt)
3999 : : {
4000 : 31075 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4001 : 31075 : if (!stmt_can_throw_internal (cfun, use_stmt)
4002 : 31075 : && fold_stmt (&gsi, follow_all_ssa_edges))
4003 : 1820 : update_stmt (gsi_stmt (gsi));
4004 : 31701 : }
4005 : : }
4006 : : }
4007 : :
4008 : : return any;
4009 : : }
4010 : :
4011 : : #include "gt-tree-scalar-evolution.h"
|