Branch data Line data Source code
1 : : /* Functions to determine/estimate number of iterations of a loop.
2 : : Copyright (C) 2004-2024 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it
7 : : under the terms of the GNU General Public License as published by the
8 : : Free Software Foundation; either version 3, or (at your option) any
9 : : later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "rtl.h"
25 : : #include "tree.h"
26 : : #include "gimple.h"
27 : : #include "tree-pass.h"
28 : : #include "ssa.h"
29 : : #include "gimple-pretty-print.h"
30 : : #include "diagnostic-core.h"
31 : : #include "stor-layout.h"
32 : : #include "fold-const.h"
33 : : #include "calls.h"
34 : : #include "intl.h"
35 : : #include "gimplify.h"
36 : : #include "gimple-iterator.h"
37 : : #include "tree-cfg.h"
38 : : #include "tree-ssa-loop-ivopts.h"
39 : : #include "tree-ssa-loop-niter.h"
40 : : #include "tree-ssa-loop.h"
41 : : #include "cfgloop.h"
42 : : #include "tree-chrec.h"
43 : : #include "tree-scalar-evolution.h"
44 : : #include "tree-dfa.h"
45 : : #include "internal-fn.h"
46 : : #include "gimple-range.h"
47 : : #include "sreal.h"
48 : :
49 : :
50 : : /* The maximum number of dominator BBs we search for conditions
51 : : of loop header copies we use for simplifying a conditional
52 : : expression. */
53 : : #define MAX_DOMINATORS_TO_WALK 8
54 : :
55 : : /*
56 : :
57 : : Analysis of number of iterations of an affine exit test.
58 : :
59 : : */
60 : :
61 : : /* Bounds on some value, BELOW <= X <= UP. */
62 : :
63 : : struct bounds
64 : : {
65 : : mpz_t below, up;
66 : : };
67 : :
68 : : /* Splits expression EXPR to a variable part VAR and constant OFFSET. */
69 : :
70 : : static void
71 : 43695122 : split_to_var_and_offset (tree expr, tree *var, mpz_t offset)
72 : : {
73 : 43695122 : tree type = TREE_TYPE (expr);
74 : 43695122 : tree op0, op1;
75 : 43695122 : bool negate = false;
76 : :
77 : 43695122 : *var = expr;
78 : 43695122 : mpz_set_ui (offset, 0);
79 : :
80 : 43695122 : switch (TREE_CODE (expr))
81 : : {
82 : 7714 : case MINUS_EXPR:
83 : 7714 : negate = true;
84 : : /* Fallthru. */
85 : :
86 : 4280022 : case PLUS_EXPR:
87 : 4280022 : case POINTER_PLUS_EXPR:
88 : 4280022 : op0 = TREE_OPERAND (expr, 0);
89 : 4280022 : op1 = TREE_OPERAND (expr, 1);
90 : :
91 : 4280022 : if (TREE_CODE (op1) != INTEGER_CST)
92 : : break;
93 : :
94 : 4269286 : *var = op0;
95 : : /* Always sign extend the offset. */
96 : 4269286 : wi::to_mpz (wi::to_wide (op1), offset, SIGNED);
97 : 4269286 : if (negate)
98 : 29 : mpz_neg (offset, offset);
99 : : break;
100 : :
101 : 19797478 : case INTEGER_CST:
102 : 19797478 : *var = build_int_cst_type (type, 0);
103 : 19797478 : wi::to_mpz (wi::to_wide (expr), offset, TYPE_SIGN (type));
104 : 19797478 : break;
105 : :
106 : : default:
107 : : break;
108 : : }
109 : 43695122 : }
110 : :
111 : : /* From condition C0 CMP C1 derives information regarding the value range
112 : : of VAR, which is of TYPE. Results are stored in to BELOW and UP. */
113 : :
114 : : static void
115 : 13270577 : refine_value_range_using_guard (tree type, tree var,
116 : : tree c0, enum tree_code cmp, tree c1,
117 : : mpz_t below, mpz_t up)
118 : : {
119 : 13270577 : tree varc0, varc1, ctype;
120 : 13270577 : mpz_t offc0, offc1;
121 : 13270577 : mpz_t mint, maxt, minc1, maxc1;
122 : 13270577 : bool no_wrap = nowrap_type_p (type);
123 : 13270577 : bool c0_ok, c1_ok;
124 : 13270577 : signop sgn = TYPE_SIGN (type);
125 : :
126 : 13270577 : switch (cmp)
127 : : {
128 : 6420469 : case LT_EXPR:
129 : 6420469 : case LE_EXPR:
130 : 6420469 : case GT_EXPR:
131 : 6420469 : case GE_EXPR:
132 : 6420469 : STRIP_SIGN_NOPS (c0);
133 : 6420469 : STRIP_SIGN_NOPS (c1);
134 : 6420469 : ctype = TREE_TYPE (c0);
135 : 6420469 : if (!useless_type_conversion_p (ctype, type))
136 : 11481456 : return;
137 : :
138 : : break;
139 : :
140 : : case EQ_EXPR:
141 : : /* We could derive quite precise information from EQ_EXPR, however,
142 : : such a guard is unlikely to appear, so we do not bother with
143 : : handling it. */
144 : : return;
145 : :
146 : 3175074 : case NE_EXPR:
147 : : /* NE_EXPR comparisons do not contain much of useful information,
148 : : except for cases of comparing with bounds. */
149 : 3175074 : if (TREE_CODE (c1) != INTEGER_CST
150 : 2935432 : || !INTEGRAL_TYPE_P (type))
151 : : return;
152 : :
153 : : /* Ensure that the condition speaks about an expression in the same
154 : : type as X and Y. */
155 : 2935432 : ctype = TREE_TYPE (c0);
156 : 2935432 : if (TYPE_PRECISION (ctype) != TYPE_PRECISION (type))
157 : : return;
158 : 1673552 : c0 = fold_convert (type, c0);
159 : 1673552 : c1 = fold_convert (type, c1);
160 : :
161 : 1673552 : if (operand_equal_p (var, c0, 0))
162 : : {
163 : : /* Case of comparing VAR with its below/up bounds. */
164 : 239359 : auto_mpz valc1;
165 : 239359 : wi::to_mpz (wi::to_wide (c1), valc1, TYPE_SIGN (type));
166 : 239359 : if (mpz_cmp (valc1, below) == 0)
167 : 100330 : cmp = GT_EXPR;
168 : 239359 : if (mpz_cmp (valc1, up) == 0)
169 : 25955 : cmp = LT_EXPR;
170 : 239359 : }
171 : : else
172 : : {
173 : : /* Case of comparing with the bounds of the type. */
174 : 1434193 : wide_int min = wi::min_value (type);
175 : 1434193 : wide_int max = wi::max_value (type);
176 : :
177 : 1434193 : if (wi::to_wide (c1) == min)
178 : 312598 : cmp = GT_EXPR;
179 : 1434193 : if (wi::to_wide (c1) == max)
180 : 15852 : cmp = LT_EXPR;
181 : 1434193 : }
182 : :
183 : : /* Quick return if no useful information. */
184 : 1673552 : if (cmp == NE_EXPR)
185 : : return;
186 : :
187 : : break;
188 : :
189 : : default:
190 : : return;
191 : : }
192 : :
193 : 5925553 : mpz_init (offc0);
194 : 5925553 : mpz_init (offc1);
195 : 5925553 : split_to_var_and_offset (expand_simple_operations (c0), &varc0, offc0);
196 : 5925553 : split_to_var_and_offset (expand_simple_operations (c1), &varc1, offc1);
197 : :
198 : : /* We are only interested in comparisons of expressions based on VAR. */
199 : 5925553 : if (operand_equal_p (var, varc1, 0))
200 : : {
201 : 498933 : std::swap (varc0, varc1);
202 : 498933 : mpz_swap (offc0, offc1);
203 : 498933 : cmp = swap_tree_comparison (cmp);
204 : : }
205 : 5426620 : else if (!operand_equal_p (var, varc0, 0))
206 : : {
207 : 4136432 : mpz_clear (offc0);
208 : 4136432 : mpz_clear (offc1);
209 : 4136432 : return;
210 : : }
211 : :
212 : 1789121 : mpz_init (mint);
213 : 1789121 : mpz_init (maxt);
214 : 1789121 : get_type_static_bounds (type, mint, maxt);
215 : 1789121 : mpz_init (minc1);
216 : 1789121 : mpz_init (maxc1);
217 : 3578242 : Value_Range r (TREE_TYPE (varc1));
218 : : /* Setup range information for varc1. */
219 : 1789121 : if (integer_zerop (varc1))
220 : : {
221 : 914930 : wi::to_mpz (0, minc1, TYPE_SIGN (type));
222 : 914930 : wi::to_mpz (0, maxc1, TYPE_SIGN (type));
223 : : }
224 : 874191 : else if (TREE_CODE (varc1) == SSA_NAME
225 : 824138 : && INTEGRAL_TYPE_P (type)
226 : 1648276 : && get_range_query (cfun)->range_of_expr (r, varc1)
227 : 824138 : && !r.undefined_p ()
228 : 1696402 : && !r.varying_p ())
229 : : {
230 : 589992 : gcc_assert (wi::le_p (r.lower_bound (), r.upper_bound (), sgn));
231 : 294996 : wi::to_mpz (r.lower_bound (), minc1, sgn);
232 : 294996 : wi::to_mpz (r.upper_bound (), maxc1, sgn);
233 : : }
234 : : else
235 : : {
236 : 579195 : mpz_set (minc1, mint);
237 : 579195 : mpz_set (maxc1, maxt);
238 : : }
239 : :
240 : : /* Compute valid range information for varc1 + offc1. Note nothing
241 : : useful can be derived if it overflows or underflows. Overflow or
242 : : underflow could happen when:
243 : :
244 : : offc1 > 0 && varc1 + offc1 > MAX_VAL (type)
245 : : offc1 < 0 && varc1 + offc1 < MIN_VAL (type). */
246 : 1789121 : mpz_add (minc1, minc1, offc1);
247 : 1789121 : mpz_add (maxc1, maxc1, offc1);
248 : 3578242 : c1_ok = (no_wrap
249 : 653911 : || mpz_sgn (offc1) == 0
250 : 100586 : || (mpz_sgn (offc1) < 0 && mpz_cmp (minc1, mint) >= 0)
251 : 1887958 : || (mpz_sgn (offc1) > 0 && mpz_cmp (maxc1, maxt) <= 0));
252 : 17083 : if (!c1_ok)
253 : 17083 : goto end;
254 : :
255 : 1772038 : if (mpz_cmp (minc1, mint) < 0)
256 : 38081 : mpz_set (minc1, mint);
257 : 1772038 : if (mpz_cmp (maxc1, maxt) > 0)
258 : 10593 : mpz_set (maxc1, maxt);
259 : :
260 : 1772038 : if (cmp == LT_EXPR)
261 : : {
262 : 258297 : cmp = LE_EXPR;
263 : 258297 : mpz_sub_ui (maxc1, maxc1, 1);
264 : : }
265 : 1772038 : if (cmp == GT_EXPR)
266 : : {
267 : 958633 : cmp = GE_EXPR;
268 : 958633 : mpz_add_ui (minc1, minc1, 1);
269 : : }
270 : :
271 : : /* Compute range information for varc0. If there is no overflow,
272 : : the condition implied that
273 : :
274 : : (varc0) cmp (varc1 + offc1 - offc0)
275 : :
276 : : We can possibly improve the upper bound of varc0 if cmp is LE_EXPR,
277 : : or the below bound if cmp is GE_EXPR.
278 : :
279 : : To prove there is no overflow/underflow, we need to check below
280 : : four cases:
281 : : 1) cmp == LE_EXPR && offc0 > 0
282 : :
283 : : (varc0 + offc0) doesn't overflow
284 : : && (varc1 + offc1 - offc0) doesn't underflow
285 : :
286 : : 2) cmp == LE_EXPR && offc0 < 0
287 : :
288 : : (varc0 + offc0) doesn't underflow
289 : : && (varc1 + offc1 - offc0) doesn't overfloe
290 : :
291 : : In this case, (varc0 + offc0) will never underflow if we can
292 : : prove (varc1 + offc1 - offc0) doesn't overflow.
293 : :
294 : : 3) cmp == GE_EXPR && offc0 < 0
295 : :
296 : : (varc0 + offc0) doesn't underflow
297 : : && (varc1 + offc1 - offc0) doesn't overflow
298 : :
299 : : 4) cmp == GE_EXPR && offc0 > 0
300 : :
301 : : (varc0 + offc0) doesn't overflow
302 : : && (varc1 + offc1 - offc0) doesn't underflow
303 : :
304 : : In this case, (varc0 + offc0) will never overflow if we can
305 : : prove (varc1 + offc1 - offc0) doesn't underflow.
306 : :
307 : : Note we only handle case 2 and 4 in below code. */
308 : :
309 : 1772038 : mpz_sub (minc1, minc1, offc0);
310 : 1772038 : mpz_sub (maxc1, maxc1, offc0);
311 : 3544076 : c0_ok = (no_wrap
312 : 636828 : || mpz_sgn (offc0) == 0
313 : 35685 : || (cmp == LE_EXPR
314 : 20341 : && mpz_sgn (offc0) < 0 && mpz_cmp (maxc1, maxt) <= 0)
315 : 1806770 : || (cmp == GE_EXPR
316 : 15344 : && mpz_sgn (offc0) > 0 && mpz_cmp (minc1, mint) >= 0));
317 : 32711 : if (!c0_ok)
318 : 32711 : goto end;
319 : :
320 : 1739327 : if (cmp == LE_EXPR)
321 : : {
322 : 576538 : if (mpz_cmp (up, maxc1) > 0)
323 : 193971 : mpz_set (up, maxc1);
324 : : }
325 : : else
326 : : {
327 : 1162789 : if (mpz_cmp (below, minc1) < 0)
328 : 965078 : mpz_set (below, minc1);
329 : : }
330 : :
331 : 197711 : end:
332 : 1789121 : mpz_clear (mint);
333 : 1789121 : mpz_clear (maxt);
334 : 1789121 : mpz_clear (minc1);
335 : 1789121 : mpz_clear (maxc1);
336 : 1789121 : mpz_clear (offc0);
337 : 1789121 : mpz_clear (offc1);
338 : : }
339 : :
340 : : /* Stores estimate on the minimum/maximum value of the expression VAR + OFF
341 : : in TYPE to MIN and MAX. */
342 : :
343 : : static void
344 : 20376396 : determine_value_range (class loop *loop, tree type, tree var, mpz_t off,
345 : : mpz_t min, mpz_t max)
346 : : {
347 : 20376396 : int cnt = 0;
348 : 20376396 : mpz_t minm, maxm;
349 : 20376396 : basic_block bb;
350 : 20376396 : wide_int minv, maxv;
351 : 20376396 : enum value_range_kind rtype = VR_VARYING;
352 : :
353 : : /* If the expression is a constant, we know its value exactly. */
354 : 20376396 : if (integer_zerop (var))
355 : : {
356 : 14479669 : mpz_set (min, off);
357 : 14479669 : mpz_set (max, off);
358 : 14479669 : return;
359 : : }
360 : :
361 : 5896727 : get_type_static_bounds (type, min, max);
362 : :
363 : : /* See if we have some range info from VRP. */
364 : 5896727 : if (TREE_CODE (var) == SSA_NAME && INTEGRAL_TYPE_P (type))
365 : : {
366 : 3789924 : edge e = loop_preheader_edge (loop);
367 : 3789924 : signop sgn = TYPE_SIGN (type);
368 : 3789924 : gphi_iterator gsi;
369 : :
370 : : /* Either for VAR itself... */
371 : 3789924 : Value_Range var_range (TREE_TYPE (var));
372 : 7579848 : get_range_query (cfun)->range_of_expr (var_range, var);
373 : 3789924 : if (var_range.varying_p () || var_range.undefined_p ())
374 : : rtype = VR_VARYING;
375 : : else
376 : : rtype = VR_RANGE;
377 : 3789924 : if (!var_range.undefined_p ())
378 : : {
379 : 3776248 : minv = var_range.lower_bound ();
380 : 3776248 : maxv = var_range.upper_bound ();
381 : : }
382 : :
383 : : /* Or for PHI results in loop->header where VAR is used as
384 : : PHI argument from the loop preheader edge. */
385 : 3789924 : Value_Range phi_range (TREE_TYPE (var));
386 : 14173152 : for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
387 : : {
388 : 10383228 : gphi *phi = gsi.phi ();
389 : 10383228 : if (PHI_ARG_DEF_FROM_EDGE (phi, e) == var
390 : 1898442 : && get_range_query (cfun)->range_of_expr (phi_range,
391 : : gimple_phi_result (phi))
392 : 949221 : && !phi_range.varying_p ()
393 : 10865586 : && !phi_range.undefined_p ())
394 : : {
395 : 482358 : if (rtype != VR_RANGE)
396 : : {
397 : 208529 : rtype = VR_RANGE;
398 : 208529 : minv = phi_range.lower_bound ();
399 : 208529 : maxv = phi_range.upper_bound ();
400 : : }
401 : : else
402 : : {
403 : 273829 : minv = wi::max (minv, phi_range.lower_bound (), sgn);
404 : 273829 : maxv = wi::min (maxv, phi_range.upper_bound (), sgn);
405 : : /* If the PHI result range are inconsistent with
406 : : the VAR range, give up on looking at the PHI
407 : : results. This can happen if VR_UNDEFINED is
408 : : involved. */
409 : 273829 : if (wi::gt_p (minv, maxv, sgn))
410 : : {
411 : 0 : Value_Range vr (TREE_TYPE (var));
412 : 0 : get_range_query (cfun)->range_of_expr (vr, var);
413 : 0 : if (vr.varying_p () || vr.undefined_p ())
414 : : rtype = VR_VARYING;
415 : : else
416 : : rtype = VR_RANGE;
417 : 0 : if (!vr.undefined_p ())
418 : : {
419 : 0 : minv = vr.lower_bound ();
420 : 0 : maxv = vr.upper_bound ();
421 : : }
422 : 0 : break;
423 : 0 : }
424 : : }
425 : : }
426 : : }
427 : 3789924 : mpz_init (minm);
428 : 3789924 : mpz_init (maxm);
429 : 3789924 : if (rtype != VR_RANGE)
430 : : {
431 : 2641517 : mpz_set (minm, min);
432 : 2641517 : mpz_set (maxm, max);
433 : : }
434 : : else
435 : : {
436 : 1148407 : gcc_assert (wi::le_p (minv, maxv, sgn));
437 : 1148407 : wi::to_mpz (minv, minm, sgn);
438 : 1148407 : wi::to_mpz (maxv, maxm, sgn);
439 : : }
440 : : /* Now walk the dominators of the loop header and use the entry
441 : : guards to refine the estimates. */
442 : 3789924 : for (bb = loop->header;
443 : 39323354 : bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK;
444 : 35533430 : bb = get_immediate_dominator (CDI_DOMINATORS, bb))
445 : : {
446 : 35533430 : edge e;
447 : 35533430 : tree c0, c1;
448 : 35533430 : enum tree_code cmp;
449 : :
450 : 71066860 : if (!single_pred_p (bb))
451 : 17756153 : continue;
452 : 17777277 : e = single_pred_edge (bb);
453 : :
454 : 17777277 : if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
455 : 4506700 : continue;
456 : :
457 : 26541154 : gcond *cond = as_a <gcond *> (*gsi_last_bb (e->src));
458 : 13270577 : c0 = gimple_cond_lhs (cond);
459 : 13270577 : cmp = gimple_cond_code (cond);
460 : 13270577 : c1 = gimple_cond_rhs (cond);
461 : :
462 : 13270577 : if (e->flags & EDGE_FALSE_VALUE)
463 : 6760588 : cmp = invert_tree_comparison (cmp, false);
464 : :
465 : 13270577 : refine_value_range_using_guard (type, var, c0, cmp, c1, minm, maxm);
466 : 13270577 : ++cnt;
467 : : }
468 : :
469 : 3789924 : mpz_add (minm, minm, off);
470 : 3789924 : mpz_add (maxm, maxm, off);
471 : : /* If the computation may not wrap or off is zero, then this
472 : : is always fine. If off is negative and minv + off isn't
473 : : smaller than type's minimum, or off is positive and
474 : : maxv + off isn't bigger than type's maximum, use the more
475 : : precise range too. */
476 : 3789924 : if (nowrap_type_p (type)
477 : 1604462 : || mpz_sgn (off) == 0
478 : 343776 : || (mpz_sgn (off) < 0 && mpz_cmp (minm, min) >= 0)
479 : 4075849 : || (mpz_sgn (off) > 0 && mpz_cmp (maxm, max) <= 0))
480 : : {
481 : 3672885 : mpz_set (min, minm);
482 : 3672885 : mpz_set (max, maxm);
483 : 3672885 : mpz_clear (minm);
484 : 3672885 : mpz_clear (maxm);
485 : 3672885 : return;
486 : : }
487 : 117039 : mpz_clear (minm);
488 : 117039 : mpz_clear (maxm);
489 : 3789924 : }
490 : :
491 : : /* If the computation may wrap, we know nothing about the value, except for
492 : : the range of the type. */
493 : 2223842 : if (!nowrap_type_p (type))
494 : : return;
495 : :
496 : : /* Since the addition of OFF does not wrap, if OFF is positive, then we may
497 : : add it to MIN, otherwise to MAX. */
498 : 1804764 : if (mpz_sgn (off) < 0)
499 : 144148 : mpz_add (max, max, off);
500 : : else
501 : 1660616 : mpz_add (min, min, off);
502 : 20376396 : }
503 : :
504 : : /* Stores the bounds on the difference of the values of the expressions
505 : : (var + X) and (var + Y), computed in TYPE, to BNDS. */
506 : :
507 : : static void
508 : 128327 : bound_difference_of_offsetted_base (tree type, mpz_t x, mpz_t y,
509 : : bounds *bnds)
510 : : {
511 : 128327 : int rel = mpz_cmp (x, y);
512 : 128327 : bool may_wrap = !nowrap_type_p (type);
513 : :
514 : : /* If X == Y, then the expressions are always equal.
515 : : If X > Y, there are the following possibilities:
516 : : a) neither of var + X and var + Y overflow or underflow, or both of
517 : : them do. Then their difference is X - Y.
518 : : b) var + X overflows, and var + Y does not. Then the values of the
519 : : expressions are var + X - M and var + Y, where M is the range of
520 : : the type, and their difference is X - Y - M.
521 : : c) var + Y underflows and var + X does not. Their difference again
522 : : is M - X + Y.
523 : : Therefore, if the arithmetics in type does not overflow, then the
524 : : bounds are (X - Y, X - Y), otherwise they are (X - Y - M, X - Y)
525 : : Similarly, if X < Y, the bounds are either (X - Y, X - Y) or
526 : : (X - Y, X - Y + M). */
527 : :
528 : 128327 : if (rel == 0)
529 : : {
530 : 735 : mpz_set_ui (bnds->below, 0);
531 : 735 : mpz_set_ui (bnds->up, 0);
532 : 735 : return;
533 : : }
534 : :
535 : 127592 : auto_mpz m;
536 : 127592 : wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), m, UNSIGNED);
537 : 127592 : mpz_add_ui (m, m, 1);
538 : 127592 : mpz_sub (bnds->up, x, y);
539 : 127592 : mpz_set (bnds->below, bnds->up);
540 : :
541 : 127592 : if (may_wrap)
542 : : {
543 : 114093 : if (rel > 0)
544 : 113646 : mpz_sub (bnds->below, bnds->below, m);
545 : : else
546 : 447 : mpz_add (bnds->up, bnds->up, m);
547 : : }
548 : 127592 : }
549 : :
550 : : /* From condition C0 CMP C1 derives information regarding the
551 : : difference of values of VARX + OFFX and VARY + OFFY, computed in TYPE,
552 : : and stores it to BNDS. */
553 : :
554 : : static void
555 : 14956898 : refine_bounds_using_guard (tree type, tree varx, mpz_t offx,
556 : : tree vary, mpz_t offy,
557 : : tree c0, enum tree_code cmp, tree c1,
558 : : bounds *bnds)
559 : : {
560 : 14956898 : tree varc0, varc1, ctype;
561 : 14956898 : mpz_t offc0, offc1, loffx, loffy, bnd;
562 : 14956898 : bool lbound = false;
563 : 14956898 : bool no_wrap = nowrap_type_p (type);
564 : 14956898 : bool x_ok, y_ok;
565 : :
566 : 14956898 : switch (cmp)
567 : : {
568 : 6364973 : case LT_EXPR:
569 : 6364973 : case LE_EXPR:
570 : 6364973 : case GT_EXPR:
571 : 6364973 : case GE_EXPR:
572 : 6364973 : STRIP_SIGN_NOPS (c0);
573 : 6364973 : STRIP_SIGN_NOPS (c1);
574 : 6364973 : ctype = TREE_TYPE (c0);
575 : 6364973 : if (!useless_type_conversion_p (ctype, type))
576 : 9351415 : return;
577 : :
578 : : break;
579 : :
580 : : case EQ_EXPR:
581 : : /* We could derive quite precise information from EQ_EXPR, however, such
582 : : a guard is unlikely to appear, so we do not bother with handling
583 : : it. */
584 : : return;
585 : :
586 : 4197165 : case NE_EXPR:
587 : : /* NE_EXPR comparisons do not contain much of useful information, except for
588 : : special case of comparing with the bounds of the type. */
589 : 4197165 : if (TREE_CODE (c1) != INTEGER_CST
590 : 3648429 : || !INTEGRAL_TYPE_P (type))
591 : : return;
592 : :
593 : : /* Ensure that the condition speaks about an expression in the same type
594 : : as X and Y. */
595 : 3320707 : ctype = TREE_TYPE (c0);
596 : 3320707 : if (TYPE_PRECISION (ctype) != TYPE_PRECISION (type))
597 : : return;
598 : 1836199 : c0 = fold_convert (type, c0);
599 : 1836199 : c1 = fold_convert (type, c1);
600 : :
601 : 1836199 : if (TYPE_MIN_VALUE (type)
602 : 1836199 : && operand_equal_p (c1, TYPE_MIN_VALUE (type), 0))
603 : : {
604 : : cmp = GT_EXPR;
605 : : break;
606 : : }
607 : 1371457 : if (TYPE_MAX_VALUE (type)
608 : 1371457 : && operand_equal_p (c1, TYPE_MAX_VALUE (type), 0))
609 : : {
610 : : cmp = LT_EXPR;
611 : : break;
612 : : }
613 : :
614 : : return;
615 : : default:
616 : : return;
617 : : }
618 : :
619 : 5605483 : mpz_init (offc0);
620 : 5605483 : mpz_init (offc1);
621 : 5605483 : split_to_var_and_offset (expand_simple_operations (c0), &varc0, offc0);
622 : 5605483 : split_to_var_and_offset (expand_simple_operations (c1), &varc1, offc1);
623 : :
624 : : /* We are only interested in comparisons of expressions based on VARX and
625 : : VARY. TODO -- we might also be able to derive some bounds from
626 : : expressions containing just one of the variables. */
627 : :
628 : 5605483 : if (operand_equal_p (varx, varc1, 0))
629 : : {
630 : 765670 : std::swap (varc0, varc1);
631 : 765670 : mpz_swap (offc0, offc1);
632 : 765670 : cmp = swap_tree_comparison (cmp);
633 : : }
634 : :
635 : 5605483 : if (!operand_equal_p (varx, varc0, 0)
636 : 5605483 : || !operand_equal_p (vary, varc1, 0))
637 : 4141824 : goto end;
638 : :
639 : 1463659 : mpz_init_set (loffx, offx);
640 : 1463659 : mpz_init_set (loffy, offy);
641 : :
642 : 1463659 : if (cmp == GT_EXPR || cmp == GE_EXPR)
643 : : {
644 : 1403307 : std::swap (varx, vary);
645 : 1403307 : mpz_swap (offc0, offc1);
646 : 1403307 : mpz_swap (loffx, loffy);
647 : 1403307 : cmp = swap_tree_comparison (cmp);
648 : 1403307 : lbound = true;
649 : : }
650 : :
651 : : /* If there is no overflow, the condition implies that
652 : :
653 : : (VARX + OFFX) cmp (VARY + OFFY) + (OFFX - OFFY + OFFC1 - OFFC0).
654 : :
655 : : The overflows and underflows may complicate things a bit; each
656 : : overflow decreases the appropriate offset by M, and underflow
657 : : increases it by M. The above inequality would not necessarily be
658 : : true if
659 : :
660 : : -- VARX + OFFX underflows and VARX + OFFC0 does not, or
661 : : VARX + OFFC0 overflows, but VARX + OFFX does not.
662 : : This may only happen if OFFX < OFFC0.
663 : : -- VARY + OFFY overflows and VARY + OFFC1 does not, or
664 : : VARY + OFFC1 underflows and VARY + OFFY does not.
665 : : This may only happen if OFFY > OFFC1. */
666 : :
667 : 1463659 : if (no_wrap)
668 : : {
669 : : x_ok = true;
670 : : y_ok = true;
671 : : }
672 : : else
673 : : {
674 : 524839 : x_ok = (integer_zerop (varx)
675 : 524839 : || mpz_cmp (loffx, offc0) >= 0);
676 : 524839 : y_ok = (integer_zerop (vary)
677 : 524839 : || mpz_cmp (loffy, offc1) <= 0);
678 : : }
679 : :
680 : 1463659 : if (x_ok && y_ok)
681 : : {
682 : 1458088 : mpz_init (bnd);
683 : 1458088 : mpz_sub (bnd, loffx, loffy);
684 : 1458088 : mpz_add (bnd, bnd, offc1);
685 : 1458088 : mpz_sub (bnd, bnd, offc0);
686 : :
687 : 1458088 : if (cmp == LT_EXPR)
688 : 1214436 : mpz_sub_ui (bnd, bnd, 1);
689 : :
690 : 1458088 : if (lbound)
691 : : {
692 : 1398646 : mpz_neg (bnd, bnd);
693 : 1398646 : if (mpz_cmp (bnds->below, bnd) < 0)
694 : 596673 : mpz_set (bnds->below, bnd);
695 : : }
696 : : else
697 : : {
698 : 59442 : if (mpz_cmp (bnd, bnds->up) < 0)
699 : 9926 : mpz_set (bnds->up, bnd);
700 : : }
701 : 1458088 : mpz_clear (bnd);
702 : : }
703 : :
704 : 1463659 : mpz_clear (loffx);
705 : 1463659 : mpz_clear (loffy);
706 : 5605483 : end:
707 : 5605483 : mpz_clear (offc0);
708 : 5605483 : mpz_clear (offc1);
709 : : }
710 : :
711 : : /* Stores the bounds on the value of the expression X - Y in LOOP to BNDS.
712 : : The subtraction is considered to be performed in arbitrary precision,
713 : : without overflows.
714 : :
715 : : We do not attempt to be too clever regarding the value ranges of X and
716 : : Y; most of the time, they are just integers or ssa names offsetted by
717 : : integer. However, we try to use the information contained in the
718 : : comparisons before the loop (usually created by loop header copying). */
719 : :
720 : : static void
721 : 10316525 : bound_difference (class loop *loop, tree x, tree y, bounds *bnds)
722 : : {
723 : 10316525 : tree type = TREE_TYPE (x);
724 : 10316525 : tree varx, vary;
725 : 10316525 : mpz_t offx, offy;
726 : 10316525 : int cnt = 0;
727 : 10316525 : edge e;
728 : 10316525 : basic_block bb;
729 : 10316525 : tree c0, c1;
730 : 10316525 : enum tree_code cmp;
731 : :
732 : : /* Get rid of unnecessary casts, but preserve the value of
733 : : the expressions. */
734 : 10316525 : STRIP_SIGN_NOPS (x);
735 : 10316525 : STRIP_SIGN_NOPS (y);
736 : :
737 : 10316525 : mpz_init (bnds->below);
738 : 10316525 : mpz_init (bnds->up);
739 : 10316525 : mpz_init (offx);
740 : 10316525 : mpz_init (offy);
741 : 10316525 : split_to_var_and_offset (x, &varx, offx);
742 : 10316525 : split_to_var_and_offset (y, &vary, offy);
743 : :
744 : 10316525 : if (!integer_zerop (varx)
745 : 10316525 : && operand_equal_p (varx, vary, 0))
746 : : {
747 : : /* Special case VARX == VARY -- we just need to compare the
748 : : offsets. The matters are a bit more complicated in the
749 : : case addition of offsets may wrap. */
750 : 128327 : bound_difference_of_offsetted_base (type, offx, offy, bnds);
751 : : }
752 : : else
753 : : {
754 : : /* Otherwise, use the value ranges to determine the initial
755 : : estimates on below and up. */
756 : 10188198 : auto_mpz minx, maxx, miny, maxy;
757 : 10188198 : determine_value_range (loop, type, varx, offx, minx, maxx);
758 : 10188198 : determine_value_range (loop, type, vary, offy, miny, maxy);
759 : :
760 : 10188198 : mpz_sub (bnds->below, minx, maxy);
761 : 10188198 : mpz_sub (bnds->up, maxx, miny);
762 : 10188198 : }
763 : :
764 : : /* If both X and Y are constants, we cannot get any more precise. */
765 : 10316525 : if (integer_zerop (varx) && integer_zerop (vary))
766 : 5738000 : goto end;
767 : :
768 : : /* Now walk the dominators of the loop header and use the entry
769 : : guards to refine the estimates. */
770 : 4578525 : for (bb = loop->header;
771 : 45229475 : bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK;
772 : 40650950 : bb = get_immediate_dominator (CDI_DOMINATORS, bb))
773 : : {
774 : 81301900 : if (!single_pred_p (bb))
775 : 19629555 : continue;
776 : 21021395 : e = single_pred_edge (bb);
777 : :
778 : 21021395 : if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
779 : 6064497 : continue;
780 : :
781 : 29913796 : gcond *cond = as_a <gcond *> (*gsi_last_bb (e->src));
782 : 14956898 : c0 = gimple_cond_lhs (cond);
783 : 14956898 : cmp = gimple_cond_code (cond);
784 : 14956898 : c1 = gimple_cond_rhs (cond);
785 : :
786 : 14956898 : if (e->flags & EDGE_FALSE_VALUE)
787 : 8255190 : cmp = invert_tree_comparison (cmp, false);
788 : :
789 : 14956898 : refine_bounds_using_guard (type, varx, offx, vary, offy,
790 : : c0, cmp, c1, bnds);
791 : 14956898 : ++cnt;
792 : : }
793 : :
794 : 4578525 : end:
795 : 10316525 : mpz_clear (offx);
796 : 10316525 : mpz_clear (offy);
797 : 10316525 : }
798 : :
799 : : /* Update the bounds in BNDS that restrict the value of X to the bounds
800 : : that restrict the value of X + DELTA. X can be obtained as a
801 : : difference of two values in TYPE. */
802 : :
803 : : static void
804 : 1623693 : bounds_add (bounds *bnds, const widest_int &delta, tree type)
805 : : {
806 : 1623693 : mpz_t mdelta, max;
807 : :
808 : 1623693 : mpz_init (mdelta);
809 : 1623693 : wi::to_mpz (delta, mdelta, SIGNED);
810 : :
811 : 1623693 : mpz_init (max);
812 : 1623693 : wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED);
813 : :
814 : 1623693 : mpz_add (bnds->up, bnds->up, mdelta);
815 : 1623693 : mpz_add (bnds->below, bnds->below, mdelta);
816 : :
817 : 1623693 : if (mpz_cmp (bnds->up, max) > 0)
818 : 106027 : mpz_set (bnds->up, max);
819 : :
820 : 1623693 : mpz_neg (max, max);
821 : 1623693 : if (mpz_cmp (bnds->below, max) < 0)
822 : 6609 : mpz_set (bnds->below, max);
823 : :
824 : 1623693 : mpz_clear (mdelta);
825 : 1623693 : mpz_clear (max);
826 : 1623693 : }
827 : :
828 : : /* Update the bounds in BNDS that restrict the value of X to the bounds
829 : : that restrict the value of -X. */
830 : :
831 : : static void
832 : 2019381 : bounds_negate (bounds *bnds)
833 : : {
834 : 2019381 : mpz_t tmp;
835 : :
836 : 2019381 : mpz_init_set (tmp, bnds->up);
837 : 2019381 : mpz_neg (bnds->up, bnds->below);
838 : 2019381 : mpz_neg (bnds->below, tmp);
839 : 2019381 : mpz_clear (tmp);
840 : 2019381 : }
841 : :
842 : : /* Returns inverse of X modulo 2^s, where MASK = 2^s-1. */
843 : :
844 : : static tree
845 : 132345 : inverse (tree x, tree mask)
846 : : {
847 : 132345 : tree type = TREE_TYPE (x);
848 : 132345 : tree rslt;
849 : 132345 : unsigned ctr = tree_floor_log2 (mask);
850 : :
851 : 132345 : if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT)
852 : : {
853 : 132287 : unsigned HOST_WIDE_INT ix;
854 : 132287 : unsigned HOST_WIDE_INT imask;
855 : 132287 : unsigned HOST_WIDE_INT irslt = 1;
856 : :
857 : 132287 : gcc_assert (cst_and_fits_in_hwi (x));
858 : 132287 : gcc_assert (cst_and_fits_in_hwi (mask));
859 : :
860 : 132287 : ix = int_cst_value (x);
861 : 132287 : imask = int_cst_value (mask);
862 : :
863 : 7493886 : for (; ctr; ctr--)
864 : : {
865 : 7229312 : irslt *= ix;
866 : 7229312 : ix *= ix;
867 : : }
868 : 132287 : irslt &= imask;
869 : :
870 : 132287 : rslt = build_int_cst_type (type, irslt);
871 : : }
872 : : else
873 : : {
874 : 58 : rslt = build_int_cst (type, 1);
875 : 7424 : for (; ctr; ctr--)
876 : : {
877 : 7366 : rslt = int_const_binop (MULT_EXPR, rslt, x);
878 : 7366 : x = int_const_binop (MULT_EXPR, x, x);
879 : : }
880 : 58 : rslt = int_const_binop (BIT_AND_EXPR, rslt, mask);
881 : : }
882 : :
883 : 132345 : return rslt;
884 : : }
885 : :
886 : : /* Derives the upper bound BND on the number of executions of loop with exit
887 : : condition S * i <> C. If NO_OVERFLOW is true, then the control variable of
888 : : the loop does not overflow. EXIT_MUST_BE_TAKEN is true if we are guaranteed
889 : : that the loop ends through this exit, i.e., the induction variable ever
890 : : reaches the value of C.
891 : :
892 : : The value C is equal to final - base, where final and base are the final and
893 : : initial value of the actual induction variable in the analysed loop. BNDS
894 : : bounds the value of this difference when computed in signed type with
895 : : unbounded range, while the computation of C is performed in an unsigned
896 : : type with the range matching the range of the type of the induction variable.
897 : : In particular, BNDS.up contains an upper bound on C in the following cases:
898 : : -- if the iv must reach its final value without overflow, i.e., if
899 : : NO_OVERFLOW && EXIT_MUST_BE_TAKEN is true, or
900 : : -- if final >= base, which we know to hold when BNDS.below >= 0. */
901 : :
902 : : static void
903 : 5965562 : number_of_iterations_ne_max (mpz_t bnd, bool no_overflow, tree c, tree s,
904 : : bounds *bnds, bool exit_must_be_taken)
905 : : {
906 : 5965562 : widest_int max;
907 : 5965562 : mpz_t d;
908 : 5965562 : tree type = TREE_TYPE (c);
909 : 11931124 : bool bnds_u_valid = ((no_overflow && exit_must_be_taken)
910 : 5965562 : || mpz_sgn (bnds->below) >= 0);
911 : :
912 : 5965562 : if (integer_onep (s)
913 : 635090 : || (TREE_CODE (c) == INTEGER_CST
914 : 266766 : && TREE_CODE (s) == INTEGER_CST
915 : 902324 : && wi::mod_trunc (wi::to_wide (c), wi::to_wide (s),
916 : 6499094 : TYPE_SIGN (type)) == 0)
917 : 6334354 : || (TYPE_OVERFLOW_UNDEFINED (type)
918 : 0 : && multiple_of_p (type, c, s)))
919 : : {
920 : : /* If C is an exact multiple of S, then its value will be reached before
921 : : the induction variable overflows (unless the loop is exited in some
922 : : other way before). Note that the actual induction variable in the
923 : : loop (which ranges from base to final instead of from 0 to C) may
924 : : overflow, in which case BNDS.up will not be giving a correct upper
925 : : bound on C; thus, BNDS_U_VALID had to be computed in advance. */
926 : 5596770 : no_overflow = true;
927 : 5596770 : exit_must_be_taken = true;
928 : : }
929 : :
930 : : /* If the induction variable can overflow, the number of iterations is at
931 : : most the period of the control variable (or infinite, but in that case
932 : : the whole # of iterations analysis will fail). */
933 : 5965562 : if (!no_overflow)
934 : : {
935 : 95724 : max = wi::mask <widest_int> (TYPE_PRECISION (type)
936 : 95724 : - wi::ctz (wi::to_wide (s)), false);
937 : 47862 : wi::to_mpz (max, bnd, UNSIGNED);
938 : 47862 : return;
939 : : }
940 : :
941 : : /* Now we know that the induction variable does not overflow, so the loop
942 : : iterates at most (range of type / S) times. */
943 : 5917700 : wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), bnd, UNSIGNED);
944 : :
945 : : /* If the induction variable is guaranteed to reach the value of C before
946 : : overflow, ... */
947 : 5917700 : if (exit_must_be_taken)
948 : : {
949 : : /* ... then we can strengthen this to C / S, and possibly we can use
950 : : the upper bound on C given by BNDS. */
951 : 5714298 : if (TREE_CODE (c) == INTEGER_CST)
952 : 5087785 : wi::to_mpz (wi::to_wide (c), bnd, UNSIGNED);
953 : 626513 : else if (bnds_u_valid)
954 : 481262 : mpz_set (bnd, bnds->up);
955 : : }
956 : :
957 : 5917700 : mpz_init (d);
958 : 5917700 : wi::to_mpz (wi::to_wide (s), d, UNSIGNED);
959 : 5917700 : mpz_fdiv_q (bnd, bnd, d);
960 : 5917700 : mpz_clear (d);
961 : 5965562 : }
962 : :
963 : : /* Determines number of iterations of loop whose ending condition
964 : : is IV <> FINAL. TYPE is the type of the iv. The number of
965 : : iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if
966 : : we know that the exit must be taken eventually, i.e., that the IV
967 : : ever reaches the value FINAL (we derived this earlier, and possibly set
968 : : NITER->assumptions to make sure this is the case). BNDS contains the
969 : : bounds on the difference FINAL - IV->base. */
970 : :
971 : : static bool
972 : 5965562 : number_of_iterations_ne (class loop *loop, tree type, affine_iv *iv,
973 : : tree final, class tree_niter_desc *niter,
974 : : bool exit_must_be_taken, bounds *bnds)
975 : : {
976 : 5965562 : tree niter_type = unsigned_type_for (type);
977 : 5965562 : tree s, c, d, bits, assumption, tmp, bound;
978 : :
979 : 5965562 : niter->control = *iv;
980 : 5965562 : niter->bound = final;
981 : 5965562 : niter->cmp = NE_EXPR;
982 : :
983 : : /* Rearrange the terms so that we get inequality S * i <> C, with S
984 : : positive. Also cast everything to the unsigned type. If IV does
985 : : not overflow, BNDS bounds the value of C. Also, this is the
986 : : case if the computation |FINAL - IV->base| does not overflow, i.e.,
987 : : if BNDS->below in the result is nonnegative. */
988 : 5965562 : if (tree_int_cst_sign_bit (iv->step))
989 : : {
990 : 2019381 : s = fold_convert (niter_type,
991 : : fold_build1 (NEGATE_EXPR, type, iv->step));
992 : 2019381 : c = fold_build2 (MINUS_EXPR, niter_type,
993 : : fold_convert (niter_type, iv->base),
994 : : fold_convert (niter_type, final));
995 : 2019381 : bounds_negate (bnds);
996 : : }
997 : : else
998 : : {
999 : 3946181 : s = fold_convert (niter_type, iv->step);
1000 : 3946181 : c = fold_build2 (MINUS_EXPR, niter_type,
1001 : : fold_convert (niter_type, final),
1002 : : fold_convert (niter_type, iv->base));
1003 : : }
1004 : :
1005 : 5965562 : auto_mpz max;
1006 : 5965562 : number_of_iterations_ne_max (max, iv->no_overflow, c, s, bnds,
1007 : : exit_must_be_taken);
1008 : 11931124 : niter->max = widest_int::from (wi::from_mpz (niter_type, max, false),
1009 : 11931124 : TYPE_SIGN (niter_type));
1010 : :
1011 : : /* Compute no-overflow information for the control iv. This can be
1012 : : proven when below two conditions are satisfied:
1013 : :
1014 : : 1) IV evaluates toward FINAL at beginning, i.e:
1015 : : base <= FINAL ; step > 0
1016 : : base >= FINAL ; step < 0
1017 : :
1018 : : 2) |FINAL - base| is an exact multiple of step.
1019 : :
1020 : : Unfortunately, it's hard to prove above conditions after pass loop-ch
1021 : : because loop with exit condition (IV != FINAL) usually will be guarded
1022 : : by initial-condition (IV.base - IV.step != FINAL). In this case, we
1023 : : can alternatively try to prove below conditions:
1024 : :
1025 : : 1') IV evaluates toward FINAL at beginning, i.e:
1026 : : new_base = base - step < FINAL ; step > 0
1027 : : && base - step doesn't underflow
1028 : : new_base = base - step > FINAL ; step < 0
1029 : : && base - step doesn't overflow
1030 : :
1031 : : Please refer to PR34114 as an example of loop-ch's impact.
1032 : :
1033 : : Note, for NE_EXPR, base equals to FINAL is a special case, in
1034 : : which the loop exits immediately, and the iv does not overflow.
1035 : :
1036 : : Also note, we prove condition 2) by checking base and final seperately
1037 : : along with condition 1) or 1'). Since we ensure the difference
1038 : : computation of c does not wrap with cond below and the adjusted s
1039 : : will fit a signed type as well as an unsigned we can safely do
1040 : : this using the type of the IV if it is not pointer typed. */
1041 : 5965562 : tree mtype = type;
1042 : 5965562 : if (POINTER_TYPE_P (type))
1043 : 430534 : mtype = niter_type;
1044 : 5965562 : if (!niter->control.no_overflow
1045 : 5965562 : && (integer_onep (s)
1046 : 162024 : || (multiple_of_p (mtype, fold_convert (mtype, iv->base),
1047 : 162024 : fold_convert (mtype, s), false)
1048 : 25506 : && multiple_of_p (mtype, fold_convert (mtype, final),
1049 : 25506 : fold_convert (mtype, s), false))))
1050 : : {
1051 : 1862587 : tree t, cond, relaxed_cond = boolean_false_node;
1052 : :
1053 : 1862587 : if (tree_int_cst_sign_bit (iv->step))
1054 : : {
1055 : 1796910 : cond = fold_build2 (GE_EXPR, boolean_type_node, iv->base, final);
1056 : 1796910 : if (TREE_CODE (type) == INTEGER_TYPE)
1057 : : {
1058 : : /* Only when base - step doesn't overflow. */
1059 : 1796910 : t = TYPE_MAX_VALUE (type);
1060 : 1796910 : t = fold_build2 (PLUS_EXPR, type, t, iv->step);
1061 : 1796910 : t = fold_build2 (GE_EXPR, boolean_type_node, t, iv->base);
1062 : 1796910 : if (integer_nonzerop (t))
1063 : : {
1064 : 1655440 : t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step);
1065 : 1655440 : relaxed_cond = fold_build2 (GT_EXPR, boolean_type_node, t,
1066 : : final);
1067 : : }
1068 : : }
1069 : : }
1070 : : else
1071 : : {
1072 : 65677 : cond = fold_build2 (LE_EXPR, boolean_type_node, iv->base, final);
1073 : 65677 : if (TREE_CODE (type) == INTEGER_TYPE)
1074 : : {
1075 : : /* Only when base - step doesn't underflow. */
1076 : 65677 : t = TYPE_MIN_VALUE (type);
1077 : 65677 : t = fold_build2 (PLUS_EXPR, type, t, iv->step);
1078 : 65677 : t = fold_build2 (LE_EXPR, boolean_type_node, t, iv->base);
1079 : 65677 : if (integer_nonzerop (t))
1080 : : {
1081 : 40381 : t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step);
1082 : 40381 : relaxed_cond = fold_build2 (LT_EXPR, boolean_type_node, t,
1083 : : final);
1084 : : }
1085 : : }
1086 : : }
1087 : :
1088 : 1862587 : t = simplify_using_initial_conditions (loop, cond);
1089 : 1862587 : if (!t || !integer_onep (t))
1090 : 55126 : t = simplify_using_initial_conditions (loop, relaxed_cond);
1091 : :
1092 : 1862587 : if (t && integer_onep (t))
1093 : : {
1094 : 1807623 : niter->control.no_overflow = true;
1095 : 1807623 : niter->niter = fold_build2 (EXACT_DIV_EXPR, niter_type, c, s);
1096 : 1807623 : return true;
1097 : : }
1098 : : }
1099 : :
1100 : : /* Let nsd (step, size of mode) = d. If d does not divide c, the loop
1101 : : is infinite. Otherwise, the number of iterations is
1102 : : (inverse(s/d) * (c/d)) mod (size of mode/d). */
1103 : 4157939 : bits = num_ending_zeros (s);
1104 : 12473817 : bound = build_low_bits_mask (niter_type,
1105 : 4157939 : (TYPE_PRECISION (niter_type)
1106 : 4157939 : - tree_to_uhwi (bits)));
1107 : :
1108 : 4157939 : d = fold_binary_to_constant (LSHIFT_EXPR, niter_type,
1109 : 4157939 : build_int_cst (niter_type, 1), bits);
1110 : 4157939 : s = fold_binary_to_constant (RSHIFT_EXPR, niter_type, s, bits);
1111 : :
1112 : 4157939 : if (!exit_must_be_taken)
1113 : : {
1114 : : /* If we cannot assume that the exit is taken eventually, record the
1115 : : assumptions for divisibility of c. */
1116 : 1528750 : assumption = fold_build2 (FLOOR_MOD_EXPR, niter_type, c, d);
1117 : 1528750 : assumption = fold_build2 (EQ_EXPR, boolean_type_node,
1118 : : assumption, build_int_cst (niter_type, 0));
1119 : 1528750 : if (!integer_nonzerop (assumption))
1120 : 239377 : niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1121 : : niter->assumptions, assumption);
1122 : : }
1123 : :
1124 : 4157939 : c = fold_build2 (EXACT_DIV_EXPR, niter_type, c, d);
1125 : 4157939 : if (integer_onep (s))
1126 : : {
1127 : 4025594 : niter->niter = c;
1128 : : }
1129 : : else
1130 : : {
1131 : 132345 : tmp = fold_build2 (MULT_EXPR, niter_type, c, inverse (s, bound));
1132 : 132345 : niter->niter = fold_build2 (BIT_AND_EXPR, niter_type, tmp, bound);
1133 : : }
1134 : : return true;
1135 : 5965562 : }
1136 : :
1137 : : /* Checks whether we can determine the final value of the control variable
1138 : : of the loop with ending condition IV0 < IV1 (computed in TYPE).
1139 : : DELTA is the difference IV1->base - IV0->base, STEP is the absolute value
1140 : : of the step. The assumptions necessary to ensure that the computation
1141 : : of the final value does not overflow are recorded in NITER. If we
1142 : : find the final value, we adjust DELTA and return TRUE. Otherwise
1143 : : we return false. BNDS bounds the value of IV1->base - IV0->base,
1144 : : and will be updated by the same amount as DELTA. EXIT_MUST_BE_TAKEN is
1145 : : true if we know that the exit must be taken eventually. */
1146 : :
1147 : : static bool
1148 : 300906 : number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1,
1149 : : class tree_niter_desc *niter,
1150 : : tree *delta, tree step,
1151 : : bool exit_must_be_taken, bounds *bnds)
1152 : : {
1153 : 300906 : tree niter_type = TREE_TYPE (step);
1154 : 300906 : tree mod = fold_build2 (FLOOR_MOD_EXPR, niter_type, *delta, step);
1155 : 300906 : tree tmod;
1156 : 300906 : tree assumption = boolean_true_node, bound, noloop;
1157 : 300906 : bool fv_comp_no_overflow;
1158 : 300906 : tree type1 = type;
1159 : 300906 : if (POINTER_TYPE_P (type))
1160 : 92363 : type1 = sizetype;
1161 : :
1162 : 300906 : if (TREE_CODE (mod) != INTEGER_CST)
1163 : : return false;
1164 : 26287 : if (integer_nonzerop (mod))
1165 : 11750 : mod = fold_build2 (MINUS_EXPR, niter_type, step, mod);
1166 : 26287 : tmod = fold_convert (type1, mod);
1167 : :
1168 : 26287 : auto_mpz mmod;
1169 : 26287 : wi::to_mpz (wi::to_wide (mod), mmod, UNSIGNED);
1170 : 26287 : mpz_neg (mmod, mmod);
1171 : :
1172 : : /* If the induction variable does not overflow and the exit is taken,
1173 : : then the computation of the final value does not overflow. This is
1174 : : also obviously the case if the new final value is equal to the
1175 : : current one. Finally, we postulate this for pointer type variables,
1176 : : as the code cannot rely on the object to that the pointer points being
1177 : : placed at the end of the address space (and more pragmatically,
1178 : : TYPE_{MIN,MAX}_VALUE is not defined for pointers). */
1179 : 26287 : if (integer_zerop (mod) || POINTER_TYPE_P (type))
1180 : : fv_comp_no_overflow = true;
1181 : 10999 : else if (!exit_must_be_taken)
1182 : : fv_comp_no_overflow = false;
1183 : : else
1184 : 5180 : fv_comp_no_overflow =
1185 : 5180 : (iv0->no_overflow && integer_nonzerop (iv0->step))
1186 : 6054 : || (iv1->no_overflow && integer_nonzerop (iv1->step));
1187 : :
1188 : 26287 : if (integer_nonzerop (iv0->step))
1189 : : {
1190 : : /* The final value of the iv is iv1->base + MOD, assuming that this
1191 : : computation does not overflow, and that
1192 : : iv0->base <= iv1->base + MOD. */
1193 : 22196 : if (!fv_comp_no_overflow)
1194 : : {
1195 : 4309 : bound = fold_build2 (MINUS_EXPR, type1,
1196 : : TYPE_MAX_VALUE (type1), tmod);
1197 : 4309 : assumption = fold_build2 (LE_EXPR, boolean_type_node,
1198 : : iv1->base, bound);
1199 : 4309 : if (integer_zerop (assumption))
1200 : : return false;
1201 : : }
1202 : 22196 : if (mpz_cmp (mmod, bnds->below) < 0)
1203 : 14878 : noloop = boolean_false_node;
1204 : 7318 : else if (POINTER_TYPE_P (type))
1205 : 4769 : noloop = fold_build2 (GT_EXPR, boolean_type_node,
1206 : : iv0->base,
1207 : : fold_build_pointer_plus (iv1->base, tmod));
1208 : : else
1209 : 2549 : noloop = fold_build2 (GT_EXPR, boolean_type_node,
1210 : : iv0->base,
1211 : : fold_build2 (PLUS_EXPR, type1,
1212 : : iv1->base, tmod));
1213 : : }
1214 : : else
1215 : : {
1216 : : /* The final value of the iv is iv0->base - MOD, assuming that this
1217 : : computation does not overflow, and that
1218 : : iv0->base - MOD <= iv1->base. */
1219 : 4091 : if (!fv_comp_no_overflow)
1220 : : {
1221 : 1510 : bound = fold_build2 (PLUS_EXPR, type1,
1222 : : TYPE_MIN_VALUE (type1), tmod);
1223 : 1510 : assumption = fold_build2 (GE_EXPR, boolean_type_node,
1224 : : iv0->base, bound);
1225 : 1510 : if (integer_zerop (assumption))
1226 : : return false;
1227 : : }
1228 : 4048 : if (mpz_cmp (mmod, bnds->below) < 0)
1229 : 3081 : noloop = boolean_false_node;
1230 : 967 : else if (POINTER_TYPE_P (type))
1231 : 854 : noloop = fold_build2 (GT_EXPR, boolean_type_node,
1232 : : fold_build_pointer_plus (iv0->base,
1233 : : fold_build1 (NEGATE_EXPR,
1234 : : type1, tmod)),
1235 : : iv1->base);
1236 : : else
1237 : 113 : noloop = fold_build2 (GT_EXPR, boolean_type_node,
1238 : : fold_build2 (MINUS_EXPR, type1,
1239 : : iv0->base, tmod),
1240 : : iv1->base);
1241 : : }
1242 : :
1243 : 26244 : if (!integer_nonzerop (assumption))
1244 : 196 : niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1245 : : niter->assumptions,
1246 : : assumption);
1247 : 26244 : if (!integer_zerop (noloop))
1248 : 2999 : niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1249 : : niter->may_be_zero,
1250 : : noloop);
1251 : 26244 : bounds_add (bnds, wi::to_widest (mod), type);
1252 : 26244 : *delta = fold_build2 (PLUS_EXPR, niter_type, *delta, mod);
1253 : :
1254 : 26244 : return true;
1255 : 26287 : }
1256 : :
1257 : : /* Add assertions to NITER that ensure that the control variable of the loop
1258 : : with ending condition IV0 < IV1 does not overflow. Types of IV0 and IV1
1259 : : are TYPE. Returns false if we can prove that there is an overflow, true
1260 : : otherwise. STEP is the absolute value of the step. */
1261 : :
1262 : : static bool
1263 : 274662 : assert_no_overflow_lt (tree type, affine_iv *iv0, affine_iv *iv1,
1264 : : class tree_niter_desc *niter, tree step)
1265 : : {
1266 : 274662 : tree bound, d, assumption, diff;
1267 : 274662 : tree niter_type = TREE_TYPE (step);
1268 : :
1269 : 274662 : if (integer_nonzerop (iv0->step))
1270 : : {
1271 : : /* for (i = iv0->base; i < iv1->base; i += iv0->step) */
1272 : 220478 : if (iv0->no_overflow)
1273 : : return true;
1274 : :
1275 : : /* If iv0->base is a constant, we can determine the last value before
1276 : : overflow precisely; otherwise we conservatively assume
1277 : : MAX - STEP + 1. */
1278 : :
1279 : 30668 : if (TREE_CODE (iv0->base) == INTEGER_CST)
1280 : : {
1281 : 10824 : d = fold_build2 (MINUS_EXPR, niter_type,
1282 : : fold_convert (niter_type, TYPE_MAX_VALUE (type)),
1283 : : fold_convert (niter_type, iv0->base));
1284 : 10824 : diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step);
1285 : : }
1286 : : else
1287 : 19844 : diff = fold_build2 (MINUS_EXPR, niter_type, step,
1288 : : build_int_cst (niter_type, 1));
1289 : 30668 : bound = fold_build2 (MINUS_EXPR, type,
1290 : : TYPE_MAX_VALUE (type), fold_convert (type, diff));
1291 : 30668 : assumption = fold_build2 (LE_EXPR, boolean_type_node,
1292 : : iv1->base, bound);
1293 : : }
1294 : : else
1295 : : {
1296 : : /* for (i = iv1->base; i > iv0->base; i += iv1->step) */
1297 : 54184 : if (iv1->no_overflow)
1298 : : return true;
1299 : :
1300 : 25920 : if (TREE_CODE (iv1->base) == INTEGER_CST)
1301 : : {
1302 : 43 : d = fold_build2 (MINUS_EXPR, niter_type,
1303 : : fold_convert (niter_type, iv1->base),
1304 : : fold_convert (niter_type, TYPE_MIN_VALUE (type)));
1305 : 43 : diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step);
1306 : : }
1307 : : else
1308 : 25877 : diff = fold_build2 (MINUS_EXPR, niter_type, step,
1309 : : build_int_cst (niter_type, 1));
1310 : 25920 : bound = fold_build2 (PLUS_EXPR, type,
1311 : : TYPE_MIN_VALUE (type), fold_convert (type, diff));
1312 : 25920 : assumption = fold_build2 (GE_EXPR, boolean_type_node,
1313 : : iv0->base, bound);
1314 : : }
1315 : :
1316 : 56588 : if (integer_zerop (assumption))
1317 : : return false;
1318 : 56537 : if (!integer_nonzerop (assumption))
1319 : 39066 : niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1320 : : niter->assumptions, assumption);
1321 : :
1322 : 56537 : iv0->no_overflow = true;
1323 : 56537 : iv1->no_overflow = true;
1324 : 56537 : return true;
1325 : : }
1326 : :
1327 : : /* Add an assumption to NITER that a loop whose ending condition
1328 : : is IV0 < IV1 rolls. TYPE is the type of the control iv. BNDS
1329 : : bounds the value of IV1->base - IV0->base. */
1330 : :
1331 : : static void
1332 : 274611 : assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
1333 : : class tree_niter_desc *niter, bounds *bnds)
1334 : : {
1335 : 274611 : tree assumption = boolean_true_node, bound, diff;
1336 : 274611 : tree mbz, mbzl, mbzr, type1;
1337 : 274611 : bool rolls_p, no_overflow_p;
1338 : 274611 : widest_int dstep;
1339 : 274611 : mpz_t mstep, max;
1340 : :
1341 : : /* We are going to compute the number of iterations as
1342 : : (iv1->base - iv0->base + step - 1) / step, computed in the unsigned
1343 : : variant of TYPE. This formula only works if
1344 : :
1345 : : -step + 1 <= (iv1->base - iv0->base) <= MAX - step + 1
1346 : :
1347 : : (where MAX is the maximum value of the unsigned variant of TYPE, and
1348 : : the computations in this formula are performed in full precision,
1349 : : i.e., without overflows).
1350 : :
1351 : : Usually, for loops with exit condition iv0->base + step * i < iv1->base,
1352 : : we have a condition of the form iv0->base - step < iv1->base before the loop,
1353 : : and for loops iv0->base < iv1->base - step * i the condition
1354 : : iv0->base < iv1->base + step, due to loop header copying, which enable us
1355 : : to prove the lower bound.
1356 : :
1357 : : The upper bound is more complicated. Unless the expressions for initial
1358 : : and final value themselves contain enough information, we usually cannot
1359 : : derive it from the context. */
1360 : :
1361 : : /* First check whether the answer does not follow from the bounds we gathered
1362 : : before. */
1363 : 274611 : if (integer_nonzerop (iv0->step))
1364 : 220478 : dstep = wi::to_widest (iv0->step);
1365 : : else
1366 : : {
1367 : 54133 : dstep = wi::sext (wi::to_widest (iv1->step), TYPE_PRECISION (type));
1368 : 54133 : dstep = -dstep;
1369 : : }
1370 : :
1371 : 274611 : mpz_init (mstep);
1372 : 274611 : wi::to_mpz (dstep, mstep, UNSIGNED);
1373 : 274611 : mpz_neg (mstep, mstep);
1374 : 274611 : mpz_add_ui (mstep, mstep, 1);
1375 : :
1376 : 274611 : rolls_p = mpz_cmp (mstep, bnds->below) <= 0;
1377 : :
1378 : 274611 : mpz_init (max);
1379 : 274611 : wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED);
1380 : 274611 : mpz_add (max, max, mstep);
1381 : 549222 : no_overflow_p = (mpz_cmp (bnds->up, max) <= 0
1382 : : /* For pointers, only values lying inside a single object
1383 : : can be compared or manipulated by pointer arithmetics.
1384 : : Gcc in general does not allow or handle objects larger
1385 : : than half of the address space, hence the upper bound
1386 : : is satisfied for pointers. */
1387 : 274611 : || POINTER_TYPE_P (type));
1388 : 274611 : mpz_clear (mstep);
1389 : 274611 : mpz_clear (max);
1390 : :
1391 : 274611 : if (rolls_p && no_overflow_p)
1392 : 94422 : return;
1393 : :
1394 : 180189 : type1 = type;
1395 : 180189 : if (POINTER_TYPE_P (type))
1396 : 55384 : type1 = sizetype;
1397 : :
1398 : : /* Now the hard part; we must formulate the assumption(s) as expressions, and
1399 : : we must be careful not to introduce overflow. */
1400 : :
1401 : 180189 : if (integer_nonzerop (iv0->step))
1402 : : {
1403 : 144956 : diff = fold_build2 (MINUS_EXPR, type1,
1404 : : iv0->step, build_int_cst (type1, 1));
1405 : :
1406 : : /* We need to know that iv0->base >= MIN + iv0->step - 1. Since
1407 : : 0 address never belongs to any object, we can assume this for
1408 : : pointers. */
1409 : 144956 : if (!POINTER_TYPE_P (type))
1410 : : {
1411 : 93091 : bound = fold_build2 (PLUS_EXPR, type1,
1412 : : TYPE_MIN_VALUE (type), diff);
1413 : 93091 : assumption = fold_build2 (GE_EXPR, boolean_type_node,
1414 : : iv0->base, bound);
1415 : : }
1416 : :
1417 : : /* And then we can compute iv0->base - diff, and compare it with
1418 : : iv1->base. */
1419 : 144956 : mbzl = fold_build2 (MINUS_EXPR, type1,
1420 : : fold_convert (type1, iv0->base), diff);
1421 : 144956 : mbzr = fold_convert (type1, iv1->base);
1422 : : }
1423 : : else
1424 : : {
1425 : 35233 : diff = fold_build2 (PLUS_EXPR, type1,
1426 : : iv1->step, build_int_cst (type1, 1));
1427 : :
1428 : 35233 : if (!POINTER_TYPE_P (type))
1429 : : {
1430 : 31714 : bound = fold_build2 (PLUS_EXPR, type1,
1431 : : TYPE_MAX_VALUE (type), diff);
1432 : 31714 : assumption = fold_build2 (LE_EXPR, boolean_type_node,
1433 : : iv1->base, bound);
1434 : : }
1435 : :
1436 : 35233 : mbzl = fold_convert (type1, iv0->base);
1437 : 35233 : mbzr = fold_build2 (MINUS_EXPR, type1,
1438 : : fold_convert (type1, iv1->base), diff);
1439 : : }
1440 : :
1441 : 180189 : if (!integer_nonzerop (assumption))
1442 : 69457 : niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1443 : : niter->assumptions, assumption);
1444 : 180189 : if (!rolls_p)
1445 : : {
1446 : 171708 : mbz = fold_build2 (GT_EXPR, boolean_type_node, mbzl, mbzr);
1447 : 171708 : niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1448 : : niter->may_be_zero, mbz);
1449 : : }
1450 : 274611 : }
1451 : :
1452 : : /* Determines number of iterations of loop whose ending condition
1453 : : is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}.
1454 : : The number of iterations is stored to NITER. */
1455 : :
1456 : : static bool
1457 : 65168 : number_of_iterations_until_wrap (class loop *loop, tree type, affine_iv *iv0,
1458 : : affine_iv *iv1, class tree_niter_desc *niter)
1459 : : {
1460 : 65168 : tree niter_type = unsigned_type_for (type);
1461 : 65168 : tree step, num, assumptions, may_be_zero, span;
1462 : 65168 : wide_int high, low, max, min;
1463 : :
1464 : 65168 : may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
1465 : 65168 : if (integer_onep (may_be_zero))
1466 : : return false;
1467 : :
1468 : 64958 : int prec = TYPE_PRECISION (type);
1469 : 64958 : signop sgn = TYPE_SIGN (type);
1470 : 64958 : min = wi::min_value (prec, sgn);
1471 : 64958 : max = wi::max_value (prec, sgn);
1472 : :
1473 : : /* n < {base, C}. */
1474 : 64958 : if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit (iv1->step))
1475 : : {
1476 : 40177 : step = iv1->step;
1477 : : /* MIN + C - 1 <= n. */
1478 : 40177 : tree last = wide_int_to_tree (type, min + wi::to_wide (step) - 1);
1479 : 40177 : assumptions = fold_build2 (LE_EXPR, boolean_type_node, last, iv0->base);
1480 : 40177 : if (integer_zerop (assumptions))
1481 : : return false;
1482 : :
1483 : 40175 : num = fold_build2 (MINUS_EXPR, niter_type,
1484 : : wide_int_to_tree (niter_type, max),
1485 : : fold_convert (niter_type, iv1->base));
1486 : :
1487 : : /* When base has the form iv + 1, if we know iv >= n, then iv + 1 < n
1488 : : only when iv + 1 overflows, i.e. when iv == TYPE_VALUE_MAX. */
1489 : 40175 : if (sgn == UNSIGNED
1490 : 4988 : && integer_onep (step)
1491 : 827 : && TREE_CODE (iv1->base) == PLUS_EXPR
1492 : 40755 : && integer_onep (TREE_OPERAND (iv1->base, 1)))
1493 : : {
1494 : 458 : tree cond = fold_build2 (GE_EXPR, boolean_type_node,
1495 : : TREE_OPERAND (iv1->base, 0), iv0->base);
1496 : 458 : cond = simplify_using_initial_conditions (loop, cond);
1497 : 458 : if (integer_onep (cond))
1498 : 12 : may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node,
1499 : : TREE_OPERAND (iv1->base, 0),
1500 : : TYPE_MAX_VALUE (type));
1501 : : }
1502 : :
1503 : 40175 : high = max;
1504 : 40175 : if (TREE_CODE (iv1->base) == INTEGER_CST)
1505 : 28920 : low = wi::to_wide (iv1->base) - 1;
1506 : 11255 : else if (TREE_CODE (iv0->base) == INTEGER_CST)
1507 : 5393 : low = wi::to_wide (iv0->base);
1508 : : else
1509 : 5862 : low = min;
1510 : : }
1511 : : /* {base, -C} < n. */
1512 : 24781 : else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step))
1513 : : {
1514 : 24781 : step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step);
1515 : : /* MAX - C + 1 >= n. */
1516 : 24781 : tree last = wide_int_to_tree (type, max - wi::to_wide (step) + 1);
1517 : 24781 : assumptions = fold_build2 (GE_EXPR, boolean_type_node, last, iv1->base);
1518 : 24781 : if (integer_zerop (assumptions))
1519 : : return false;
1520 : :
1521 : 24781 : num = fold_build2 (MINUS_EXPR, niter_type,
1522 : : fold_convert (niter_type, iv0->base),
1523 : : wide_int_to_tree (niter_type, min));
1524 : 24781 : low = min;
1525 : 24781 : if (TREE_CODE (iv0->base) == INTEGER_CST)
1526 : 1276 : high = wi::to_wide (iv0->base) + 1;
1527 : 23505 : else if (TREE_CODE (iv1->base) == INTEGER_CST)
1528 : 1995 : high = wi::to_wide (iv1->base);
1529 : : else
1530 : 21510 : high = max;
1531 : : }
1532 : : else
1533 : 0 : return false;
1534 : :
1535 : : /* (delta + step - 1) / step */
1536 : 64956 : step = fold_convert (niter_type, step);
1537 : 64956 : num = fold_build2 (PLUS_EXPR, niter_type, num, step);
1538 : 64956 : niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, step);
1539 : :
1540 : 64956 : widest_int delta, s;
1541 : 64956 : delta = widest_int::from (high, sgn) - widest_int::from (low, sgn);
1542 : 64956 : s = wi::to_widest (step);
1543 : 64956 : delta = delta + s - 1;
1544 : 64956 : niter->max = wi::udiv_floor (delta, s);
1545 : :
1546 : 64956 : niter->may_be_zero = may_be_zero;
1547 : :
1548 : 64956 : if (!integer_nonzerop (assumptions))
1549 : 9190 : niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1550 : : niter->assumptions, assumptions);
1551 : :
1552 : 64956 : niter->control.no_overflow = false;
1553 : :
1554 : : /* Update bound and exit condition as:
1555 : : bound = niter * STEP + (IVbase - STEP).
1556 : : { IVbase - STEP, +, STEP } != bound
1557 : : Here, biasing IVbase by 1 step makes 'bound' be the value before wrap.
1558 : : */
1559 : 64956 : tree base_type = TREE_TYPE (niter->control.base);
1560 : 64956 : if (POINTER_TYPE_P (base_type))
1561 : : {
1562 : 6571 : tree utype = unsigned_type_for (base_type);
1563 : 6571 : niter->control.base
1564 : 6571 : = fold_build2 (MINUS_EXPR, utype,
1565 : : fold_convert (utype, niter->control.base),
1566 : : fold_convert (utype, niter->control.step));
1567 : 6571 : niter->control.base = fold_convert (base_type, niter->control.base);
1568 : : }
1569 : : else
1570 : 58385 : niter->control.base
1571 : 58385 : = fold_build2 (MINUS_EXPR, base_type, niter->control.base,
1572 : : niter->control.step);
1573 : :
1574 : 64956 : span = fold_build2 (MULT_EXPR, niter_type, niter->niter,
1575 : : fold_convert (niter_type, niter->control.step));
1576 : 64956 : niter->bound = fold_build2 (PLUS_EXPR, niter_type, span,
1577 : : fold_convert (niter_type, niter->control.base));
1578 : 64956 : niter->bound = fold_convert (type, niter->bound);
1579 : 64956 : niter->cmp = NE_EXPR;
1580 : :
1581 : 64956 : return true;
1582 : 130124 : }
1583 : :
1584 : : /* Determines number of iterations of loop whose ending condition
1585 : : is IV0 < IV1. TYPE is the type of the iv. The number of
1586 : : iterations is stored to NITER. BNDS bounds the difference
1587 : : IV1->base - IV0->base. EXIT_MUST_BE_TAKEN is true if we know
1588 : : that the exit must be taken eventually. */
1589 : :
1590 : : static bool
1591 : 4377181 : number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0,
1592 : : affine_iv *iv1, class tree_niter_desc *niter,
1593 : : bool exit_must_be_taken, bounds *bnds)
1594 : : {
1595 : 4377181 : tree niter_type = unsigned_type_for (type);
1596 : 4377181 : tree delta, step, s;
1597 : 4377181 : mpz_t mstep, tmp;
1598 : :
1599 : 4377181 : if (integer_nonzerop (iv0->step))
1600 : : {
1601 : 4059765 : niter->control = *iv0;
1602 : 4059765 : niter->cmp = LT_EXPR;
1603 : 4059765 : niter->bound = iv1->base;
1604 : : }
1605 : : else
1606 : : {
1607 : 317416 : niter->control = *iv1;
1608 : 317416 : niter->cmp = GT_EXPR;
1609 : 317416 : niter->bound = iv0->base;
1610 : : }
1611 : :
1612 : : /* {base, -C} < n, or n < {base, C} */
1613 : 4377181 : if (tree_int_cst_sign_bit (iv0->step)
1614 : 4377181 : || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step)))
1615 : 65168 : return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter);
1616 : :
1617 : 4312013 : delta = fold_build2 (MINUS_EXPR, niter_type,
1618 : : fold_convert (niter_type, iv1->base),
1619 : : fold_convert (niter_type, iv0->base));
1620 : :
1621 : : /* First handle the special case that the step is +-1. */
1622 : 8104323 : if ((integer_onep (iv0->step) && integer_zerop (iv1->step))
1623 : 4312013 : || (integer_all_onesp (iv1->step) && integer_zerop (iv0->step)))
1624 : : {
1625 : : /* for (i = iv0->base; i < iv1->base; i++)
1626 : :
1627 : : or
1628 : :
1629 : : for (i = iv1->base; i > iv0->base; i--).
1630 : :
1631 : : In both cases # of iterations is iv1->base - iv0->base, assuming that
1632 : : iv1->base >= iv0->base.
1633 : :
1634 : : First try to derive a lower bound on the value of
1635 : : iv1->base - iv0->base, computed in full precision. If the difference
1636 : : is nonnegative, we are done, otherwise we must record the
1637 : : condition. */
1638 : :
1639 : 4011107 : if (mpz_sgn (bnds->below) < 0)
1640 : 1639625 : niter->may_be_zero = fold_build2 (LT_EXPR, boolean_type_node,
1641 : : iv1->base, iv0->base);
1642 : 4011107 : niter->niter = delta;
1643 : 8022214 : niter->max = widest_int::from (wi::from_mpz (niter_type, bnds->up, false),
1644 : 8022214 : TYPE_SIGN (niter_type));
1645 : 4011107 : niter->control.no_overflow = true;
1646 : 4011107 : return true;
1647 : : }
1648 : :
1649 : 300906 : if (integer_nonzerop (iv0->step))
1650 : 242674 : step = fold_convert (niter_type, iv0->step);
1651 : : else
1652 : 58232 : step = fold_convert (niter_type,
1653 : : fold_build1 (NEGATE_EXPR, type, iv1->step));
1654 : :
1655 : : /* If we can determine the final value of the control iv exactly, we can
1656 : : transform the condition to != comparison. In particular, this will be
1657 : : the case if DELTA is constant. */
1658 : 300906 : if (number_of_iterations_lt_to_ne (type, iv0, iv1, niter, &delta, step,
1659 : : exit_must_be_taken, bnds))
1660 : : {
1661 : 26244 : affine_iv zps;
1662 : :
1663 : 26244 : zps.base = build_int_cst (niter_type, 0);
1664 : 26244 : zps.step = step;
1665 : : /* number_of_iterations_lt_to_ne will add assumptions that ensure that
1666 : : zps does not overflow. */
1667 : 26244 : zps.no_overflow = true;
1668 : :
1669 : 26244 : return number_of_iterations_ne (loop, type, &zps,
1670 : : delta, niter, true, bnds);
1671 : : }
1672 : :
1673 : : /* Make sure that the control iv does not overflow. */
1674 : 274662 : if (!assert_no_overflow_lt (type, iv0, iv1, niter, step))
1675 : : return false;
1676 : :
1677 : : /* We determine the number of iterations as (delta + step - 1) / step. For
1678 : : this to work, we must know that iv1->base >= iv0->base - step + 1,
1679 : : otherwise the loop does not roll. */
1680 : 274611 : assert_loop_rolls_lt (type, iv0, iv1, niter, bnds);
1681 : :
1682 : 274611 : s = fold_build2 (MINUS_EXPR, niter_type,
1683 : : step, build_int_cst (niter_type, 1));
1684 : 274611 : delta = fold_build2 (PLUS_EXPR, niter_type, delta, s);
1685 : 274611 : niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, delta, step);
1686 : :
1687 : 274611 : mpz_init (mstep);
1688 : 274611 : mpz_init (tmp);
1689 : 274611 : wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
1690 : 274611 : mpz_add (tmp, bnds->up, mstep);
1691 : 274611 : mpz_sub_ui (tmp, tmp, 1);
1692 : 274611 : mpz_fdiv_q (tmp, tmp, mstep);
1693 : 549222 : niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false),
1694 : 549222 : TYPE_SIGN (niter_type));
1695 : 274611 : mpz_clear (mstep);
1696 : 274611 : mpz_clear (tmp);
1697 : :
1698 : 274611 : return true;
1699 : : }
1700 : :
1701 : : /* Determines number of iterations of loop whose ending condition
1702 : : is IV0 <= IV1. TYPE is the type of the iv. The number of
1703 : : iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if
1704 : : we know that this condition must eventually become false (we derived this
1705 : : earlier, and possibly set NITER->assumptions to make sure this
1706 : : is the case). BNDS bounds the difference IV1->base - IV0->base. */
1707 : :
1708 : : static bool
1709 : 1597475 : number_of_iterations_le (class loop *loop, tree type, affine_iv *iv0,
1710 : : affine_iv *iv1, class tree_niter_desc *niter,
1711 : : bool exit_must_be_taken, bounds *bnds)
1712 : : {
1713 : 1597475 : tree assumption;
1714 : 1597475 : tree type1 = type;
1715 : 1597475 : if (POINTER_TYPE_P (type))
1716 : 5325 : type1 = sizetype;
1717 : :
1718 : : /* Say that IV0 is the control variable. Then IV0 <= IV1 iff
1719 : : IV0 < IV1 + 1, assuming that IV1 is not equal to the greatest
1720 : : value of the type. This we must know anyway, since if it is
1721 : : equal to this value, the loop rolls forever. We do not check
1722 : : this condition for pointer type ivs, as the code cannot rely on
1723 : : the object to that the pointer points being placed at the end of
1724 : : the address space (and more pragmatically, TYPE_{MIN,MAX}_VALUE is
1725 : : not defined for pointers). */
1726 : :
1727 : 1597475 : if (!exit_must_be_taken && !POINTER_TYPE_P (type))
1728 : : {
1729 : 502780 : if (integer_nonzerop (iv0->step))
1730 : 441119 : assumption = fold_build2 (NE_EXPR, boolean_type_node,
1731 : : iv1->base, TYPE_MAX_VALUE (type));
1732 : : else
1733 : 61661 : assumption = fold_build2 (NE_EXPR, boolean_type_node,
1734 : : iv0->base, TYPE_MIN_VALUE (type));
1735 : :
1736 : 502780 : if (integer_zerop (assumption))
1737 : : return false;
1738 : 502754 : if (!integer_nonzerop (assumption))
1739 : 194247 : niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1740 : : niter->assumptions, assumption);
1741 : : }
1742 : :
1743 : 1597449 : if (integer_nonzerop (iv0->step))
1744 : : {
1745 : 1503582 : if (POINTER_TYPE_P (type))
1746 : 1337 : iv1->base = fold_build_pointer_plus_hwi (iv1->base, 1);
1747 : : else
1748 : 1502245 : iv1->base = fold_build2 (PLUS_EXPR, type1, iv1->base,
1749 : : build_int_cst (type1, 1));
1750 : : }
1751 : 93867 : else if (POINTER_TYPE_P (type))
1752 : 3988 : iv0->base = fold_build_pointer_plus_hwi (iv0->base, -1);
1753 : : else
1754 : 89879 : iv0->base = fold_build2 (MINUS_EXPR, type1,
1755 : : iv0->base, build_int_cst (type1, 1));
1756 : :
1757 : 1597449 : bounds_add (bnds, 1, type1);
1758 : :
1759 : 1597449 : return number_of_iterations_lt (loop, type, iv0, iv1, niter, exit_must_be_taken,
1760 : 1597449 : bnds);
1761 : : }
1762 : :
1763 : : /* Dumps description of affine induction variable IV to FILE. */
1764 : :
1765 : : static void
1766 : 73042 : dump_affine_iv (FILE *file, affine_iv *iv)
1767 : : {
1768 : 73042 : if (!integer_zerop (iv->step))
1769 : 36521 : fprintf (file, "[");
1770 : :
1771 : 73042 : print_generic_expr (dump_file, iv->base, TDF_SLIM);
1772 : :
1773 : 73042 : if (!integer_zerop (iv->step))
1774 : : {
1775 : 36521 : fprintf (file, ", + , ");
1776 : 36521 : print_generic_expr (dump_file, iv->step, TDF_SLIM);
1777 : 65435 : fprintf (file, "]%s", iv->no_overflow ? "(no_overflow)" : "");
1778 : : }
1779 : 73042 : }
1780 : :
1781 : : /* Determine the number of iterations according to condition (for staying
1782 : : inside loop) which compares two induction variables using comparison
1783 : : operator CODE. The induction variable on left side of the comparison
1784 : : is IV0, the right-hand side is IV1. Both induction variables must have
1785 : : type TYPE, which must be an integer or pointer type. The steps of the
1786 : : ivs must be constants (or NULL_TREE, which is interpreted as constant zero).
1787 : :
1788 : : LOOP is the loop whose number of iterations we are determining.
1789 : :
1790 : : ONLY_EXIT is true if we are sure this is the only way the loop could be
1791 : : exited (including possibly non-returning function calls, exceptions, etc.)
1792 : : -- in this case we can use the information whether the control induction
1793 : : variables can overflow or not in a more efficient way.
1794 : :
1795 : : if EVERY_ITERATION is true, we know the test is executed on every iteration.
1796 : :
1797 : : The results (number of iterations and assumptions as described in
1798 : : comments at class tree_niter_desc in tree-ssa-loop.h) are stored to NITER.
1799 : : Returns false if it fails to determine number of iterations, true if it
1800 : : was determined (possibly with some assumptions). */
1801 : :
1802 : : static bool
1803 : 10480574 : number_of_iterations_cond (class loop *loop,
1804 : : tree type, affine_iv *iv0, enum tree_code code,
1805 : : affine_iv *iv1, class tree_niter_desc *niter,
1806 : : bool only_exit, bool every_iteration)
1807 : : {
1808 : 10480574 : bool exit_must_be_taken = false, ret;
1809 : 10480574 : bounds bnds;
1810 : :
1811 : : /* If the test is not executed every iteration, wrapping may make the test
1812 : : to pass again.
1813 : : TODO: the overflow case can be still used as unreliable estimate of upper
1814 : : bound. But we have no API to pass it down to number of iterations code
1815 : : and, at present, it will not use it anyway. */
1816 : 10480574 : if (!every_iteration
1817 : 53132 : && (!iv0->no_overflow || !iv1->no_overflow
1818 : 38087 : || code == NE_EXPR || code == EQ_EXPR))
1819 : : return false;
1820 : :
1821 : : /* The meaning of these assumptions is this:
1822 : : if !assumptions
1823 : : then the rest of information does not have to be valid
1824 : : if may_be_zero then the loop does not roll, even if
1825 : : niter != 0. */
1826 : 10445286 : niter->assumptions = boolean_true_node;
1827 : 10445286 : niter->may_be_zero = boolean_false_node;
1828 : 10445286 : niter->niter = NULL_TREE;
1829 : 10445286 : niter->max = 0;
1830 : 10445286 : niter->bound = NULL_TREE;
1831 : 10445286 : niter->cmp = ERROR_MARK;
1832 : :
1833 : : /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
1834 : : the control variable is on lhs. */
1835 : 10445286 : if (code == GE_EXPR || code == GT_EXPR
1836 : 10445286 : || (code == NE_EXPR && integer_zerop (iv0->step)))
1837 : : {
1838 : 2400744 : std::swap (iv0, iv1);
1839 : 2400744 : code = swap_tree_comparison (code);
1840 : : }
1841 : :
1842 : 10445286 : if (POINTER_TYPE_P (type))
1843 : : {
1844 : : /* Comparison of pointers is undefined unless both iv0 and iv1 point
1845 : : to the same object. If they do, the control variable cannot wrap
1846 : : (as wrap around the bounds of memory will never return a pointer
1847 : : that would be guaranteed to point to the same object, even if we
1848 : : avoid undefined behavior by casting to size_t and back). */
1849 : 568820 : iv0->no_overflow = true;
1850 : 568820 : iv1->no_overflow = true;
1851 : : }
1852 : :
1853 : : /* If the control induction variable does not overflow and the only exit
1854 : : from the loop is the one that we analyze, we know it must be taken
1855 : : eventually. */
1856 : 10445286 : if (only_exit)
1857 : : {
1858 : 6761515 : if (!integer_zerop (iv0->step) && iv0->no_overflow)
1859 : : exit_must_be_taken = true;
1860 : 1715817 : else if (!integer_zerop (iv1->step) && iv1->no_overflow)
1861 : 5182689 : exit_must_be_taken = true;
1862 : : }
1863 : :
1864 : : /* We can handle cases which neither of the sides of the comparison is
1865 : : invariant:
1866 : :
1867 : : {iv0.base, iv0.step} cmp_code {iv1.base, iv1.step}
1868 : : as if:
1869 : : {iv0.base, iv0.step - iv1.step} cmp_code {iv1.base, 0}
1870 : :
1871 : : provided that either below condition is satisfied:
1872 : :
1873 : : a) the test is NE_EXPR;
1874 : : b) iv0 and iv1 do not overflow and iv0.step - iv1.step is of
1875 : : the same sign and of less or equal magnitude than iv0.step
1876 : :
1877 : : This rarely occurs in practice, but it is simple enough to manage. */
1878 : 10445286 : if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1879 : : {
1880 : 4918 : tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
1881 : 4918 : tree step = fold_binary_to_constant (MINUS_EXPR, step_type,
1882 : : iv0->step, iv1->step);
1883 : :
1884 : : /* For code other than NE_EXPR we have to ensure moving the evolution
1885 : : of IV1 to that of IV0 does not introduce overflow. */
1886 : 4918 : if (TREE_CODE (step) != INTEGER_CST
1887 : 4918 : || !iv0->no_overflow || !iv1->no_overflow)
1888 : : {
1889 : 1006 : if (code != NE_EXPR)
1890 : : return false;
1891 : 0 : iv0->no_overflow = false;
1892 : : }
1893 : : /* If the new step of IV0 has changed sign or is of greater
1894 : : magnitude then we do not know whether IV0 does overflow
1895 : : and thus the transform is not valid for code other than NE_EXPR. */
1896 : 3912 : else if (tree_int_cst_sign_bit (step) != tree_int_cst_sign_bit (iv0->step)
1897 : 10514 : || wi::gtu_p (wi::abs (wi::to_widest (step)),
1898 : 10514 : wi::abs (wi::to_widest (iv0->step))))
1899 : : {
1900 : 2868 : if (POINTER_TYPE_P (type) && code != NE_EXPR)
1901 : : /* For relational pointer compares we have further guarantees
1902 : : that the pointers always point to the same object (or one
1903 : : after it) and that objects do not cross the zero page. So
1904 : : not only is the transform always valid for relational
1905 : : pointer compares, we also know the resulting IV does not
1906 : : overflow. */
1907 : : ;
1908 : 750 : else if (code != NE_EXPR)
1909 : : return false;
1910 : : else
1911 : 622 : iv0->no_overflow = false;
1912 : : }
1913 : :
1914 : 3162 : iv0->step = step;
1915 : 3162 : iv1->step = build_int_cst (step_type, 0);
1916 : 3162 : iv1->no_overflow = true;
1917 : : }
1918 : :
1919 : : /* If the result of the comparison is a constant, the loop is weird. More
1920 : : precise handling would be possible, but the situation is not common enough
1921 : : to waste time on it. */
1922 : 10443530 : if (integer_zerop (iv0->step) && integer_zerop (iv1->step))
1923 : : return false;
1924 : :
1925 : : /* If the loop exits immediately, there is nothing to do. */
1926 : 10375664 : tree tem = fold_binary (code, boolean_type_node, iv0->base, iv1->base);
1927 : 10375664 : if (tem && integer_zerop (tem))
1928 : : {
1929 : 59139 : if (!every_iteration)
1930 : : return false;
1931 : 59074 : niter->niter = build_int_cst (unsigned_type_for (type), 0);
1932 : 59074 : niter->max = 0;
1933 : 59074 : return true;
1934 : : }
1935 : :
1936 : : /* OK, now we know we have a senseful loop. Handle several cases, depending
1937 : : on what comparison operator is used. */
1938 : 10316525 : bound_difference (loop, iv1->base, iv0->base, &bnds);
1939 : :
1940 : 10316525 : if (dump_file && (dump_flags & TDF_DETAILS))
1941 : : {
1942 : 36521 : fprintf (dump_file,
1943 : : "Analyzing # of iterations of loop %d\n", loop->num);
1944 : :
1945 : 36521 : fprintf (dump_file, " exit condition ");
1946 : 36521 : dump_affine_iv (dump_file, iv0);
1947 : 43323 : fprintf (dump_file, " %s ",
1948 : : code == NE_EXPR ? "!="
1949 : 6802 : : code == LT_EXPR ? "<"
1950 : : : "<=");
1951 : 36521 : dump_affine_iv (dump_file, iv1);
1952 : 36521 : fprintf (dump_file, "\n");
1953 : :
1954 : 36521 : fprintf (dump_file, " bounds on difference of bases: ");
1955 : 36521 : mpz_out_str (dump_file, 10, bnds.below);
1956 : 36521 : fprintf (dump_file, " ... ");
1957 : 36521 : mpz_out_str (dump_file, 10, bnds.up);
1958 : 36521 : fprintf (dump_file, "\n");
1959 : : }
1960 : :
1961 : 10316525 : switch (code)
1962 : : {
1963 : 5939318 : case NE_EXPR:
1964 : 5939318 : gcc_assert (integer_zerop (iv1->step));
1965 : 5939318 : ret = number_of_iterations_ne (loop, type, iv0, iv1->base, niter,
1966 : : exit_must_be_taken, &bnds);
1967 : 5939318 : break;
1968 : :
1969 : 2779732 : case LT_EXPR:
1970 : 2779732 : ret = number_of_iterations_lt (loop, type, iv0, iv1, niter,
1971 : : exit_must_be_taken, &bnds);
1972 : 2779732 : break;
1973 : :
1974 : 1597475 : case LE_EXPR:
1975 : 1597475 : ret = number_of_iterations_le (loop, type, iv0, iv1, niter,
1976 : : exit_must_be_taken, &bnds);
1977 : 1597475 : break;
1978 : :
1979 : 0 : default:
1980 : 0 : gcc_unreachable ();
1981 : : }
1982 : :
1983 : 10316525 : mpz_clear (bnds.up);
1984 : 10316525 : mpz_clear (bnds.below);
1985 : :
1986 : 10316525 : if (dump_file && (dump_flags & TDF_DETAILS))
1987 : : {
1988 : 36521 : if (ret)
1989 : : {
1990 : 36521 : fprintf (dump_file, " result:\n");
1991 : 36521 : if (!integer_nonzerop (niter->assumptions))
1992 : : {
1993 : 152 : fprintf (dump_file, " under assumptions ");
1994 : 152 : print_generic_expr (dump_file, niter->assumptions, TDF_SLIM);
1995 : 152 : fprintf (dump_file, "\n");
1996 : : }
1997 : :
1998 : 36521 : if (!integer_zerop (niter->may_be_zero))
1999 : : {
2000 : 867 : fprintf (dump_file, " zero if ");
2001 : 867 : print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
2002 : 867 : fprintf (dump_file, "\n");
2003 : : }
2004 : :
2005 : 36521 : fprintf (dump_file, " # of iterations ");
2006 : 36521 : print_generic_expr (dump_file, niter->niter, TDF_SLIM);
2007 : 36521 : fprintf (dump_file, ", bounded by ");
2008 : 36521 : print_decu (niter->max, dump_file);
2009 : 36521 : fprintf (dump_file, "\n");
2010 : : }
2011 : : else
2012 : 0 : fprintf (dump_file, " failed\n\n");
2013 : : }
2014 : : return ret;
2015 : : }
2016 : :
2017 : : /* Return an expression that computes the popcount of src. */
2018 : :
2019 : : static tree
2020 : 880 : build_popcount_expr (tree src)
2021 : : {
2022 : 880 : tree fn;
2023 : 880 : bool use_ifn = false;
2024 : 880 : int prec = TYPE_PRECISION (TREE_TYPE (src));
2025 : 880 : int i_prec = TYPE_PRECISION (integer_type_node);
2026 : 880 : int li_prec = TYPE_PRECISION (long_integer_type_node);
2027 : 880 : int lli_prec = TYPE_PRECISION (long_long_integer_type_node);
2028 : :
2029 : 880 : tree utype = unsigned_type_for (TREE_TYPE (src));
2030 : 880 : src = fold_convert (utype, src);
2031 : :
2032 : 880 : if (direct_internal_fn_supported_p (IFN_POPCOUNT, utype, OPTIMIZE_FOR_BOTH))
2033 : : use_ifn = true;
2034 : 832 : else if (prec <= i_prec)
2035 : 748 : fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
2036 : 84 : else if (prec == li_prec)
2037 : 68 : fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
2038 : 16 : else if (prec == lli_prec || prec == 2 * lli_prec)
2039 : 16 : fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
2040 : : else
2041 : : return NULL_TREE;
2042 : :
2043 : 48 : tree call;
2044 : 48 : if (use_ifn)
2045 : 48 : call = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_POPCOUNT,
2046 : : integer_type_node, 1, src);
2047 : 832 : else if (prec == 2 * lli_prec)
2048 : : {
2049 : 16 : tree src1 = fold_convert (long_long_unsigned_type_node,
2050 : : fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
2051 : : unshare_expr (src),
2052 : : build_int_cst (integer_type_node,
2053 : : lli_prec)));
2054 : 16 : tree src2 = fold_convert (long_long_unsigned_type_node, src);
2055 : 16 : tree call1 = build_call_expr (fn, 1, src1);
2056 : 16 : tree call2 = build_call_expr (fn, 1, src2);
2057 : 16 : call = fold_build2 (PLUS_EXPR, integer_type_node, call1, call2);
2058 : : }
2059 : : else
2060 : : {
2061 : 816 : if (prec < i_prec)
2062 : 280 : src = fold_convert (unsigned_type_node, src);
2063 : :
2064 : 816 : call = build_call_expr (fn, 1, src);
2065 : : }
2066 : :
2067 : : return call;
2068 : : }
2069 : :
2070 : : /* Utility function to check if OP is defined by a stmt
2071 : : that is a val - 1. */
2072 : :
2073 : : static bool
2074 : 111363 : ssa_defined_by_minus_one_stmt_p (tree op, tree val)
2075 : : {
2076 : 111363 : gimple *stmt;
2077 : 111363 : return (TREE_CODE (op) == SSA_NAME
2078 : 70347 : && (stmt = SSA_NAME_DEF_STMT (op))
2079 : 70347 : && is_gimple_assign (stmt)
2080 : 59870 : && (gimple_assign_rhs_code (stmt) == PLUS_EXPR)
2081 : 5532 : && val == gimple_assign_rhs1 (stmt)
2082 : 112493 : && integer_minus_onep (gimple_assign_rhs2 (stmt)));
2083 : : }
2084 : :
2085 : : /* See comment below for number_of_iterations_bitcount.
2086 : : For popcount, we have:
2087 : :
2088 : : modify:
2089 : : _1 = iv_1 + -1
2090 : : iv_2 = iv_1 & _1
2091 : :
2092 : : test:
2093 : : if (iv != 0)
2094 : :
2095 : : modification count:
2096 : : popcount (src)
2097 : :
2098 : : */
2099 : :
2100 : : static bool
2101 : 2972131 : number_of_iterations_popcount (loop_p loop, edge exit,
2102 : : enum tree_code code,
2103 : : class tree_niter_desc *niter)
2104 : : {
2105 : 2972131 : bool modify_before_test = true;
2106 : 2972131 : HOST_WIDE_INT max;
2107 : :
2108 : : /* Check that condition for staying inside the loop is like
2109 : : if (iv != 0). */
2110 : 5944262 : gcond *cond_stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (exit->src));
2111 : 2972131 : if (!cond_stmt
2112 : 2972131 : || code != NE_EXPR
2113 : 1453850 : || !integer_zerop (gimple_cond_rhs (cond_stmt))
2114 : 4064038 : || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME)
2115 : 1880224 : return false;
2116 : :
2117 : 1091907 : tree iv_2 = gimple_cond_lhs (cond_stmt);
2118 : 1091907 : gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
2119 : :
2120 : : /* If the test comes before the iv modification, then these will actually be
2121 : : iv_1 and a phi node. */
2122 : 1091907 : if (gimple_code (iv_2_stmt) == GIMPLE_PHI
2123 : 320643 : && gimple_bb (iv_2_stmt) == loop->header
2124 : 226789 : && gimple_phi_num_args (iv_2_stmt) == 2
2125 : 1318696 : && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt,
2126 : : loop_latch_edge (loop)->dest_idx))
2127 : : == SSA_NAME))
2128 : : {
2129 : : /* iv_2 is actually one of the inputs to the phi. */
2130 : 211853 : iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx);
2131 : 211853 : iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
2132 : 211853 : modify_before_test = false;
2133 : : }
2134 : :
2135 : : /* Make sure iv_2_stmt is an and stmt (iv_2 = _1 & iv_1). */
2136 : 1091907 : if (!is_gimple_assign (iv_2_stmt)
2137 : 1091907 : || gimple_assign_rhs_code (iv_2_stmt) != BIT_AND_EXPR)
2138 : : return false;
2139 : :
2140 : 56092 : tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
2141 : 56092 : tree _1 = gimple_assign_rhs2 (iv_2_stmt);
2142 : :
2143 : : /* Check that _1 is defined by (_1 = iv_1 + -1).
2144 : : Also make sure that _1 is the same in and_stmt and _1 defining stmt.
2145 : : Also canonicalize if _1 and _b11 are revrsed. */
2146 : 56092 : if (ssa_defined_by_minus_one_stmt_p (iv_1, _1))
2147 : : std::swap (iv_1, _1);
2148 : 55271 : else if (ssa_defined_by_minus_one_stmt_p (_1, iv_1))
2149 : : ;
2150 : : else
2151 : : return false;
2152 : :
2153 : : /* Check the recurrence. */
2154 : 899 : gimple *phi = SSA_NAME_DEF_STMT (iv_1);
2155 : 899 : if (gimple_code (phi) != GIMPLE_PHI
2156 : 899 : || (gimple_bb (phi) != loop_latch_edge (loop)->dest)
2157 : 1779 : || (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
2158 : 19 : return false;
2159 : :
2160 : : /* We found a match. */
2161 : 880 : tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
2162 : 880 : int src_precision = TYPE_PRECISION (TREE_TYPE (src));
2163 : :
2164 : : /* Get the corresponding popcount builtin. */
2165 : 880 : tree expr = build_popcount_expr (src);
2166 : :
2167 : 880 : if (!expr)
2168 : : return false;
2169 : :
2170 : 880 : max = src_precision;
2171 : :
2172 : 880 : tree may_be_zero = boolean_false_node;
2173 : :
2174 : 880 : if (modify_before_test)
2175 : : {
2176 : 392 : expr = fold_build2 (MINUS_EXPR, integer_type_node, expr,
2177 : : integer_one_node);
2178 : 392 : max = max - 1;
2179 : 392 : may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
2180 : : build_zero_cst (TREE_TYPE (src)));
2181 : : }
2182 : :
2183 : 880 : expr = fold_convert (unsigned_type_node, expr);
2184 : :
2185 : 880 : niter->assumptions = boolean_true_node;
2186 : 880 : niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero);
2187 : 880 : niter->niter = simplify_using_initial_conditions(loop, expr);
2188 : :
2189 : 880 : if (TREE_CODE (niter->niter) == INTEGER_CST)
2190 : 34 : niter->max = tree_to_uhwi (niter->niter);
2191 : : else
2192 : 846 : niter->max = max;
2193 : :
2194 : 880 : niter->bound = NULL_TREE;
2195 : 880 : niter->cmp = ERROR_MARK;
2196 : 880 : return true;
2197 : : }
2198 : :
2199 : : /* Return an expression that counts the leading/trailing zeroes of src.
2200 : :
2201 : : If define_at_zero is true, then the built expression will be defined to
2202 : : return the precision of src when src == 0 (using either a conditional
2203 : : expression or a suitable internal function).
2204 : : Otherwise, we can elide the conditional expression and let src = 0 invoke
2205 : : undefined behaviour. */
2206 : :
2207 : : static tree
2208 : 4363 : build_cltz_expr (tree src, bool leading, bool define_at_zero)
2209 : : {
2210 : 4363 : tree fn;
2211 : 4363 : internal_fn ifn = leading ? IFN_CLZ : IFN_CTZ;
2212 : 4363 : bool use_ifn = false;
2213 : 4363 : int prec = TYPE_PRECISION (TREE_TYPE (src));
2214 : 4363 : int i_prec = TYPE_PRECISION (integer_type_node);
2215 : 4363 : int li_prec = TYPE_PRECISION (long_integer_type_node);
2216 : 4363 : int lli_prec = TYPE_PRECISION (long_long_integer_type_node);
2217 : :
2218 : 4363 : tree utype = unsigned_type_for (TREE_TYPE (src));
2219 : 4363 : src = fold_convert (utype, src);
2220 : :
2221 : 4363 : if (direct_internal_fn_supported_p (ifn, utype, OPTIMIZE_FOR_BOTH))
2222 : : use_ifn = true;
2223 : 1389 : else if (prec <= i_prec)
2224 : 452 : fn = leading ? builtin_decl_implicit (BUILT_IN_CLZ)
2225 : 1389 : : builtin_decl_implicit (BUILT_IN_CTZ);
2226 : 937 : else if (prec == li_prec)
2227 : 0 : fn = leading ? builtin_decl_implicit (BUILT_IN_CLZL)
2228 : 1389 : : builtin_decl_implicit (BUILT_IN_CTZL);
2229 : 937 : else if (prec == lli_prec || prec == 2 * lli_prec)
2230 : 937 : fn = leading ? builtin_decl_implicit (BUILT_IN_CLZLL)
2231 : 1389 : : builtin_decl_implicit (BUILT_IN_CTZLL);
2232 : : else
2233 : : return NULL_TREE;
2234 : :
2235 : 2974 : tree call;
2236 : 2974 : if (use_ifn)
2237 : : {
2238 : 2974 : int val;
2239 : 2974 : int optab_defined_at_zero
2240 : : = (leading
2241 : 5128 : ? CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (utype), val)
2242 : 1640 : : CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (utype), val));
2243 : 2974 : tree arg2 = NULL_TREE;
2244 : 2974 : if (define_at_zero && optab_defined_at_zero == 2 && val == prec)
2245 : 0 : arg2 = build_int_cst (integer_type_node, val);
2246 : 2974 : call = build_call_expr_internal_loc (UNKNOWN_LOCATION, ifn,
2247 : : integer_type_node, arg2 ? 2 : 1,
2248 : : src, arg2);
2249 : 2974 : if (define_at_zero && arg2 == NULL_TREE)
2250 : : {
2251 : 2147 : tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src,
2252 : : build_zero_cst (TREE_TYPE (src)));
2253 : 2147 : call = fold_build3 (COND_EXPR, integer_type_node, is_zero, call,
2254 : : build_int_cst (integer_type_node, prec));
2255 : : }
2256 : : }
2257 : 1389 : else if (prec == 2 * lli_prec)
2258 : : {
2259 : 356 : tree src1 = fold_convert (long_long_unsigned_type_node,
2260 : : fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
2261 : : unshare_expr (src),
2262 : : build_int_cst (integer_type_node,
2263 : : lli_prec)));
2264 : 356 : tree src2 = fold_convert (long_long_unsigned_type_node, src);
2265 : : /* We count the zeroes in src1, and add the number in src2 when src1
2266 : : is 0. */
2267 : 356 : if (!leading)
2268 : 0 : std::swap (src1, src2);
2269 : 356 : tree call1 = build_call_expr (fn, 1, src1);
2270 : 356 : tree call2 = build_call_expr (fn, 1, src2);
2271 : 356 : if (define_at_zero)
2272 : : {
2273 : 356 : tree is_zero2 = fold_build2 (NE_EXPR, boolean_type_node, src2,
2274 : : build_zero_cst (TREE_TYPE (src2)));
2275 : 356 : call2 = fold_build3 (COND_EXPR, integer_type_node, is_zero2, call2,
2276 : : build_int_cst (integer_type_node, lli_prec));
2277 : : }
2278 : 356 : tree is_zero1 = fold_build2 (NE_EXPR, boolean_type_node, src1,
2279 : : build_zero_cst (TREE_TYPE (src1)));
2280 : 356 : call = fold_build3 (COND_EXPR, integer_type_node, is_zero1, call1,
2281 : : fold_build2 (PLUS_EXPR, integer_type_node, call2,
2282 : : build_int_cst (integer_type_node,
2283 : : lli_prec)));
2284 : : }
2285 : : else
2286 : : {
2287 : 1033 : if (prec < i_prec)
2288 : 452 : src = fold_convert (unsigned_type_node, src);
2289 : :
2290 : 1033 : call = build_call_expr (fn, 1, src);
2291 : 1033 : if (leading && prec < i_prec)
2292 : 375 : call = fold_build2 (MINUS_EXPR, integer_type_node, call,
2293 : : build_int_cst (integer_type_node, i_prec - prec));
2294 : 1033 : if (define_at_zero)
2295 : : {
2296 : 900 : tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src,
2297 : : build_zero_cst (TREE_TYPE (src)));
2298 : 900 : call = fold_build3 (COND_EXPR, integer_type_node, is_zero, call,
2299 : : build_int_cst (integer_type_node, prec));
2300 : : }
2301 : : }
2302 : :
2303 : : return call;
2304 : : }
2305 : :
2306 : : /* See comment below for number_of_iterations_bitcount.
2307 : : For c[lt]z, we have:
2308 : :
2309 : : modify:
2310 : : iv_2 = iv_1 << 1 OR iv_1 >> 1
2311 : :
2312 : : test:
2313 : : if (iv & 1 << (prec-1)) OR (iv & 1)
2314 : :
2315 : : modification count:
2316 : : src precision - c[lt]z (src)
2317 : :
2318 : : */
2319 : :
2320 : : static bool
2321 : 5596078 : number_of_iterations_cltz (loop_p loop, edge exit,
2322 : : enum tree_code code,
2323 : : class tree_niter_desc *niter)
2324 : : {
2325 : 5596078 : bool modify_before_test = true;
2326 : 5596078 : HOST_WIDE_INT max;
2327 : 5596078 : int checked_bit;
2328 : 5596078 : tree iv_2;
2329 : :
2330 : : /* Check that condition for staying inside the loop is like
2331 : : if (iv == 0). */
2332 : 11192156 : gcond *cond_stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (exit->src));
2333 : 5596078 : if (!cond_stmt
2334 : 5596078 : || (code != EQ_EXPR && code != GE_EXPR)
2335 : 2860216 : || !integer_zerop (gimple_cond_rhs (cond_stmt))
2336 : 1153739 : || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME)
2337 : 4442516 : return false;
2338 : :
2339 : 1153562 : if (code == EQ_EXPR)
2340 : : {
2341 : : /* Make sure we check a bitwise and with a suitable constant */
2342 : 1079828 : gimple *and_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (cond_stmt));
2343 : 1079828 : if (!is_gimple_assign (and_stmt)
2344 : 621048 : || gimple_assign_rhs_code (and_stmt) != BIT_AND_EXPR
2345 : 121209 : || !integer_pow2p (gimple_assign_rhs2 (and_stmt))
2346 : 1098369 : || TREE_CODE (gimple_assign_rhs1 (and_stmt)) != SSA_NAME)
2347 : 1061287 : return false;
2348 : :
2349 : 18541 : checked_bit = tree_log2 (gimple_assign_rhs2 (and_stmt));
2350 : :
2351 : 18541 : iv_2 = gimple_assign_rhs1 (and_stmt);
2352 : : }
2353 : : else
2354 : : {
2355 : : /* We have a GE_EXPR - a signed comparison with zero is equivalent to
2356 : : testing the leading bit, so check for this pattern too. */
2357 : :
2358 : 73734 : iv_2 = gimple_cond_lhs (cond_stmt);
2359 : 73734 : tree test_value_type = TREE_TYPE (iv_2);
2360 : :
2361 : 73734 : if (TYPE_UNSIGNED (test_value_type))
2362 : : return false;
2363 : :
2364 : 73734 : gimple *test_value_stmt = SSA_NAME_DEF_STMT (iv_2);
2365 : :
2366 : 73734 : if (is_gimple_assign (test_value_stmt)
2367 : 73734 : && gimple_assign_rhs_code (test_value_stmt) == NOP_EXPR)
2368 : : {
2369 : : /* If the test value comes from a NOP_EXPR, then we need to unwrap
2370 : : this. We conservatively require that both types have the same
2371 : : precision. */
2372 : 7555 : iv_2 = gimple_assign_rhs1 (test_value_stmt);
2373 : 7555 : tree rhs_type = TREE_TYPE (iv_2);
2374 : 7555 : if (TREE_CODE (iv_2) != SSA_NAME
2375 : 7555 : || TREE_CODE (rhs_type) != INTEGER_TYPE
2376 : 15110 : || (TYPE_PRECISION (rhs_type)
2377 : 7555 : != TYPE_PRECISION (test_value_type)))
2378 : : return false;
2379 : : }
2380 : :
2381 : 73336 : checked_bit = TYPE_PRECISION (test_value_type) - 1;
2382 : : }
2383 : :
2384 : 91877 : gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
2385 : :
2386 : : /* If the test comes before the iv modification, then these will actually be
2387 : : iv_1 and a phi node. */
2388 : 91877 : if (gimple_code (iv_2_stmt) == GIMPLE_PHI
2389 : 25751 : && gimple_bb (iv_2_stmt) == loop->header
2390 : 15192 : && gimple_phi_num_args (iv_2_stmt) == 2
2391 : 107069 : && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt,
2392 : : loop_latch_edge (loop)->dest_idx))
2393 : : == SSA_NAME))
2394 : : {
2395 : : /* iv_2 is actually one of the inputs to the phi. */
2396 : 15180 : iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx);
2397 : 15180 : iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
2398 : 15180 : modify_before_test = false;
2399 : : }
2400 : :
2401 : : /* Make sure iv_2_stmt is a logical shift by one stmt:
2402 : : iv_2 = iv_1 {<<|>>} 1 */
2403 : 91877 : if (!is_gimple_assign (iv_2_stmt)
2404 : 62388 : || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR
2405 : 62093 : && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR
2406 : 6082 : || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt)))))
2407 : 96600 : || !integer_onep (gimple_assign_rhs2 (iv_2_stmt)))
2408 : 90917 : return false;
2409 : :
2410 : 960 : bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR);
2411 : :
2412 : 960 : tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
2413 : :
2414 : : /* Check the recurrence. */
2415 : 960 : gimple *phi = SSA_NAME_DEF_STMT (iv_1);
2416 : 960 : if (gimple_code (phi) != GIMPLE_PHI
2417 : 960 : || (gimple_bb (phi) != loop_latch_edge (loop)->dest)
2418 : 1920 : || (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
2419 : 0 : return false;
2420 : :
2421 : : /* We found a match. */
2422 : 960 : tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
2423 : 960 : int src_precision = TYPE_PRECISION (TREE_TYPE (src));
2424 : :
2425 : : /* Apply any needed preprocessing to src. */
2426 : 960 : int num_ignored_bits;
2427 : 960 : if (left_shift)
2428 : 295 : num_ignored_bits = src_precision - checked_bit - 1;
2429 : : else
2430 : : num_ignored_bits = checked_bit;
2431 : :
2432 : 960 : if (modify_before_test)
2433 : 518 : num_ignored_bits++;
2434 : :
2435 : 960 : if (num_ignored_bits != 0)
2436 : 1098 : src = fold_build2 (left_shift ? LSHIFT_EXPR : RSHIFT_EXPR,
2437 : : TREE_TYPE (src), src,
2438 : : build_int_cst (integer_type_node, num_ignored_bits));
2439 : :
2440 : : /* Get the corresponding c[lt]z builtin. */
2441 : 960 : tree expr = build_cltz_expr (src, left_shift, false);
2442 : :
2443 : 960 : if (!expr)
2444 : : return false;
2445 : :
2446 : 960 : max = src_precision - num_ignored_bits - 1;
2447 : :
2448 : 960 : expr = fold_convert (unsigned_type_node, expr);
2449 : :
2450 : 960 : tree assumptions = fold_build2 (NE_EXPR, boolean_type_node, src,
2451 : : build_zero_cst (TREE_TYPE (src)));
2452 : :
2453 : 960 : niter->assumptions = simplify_using_initial_conditions (loop, assumptions);
2454 : 960 : niter->may_be_zero = boolean_false_node;
2455 : 960 : niter->niter = simplify_using_initial_conditions (loop, expr);
2456 : :
2457 : 960 : if (TREE_CODE (niter->niter) == INTEGER_CST)
2458 : 0 : niter->max = tree_to_uhwi (niter->niter);
2459 : : else
2460 : 960 : niter->max = max;
2461 : :
2462 : 960 : niter->bound = NULL_TREE;
2463 : 960 : niter->cmp = ERROR_MARK;
2464 : :
2465 : 960 : return true;
2466 : : }
2467 : :
2468 : : /* See comment below for number_of_iterations_bitcount.
2469 : : For c[lt]z complement, we have:
2470 : :
2471 : : modify:
2472 : : iv_2 = iv_1 >> 1 OR iv_1 << 1
2473 : :
2474 : : test:
2475 : : if (iv != 0)
2476 : :
2477 : : modification count:
2478 : : src precision - c[lt]z (src)
2479 : :
2480 : : */
2481 : :
2482 : : static bool
2483 : 2971156 : number_of_iterations_cltz_complement (loop_p loop, edge exit,
2484 : : enum tree_code code,
2485 : : class tree_niter_desc *niter)
2486 : : {
2487 : 2971156 : bool modify_before_test = true;
2488 : 2971156 : HOST_WIDE_INT max;
2489 : :
2490 : : /* Check that condition for staying inside the loop is like
2491 : : if (iv != 0). */
2492 : 5942312 : gcond *cond_stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (exit->src));
2493 : 2971156 : if (!cond_stmt
2494 : 2971156 : || code != NE_EXPR
2495 : 1452970 : || !integer_zerop (gimple_cond_rhs (cond_stmt))
2496 : 4062183 : || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME)
2497 : 1880129 : return false;
2498 : :
2499 : 1091027 : tree iv_2 = gimple_cond_lhs (cond_stmt);
2500 : 1091027 : gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
2501 : :
2502 : : /* If the test comes before the iv modification, then these will actually be
2503 : : iv_1 and a phi node. */
2504 : 1091027 : if (gimple_code (iv_2_stmt) == GIMPLE_PHI
2505 : 320155 : && gimple_bb (iv_2_stmt) == loop->header
2506 : 226301 : && gimple_phi_num_args (iv_2_stmt) == 2
2507 : 1317328 : && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt,
2508 : : loop_latch_edge (loop)->dest_idx))
2509 : : == SSA_NAME))
2510 : : {
2511 : : /* iv_2 is actually one of the inputs to the phi. */
2512 : 211365 : iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx);
2513 : 211365 : iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
2514 : 211365 : modify_before_test = false;
2515 : : }
2516 : :
2517 : : /* Make sure iv_2_stmt is a logical shift by one stmt:
2518 : : iv_2 = iv_1 {>>|<<} 1 */
2519 : 1091027 : if (!is_gimple_assign (iv_2_stmt)
2520 : 781927 : || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR
2521 : 781038 : && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR
2522 : 8550 : || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt)))))
2523 : 1099573 : || !integer_onep (gimple_assign_rhs2 (iv_2_stmt)))
2524 : 1086817 : return false;
2525 : :
2526 : 4210 : bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR);
2527 : :
2528 : 4210 : tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
2529 : :
2530 : : /* Check the recurrence. */
2531 : 4210 : gimple *phi = SSA_NAME_DEF_STMT (iv_1);
2532 : 4210 : if (gimple_code (phi) != GIMPLE_PHI
2533 : 3576 : || (gimple_bb (phi) != loop_latch_edge (loop)->dest)
2534 : 7778 : || (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
2535 : 807 : return false;
2536 : :
2537 : : /* We found a match. */
2538 : 3403 : tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
2539 : 3403 : int src_precision = TYPE_PRECISION (TREE_TYPE (src));
2540 : :
2541 : : /* Get the corresponding c[lt]z builtin. */
2542 : 3403 : tree expr = build_cltz_expr (src, !left_shift, true);
2543 : :
2544 : 3403 : if (!expr)
2545 : : return false;
2546 : :
2547 : 3403 : expr = fold_build2 (MINUS_EXPR, integer_type_node,
2548 : : build_int_cst (integer_type_node, src_precision),
2549 : : expr);
2550 : :
2551 : 3403 : max = src_precision;
2552 : :
2553 : 3403 : tree may_be_zero = boolean_false_node;
2554 : :
2555 : 3403 : if (modify_before_test)
2556 : : {
2557 : 2297 : expr = fold_build2 (MINUS_EXPR, integer_type_node, expr,
2558 : : integer_one_node);
2559 : 2297 : max = max - 1;
2560 : 2297 : may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
2561 : : build_zero_cst (TREE_TYPE (src)));
2562 : : }
2563 : :
2564 : 3403 : expr = fold_convert (unsigned_type_node, expr);
2565 : :
2566 : 3403 : niter->assumptions = boolean_true_node;
2567 : 3403 : niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero);
2568 : 3403 : niter->niter = simplify_using_initial_conditions (loop, expr);
2569 : :
2570 : 3403 : if (TREE_CODE (niter->niter) == INTEGER_CST)
2571 : 51 : niter->max = tree_to_uhwi (niter->niter);
2572 : : else
2573 : 3352 : niter->max = max;
2574 : :
2575 : 3403 : niter->bound = NULL_TREE;
2576 : 3403 : niter->cmp = ERROR_MARK;
2577 : 3403 : return true;
2578 : : }
2579 : :
2580 : : /* See if LOOP contains a bit counting idiom. The idiom consists of two parts:
2581 : : 1. A modification to the induction variabler;.
2582 : : 2. A test to determine whether or not to exit the loop.
2583 : :
2584 : : These can come in either order - i.e.:
2585 : :
2586 : : <bb 3>
2587 : : iv_1 = PHI <src(2), iv_2(4)>
2588 : : if (test (iv_1))
2589 : : goto <bb 4>
2590 : : else
2591 : : goto <bb 5>
2592 : :
2593 : : <bb 4>
2594 : : iv_2 = modify (iv_1)
2595 : : goto <bb 3>
2596 : :
2597 : : OR
2598 : :
2599 : : <bb 3>
2600 : : iv_1 = PHI <src(2), iv_2(4)>
2601 : : iv_2 = modify (iv_1)
2602 : :
2603 : : <bb 4>
2604 : : if (test (iv_2))
2605 : : goto <bb 3>
2606 : : else
2607 : : goto <bb 5>
2608 : :
2609 : : The second form can be generated by copying the loop header out of the loop.
2610 : :
2611 : : In the first case, the number of latch executions will be equal to the
2612 : : number of induction variable modifications required before the test fails.
2613 : :
2614 : : In the second case (modify_before_test), if we assume that the number of
2615 : : modifications required before the test fails is nonzero, then the number of
2616 : : latch executions will be one less than this number.
2617 : :
2618 : : If we recognise the pattern, then we update niter accordingly, and return
2619 : : true. */
2620 : :
2621 : : static bool
2622 : 2972131 : number_of_iterations_bitcount (loop_p loop, edge exit,
2623 : : enum tree_code code,
2624 : : class tree_niter_desc *niter)
2625 : : {
2626 : 2972131 : return (number_of_iterations_popcount (loop, exit, code, niter)
2627 : 2971251 : || number_of_iterations_cltz (loop, exit, code, niter)
2628 : 5943287 : || number_of_iterations_cltz_complement (loop, exit, code, niter));
2629 : : }
2630 : :
2631 : : /* Substitute NEW_TREE for OLD in EXPR and fold the result.
2632 : : If VALUEIZE is non-NULL then OLD and NEW_TREE are ignored and instead
2633 : : all SSA names are replaced with the result of calling the VALUEIZE
2634 : : function with the SSA name as argument. */
2635 : :
2636 : : tree
2637 : 91217822 : simplify_replace_tree (tree expr, tree old, tree new_tree,
2638 : : tree (*valueize) (tree, void*), void *context,
2639 : : bool do_fold)
2640 : : {
2641 : 91217822 : unsigned i, n;
2642 : 91217822 : tree ret = NULL_TREE, e, se;
2643 : :
2644 : 91217822 : if (!expr)
2645 : : return NULL_TREE;
2646 : :
2647 : : /* Do not bother to replace constants. */
2648 : 25404095 : if (CONSTANT_CLASS_P (expr))
2649 : : return expr;
2650 : :
2651 : 19219063 : if (valueize)
2652 : : {
2653 : 89391 : if (TREE_CODE (expr) == SSA_NAME)
2654 : : {
2655 : 31523 : new_tree = valueize (expr, context);
2656 : 31523 : if (new_tree != expr)
2657 : : return new_tree;
2658 : : }
2659 : : }
2660 : 19129672 : else if (expr == old
2661 : 19129672 : || operand_equal_p (expr, old, 0))
2662 : 193270 : return unshare_expr (new_tree);
2663 : :
2664 : 19024643 : if (!EXPR_P (expr))
2665 : : return expr;
2666 : :
2667 : 10108468 : n = TREE_OPERAND_LENGTH (expr);
2668 : 28455723 : for (i = 0; i < n; i++)
2669 : : {
2670 : 18347255 : e = TREE_OPERAND (expr, i);
2671 : 18347255 : se = simplify_replace_tree (e, old, new_tree, valueize, context, do_fold);
2672 : 18347255 : if (e == se)
2673 : 18097886 : continue;
2674 : :
2675 : 249369 : if (!ret)
2676 : 248769 : ret = copy_node (expr);
2677 : :
2678 : 249369 : TREE_OPERAND (ret, i) = se;
2679 : : }
2680 : :
2681 : 10108468 : return (ret ? (do_fold ? fold (ret) : ret) : expr);
2682 : : }
2683 : :
2684 : : /* Expand definitions of ssa names in EXPR as long as they are simple
2685 : : enough, and return the new expression. If STOP is specified, stop
2686 : : expanding if EXPR equals to it. */
2687 : :
2688 : : static tree
2689 : 98163620 : expand_simple_operations (tree expr, tree stop, hash_map<tree, tree> &cache)
2690 : : {
2691 : 98701588 : unsigned i, n;
2692 : 98701588 : tree ret = NULL_TREE, e, ee, e1;
2693 : 98701588 : enum tree_code code;
2694 : 98701588 : gimple *stmt;
2695 : :
2696 : 98701588 : if (expr == NULL_TREE)
2697 : : return expr;
2698 : :
2699 : 98701588 : if (is_gimple_min_invariant (expr))
2700 : : return expr;
2701 : :
2702 : 67600082 : code = TREE_CODE (expr);
2703 : 67600082 : if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
2704 : : {
2705 : 17327386 : n = TREE_OPERAND_LENGTH (expr);
2706 : 48095780 : for (i = 0; i < n; i++)
2707 : : {
2708 : 30768394 : e = TREE_OPERAND (expr, i);
2709 : 30768394 : if (!e)
2710 : 27803940 : continue;
2711 : : /* SCEV analysis feeds us with a proper expression
2712 : : graph matching the SSA graph. Avoid turning it
2713 : : into a tree here, thus handle tree sharing
2714 : : properly.
2715 : : ??? The SSA walk below still turns the SSA graph
2716 : : into a tree but until we find a testcase do not
2717 : : introduce additional tree sharing here. */
2718 : 30759842 : bool existed_p;
2719 : 30759842 : tree &cee = cache.get_or_insert (e, &existed_p);
2720 : 30759842 : if (existed_p)
2721 : 153218 : ee = cee;
2722 : : else
2723 : : {
2724 : 30606624 : cee = e;
2725 : 30606624 : ee = expand_simple_operations (e, stop, cache);
2726 : 30606624 : if (ee != e)
2727 : 2963859 : *cache.get (e) = ee;
2728 : : }
2729 : 30759842 : if (e == ee)
2730 : 27795388 : continue;
2731 : :
2732 : 2964454 : if (!ret)
2733 : 2800607 : ret = copy_node (expr);
2734 : :
2735 : 2964454 : TREE_OPERAND (ret, i) = ee;
2736 : : }
2737 : :
2738 : 17327386 : if (!ret)
2739 : : return expr;
2740 : :
2741 : 2800607 : fold_defer_overflow_warnings ();
2742 : 2800607 : ret = fold (ret);
2743 : 2800607 : fold_undefer_and_ignore_overflow_warnings ();
2744 : 2800607 : return ret;
2745 : : }
2746 : :
2747 : : /* Stop if it's not ssa name or the one we don't want to expand. */
2748 : 50272696 : if (TREE_CODE (expr) != SSA_NAME || expr == stop)
2749 : : return expr;
2750 : :
2751 : 50090981 : stmt = SSA_NAME_DEF_STMT (expr);
2752 : 50090981 : if (gimple_code (stmt) == GIMPLE_PHI)
2753 : : {
2754 : 14106162 : basic_block src, dest;
2755 : :
2756 : 14106162 : if (gimple_phi_num_args (stmt) != 1)
2757 : : return expr;
2758 : 471660 : e = PHI_ARG_DEF (stmt, 0);
2759 : :
2760 : : /* Avoid propagating through loop exit phi nodes, which
2761 : : could break loop-closed SSA form restrictions. */
2762 : 471660 : dest = gimple_bb (stmt);
2763 : 471660 : src = single_pred (dest);
2764 : 471660 : if (TREE_CODE (e) == SSA_NAME
2765 : 470868 : && src->loop_father != dest->loop_father)
2766 : : return expr;
2767 : :
2768 : : return expand_simple_operations (e, stop, cache);
2769 : : }
2770 : 35984819 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
2771 : : return expr;
2772 : :
2773 : : /* Avoid expanding to expressions that contain SSA names that need
2774 : : to take part in abnormal coalescing. */
2775 : 31473898 : ssa_op_iter iter;
2776 : 64817964 : FOR_EACH_SSA_TREE_OPERAND (e, stmt, iter, SSA_OP_USE)
2777 : 33344430 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (e))
2778 : : return expr;
2779 : :
2780 : 31473534 : e = gimple_assign_rhs1 (stmt);
2781 : 31473534 : code = gimple_assign_rhs_code (stmt);
2782 : 31473534 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
2783 : : {
2784 : 12540446 : if (is_gimple_min_invariant (e))
2785 : : return e;
2786 : :
2787 : 12506529 : if (code == SSA_NAME)
2788 : : return expand_simple_operations (e, stop, cache);
2789 : 12175959 : else if (code == ADDR_EXPR)
2790 : : {
2791 : 9536 : poly_int64 offset;
2792 : 9536 : tree base = get_addr_base_and_unit_offset (TREE_OPERAND (e, 0),
2793 : : &offset);
2794 : 9536 : if (base
2795 : 7910 : && TREE_CODE (base) == MEM_REF)
2796 : : {
2797 : 7910 : ee = expand_simple_operations (TREE_OPERAND (base, 0), stop,
2798 : : cache);
2799 : 7910 : return fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (expr), ee,
2800 : : wide_int_to_tree (sizetype,
2801 : : mem_ref_offset (base)
2802 : : + offset));
2803 : : }
2804 : : }
2805 : :
2806 : 12168049 : return expr;
2807 : : }
2808 : :
2809 : 18933088 : switch (code)
2810 : : {
2811 : 4247626 : CASE_CONVERT:
2812 : : /* Casts are simple. */
2813 : 4247626 : ee = expand_simple_operations (e, stop, cache);
2814 : 4247626 : return fold_build1 (code, TREE_TYPE (expr), ee);
2815 : :
2816 : 10915800 : case PLUS_EXPR:
2817 : 10915800 : case MINUS_EXPR:
2818 : 10915800 : case MULT_EXPR:
2819 : 21831544 : if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (expr))
2820 : 21827079 : && TYPE_OVERFLOW_TRAPS (TREE_TYPE (expr)))
2821 : : return expr;
2822 : : /* Fallthru. */
2823 : 11414466 : case POINTER_PLUS_EXPR:
2824 : : /* And increments and decrements by a constant are simple. */
2825 : 11414466 : e1 = gimple_assign_rhs2 (stmt);
2826 : 11414466 : if (!is_gimple_min_invariant (e1))
2827 : : return expr;
2828 : :
2829 : 5719689 : ee = expand_simple_operations (e, stop, cache);
2830 : 5719689 : return fold_build2 (code, TREE_TYPE (expr), ee, e1);
2831 : :
2832 : : default:
2833 : : return expr;
2834 : : }
2835 : : }
2836 : :
2837 : : tree
2838 : 57581771 : expand_simple_operations (tree expr, tree stop)
2839 : : {
2840 : 57581771 : hash_map<tree, tree> cache;
2841 : 57581771 : return expand_simple_operations (expr, stop, cache);
2842 : 57581771 : }
2843 : :
2844 : : /* Tries to simplify EXPR using the condition COND. Returns the simplified
2845 : : expression (or EXPR unchanged, if no simplification was possible). */
2846 : :
2847 : : static tree
2848 : 7784155 : tree_simplify_using_condition_1 (tree cond, tree expr)
2849 : : {
2850 : 7784155 : bool changed;
2851 : 7784155 : tree e, e0, e1, e2, notcond;
2852 : 7784155 : enum tree_code code = TREE_CODE (expr);
2853 : :
2854 : 7784155 : if (code == INTEGER_CST)
2855 : : return expr;
2856 : :
2857 : 7780483 : if (code == TRUTH_OR_EXPR
2858 : 7780483 : || code == TRUTH_AND_EXPR
2859 : 7780483 : || code == COND_EXPR)
2860 : : {
2861 : 68146 : changed = false;
2862 : :
2863 : 68146 : e0 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 0));
2864 : 68146 : if (TREE_OPERAND (expr, 0) != e0)
2865 : 659 : changed = true;
2866 : :
2867 : 68146 : e1 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 1));
2868 : 68146 : if (TREE_OPERAND (expr, 1) != e1)
2869 : 0 : changed = true;
2870 : :
2871 : 68146 : if (code == COND_EXPR)
2872 : : {
2873 : 2791 : e2 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 2));
2874 : 2791 : if (TREE_OPERAND (expr, 2) != e2)
2875 : : changed = true;
2876 : : }
2877 : : else
2878 : : e2 = NULL_TREE;
2879 : :
2880 : 68146 : if (changed)
2881 : : {
2882 : 659 : if (code == COND_EXPR)
2883 : 526 : expr = fold_build3 (code, boolean_type_node, e0, e1, e2);
2884 : : else
2885 : 133 : expr = fold_build2 (code, boolean_type_node, e0, e1);
2886 : : }
2887 : :
2888 : 68146 : return expr;
2889 : : }
2890 : :
2891 : : /* In case COND is equality, we may be able to simplify EXPR by copy/constant
2892 : : propagation, and vice versa. Fold does not handle this, since it is
2893 : : considered too expensive. */
2894 : 7712337 : if (TREE_CODE (cond) == EQ_EXPR)
2895 : : {
2896 : 1782855 : e0 = TREE_OPERAND (cond, 0);
2897 : 1782855 : e1 = TREE_OPERAND (cond, 1);
2898 : :
2899 : : /* We know that e0 == e1. Check whether we cannot simplify expr
2900 : : using this fact. */
2901 : 1782855 : e = simplify_replace_tree (expr, e0, e1);
2902 : 1782855 : if (integer_zerop (e) || integer_nonzerop (e))
2903 : 2247 : return e;
2904 : :
2905 : 1780608 : e = simplify_replace_tree (expr, e1, e0);
2906 : 1780608 : if (integer_zerop (e) || integer_nonzerop (e))
2907 : 1104 : return e;
2908 : : }
2909 : 7708986 : if (TREE_CODE (expr) == EQ_EXPR)
2910 : : {
2911 : 770948 : e0 = TREE_OPERAND (expr, 0);
2912 : 770948 : e1 = TREE_OPERAND (expr, 1);
2913 : :
2914 : : /* If e0 == e1 (EXPR) implies !COND, then EXPR cannot be true. */
2915 : 770948 : e = simplify_replace_tree (cond, e0, e1);
2916 : 770948 : if (integer_zerop (e))
2917 : : return e;
2918 : 746600 : e = simplify_replace_tree (cond, e1, e0);
2919 : 746600 : if (integer_zerop (e))
2920 : : return e;
2921 : : }
2922 : 7684638 : if (TREE_CODE (expr) == NE_EXPR)
2923 : : {
2924 : 921385 : e0 = TREE_OPERAND (expr, 0);
2925 : 921385 : e1 = TREE_OPERAND (expr, 1);
2926 : :
2927 : : /* If e0 == e1 (!EXPR) implies !COND, then EXPR must be true. */
2928 : 921385 : e = simplify_replace_tree (cond, e0, e1);
2929 : 921385 : if (integer_zerop (e))
2930 : 19143 : return boolean_true_node;
2931 : 902242 : e = simplify_replace_tree (cond, e1, e0);
2932 : 902242 : if (integer_zerop (e))
2933 : 0 : return boolean_true_node;
2934 : : }
2935 : :
2936 : : /* Check whether COND ==> EXPR. */
2937 : 7665495 : notcond = invert_truthvalue (cond);
2938 : 7665495 : e = fold_binary (TRUTH_OR_EXPR, boolean_type_node, notcond, expr);
2939 : 7665495 : if (e && integer_nonzerop (e))
2940 : : return e;
2941 : :
2942 : : /* Check whether COND ==> not EXPR. */
2943 : 7660176 : e = fold_binary (TRUTH_AND_EXPR, boolean_type_node, cond, expr);
2944 : 7660176 : if (e && integer_zerop (e))
2945 : : return e;
2946 : :
2947 : : return expr;
2948 : : }
2949 : :
2950 : : /* Tries to simplify EXPR using the condition COND. Returns the simplified
2951 : : expression (or EXPR unchanged, if no simplification was possible).
2952 : : Wrapper around tree_simplify_using_condition_1 that ensures that chains
2953 : : of simple operations in definitions of ssa names in COND are expanded,
2954 : : so that things like casts or incrementing the value of the bound before
2955 : : the loop do not cause us to fail. */
2956 : :
2957 : : static tree
2958 : 7645072 : tree_simplify_using_condition (tree cond, tree expr)
2959 : : {
2960 : 7645072 : cond = expand_simple_operations (cond);
2961 : :
2962 : 7645072 : return tree_simplify_using_condition_1 (cond, expr);
2963 : : }
2964 : :
2965 : : /* Tries to simplify EXPR using the conditions on entry to LOOP.
2966 : : Returns the simplified expression (or EXPR unchanged, if no
2967 : : simplification was possible). */
2968 : :
2969 : : tree
2970 : 22734849 : simplify_using_initial_conditions (class loop *loop, tree expr)
2971 : : {
2972 : 22734849 : edge e;
2973 : 22734849 : basic_block bb;
2974 : 22734849 : tree cond, expanded, backup;
2975 : 22734849 : int cnt = 0;
2976 : :
2977 : 22734849 : if (TREE_CODE (expr) == INTEGER_CST)
2978 : : return expr;
2979 : :
2980 : 2475752 : backup = expanded = expand_simple_operations (expr);
2981 : :
2982 : : /* Limit walking the dominators to avoid quadraticness in
2983 : : the number of BBs times the number of loops in degenerate
2984 : : cases. */
2985 : 2475752 : for (bb = loop->header;
2986 : 24486419 : bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK;
2987 : 22010667 : bb = get_immediate_dominator (CDI_DOMINATORS, bb))
2988 : : {
2989 : 44204584 : if (!single_pred_p (bb))
2990 : 11120623 : continue;
2991 : 10981669 : e = single_pred_edge (bb);
2992 : :
2993 : 10981669 : if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2994 : 3336597 : continue;
2995 : :
2996 : 15290144 : gcond *stmt = as_a <gcond *> (*gsi_last_bb (e->src));
2997 : 7645072 : cond = fold_build2 (gimple_cond_code (stmt),
2998 : : boolean_type_node,
2999 : : gimple_cond_lhs (stmt),
3000 : : gimple_cond_rhs (stmt));
3001 : 7645072 : if (e->flags & EDGE_FALSE_VALUE)
3002 : 4322798 : cond = invert_truthvalue (cond);
3003 : 7645072 : expanded = tree_simplify_using_condition (cond, expanded);
3004 : : /* Break if EXPR is simplified to const values. */
3005 : 7645072 : if (expanded
3006 : 7645072 : && (integer_zerop (expanded) || integer_nonzerop (expanded)))
3007 : 91625 : return expanded;
3008 : :
3009 : 7553447 : ++cnt;
3010 : : }
3011 : :
3012 : : /* Return the original expression if no simplification is done. */
3013 : 2384127 : return operand_equal_p (backup, expanded, 0) ? expr : expanded;
3014 : : }
3015 : :
3016 : : /* Tries to simplify EXPR using the evolutions of the loop invariants
3017 : : in the superloops of LOOP. Returns the simplified expression
3018 : : (or EXPR unchanged, if no simplification was possible). */
3019 : :
3020 : : static tree
3021 : 4891398 : simplify_using_outer_evolutions (class loop *loop, tree expr)
3022 : : {
3023 : 4891398 : enum tree_code code = TREE_CODE (expr);
3024 : 4891398 : bool changed;
3025 : 4891398 : tree e, e0, e1, e2;
3026 : :
3027 : 4891398 : if (is_gimple_min_invariant (expr))
3028 : : return expr;
3029 : :
3030 : 1163490 : if (code == TRUTH_OR_EXPR
3031 : 1163490 : || code == TRUTH_AND_EXPR
3032 : 1163490 : || code == COND_EXPR)
3033 : : {
3034 : 7818 : changed = false;
3035 : :
3036 : 7818 : e0 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 0));
3037 : 7818 : if (TREE_OPERAND (expr, 0) != e0)
3038 : 0 : changed = true;
3039 : :
3040 : 7818 : e1 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 1));
3041 : 7818 : if (TREE_OPERAND (expr, 1) != e1)
3042 : 0 : changed = true;
3043 : :
3044 : 7818 : if (code == COND_EXPR)
3045 : : {
3046 : 0 : e2 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 2));
3047 : 0 : if (TREE_OPERAND (expr, 2) != e2)
3048 : : changed = true;
3049 : : }
3050 : : else
3051 : : e2 = NULL_TREE;
3052 : :
3053 : 7818 : if (changed)
3054 : : {
3055 : 0 : if (code == COND_EXPR)
3056 : 0 : expr = fold_build3 (code, boolean_type_node, e0, e1, e2);
3057 : : else
3058 : 0 : expr = fold_build2 (code, boolean_type_node, e0, e1);
3059 : : }
3060 : :
3061 : 7818 : return expr;
3062 : : }
3063 : :
3064 : 1155672 : e = instantiate_parameters (loop, expr);
3065 : 1155672 : if (is_gimple_min_invariant (e))
3066 : : return e;
3067 : :
3068 : : return expr;
3069 : : }
3070 : :
3071 : : /* Returns true if EXIT is the only possible exit from LOOP. */
3072 : :
3073 : : bool
3074 : 10831490 : loop_only_exit_p (const class loop *loop, basic_block *body, const_edge exit)
3075 : : {
3076 : 10831490 : gimple_stmt_iterator bsi;
3077 : 10831490 : unsigned i;
3078 : :
3079 : 10831490 : if (exit != single_exit (loop))
3080 : : return false;
3081 : :
3082 : 35066124 : for (i = 0; i < loop->num_nodes; i++)
3083 : 235699503 : for (bsi = gsi_start_bb (body[i]); !gsi_end_p (bsi); gsi_next (&bsi))
3084 : 180842316 : if (stmt_can_terminate_bb_p (gsi_stmt (bsi)))
3085 : : return false;
3086 : :
3087 : : return true;
3088 : : }
3089 : :
3090 : : /* Stores description of number of iterations of LOOP derived from
3091 : : EXIT (an exit edge of the LOOP) in NITER. Returns true if some useful
3092 : : information could be derived (and fields of NITER have meaning described
3093 : : in comments at class tree_niter_desc declaration), false otherwise.
3094 : : When EVERY_ITERATION is true, only tests that are known to be executed
3095 : : every iteration are considered (i.e. only test that alone bounds the loop).
3096 : : If AT_STMT is not NULL, this function stores LOOP's condition statement in
3097 : : it when returning true. */
3098 : :
3099 : : bool
3100 : 21232776 : number_of_iterations_exit_assumptions (class loop *loop, edge exit,
3101 : : class tree_niter_desc *niter,
3102 : : gcond **at_stmt, bool every_iteration,
3103 : : basic_block *body)
3104 : : {
3105 : 21232776 : tree type;
3106 : 21232776 : tree op0, op1;
3107 : 21232776 : enum tree_code code;
3108 : 21232776 : affine_iv iv0, iv1;
3109 : 21232776 : bool safe;
3110 : :
3111 : : /* The condition at a fake exit (if it exists) does not control its
3112 : : execution. */
3113 : 21232776 : if (exit->flags & EDGE_FAKE)
3114 : : return false;
3115 : :
3116 : : /* Nothing to analyze if the loop is known to be infinite. */
3117 : 21232375 : if (loop_constraint_set_p (loop, LOOP_C_INFINITE))
3118 : : return false;
3119 : :
3120 : 21232375 : safe = dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src);
3121 : :
3122 : 21232375 : if (every_iteration && !safe)
3123 : : return false;
3124 : :
3125 : 20018964 : niter->assumptions = boolean_false_node;
3126 : 20018964 : niter->control.base = NULL_TREE;
3127 : 20018964 : niter->control.step = NULL_TREE;
3128 : 20018964 : niter->control.no_overflow = false;
3129 : 45298436 : gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (exit->src));
3130 : 17374244 : if (!stmt)
3131 : : return false;
3132 : :
3133 : 17374244 : if (at_stmt)
3134 : 16852063 : *at_stmt = stmt;
3135 : :
3136 : : /* We want the condition for staying inside loop. */
3137 : 17374244 : code = gimple_cond_code (stmt);
3138 : 17374244 : if (exit->flags & EDGE_TRUE_VALUE)
3139 : 6369916 : code = invert_tree_comparison (code, false);
3140 : :
3141 : 17374244 : switch (code)
3142 : : {
3143 : 14749085 : case GT_EXPR:
3144 : 14749085 : case GE_EXPR:
3145 : 14749085 : case LT_EXPR:
3146 : 14749085 : case LE_EXPR:
3147 : 14749085 : case NE_EXPR:
3148 : 14749085 : break;
3149 : :
3150 : 2624827 : case EQ_EXPR:
3151 : 2624827 : return number_of_iterations_cltz (loop, exit, code, niter);
3152 : :
3153 : : default:
3154 : : return false;
3155 : : }
3156 : :
3157 : 14749085 : op0 = gimple_cond_lhs (stmt);
3158 : 14749085 : op1 = gimple_cond_rhs (stmt);
3159 : 14749085 : type = TREE_TYPE (op0);
3160 : :
3161 : 14749085 : if (TREE_CODE (type) != INTEGER_TYPE
3162 : 2272237 : && !POINTER_TYPE_P (type))
3163 : : return false;
3164 : :
3165 : 13987290 : tree iv0_niters = NULL_TREE;
3166 : 27974580 : if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
3167 : : op0, &iv0, safe ? &iv0_niters : NULL, false))
3168 : 2972131 : return number_of_iterations_bitcount (loop, exit, code, niter);
3169 : 11015159 : tree iv1_niters = NULL_TREE;
3170 : 22030318 : if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
3171 : : op1, &iv1, safe ? &iv1_niters : NULL, false))
3172 : : return false;
3173 : : /* Give up on complicated case. */
3174 : 10480574 : if (iv0_niters && iv1_niters)
3175 : : return false;
3176 : :
3177 : : /* We don't want to see undefined signed overflow warnings while
3178 : : computing the number of iterations. */
3179 : 10480574 : fold_defer_overflow_warnings ();
3180 : :
3181 : 10480574 : iv0.base = expand_simple_operations (iv0.base);
3182 : 10480574 : iv1.base = expand_simple_operations (iv1.base);
3183 : 10480574 : bool body_from_caller = true;
3184 : 10480574 : if (!body)
3185 : : {
3186 : 6243624 : body = get_loop_body (loop);
3187 : 6243624 : body_from_caller = false;
3188 : : }
3189 : 10480574 : bool only_exit_p = loop_only_exit_p (loop, body, exit);
3190 : 10480574 : if (!body_from_caller)
3191 : 6243624 : free (body);
3192 : 10480574 : if (!number_of_iterations_cond (loop, type, &iv0, code, &iv1, niter,
3193 : : only_exit_p, safe))
3194 : : {
3195 : 105264 : fold_undefer_and_ignore_overflow_warnings ();
3196 : 105264 : return false;
3197 : : }
3198 : :
3199 : : /* Incorporate additional assumption implied by control iv. */
3200 : 10375310 : tree iv_niters = iv0_niters ? iv0_niters : iv1_niters;
3201 : 10375310 : if (iv_niters)
3202 : : {
3203 : 2085 : tree assumption = fold_build2 (LE_EXPR, boolean_type_node, niter->niter,
3204 : : fold_convert (TREE_TYPE (niter->niter),
3205 : : iv_niters));
3206 : :
3207 : 2085 : if (!integer_nonzerop (assumption))
3208 : 2072 : niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3209 : : niter->assumptions, assumption);
3210 : :
3211 : : /* Refine upper bound if possible. */
3212 : 2085 : if (TREE_CODE (iv_niters) == INTEGER_CST
3213 : 4170 : && niter->max > wi::to_widest (iv_niters))
3214 : 1855 : niter->max = wi::to_widest (iv_niters);
3215 : : }
3216 : :
3217 : : /* There is no assumptions if the loop is known to be finite. */
3218 : 10375310 : if (!integer_zerop (niter->assumptions)
3219 : 10375310 : && loop_constraint_set_p (loop, LOOP_C_FINITE))
3220 : 6055 : niter->assumptions = boolean_true_node;
3221 : :
3222 : 10375310 : if (optimize >= 3)
3223 : : {
3224 : 1625254 : niter->assumptions = simplify_using_outer_evolutions (loop,
3225 : : niter->assumptions);
3226 : 1625254 : niter->may_be_zero = simplify_using_outer_evolutions (loop,
3227 : : niter->may_be_zero);
3228 : 1625254 : niter->niter = simplify_using_outer_evolutions (loop, niter->niter);
3229 : : }
3230 : :
3231 : 10375310 : niter->assumptions
3232 : 10375310 : = simplify_using_initial_conditions (loop,
3233 : : niter->assumptions);
3234 : 10375310 : niter->may_be_zero
3235 : 10375310 : = simplify_using_initial_conditions (loop,
3236 : : niter->may_be_zero);
3237 : :
3238 : 10375310 : fold_undefer_and_ignore_overflow_warnings ();
3239 : :
3240 : : /* If NITER has simplified into a constant, update MAX. */
3241 : 10375310 : if (TREE_CODE (niter->niter) == INTEGER_CST)
3242 : 5962727 : niter->max = wi::to_widest (niter->niter);
3243 : :
3244 : 10375310 : return (!integer_zerop (niter->assumptions));
3245 : : }
3246 : :
3247 : : /* Like number_of_iterations_exit_assumptions, but return TRUE only if
3248 : : the niter information holds unconditionally. */
3249 : :
3250 : : bool
3251 : 20605197 : number_of_iterations_exit (class loop *loop, edge exit,
3252 : : class tree_niter_desc *niter,
3253 : : bool warn, bool every_iteration,
3254 : : basic_block *body)
3255 : : {
3256 : 20605197 : gcond *stmt;
3257 : 20605197 : if (!number_of_iterations_exit_assumptions (loop, exit, niter,
3258 : : &stmt, every_iteration, body))
3259 : : return false;
3260 : :
3261 : 10079187 : if (integer_nonzerop (niter->assumptions))
3262 : : return true;
3263 : :
3264 : 470517 : if (warn && dump_enabled_p ())
3265 : 5 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt,
3266 : : "missed loop optimization: niters analysis ends up "
3267 : : "with assumptions.\n");
3268 : :
3269 : : return false;
3270 : : }
3271 : :
3272 : : /* Try to determine the number of iterations of LOOP. If we succeed,
3273 : : expression giving number of iterations is returned and *EXIT is
3274 : : set to the edge from that the information is obtained. Otherwise
3275 : : chrec_dont_know is returned. */
3276 : :
3277 : : tree
3278 : 624904 : find_loop_niter (class loop *loop, edge *exit)
3279 : : {
3280 : 624904 : unsigned i;
3281 : 624904 : auto_vec<edge> exits = get_loop_exit_edges (loop);
3282 : 624904 : edge ex;
3283 : 624904 : tree niter = NULL_TREE, aniter;
3284 : 624904 : class tree_niter_desc desc;
3285 : :
3286 : 624904 : *exit = NULL;
3287 : 2976706 : FOR_EACH_VEC_ELT (exits, i, ex)
3288 : : {
3289 : 2351802 : if (!number_of_iterations_exit (loop, ex, &desc, false))
3290 : 1935896 : continue;
3291 : :
3292 : 415906 : if (integer_nonzerop (desc.may_be_zero))
3293 : : {
3294 : : /* We exit in the first iteration through this exit.
3295 : : We won't find anything better. */
3296 : 0 : niter = build_int_cst (unsigned_type_node, 0);
3297 : 0 : *exit = ex;
3298 : 0 : break;
3299 : : }
3300 : :
3301 : 415906 : if (!integer_zerop (desc.may_be_zero))
3302 : 55679 : continue;
3303 : :
3304 : 360227 : aniter = desc.niter;
3305 : :
3306 : 360227 : if (!niter)
3307 : : {
3308 : : /* Nothing recorded yet. */
3309 : 353752 : niter = aniter;
3310 : 353752 : *exit = ex;
3311 : 353752 : continue;
3312 : : }
3313 : :
3314 : : /* Prefer constants, the lower the better. */
3315 : 6475 : if (TREE_CODE (aniter) != INTEGER_CST)
3316 : 3807 : continue;
3317 : :
3318 : 2668 : if (TREE_CODE (niter) != INTEGER_CST)
3319 : : {
3320 : 739 : niter = aniter;
3321 : 739 : *exit = ex;
3322 : 739 : continue;
3323 : : }
3324 : :
3325 : 1929 : if (tree_int_cst_lt (aniter, niter))
3326 : : {
3327 : 155 : niter = aniter;
3328 : 155 : *exit = ex;
3329 : 155 : continue;
3330 : : }
3331 : : }
3332 : :
3333 : 624904 : return niter ? niter : chrec_dont_know;
3334 : 624904 : }
3335 : :
3336 : : /* Return true if loop is known to have bounded number of iterations. */
3337 : :
3338 : : bool
3339 : 2563811 : finite_loop_p (class loop *loop)
3340 : : {
3341 : 2563811 : widest_int nit;
3342 : 2563811 : int flags;
3343 : :
3344 : 2563811 : if (loop->finite_p)
3345 : : {
3346 : 1866398 : unsigned i;
3347 : 1866398 : auto_vec<edge> exits = get_loop_exit_edges (loop);
3348 : 1866398 : edge ex;
3349 : :
3350 : : /* If the loop has a normal exit, we can assume it will terminate. */
3351 : 3752172 : FOR_EACH_VEC_ELT (exits, i, ex)
3352 : 1881078 : if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_FAKE)))
3353 : : {
3354 : 1862029 : if (dump_file)
3355 : 135 : fprintf (dump_file, "Assume loop %i to be finite: it has an exit "
3356 : : "and -ffinite-loops is on or loop was "
3357 : : "previously finite.\n",
3358 : : loop->num);
3359 : 1862029 : return true;
3360 : : }
3361 : 1866398 : }
3362 : :
3363 : 701782 : flags = flags_from_decl_or_type (current_function_decl);
3364 : 701782 : if ((flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE))
3365 : : {
3366 : 2217 : if (dump_file && (dump_flags & TDF_DETAILS))
3367 : 0 : fprintf (dump_file,
3368 : : "Found loop %i to be finite: it is within "
3369 : : "pure or const function.\n",
3370 : : loop->num);
3371 : 2217 : loop->finite_p = true;
3372 : 2217 : return true;
3373 : : }
3374 : :
3375 : 699565 : if (loop->any_upper_bound
3376 : : /* Loop with no normal exit will not pass max_loop_iterations. */
3377 : 699565 : || (!loop->finite_p && max_loop_iterations (loop, &nit)))
3378 : : {
3379 : 344835 : if (dump_file && (dump_flags & TDF_DETAILS))
3380 : 3 : fprintf (dump_file, "Found loop %i to be finite: upper bound found.\n",
3381 : : loop->num);
3382 : 344835 : loop->finite_p = true;
3383 : 344835 : return true;
3384 : : }
3385 : :
3386 : : return false;
3387 : 2563811 : }
3388 : :
3389 : : /*
3390 : :
3391 : : Analysis of a number of iterations of a loop by a brute-force evaluation.
3392 : :
3393 : : */
3394 : :
3395 : : /* Bound on the number of iterations we try to evaluate. */
3396 : :
3397 : : #define MAX_ITERATIONS_TO_TRACK \
3398 : : ((unsigned) param_max_iterations_to_track)
3399 : :
3400 : : /* Returns the loop phi node of LOOP such that ssa name X is derived from its
3401 : : result by a chain of operations such that all but exactly one of their
3402 : : operands are constants. */
3403 : :
3404 : : static gphi *
3405 : 1073582 : chain_of_csts_start (class loop *loop, tree x)
3406 : : {
3407 : 1304533 : gimple *stmt = SSA_NAME_DEF_STMT (x);
3408 : 1304533 : tree use;
3409 : 1304533 : basic_block bb = gimple_bb (stmt);
3410 : 1304533 : enum tree_code code;
3411 : :
3412 : 1304533 : if (!bb
3413 : 1304533 : || !flow_bb_inside_loop_p (loop, bb))
3414 : 294638 : return NULL;
3415 : :
3416 : 1009895 : if (gimple_code (stmt) == GIMPLE_PHI)
3417 : : {
3418 : 393155 : if (bb == loop->header)
3419 : 359602 : return as_a <gphi *> (stmt);
3420 : :
3421 : : return NULL;
3422 : : }
3423 : :
3424 : 616740 : if (gimple_code (stmt) != GIMPLE_ASSIGN
3425 : 1148511 : || gimple_assign_rhs_class (stmt) == GIMPLE_TERNARY_RHS)
3426 : : return NULL;
3427 : :
3428 : 531517 : code = gimple_assign_rhs_code (stmt);
3429 : 531517 : if (gimple_references_memory_p (stmt)
3430 : 311272 : || TREE_CODE_CLASS (code) == tcc_reference
3431 : 295023 : || (code == ADDR_EXPR
3432 : 473 : && !is_gimple_min_invariant (gimple_assign_rhs1 (stmt))))
3433 : 236967 : return NULL;
3434 : :
3435 : 294550 : use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
3436 : 294550 : if (use == NULL_TREE)
3437 : : return NULL;
3438 : :
3439 : : return chain_of_csts_start (loop, use);
3440 : : }
3441 : :
3442 : : /* Determines whether the expression X is derived from a result of a phi node
3443 : : in header of LOOP such that
3444 : :
3445 : : * the derivation of X consists only from operations with constants
3446 : : * the initial value of the phi node is constant
3447 : : * the value of the phi node in the next iteration can be derived from the
3448 : : value in the current iteration by a chain of operations with constants,
3449 : : or is also a constant
3450 : :
3451 : : If such phi node exists, it is returned, otherwise NULL is returned. */
3452 : :
3453 : : static gphi *
3454 : 969785 : get_base_for (class loop *loop, tree x)
3455 : : {
3456 : 969785 : gphi *phi;
3457 : 969785 : tree init, next;
3458 : :
3459 : 969785 : if (is_gimple_min_invariant (x))
3460 : : return NULL;
3461 : :
3462 : 969785 : phi = chain_of_csts_start (loop, x);
3463 : 969785 : if (!phi)
3464 : : return NULL;
3465 : :
3466 : 262941 : init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
3467 : 262941 : next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3468 : :
3469 : 262941 : if (!is_gimple_min_invariant (init))
3470 : : return NULL;
3471 : :
3472 : 115688 : if (TREE_CODE (next) == SSA_NAME
3473 : 115688 : && chain_of_csts_start (loop, next) != phi)
3474 : : return NULL;
3475 : :
3476 : : return phi;
3477 : : }
3478 : :
3479 : : /* Given an expression X, then
3480 : :
3481 : : * if X is NULL_TREE, we return the constant BASE.
3482 : : * if X is a constant, we return the constant X.
3483 : : * otherwise X is a SSA name, whose value in the considered loop is derived
3484 : : by a chain of operations with constant from a result of a phi node in
3485 : : the header of the loop. Then we return value of X when the value of the
3486 : : result of this phi node is given by the constant BASE. */
3487 : :
3488 : : static tree
3489 : 5046062 : get_val_for (tree x, tree base)
3490 : : {
3491 : 5052899 : gimple *stmt;
3492 : :
3493 : 5052899 : gcc_checking_assert (is_gimple_min_invariant (base));
3494 : :
3495 : 5052899 : if (!x)
3496 : : return base;
3497 : 3549625 : else if (is_gimple_min_invariant (x))
3498 : : return x;
3499 : :
3500 : 3537947 : stmt = SSA_NAME_DEF_STMT (x);
3501 : 3537947 : if (gimple_code (stmt) == GIMPLE_PHI)
3502 : : return base;
3503 : :
3504 : 1842698 : gcc_checking_assert (is_gimple_assign (stmt));
3505 : :
3506 : : /* STMT must be either an assignment of a single SSA name or an
3507 : : expression involving an SSA name and a constant. Try to fold that
3508 : : expression using the value for the SSA name. */
3509 : 1842698 : if (gimple_assign_ssa_name_copy_p (stmt))
3510 : 6837 : return get_val_for (gimple_assign_rhs1 (stmt), base);
3511 : 1835861 : else if (gimple_assign_rhs_class (stmt) == GIMPLE_UNARY_RHS
3512 : 1835861 : && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
3513 : 113697 : return fold_build1 (gimple_assign_rhs_code (stmt),
3514 : : TREE_TYPE (gimple_assign_lhs (stmt)),
3515 : : get_val_for (gimple_assign_rhs1 (stmt), base));
3516 : 1722164 : else if (gimple_assign_rhs_class (stmt) == GIMPLE_BINARY_RHS)
3517 : : {
3518 : 1722164 : tree rhs1 = gimple_assign_rhs1 (stmt);
3519 : 1722164 : tree rhs2 = gimple_assign_rhs2 (stmt);
3520 : 1722164 : if (TREE_CODE (rhs1) == SSA_NAME)
3521 : 1681594 : rhs1 = get_val_for (rhs1, base);
3522 : 40570 : else if (TREE_CODE (rhs2) == SSA_NAME)
3523 : 40570 : rhs2 = get_val_for (rhs2, base);
3524 : : else
3525 : 0 : gcc_unreachable ();
3526 : 1722164 : return fold_build2 (gimple_assign_rhs_code (stmt),
3527 : : TREE_TYPE (gimple_assign_lhs (stmt)), rhs1, rhs2);
3528 : : }
3529 : : else
3530 : 0 : gcc_unreachable ();
3531 : : }
3532 : :
3533 : :
3534 : : /* Tries to count the number of iterations of LOOP till it exits by EXIT
3535 : : by brute force -- i.e. by determining the value of the operands of the
3536 : : condition at EXIT in first few iterations of the loop (assuming that
3537 : : these values are constant) and determining the first one in that the
3538 : : condition is not satisfied. Returns the constant giving the number
3539 : : of the iterations of LOOP if successful, chrec_dont_know otherwise. */
3540 : :
3541 : : tree
3542 : 961133 : loop_niter_by_eval (class loop *loop, edge exit)
3543 : : {
3544 : 961133 : tree acnd;
3545 : 961133 : tree op[2], val[2], next[2], aval[2];
3546 : 961133 : gphi *phi;
3547 : 961133 : unsigned i, j;
3548 : 961133 : enum tree_code cmp;
3549 : :
3550 : 1922266 : gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (exit->src));
3551 : 961133 : if (!cond)
3552 : 84695 : return chrec_dont_know;
3553 : :
3554 : 876438 : cmp = gimple_cond_code (cond);
3555 : 876438 : if (exit->flags & EDGE_TRUE_VALUE)
3556 : 322720 : cmp = invert_tree_comparison (cmp, false);
3557 : :
3558 : 876438 : switch (cmp)
3559 : : {
3560 : 876411 : case EQ_EXPR:
3561 : 876411 : case NE_EXPR:
3562 : 876411 : case GT_EXPR:
3563 : 876411 : case GE_EXPR:
3564 : 876411 : case LT_EXPR:
3565 : 876411 : case LE_EXPR:
3566 : 876411 : op[0] = gimple_cond_lhs (cond);
3567 : 876411 : op[1] = gimple_cond_rhs (cond);
3568 : 876411 : break;
3569 : :
3570 : 27 : default:
3571 : 27 : return chrec_dont_know;
3572 : : }
3573 : :
3574 : 1002437 : for (j = 0; j < 2; j++)
3575 : : {
3576 : 987278 : if (is_gimple_min_invariant (op[j]))
3577 : : {
3578 : 17493 : val[j] = op[j];
3579 : 17493 : next[j] = NULL_TREE;
3580 : 17493 : op[j] = NULL_TREE;
3581 : : }
3582 : : else
3583 : : {
3584 : 969785 : phi = get_base_for (loop, op[j]);
3585 : 969785 : if (!phi)
3586 : 861252 : return chrec_dont_know;
3587 : 108533 : val[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
3588 : 108533 : next[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3589 : : }
3590 : : }
3591 : :
3592 : : /* Don't issue signed overflow warnings. */
3593 : 15159 : fold_defer_overflow_warnings ();
3594 : :
3595 : 825432 : for (i = 0; i < MAX_ITERATIONS_TO_TRACK; i++)
3596 : : {
3597 : 2429004 : for (j = 0; j < 2; j++)
3598 : 1619336 : aval[j] = get_val_for (op[j], val[j]);
3599 : :
3600 : 809668 : acnd = fold_binary (cmp, boolean_type_node, aval[0], aval[1]);
3601 : 809668 : if (acnd && integer_zerop (acnd))
3602 : : {
3603 : 14080 : fold_undefer_and_ignore_overflow_warnings ();
3604 : 14080 : if (dump_file && (dump_flags & TDF_DETAILS))
3605 : 3 : fprintf (dump_file,
3606 : : "Proved that loop %d iterates %d times using brute force.\n",
3607 : : loop->num, i);
3608 : 14080 : return build_int_cst (unsigned_type_node, i);
3609 : : }
3610 : :
3611 : 2386013 : for (j = 0; j < 2; j++)
3612 : : {
3613 : 1590865 : aval[j] = val[j];
3614 : 1590865 : val[j] = get_val_for (next[j], val[j]);
3615 : 1590865 : if (!is_gimple_min_invariant (val[j]))
3616 : : {
3617 : 440 : fold_undefer_and_ignore_overflow_warnings ();
3618 : 440 : return chrec_dont_know;
3619 : : }
3620 : : }
3621 : :
3622 : : /* If the next iteration would use the same base values
3623 : : as the current one, there is no point looping further,
3624 : : all following iterations will be the same as this one. */
3625 : 795148 : if (val[0] == aval[0] && val[1] == aval[1])
3626 : : break;
3627 : : }
3628 : :
3629 : 639 : fold_undefer_and_ignore_overflow_warnings ();
3630 : :
3631 : 639 : return chrec_dont_know;
3632 : : }
3633 : :
3634 : : /* Finds the exit of the LOOP by that the loop exits after a constant
3635 : : number of iterations and stores the exit edge to *EXIT. The constant
3636 : : giving the number of iterations of LOOP is returned. The number of
3637 : : iterations is determined using loop_niter_by_eval (i.e. by brute force
3638 : : evaluation). If we are unable to find the exit for that loop_niter_by_eval
3639 : : determines the number of iterations, chrec_dont_know is returned. */
3640 : :
3641 : : tree
3642 : 322564 : find_loop_niter_by_eval (class loop *loop, edge *exit)
3643 : : {
3644 : 322564 : unsigned i;
3645 : 322564 : auto_vec<edge> exits = get_loop_exit_edges (loop);
3646 : 322564 : edge ex;
3647 : 322564 : tree niter = NULL_TREE, aniter;
3648 : :
3649 : 322564 : *exit = NULL;
3650 : :
3651 : : /* Loops with multiple exits are expensive to handle and less important. */
3652 : 322564 : if (!flag_expensive_optimizations
3653 : 339757 : && exits.length () > 1)
3654 : 3536 : return chrec_dont_know;
3655 : :
3656 : 1015385 : FOR_EACH_VEC_ELT (exits, i, ex)
3657 : : {
3658 : 696357 : if (!just_once_each_iteration_p (loop, ex->src))
3659 : 239599 : continue;
3660 : :
3661 : 456758 : aniter = loop_niter_by_eval (loop, ex);
3662 : 456758 : if (chrec_contains_undetermined (aniter))
3663 : 454069 : continue;
3664 : :
3665 : 2733 : if (niter
3666 : 2689 : && !tree_int_cst_lt (aniter, niter))
3667 : 44 : continue;
3668 : :
3669 : 2645 : niter = aniter;
3670 : 2645 : *exit = ex;
3671 : : }
3672 : :
3673 : 319028 : return niter ? niter : chrec_dont_know;
3674 : 322564 : }
3675 : :
3676 : : /*
3677 : :
3678 : : Analysis of upper bounds on number of iterations of a loop.
3679 : :
3680 : : */
3681 : :
3682 : : static widest_int derive_constant_upper_bound_ops (tree, tree,
3683 : : enum tree_code, tree);
3684 : :
3685 : : /* Returns a constant upper bound on the value of the right-hand side of
3686 : : an assignment statement STMT. */
3687 : :
3688 : : static widest_int
3689 : 279 : derive_constant_upper_bound_assign (gimple *stmt)
3690 : : {
3691 : 279 : enum tree_code code = gimple_assign_rhs_code (stmt);
3692 : 279 : tree op0 = gimple_assign_rhs1 (stmt);
3693 : 279 : tree op1 = gimple_assign_rhs2 (stmt);
3694 : :
3695 : 279 : return derive_constant_upper_bound_ops (TREE_TYPE (gimple_assign_lhs (stmt)),
3696 : 279 : op0, code, op1);
3697 : : }
3698 : :
3699 : : /* Returns a constant upper bound on the value of expression VAL. VAL
3700 : : is considered to be unsigned. If its type is signed, its value must
3701 : : be nonnegative. */
3702 : :
3703 : : static widest_int
3704 : 9338770 : derive_constant_upper_bound (tree val)
3705 : : {
3706 : 9338770 : enum tree_code code;
3707 : 9338770 : tree op0, op1, op2;
3708 : :
3709 : 9338770 : extract_ops_from_tree (val, &code, &op0, &op1, &op2);
3710 : 9338770 : return derive_constant_upper_bound_ops (TREE_TYPE (val), op0, code, op1);
3711 : : }
3712 : :
3713 : : /* Returns a constant upper bound on the value of expression OP0 CODE OP1,
3714 : : whose type is TYPE. The expression is considered to be unsigned. If
3715 : : its type is signed, its value must be nonnegative. */
3716 : :
3717 : : static widest_int
3718 : 9339049 : derive_constant_upper_bound_ops (tree type, tree op0,
3719 : : enum tree_code code, tree op1)
3720 : : {
3721 : 9339049 : tree subtype, maxt;
3722 : 9339049 : widest_int bnd, max, cst;
3723 : 9339049 : gimple *stmt;
3724 : :
3725 : 9339049 : if (INTEGRAL_TYPE_P (type))
3726 : 9339049 : maxt = TYPE_MAX_VALUE (type);
3727 : : else
3728 : 0 : maxt = upper_bound_in_type (type, type);
3729 : :
3730 : 9339049 : max = wi::to_widest (maxt);
3731 : :
3732 : 9339049 : switch (code)
3733 : : {
3734 : 9295203 : case INTEGER_CST:
3735 : 9295203 : return wi::to_widest (op0);
3736 : :
3737 : 1044 : CASE_CONVERT:
3738 : 1044 : subtype = TREE_TYPE (op0);
3739 : 1044 : if (!TYPE_UNSIGNED (subtype)
3740 : : /* If TYPE is also signed, the fact that VAL is nonnegative implies
3741 : : that OP0 is nonnegative. */
3742 : 765 : && TYPE_UNSIGNED (type)
3743 : 1809 : && !tree_expr_nonnegative_p (op0))
3744 : : {
3745 : : /* If we cannot prove that the casted expression is nonnegative,
3746 : : we cannot establish more useful upper bound than the precision
3747 : : of the type gives us. */
3748 : 742 : return max;
3749 : : }
3750 : :
3751 : : /* We now know that op0 is an nonnegative value. Try deriving an upper
3752 : : bound for it. */
3753 : 302 : bnd = derive_constant_upper_bound (op0);
3754 : :
3755 : : /* If the bound does not fit in TYPE, max. value of TYPE could be
3756 : : attained. */
3757 : 302 : if (wi::ltu_p (max, bnd))
3758 : 0 : return max;
3759 : :
3760 : 302 : return bnd;
3761 : :
3762 : 27215 : case PLUS_EXPR:
3763 : 27215 : case POINTER_PLUS_EXPR:
3764 : 27215 : case MINUS_EXPR:
3765 : 27215 : if (TREE_CODE (op1) != INTEGER_CST
3766 : 27215 : || !tree_expr_nonnegative_p (op0))
3767 : 26802 : return max;
3768 : :
3769 : : /* Canonicalize to OP0 - CST. Consider CST to be signed, in order to
3770 : : choose the most logical way how to treat this constant regardless
3771 : : of the signedness of the type. */
3772 : 413 : cst = wi::sext (wi::to_widest (op1), TYPE_PRECISION (type));
3773 : 413 : if (code != MINUS_EXPR)
3774 : 413 : cst = -cst;
3775 : :
3776 : 413 : bnd = derive_constant_upper_bound (op0);
3777 : :
3778 : 413 : if (wi::neg_p (cst))
3779 : : {
3780 : 29 : cst = -cst;
3781 : : /* Avoid CST == 0x80000... */
3782 : 29 : if (wi::neg_p (cst))
3783 : 0 : return max;
3784 : :
3785 : : /* OP0 + CST. We need to check that
3786 : : BND <= MAX (type) - CST. */
3787 : :
3788 : 29 : widest_int mmax = max - cst;
3789 : 29 : if (wi::leu_p (bnd, mmax))
3790 : 0 : return max;
3791 : :
3792 : 29 : return bnd + cst;
3793 : 29 : }
3794 : : else
3795 : : {
3796 : : /* OP0 - CST, where CST >= 0.
3797 : :
3798 : : If TYPE is signed, we have already verified that OP0 >= 0, and we
3799 : : know that the result is nonnegative. This implies that
3800 : : VAL <= BND - CST.
3801 : :
3802 : : If TYPE is unsigned, we must additionally know that OP0 >= CST,
3803 : : otherwise the operation underflows.
3804 : : */
3805 : :
3806 : : /* This should only happen if the type is unsigned; however, for
3807 : : buggy programs that use overflowing signed arithmetics even with
3808 : : -fno-wrapv, this condition may also be true for signed values. */
3809 : 384 : if (wi::ltu_p (bnd, cst))
3810 : 0 : return max;
3811 : :
3812 : 384 : if (TYPE_UNSIGNED (type))
3813 : : {
3814 : 384 : tree tem = fold_binary (GE_EXPR, boolean_type_node, op0,
3815 : : wide_int_to_tree (type, cst));
3816 : 384 : if (!tem || integer_nonzerop (tem))
3817 : 30 : return max;
3818 : : }
3819 : :
3820 : 354 : bnd -= cst;
3821 : : }
3822 : :
3823 : 354 : return bnd;
3824 : :
3825 : 0 : case FLOOR_DIV_EXPR:
3826 : 0 : case EXACT_DIV_EXPR:
3827 : 0 : if (TREE_CODE (op1) != INTEGER_CST
3828 : 0 : || tree_int_cst_sign_bit (op1))
3829 : 0 : return max;
3830 : :
3831 : 0 : bnd = derive_constant_upper_bound (op0);
3832 : 0 : return wi::udiv_floor (bnd, wi::to_widest (op1));
3833 : :
3834 : 0 : case BIT_AND_EXPR:
3835 : 0 : if (TREE_CODE (op1) != INTEGER_CST
3836 : 0 : || tree_int_cst_sign_bit (op1))
3837 : 0 : return max;
3838 : 0 : return wi::to_widest (op1);
3839 : :
3840 : 455 : case SSA_NAME:
3841 : 455 : stmt = SSA_NAME_DEF_STMT (op0);
3842 : 455 : if (gimple_code (stmt) != GIMPLE_ASSIGN
3843 : 455 : || gimple_assign_lhs (stmt) != op0)
3844 : 176 : return max;
3845 : 279 : return derive_constant_upper_bound_assign (stmt);
3846 : :
3847 : 15132 : default:
3848 : 15132 : return max;
3849 : : }
3850 : 9339062 : }
3851 : :
3852 : : /* Emit a -Waggressive-loop-optimizations warning if needed. */
3853 : :
3854 : : static void
3855 : 8443813 : do_warn_aggressive_loop_optimizations (class loop *loop,
3856 : : widest_int i_bound, gimple *stmt)
3857 : : {
3858 : : /* Don't warn if the loop doesn't have known constant bound. */
3859 : 8443813 : if (!loop->nb_iterations
3860 : 8443813 : || TREE_CODE (loop->nb_iterations) != INTEGER_CST
3861 : 4040451 : || !warn_aggressive_loop_optimizations
3862 : : /* To avoid warning multiple times for the same loop,
3863 : : only start warning when we preserve loops. */
3864 : 4040290 : || (cfun->curr_properties & PROP_loops) == 0
3865 : : /* Only warn once per loop. */
3866 : 4040290 : || loop->warned_aggressive_loop_optimizations
3867 : : /* Only warn if undefined behavior gives us lower estimate than the
3868 : : known constant bound. */
3869 : 4039715 : || wi::cmpu (i_bound, wi::to_widest (loop->nb_iterations)) >= 0
3870 : : /* And undefined behavior happens unconditionally. */
3871 : 8443858 : || !dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (stmt)))
3872 : 8443768 : return;
3873 : :
3874 : 45 : edge e = single_exit (loop);
3875 : 45 : if (e == NULL)
3876 : : return;
3877 : :
3878 : 45 : gimple *estmt = last_nondebug_stmt (e->src);
3879 : 45 : char buf[WIDE_INT_PRINT_BUFFER_SIZE], *p;
3880 : 45 : unsigned len;
3881 : 45 : if (print_dec_buf_size (i_bound, TYPE_SIGN (TREE_TYPE (loop->nb_iterations)),
3882 : : &len))
3883 : 0 : p = XALLOCAVEC (char, len);
3884 : : else
3885 : : p = buf;
3886 : 45 : print_dec (i_bound, p, TYPE_SIGN (TREE_TYPE (loop->nb_iterations)));
3887 : 45 : auto_diagnostic_group d;
3888 : 45 : if (warning_at (gimple_location (stmt), OPT_Waggressive_loop_optimizations,
3889 : : "iteration %s invokes undefined behavior", p))
3890 : 9 : inform (gimple_location (estmt), "within this loop");
3891 : 45 : loop->warned_aggressive_loop_optimizations = true;
3892 : 45 : }
3893 : :
3894 : : /* Records that AT_STMT is executed at most BOUND + 1 times in LOOP. IS_EXIT
3895 : : is true if the loop is exited immediately after STMT, and this exit
3896 : : is taken at last when the STMT is executed BOUND + 1 times.
3897 : : REALISTIC is true if BOUND is expected to be close to the real number
3898 : : of iterations. UPPER is true if we are sure the loop iterates at most
3899 : : BOUND times. I_BOUND is a widest_int upper estimate on BOUND. */
3900 : :
3901 : : static void
3902 : 13264650 : record_estimate (class loop *loop, tree bound, const widest_int &i_bound,
3903 : : gimple *at_stmt, bool is_exit, bool realistic, bool upper)
3904 : : {
3905 : 13264650 : widest_int delta;
3906 : :
3907 : 13264650 : if (dump_file && (dump_flags & TDF_DETAILS))
3908 : : {
3909 : 98370 : fprintf (dump_file, "Statement %s", is_exit ? "(exit)" : "");
3910 : 54533 : print_gimple_stmt (dump_file, at_stmt, 0, TDF_SLIM);
3911 : 55055 : fprintf (dump_file, " is %sexecuted at most ",
3912 : : upper ? "" : "probably ");
3913 : 54533 : print_generic_expr (dump_file, bound, TDF_SLIM);
3914 : 54533 : fprintf (dump_file, " (bounded by ");
3915 : 54533 : print_decu (i_bound, dump_file);
3916 : 54533 : fprintf (dump_file, ") + 1 times in loop %d.\n", loop->num);
3917 : : }
3918 : :
3919 : : /* If the I_BOUND is just an estimate of BOUND, it rarely is close to the
3920 : : real number of iterations. */
3921 : 13264650 : if (TREE_CODE (bound) != INTEGER_CST)
3922 : : realistic = false;
3923 : : else
3924 : 11755006 : gcc_checking_assert (i_bound == wi::to_widest (bound));
3925 : :
3926 : 13264650 : if (wi::min_precision (i_bound, SIGNED) > bound_wide_int ().get_precision ())
3927 : : return;
3928 : :
3929 : : /* If we have a guaranteed upper bound, record it in the appropriate
3930 : : list, unless this is an !is_exit bound (i.e. undefined behavior in
3931 : : at_stmt) in a loop with known constant number of iterations. */
3932 : 13264637 : if (upper
3933 : 12714825 : && (is_exit
3934 : 8788230 : || loop->nb_iterations == NULL_TREE
3935 : 8788230 : || TREE_CODE (loop->nb_iterations) != INTEGER_CST))
3936 : : {
3937 : 8494455 : class nb_iter_bound *elt = ggc_alloc<nb_iter_bound> ();
3938 : :
3939 : 8494455 : elt->bound = bound_wide_int::from (i_bound, SIGNED);
3940 : 8494455 : elt->stmt = at_stmt;
3941 : 8494455 : elt->is_exit = is_exit;
3942 : 8494455 : elt->next = loop->bounds;
3943 : 8494455 : loop->bounds = elt;
3944 : : }
3945 : :
3946 : : /* If statement is executed on every path to the loop latch, we can directly
3947 : : infer the upper bound on the # of iterations of the loop. */
3948 : 13264637 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (at_stmt)))
3949 : 398317 : upper = false;
3950 : :
3951 : : /* Update the number of iteration estimates according to the bound.
3952 : : If at_stmt is an exit then the loop latch is executed at most BOUND times,
3953 : : otherwise it can be executed BOUND + 1 times. We will lower the estimate
3954 : : later if such statement must be executed on last iteration */
3955 : 13264637 : if (is_exit)
3956 : 3926597 : delta = 0;
3957 : : else
3958 : 9338040 : delta = 1;
3959 : 13264637 : widest_int new_i_bound = i_bound + delta;
3960 : :
3961 : : /* If an overflow occurred, ignore the result. */
3962 : 13264637 : if (wi::ltu_p (new_i_bound, delta))
3963 : 0 : return;
3964 : :
3965 : 13264637 : if (upper && !is_exit)
3966 : 8443813 : do_warn_aggressive_loop_optimizations (loop, new_i_bound, at_stmt);
3967 : 13264637 : record_niter_bound (loop, new_i_bound, realistic, upper);
3968 : 13264650 : }
3969 : :
3970 : : /* Records the control iv analyzed in NITER for LOOP if the iv is valid
3971 : : and doesn't overflow. */
3972 : :
3973 : : static void
3974 : 3926595 : record_control_iv (class loop *loop, class tree_niter_desc *niter)
3975 : : {
3976 : 3926595 : struct control_iv *iv;
3977 : :
3978 : 3926595 : if (!niter->control.base || !niter->control.step)
3979 : : return;
3980 : :
3981 : 3900792 : if (!integer_onep (niter->assumptions) || !niter->control.no_overflow)
3982 : : return;
3983 : :
3984 : 3774358 : iv = ggc_alloc<control_iv> ();
3985 : 3774358 : iv->base = niter->control.base;
3986 : 3774358 : iv->step = niter->control.step;
3987 : 3774358 : iv->next = loop->control_ivs;
3988 : 3774358 : loop->control_ivs = iv;
3989 : :
3990 : 3774358 : return;
3991 : : }
3992 : :
3993 : : /* This function returns TRUE if below conditions are satisfied:
3994 : : 1) VAR is SSA variable.
3995 : : 2) VAR is an IV:{base, step} in its defining loop.
3996 : : 3) IV doesn't overflow.
3997 : : 4) Both base and step are integer constants.
3998 : : 5) Base is the MIN/MAX value depends on IS_MIN.
3999 : : Store value of base to INIT correspondingly. */
4000 : :
4001 : : static bool
4002 : 99118 : get_cst_init_from_scev (tree var, wide_int *init, bool is_min)
4003 : : {
4004 : 99118 : if (TREE_CODE (var) != SSA_NAME)
4005 : : return false;
4006 : :
4007 : 99118 : gimple *def_stmt = SSA_NAME_DEF_STMT (var);
4008 : 183307 : class loop *loop = loop_containing_stmt (def_stmt);
4009 : :
4010 : 88265 : if (loop == NULL)
4011 : : return false;
4012 : :
4013 : 88265 : affine_iv iv;
4014 : 88265 : if (!simple_iv (loop, loop, var, &iv, false))
4015 : : return false;
4016 : :
4017 : 5448 : if (!iv.no_overflow)
4018 : : return false;
4019 : :
4020 : 5376 : if (TREE_CODE (iv.base) != INTEGER_CST || TREE_CODE (iv.step) != INTEGER_CST)
4021 : : return false;
4022 : :
4023 : 4380 : if (is_min == tree_int_cst_sign_bit (iv.step))
4024 : : return false;
4025 : :
4026 : 4076 : *init = wi::to_wide (iv.base);
4027 : 4076 : return true;
4028 : : }
4029 : :
4030 : : /* Record the estimate on number of iterations of LOOP based on the fact that
4031 : : the induction variable BASE + STEP * i evaluated in STMT does not wrap and
4032 : : its values belong to the range <LOW, HIGH>. REALISTIC is true if the
4033 : : estimated number of iterations is expected to be close to the real one.
4034 : : UPPER is true if we are sure the induction variable does not wrap. */
4035 : :
4036 : : static void
4037 : 9338053 : record_nonwrapping_iv (class loop *loop, tree base, tree step, gimple *stmt,
4038 : : tree low, tree high, bool realistic, bool upper)
4039 : : {
4040 : 9338053 : tree niter_bound, extreme, delta;
4041 : 9338053 : tree type = TREE_TYPE (base), unsigned_type;
4042 : 9338053 : tree orig_base = base;
4043 : :
4044 : 9338053 : if (TREE_CODE (step) != INTEGER_CST || integer_zerop (step))
4045 : 0 : return;
4046 : :
4047 : 9338053 : if (dump_file && (dump_flags & TDF_DETAILS))
4048 : : {
4049 : 43837 : fprintf (dump_file, "Induction variable (");
4050 : 43837 : print_generic_expr (dump_file, TREE_TYPE (base), TDF_SLIM);
4051 : 43837 : fprintf (dump_file, ") ");
4052 : 43837 : print_generic_expr (dump_file, base, TDF_SLIM);
4053 : 43837 : fprintf (dump_file, " + ");
4054 : 43837 : print_generic_expr (dump_file, step, TDF_SLIM);
4055 : 43837 : fprintf (dump_file, " * iteration does not wrap in statement ");
4056 : 43837 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
4057 : 43837 : fprintf (dump_file, " in loop %d.\n", loop->num);
4058 : : }
4059 : :
4060 : 9338053 : unsigned_type = unsigned_type_for (type);
4061 : 9338053 : base = fold_convert (unsigned_type, base);
4062 : 9338053 : step = fold_convert (unsigned_type, step);
4063 : :
4064 : 9338053 : if (tree_int_cst_sign_bit (step))
4065 : : {
4066 : 322137 : wide_int max;
4067 : 322137 : Value_Range base_range (TREE_TYPE (orig_base));
4068 : 644274 : if (get_range_query (cfun)->range_of_expr (base_range, orig_base)
4069 : 322137 : && !base_range.undefined_p ())
4070 : 322061 : max = base_range.upper_bound ();
4071 : 322137 : extreme = fold_convert (unsigned_type, low);
4072 : 322137 : if (TREE_CODE (orig_base) == SSA_NAME
4073 : 20282 : && TREE_CODE (high) == INTEGER_CST
4074 : 20282 : && INTEGRAL_TYPE_P (TREE_TYPE (orig_base))
4075 : 20236 : && ((!base_range.varying_p ()
4076 : 5340 : && !base_range.undefined_p ())
4077 : 14973 : || get_cst_init_from_scev (orig_base, &max, false))
4078 : 327400 : && wi::gts_p (wi::to_wide (high), max))
4079 : 1566 : base = wide_int_to_tree (unsigned_type, max);
4080 : 320571 : else if (TREE_CODE (base) != INTEGER_CST
4081 : 517175 : && dominated_by_p (CDI_DOMINATORS,
4082 : 196604 : loop->latch, gimple_bb (stmt)))
4083 : 195392 : base = fold_convert (unsigned_type, high);
4084 : 322137 : delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
4085 : 322137 : step = fold_build1 (NEGATE_EXPR, unsigned_type, step);
4086 : 322138 : }
4087 : : else
4088 : : {
4089 : 9015916 : wide_int min;
4090 : 9015916 : Value_Range base_range (TREE_TYPE (orig_base));
4091 : 18031832 : if (get_range_query (cfun)->range_of_expr (base_range, orig_base)
4092 : 9015916 : && !base_range.undefined_p ())
4093 : 9014062 : min = base_range.lower_bound ();
4094 : 9015916 : extreme = fold_convert (unsigned_type, high);
4095 : 9015916 : if (TREE_CODE (orig_base) == SSA_NAME
4096 : 737796 : && TREE_CODE (low) == INTEGER_CST
4097 : 737796 : && INTEGRAL_TYPE_P (TREE_TYPE (orig_base))
4098 : 207378 : && ((!base_range.varying_p ()
4099 : 124857 : && !base_range.undefined_p ())
4100 : 84145 : || get_cst_init_from_scev (orig_base, &min, true))
4101 : 9143225 : && wi::gts_p (min, wi::to_wide (low)))
4102 : 10664 : base = wide_int_to_tree (unsigned_type, min);
4103 : 9005252 : else if (TREE_CODE (base) != INTEGER_CST
4104 : 11820294 : && dominated_by_p (CDI_DOMINATORS,
4105 : 2815042 : loop->latch, gimple_bb (stmt)))
4106 : 2773402 : base = fold_convert (unsigned_type, low);
4107 : 9015916 : delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
4108 : 9015928 : }
4109 : :
4110 : : /* STMT is executed at most NITER_BOUND + 1 times, since otherwise the value
4111 : : would get out of the range. */
4112 : 9338053 : niter_bound = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step);
4113 : 9338053 : widest_int max = derive_constant_upper_bound (niter_bound);
4114 : 9338053 : record_estimate (loop, niter_bound, max, stmt, false, realistic, upper);
4115 : 9338053 : }
4116 : :
4117 : : /* Determine information about number of iterations a LOOP from the index
4118 : : IDX of a data reference accessed in STMT. RELIABLE is true if STMT is
4119 : : guaranteed to be executed in every iteration of LOOP. Callback for
4120 : : for_each_index. */
4121 : :
4122 : : struct ilb_data
4123 : : {
4124 : : class loop *loop;
4125 : : gimple *stmt;
4126 : : };
4127 : :
4128 : : static bool
4129 : 30132604 : idx_infer_loop_bounds (tree base, tree *idx, void *dta)
4130 : : {
4131 : 30132604 : struct ilb_data *data = (struct ilb_data *) dta;
4132 : 30132604 : tree ev, init, step;
4133 : 30132604 : tree low, high, type, next;
4134 : 30132604 : bool sign, upper = true, has_flexible_size = false;
4135 : 30132604 : class loop *loop = data->loop;
4136 : :
4137 : 30132604 : if (TREE_CODE (base) != ARRAY_REF)
4138 : : return true;
4139 : :
4140 : : /* For arrays that might have flexible sizes, it is not guaranteed that they
4141 : : do not really extend over their declared size. */
4142 : 8193432 : if (array_ref_flexible_size_p (base))
4143 : : {
4144 : 1895366 : has_flexible_size = true;
4145 : 1895366 : upper = false;
4146 : : }
4147 : :
4148 : 8193432 : class loop *dloop = loop_containing_stmt (data->stmt);
4149 : 8193432 : if (!dloop)
4150 : : return true;
4151 : :
4152 : 8193432 : ev = analyze_scalar_evolution (dloop, *idx);
4153 : 8193432 : ev = instantiate_parameters (loop, ev);
4154 : 8193432 : init = initial_condition (ev);
4155 : 8193432 : step = evolution_part_in_loop_num (ev, loop->num);
4156 : :
4157 : 8193432 : if (!init
4158 : 8193432 : || !step
4159 : 4784147 : || TREE_CODE (step) != INTEGER_CST
4160 : 3637895 : || integer_zerop (step)
4161 : 3637895 : || tree_contains_chrecs (init, NULL)
4162 : 11831327 : || chrec_contains_symbols_defined_in_loop (init, loop->num))
4163 : 4555537 : return true;
4164 : :
4165 : 3637895 : low = array_ref_low_bound (base);
4166 : 3637895 : high = array_ref_up_bound (base);
4167 : :
4168 : : /* The case of nonconstant bounds could be handled, but it would be
4169 : : complicated. */
4170 : 3637895 : if (TREE_CODE (low) != INTEGER_CST
4171 : 3637895 : || !high
4172 : 3097547 : || TREE_CODE (high) != INTEGER_CST)
4173 : : return true;
4174 : 2941863 : sign = tree_int_cst_sign_bit (step);
4175 : 2941863 : type = TREE_TYPE (step);
4176 : :
4177 : : /* The array that might have flexible size most likely extends
4178 : : beyond its bounds. */
4179 : 2941863 : if (has_flexible_size
4180 : 2941863 : && operand_equal_p (low, high, 0))
4181 : : return true;
4182 : :
4183 : : /* In case the relevant bound of the array does not fit in type, or
4184 : : it does, but bound + step (in type) still belongs into the range of the
4185 : : array, the index may wrap and still stay within the range of the array
4186 : : (consider e.g. if the array is indexed by the full range of
4187 : : unsigned char).
4188 : :
4189 : : To make things simpler, we require both bounds to fit into type, although
4190 : : there are cases where this would not be strictly necessary. */
4191 : 2923796 : if (!int_fits_type_p (high, type)
4192 : 2923796 : || !int_fits_type_p (low, type))
4193 : : return true;
4194 : 2923796 : low = fold_convert (type, low);
4195 : 2923796 : high = fold_convert (type, high);
4196 : :
4197 : 2923796 : if (sign)
4198 : 49944 : next = fold_binary (PLUS_EXPR, type, low, step);
4199 : : else
4200 : 2873852 : next = fold_binary (PLUS_EXPR, type, high, step);
4201 : :
4202 : 2923796 : if (tree_int_cst_compare (low, next) <= 0
4203 : 2923796 : && tree_int_cst_compare (next, high) <= 0)
4204 : : return true;
4205 : :
4206 : : /* If access is not executed on every iteration, we must ensure that overlow
4207 : : may not make the access valid later. */
4208 : 2923796 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (data->stmt)))
4209 : : {
4210 : 391911 : if (scev_probably_wraps_p (NULL_TREE,
4211 : 391911 : initial_condition_in_loop_num (ev, loop->num),
4212 : : step, data->stmt, loop, true))
4213 : 2923796 : upper = false;
4214 : : }
4215 : : else
4216 : 2531885 : record_nonwrapping_chrec (ev);
4217 : :
4218 : 2923796 : record_nonwrapping_iv (loop, init, step, data->stmt, low, high, false, upper);
4219 : 2923796 : return true;
4220 : : }
4221 : :
4222 : : /* Determine information about number of iterations a LOOP from the bounds
4223 : : of arrays in the data reference REF accessed in STMT. RELIABLE is true if
4224 : : STMT is guaranteed to be executed in every iteration of LOOP.*/
4225 : :
4226 : : static void
4227 : 31852984 : infer_loop_bounds_from_ref (class loop *loop, gimple *stmt, tree ref)
4228 : : {
4229 : 31852984 : struct ilb_data data;
4230 : :
4231 : 31852984 : data.loop = loop;
4232 : 31852984 : data.stmt = stmt;
4233 : 0 : for_each_index (&ref, idx_infer_loop_bounds, &data);
4234 : 0 : }
4235 : :
4236 : : /* Determine information about number of iterations of a LOOP from the way
4237 : : arrays are used in STMT. RELIABLE is true if STMT is guaranteed to be
4238 : : executed in every iteration of LOOP. */
4239 : :
4240 : : static void
4241 : 203501922 : infer_loop_bounds_from_array (class loop *loop, gimple *stmt)
4242 : : {
4243 : 203501922 : if (is_gimple_assign (stmt))
4244 : : {
4245 : 88467825 : tree op0 = gimple_assign_lhs (stmt);
4246 : 88467825 : tree op1 = gimple_assign_rhs1 (stmt);
4247 : :
4248 : : /* For each memory access, analyze its access function
4249 : : and record a bound on the loop iteration domain. */
4250 : 88467825 : if (REFERENCE_CLASS_P (op0))
4251 : 12690399 : infer_loop_bounds_from_ref (loop, stmt, op0);
4252 : :
4253 : 88467825 : if (REFERENCE_CLASS_P (op1))
4254 : 19009884 : infer_loop_bounds_from_ref (loop, stmt, op1);
4255 : : }
4256 : 115034097 : else if (is_gimple_call (stmt))
4257 : : {
4258 : 6836790 : tree arg, lhs;
4259 : 6836790 : unsigned i, n = gimple_call_num_args (stmt);
4260 : :
4261 : 6836790 : lhs = gimple_call_lhs (stmt);
4262 : 6836790 : if (lhs && REFERENCE_CLASS_P (lhs))
4263 : 6945 : infer_loop_bounds_from_ref (loop, stmt, lhs);
4264 : :
4265 : 22102257 : for (i = 0; i < n; i++)
4266 : : {
4267 : 15265467 : arg = gimple_call_arg (stmt, i);
4268 : 15265467 : if (REFERENCE_CLASS_P (arg))
4269 : 145756 : infer_loop_bounds_from_ref (loop, stmt, arg);
4270 : : }
4271 : : }
4272 : 203501922 : }
4273 : :
4274 : : /* Determine information about number of iterations of a LOOP from the fact
4275 : : that pointer arithmetics in STMT does not overflow. */
4276 : :
4277 : : static void
4278 : 97803636 : infer_loop_bounds_from_pointer_arith (class loop *loop, gimple *stmt)
4279 : : {
4280 : 97803636 : tree def, base, step, scev, type, low, high;
4281 : 97803636 : tree var, ptr;
4282 : :
4283 : 97803636 : if (!is_gimple_assign (stmt)
4284 : 97803636 : || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
4285 : : return;
4286 : :
4287 : 3792899 : def = gimple_assign_lhs (stmt);
4288 : 3792899 : if (TREE_CODE (def) != SSA_NAME)
4289 : : return;
4290 : :
4291 : 3792899 : type = TREE_TYPE (def);
4292 : 3792899 : if (!nowrap_type_p (type))
4293 : : return;
4294 : :
4295 : 3792899 : ptr = gimple_assign_rhs1 (stmt);
4296 : 3792899 : if (!expr_invariant_in_loop_p (loop, ptr))
4297 : : return;
4298 : :
4299 : 2162446 : var = gimple_assign_rhs2 (stmt);
4300 : 2162446 : if (TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (var)))
4301 : : return;
4302 : :
4303 : 2162446 : class loop *uloop = loop_containing_stmt (stmt);
4304 : 2162446 : scev = instantiate_parameters (loop, analyze_scalar_evolution (uloop, def));
4305 : 2162446 : if (chrec_contains_undetermined (scev))
4306 : : return;
4307 : :
4308 : 1839544 : base = initial_condition_in_loop_num (scev, loop->num);
4309 : 1839544 : step = evolution_part_in_loop_num (scev, loop->num);
4310 : :
4311 : 1839544 : if (!base || !step
4312 : 1799653 : || TREE_CODE (step) != INTEGER_CST
4313 : 1634088 : || tree_contains_chrecs (base, NULL)
4314 : 3473632 : || chrec_contains_symbols_defined_in_loop (base, loop->num))
4315 : 205456 : return;
4316 : :
4317 : 1634088 : low = lower_bound_in_type (type, type);
4318 : 1634088 : high = upper_bound_in_type (type, type);
4319 : :
4320 : : /* In C, pointer arithmetic p + 1 cannot use a NULL pointer, and p - 1 cannot
4321 : : produce a NULL pointer. The contrary would mean NULL points to an object,
4322 : : while NULL is supposed to compare unequal with the address of all objects.
4323 : : Furthermore, p + 1 cannot produce a NULL pointer and p - 1 cannot use a
4324 : : NULL pointer since that would mean wrapping, which we assume here not to
4325 : : happen. So, we can exclude NULL from the valid range of pointer
4326 : : arithmetic. */
4327 : 1634088 : if (flag_delete_null_pointer_checks && int_cst_value (low) == 0)
4328 : 1634088 : low = build_int_cstu (TREE_TYPE (low), TYPE_ALIGN_UNIT (TREE_TYPE (type)));
4329 : :
4330 : 1634088 : record_nonwrapping_chrec (scev);
4331 : 1634088 : record_nonwrapping_iv (loop, base, step, stmt, low, high, false, true);
4332 : : }
4333 : :
4334 : : /* Determine information about number of iterations of a LOOP from the fact
4335 : : that signed arithmetics in STMT does not overflow. */
4336 : :
4337 : : static void
4338 : 97803636 : infer_loop_bounds_from_signedness (class loop *loop, gimple *stmt)
4339 : : {
4340 : 97803636 : tree def, base, step, scev, type, low, high;
4341 : :
4342 : 97803636 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
4343 : 93023467 : return;
4344 : :
4345 : 44264577 : def = gimple_assign_lhs (stmt);
4346 : :
4347 : 44264577 : if (TREE_CODE (def) != SSA_NAME)
4348 : : return;
4349 : :
4350 : 37574145 : type = TREE_TYPE (def);
4351 : 37574145 : if (!INTEGRAL_TYPE_P (type)
4352 : 37574145 : || !TYPE_OVERFLOW_UNDEFINED (type))
4353 : : return;
4354 : :
4355 : 12206376 : scev = instantiate_parameters (loop, analyze_scalar_evolution (loop, def));
4356 : 12206376 : if (chrec_contains_undetermined (scev))
4357 : : return;
4358 : :
4359 : 6064954 : base = initial_condition_in_loop_num (scev, loop->num);
4360 : 6064954 : step = evolution_part_in_loop_num (scev, loop->num);
4361 : :
4362 : 6064954 : if (!base || !step
4363 : 5369147 : || TREE_CODE (step) != INTEGER_CST
4364 : 4780169 : || tree_contains_chrecs (base, NULL)
4365 : 10845123 : || chrec_contains_symbols_defined_in_loop (base, loop->num))
4366 : 1284785 : return;
4367 : :
4368 : 4780169 : low = lower_bound_in_type (type, type);
4369 : 4780169 : high = upper_bound_in_type (type, type);
4370 : 4780169 : Value_Range r (TREE_TYPE (def));
4371 : 9560338 : get_range_query (cfun)->range_of_expr (r, def);
4372 : 4780169 : if (!r.varying_p () && !r.undefined_p ())
4373 : : {
4374 : 4036236 : low = wide_int_to_tree (type, r.lower_bound ());
4375 : 4036248 : high = wide_int_to_tree (type, r.upper_bound ());
4376 : : }
4377 : :
4378 : 4780169 : record_nonwrapping_chrec (scev);
4379 : 4780169 : record_nonwrapping_iv (loop, base, step, stmt, low, high, false, true);
4380 : 4780169 : }
4381 : :
4382 : : /* The following analyzers are extracting informations on the bounds
4383 : : of LOOP from the following undefined behaviors:
4384 : :
4385 : : - data references should not access elements over the statically
4386 : : allocated size,
4387 : :
4388 : : - signed variables should not overflow when flag_wrapv is not set.
4389 : : */
4390 : :
4391 : : static void
4392 : 5460519 : infer_loop_bounds_from_undefined (class loop *loop, basic_block *bbs)
4393 : : {
4394 : 5460519 : unsigned i;
4395 : 5460519 : gimple_stmt_iterator bsi;
4396 : 5460519 : basic_block bb;
4397 : 5460519 : bool reliable;
4398 : :
4399 : 38056390 : for (i = 0; i < loop->num_nodes; i++)
4400 : : {
4401 : 32595871 : bb = bbs[i];
4402 : :
4403 : : /* If BB is not executed in each iteration of the loop, we cannot
4404 : : use the operations in it to infer reliable upper bound on the
4405 : : # of iterations of the loop. However, we can use it as a guess.
4406 : : Reliable guesses come only from array bounds. */
4407 : 32595871 : reliable = dominated_by_p (CDI_DOMINATORS, loop->latch, bb);
4408 : :
4409 : 268693664 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4410 : : {
4411 : 203501922 : gimple *stmt = gsi_stmt (bsi);
4412 : :
4413 : 203501922 : infer_loop_bounds_from_array (loop, stmt);
4414 : :
4415 : 203501922 : if (reliable)
4416 : : {
4417 : 97803636 : infer_loop_bounds_from_signedness (loop, stmt);
4418 : 97803636 : infer_loop_bounds_from_pointer_arith (loop, stmt);
4419 : : }
4420 : : }
4421 : :
4422 : : }
4423 : 5460519 : }
4424 : :
4425 : : /* Compare wide ints, callback for qsort. */
4426 : :
4427 : : static int
4428 : 25047 : wide_int_cmp (const void *p1, const void *p2)
4429 : : {
4430 : 25047 : const bound_wide_int *d1 = (const bound_wide_int *) p1;
4431 : 25047 : const bound_wide_int *d2 = (const bound_wide_int *) p2;
4432 : 25047 : return wi::cmpu (*d1, *d2);
4433 : : }
4434 : :
4435 : : /* Return index of BOUND in BOUNDS array sorted in increasing order.
4436 : : Lookup by binary search. */
4437 : :
4438 : : static int
4439 : 6065 : bound_index (const vec<bound_wide_int> &bounds, const bound_wide_int &bound)
4440 : : {
4441 : 6065 : unsigned int end = bounds.length ();
4442 : : unsigned int begin = 0;
4443 : :
4444 : : /* Find a matching index by means of a binary search. */
4445 : 6990 : while (begin != end)
4446 : : {
4447 : 6990 : unsigned int middle = (begin + end) / 2;
4448 : 6990 : bound_wide_int index = bounds[middle];
4449 : :
4450 : 6990 : if (index == bound)
4451 : 6065 : return middle;
4452 : 925 : else if (wi::ltu_p (index, bound))
4453 : 303 : begin = middle + 1;
4454 : : else
4455 : : end = middle;
4456 : : }
4457 : 0 : gcc_unreachable ();
4458 : : }
4459 : :
4460 : : /* We recorded loop bounds only for statements dominating loop latch (and thus
4461 : : executed each loop iteration). If there are any bounds on statements not
4462 : : dominating the loop latch we can improve the estimate by walking the loop
4463 : : body and seeing if every path from loop header to loop latch contains
4464 : : some bounded statement. */
4465 : :
4466 : : static void
4467 : 5496122 : discover_iteration_bound_by_body_walk (class loop *loop)
4468 : : {
4469 : 5496122 : class nb_iter_bound *elt;
4470 : 5496122 : auto_vec<bound_wide_int> bounds;
4471 : 5496122 : vec<vec<basic_block> > queues = vNULL;
4472 : 5496122 : vec<basic_block> queue = vNULL;
4473 : 5496122 : ptrdiff_t queue_index;
4474 : 5496122 : ptrdiff_t latch_index = 0;
4475 : :
4476 : : /* Discover what bounds may interest us. */
4477 : 13990577 : for (elt = loop->bounds; elt; elt = elt->next)
4478 : : {
4479 : 8494455 : bound_wide_int bound = elt->bound;
4480 : :
4481 : : /* Exit terminates loop at given iteration, while non-exits produce undefined
4482 : : effect on the next iteration. */
4483 : 8494455 : if (!elt->is_exit)
4484 : : {
4485 : 4567860 : bound += 1;
4486 : : /* If an overflow occurred, ignore the result. */
4487 : 4567860 : if (bound == 0)
4488 : 0 : continue;
4489 : : }
4490 : :
4491 : 8494455 : if (!loop->any_upper_bound
4492 : 8494455 : || wi::ltu_p (bound, loop->nb_iterations_upper_bound))
4493 : 6065 : bounds.safe_push (bound);
4494 : : }
4495 : :
4496 : : /* Exit early if there is nothing to do. */
4497 : 5496122 : if (!bounds.exists ())
4498 : 5492797 : return;
4499 : :
4500 : 3325 : if (dump_file && (dump_flags & TDF_DETAILS))
4501 : 13 : fprintf (dump_file, " Trying to walk loop body to reduce the bound.\n");
4502 : :
4503 : : /* Sort the bounds in decreasing order. */
4504 : 3325 : bounds.qsort (wide_int_cmp);
4505 : :
4506 : : /* For every basic block record the lowest bound that is guaranteed to
4507 : : terminate the loop. */
4508 : :
4509 : 3325 : hash_map<basic_block, ptrdiff_t> bb_bounds;
4510 : 16689 : for (elt = loop->bounds; elt; elt = elt->next)
4511 : : {
4512 : 13364 : bound_wide_int bound = elt->bound;
4513 : 13364 : if (!elt->is_exit)
4514 : : {
4515 : 10034 : bound += 1;
4516 : : /* If an overflow occurred, ignore the result. */
4517 : 10034 : if (bound == 0)
4518 : 0 : continue;
4519 : : }
4520 : :
4521 : 13364 : if (!loop->any_upper_bound
4522 : 13364 : || wi::ltu_p (bound, loop->nb_iterations_upper_bound))
4523 : : {
4524 : 6065 : ptrdiff_t index = bound_index (bounds, bound);
4525 : 6065 : ptrdiff_t *entry = bb_bounds.get (gimple_bb (elt->stmt));
4526 : 6065 : if (!entry)
4527 : 4192 : bb_bounds.put (gimple_bb (elt->stmt), index);
4528 : 1873 : else if ((ptrdiff_t)*entry > index)
4529 : 143 : *entry = index;
4530 : : }
4531 : : }
4532 : :
4533 : 3325 : hash_map<basic_block, ptrdiff_t> block_priority;
4534 : :
4535 : : /* Perform shortest path discovery loop->header ... loop->latch.
4536 : :
4537 : : The "distance" is given by the smallest loop bound of basic block
4538 : : present in the path and we look for path with largest smallest bound
4539 : : on it.
4540 : :
4541 : : To avoid the need for fibonacci heap on double ints we simply compress
4542 : : double ints into indexes to BOUNDS array and then represent the queue
4543 : : as arrays of queues for every index.
4544 : : Index of BOUNDS.length() means that the execution of given BB has
4545 : : no bounds determined.
4546 : :
4547 : : VISITED is a pointer map translating basic block into smallest index
4548 : : it was inserted into the priority queue with. */
4549 : 3325 : latch_index = -1;
4550 : :
4551 : : /* Start walk in loop header with index set to infinite bound. */
4552 : 3325 : queue_index = bounds.length ();
4553 : 3325 : queues.safe_grow_cleared (queue_index + 1, true);
4554 : 3325 : queue.safe_push (loop->header);
4555 : 3325 : queues[queue_index] = queue;
4556 : 3325 : block_priority.put (loop->header, queue_index);
4557 : :
4558 : 12715 : for (; queue_index >= 0; queue_index--)
4559 : : {
4560 : 9390 : if (latch_index < queue_index)
4561 : : {
4562 : 31366 : while (queues[queue_index].length ())
4563 : : {
4564 : 27772 : basic_block bb;
4565 : 27772 : ptrdiff_t bound_index = queue_index;
4566 : 27772 : edge e;
4567 : 27772 : edge_iterator ei;
4568 : :
4569 : 27772 : queue = queues[queue_index];
4570 : 27772 : bb = queue.pop ();
4571 : :
4572 : : /* OK, we later inserted the BB with lower priority, skip it. */
4573 : 27772 : if (*block_priority.get (bb) > queue_index)
4574 : 0 : continue;
4575 : :
4576 : : /* See if we can improve the bound. */
4577 : 27772 : ptrdiff_t *entry = bb_bounds.get (bb);
4578 : 27772 : if (entry && *entry < bound_index)
4579 : 3648 : bound_index = *entry;
4580 : :
4581 : : /* Insert succesors into the queue, watch for latch edge
4582 : : and record greatest index we saw. */
4583 : 70906 : FOR_EACH_EDGE (e, ei, bb->succs)
4584 : : {
4585 : 43134 : bool insert = false;
4586 : :
4587 : 43134 : if (loop_exit_edge_p (loop, e))
4588 : 7882 : continue;
4589 : :
4590 : 35252 : if (e == loop_latch_edge (loop)
4591 : 35252 : && latch_index < bound_index)
4592 : : latch_index = bound_index;
4593 : 31927 : else if (!(entry = block_priority.get (e->dest)))
4594 : : {
4595 : 25624 : insert = true;
4596 : 25624 : block_priority.put (e->dest, bound_index);
4597 : : }
4598 : 6303 : else if (*entry < bound_index)
4599 : : {
4600 : 161 : insert = true;
4601 : 161 : *entry = bound_index;
4602 : : }
4603 : :
4604 : 25785 : if (insert)
4605 : 25785 : queues[bound_index].safe_push (e->dest);
4606 : : }
4607 : : }
4608 : : }
4609 : 9390 : queues[queue_index].release ();
4610 : : }
4611 : :
4612 : 3325 : gcc_assert (latch_index >= 0);
4613 : 3325 : if ((unsigned)latch_index < bounds.length ())
4614 : : {
4615 : 185 : if (dump_file && (dump_flags & TDF_DETAILS))
4616 : : {
4617 : 0 : fprintf (dump_file, "Found better loop bound ");
4618 : 0 : print_decu (bounds[latch_index], dump_file);
4619 : 0 : fprintf (dump_file, "\n");
4620 : : }
4621 : 185 : record_niter_bound (loop, widest_int::from (bounds[latch_index],
4622 : : SIGNED), false, true);
4623 : : }
4624 : :
4625 : 3325 : queues.release ();
4626 : 5496122 : }
4627 : :
4628 : : /* See if every path cross the loop goes through a statement that is known
4629 : : to not execute at the last iteration. In that case we can decrese iteration
4630 : : count by 1. */
4631 : :
4632 : : static void
4633 : 5496122 : maybe_lower_iteration_bound (class loop *loop)
4634 : : {
4635 : 5496122 : hash_set<gimple *> *not_executed_last_iteration = NULL;
4636 : 5496122 : class nb_iter_bound *elt;
4637 : 5496122 : bool found_exit = false;
4638 : 5496122 : auto_vec<basic_block> queue;
4639 : 5496122 : bitmap visited;
4640 : :
4641 : : /* Collect all statements with interesting (i.e. lower than
4642 : : nb_iterations_upper_bound) bound on them.
4643 : :
4644 : : TODO: Due to the way record_estimate choose estimates to store, the bounds
4645 : : will be always nb_iterations_upper_bound-1. We can change this to record
4646 : : also statements not dominating the loop latch and update the walk bellow
4647 : : to the shortest path algorithm. */
4648 : 13990577 : for (elt = loop->bounds; elt; elt = elt->next)
4649 : : {
4650 : 8494455 : if (!elt->is_exit
4651 : 8494455 : && wi::ltu_p (elt->bound, loop->nb_iterations_upper_bound))
4652 : : {
4653 : 1775966 : if (!not_executed_last_iteration)
4654 : 1023199 : not_executed_last_iteration = new hash_set<gimple *>;
4655 : 1775966 : not_executed_last_iteration->add (elt->stmt);
4656 : : }
4657 : : }
4658 : 5496122 : if (!not_executed_last_iteration)
4659 : 4472923 : return;
4660 : :
4661 : : /* Start DFS walk in the loop header and see if we can reach the
4662 : : loop latch or any of the exits (including statements with side
4663 : : effects that may terminate the loop otherwise) without visiting
4664 : : any of the statements known to have undefined effect on the last
4665 : : iteration. */
4666 : 1023199 : queue.safe_push (loop->header);
4667 : 1023199 : visited = BITMAP_ALLOC (NULL);
4668 : 1023199 : bitmap_set_bit (visited, loop->header->index);
4669 : 1023199 : found_exit = false;
4670 : :
4671 : 1090136 : do
4672 : : {
4673 : 1090136 : basic_block bb = queue.pop ();
4674 : 1090136 : gimple_stmt_iterator gsi;
4675 : 1090136 : bool stmt_found = false;
4676 : :
4677 : : /* Loop for possible exits and statements bounding the execution. */
4678 : 5307811 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4679 : : {
4680 : 3192282 : gimple *stmt = gsi_stmt (gsi);
4681 : 3192282 : if (not_executed_last_iteration->contains (stmt))
4682 : : {
4683 : : stmt_found = true;
4684 : 64743 : break;
4685 : : }
4686 : 3151107 : if (gimple_has_side_effects (stmt))
4687 : : {
4688 : : found_exit = true;
4689 : : break;
4690 : : }
4691 : : }
4692 : 1090136 : if (found_exit)
4693 : : break;
4694 : :
4695 : : /* If no bounding statement is found, continue the walk. */
4696 : 1066568 : if (!stmt_found)
4697 : : {
4698 : 1025393 : edge e;
4699 : 1025393 : edge_iterator ei;
4700 : :
4701 : 1766701 : FOR_EACH_EDGE (e, ei, bb->succs)
4702 : : {
4703 : 1704843 : if (loop_exit_edge_p (loop, e)
4704 : 1704843 : || e == loop_latch_edge (loop))
4705 : : {
4706 : : found_exit = true;
4707 : : break;
4708 : : }
4709 : 741308 : if (bitmap_set_bit (visited, e->dest->index))
4710 : 730114 : queue.safe_push (e->dest);
4711 : : }
4712 : : }
4713 : : }
4714 : 1066568 : while (queue.length () && !found_exit);
4715 : :
4716 : : /* If every path through the loop reach bounding statement before exit,
4717 : : then we know the last iteration of the loop will have undefined effect
4718 : : and we can decrease number of iterations. */
4719 : :
4720 : 1023199 : if (!found_exit)
4721 : : {
4722 : 36096 : if (dump_file && (dump_flags & TDF_DETAILS))
4723 : 64 : fprintf (dump_file, "Reducing loop iteration estimate by 1; "
4724 : : "undefined statement must be executed at the last iteration.\n");
4725 : 72192 : record_niter_bound (loop, widest_int::from (loop->nb_iterations_upper_bound,
4726 : 108288 : SIGNED) - 1,
4727 : : false, true);
4728 : : }
4729 : :
4730 : 1023199 : BITMAP_FREE (visited);
4731 : 1023199 : delete not_executed_last_iteration;
4732 : 5496122 : }
4733 : :
4734 : : /* Get expected upper bound for number of loop iterations for
4735 : : BUILT_IN_EXPECT_WITH_PROBABILITY for a condition COND. */
4736 : :
4737 : : static tree
4738 : 4312095 : get_upper_bound_based_on_builtin_expr_with_prob (gcond *cond)
4739 : : {
4740 : 4312095 : if (cond == NULL)
4741 : : return NULL_TREE;
4742 : :
4743 : 4158818 : tree lhs = gimple_cond_lhs (cond);
4744 : 4158818 : if (TREE_CODE (lhs) != SSA_NAME)
4745 : : return NULL_TREE;
4746 : :
4747 : 4144557 : gimple *stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (cond));
4748 : 4495189 : gcall *def = dyn_cast<gcall *> (stmt);
4749 : 183096 : if (def == NULL)
4750 : : return NULL_TREE;
4751 : :
4752 : 183096 : tree decl = gimple_call_fndecl (def);
4753 : 183096 : if (!decl
4754 : 156698 : || !fndecl_built_in_p (decl, BUILT_IN_EXPECT_WITH_PROBABILITY)
4755 : 183100 : || gimple_call_num_args (stmt) != 3)
4756 : : return NULL_TREE;
4757 : :
4758 : 4 : tree c = gimple_call_arg (def, 1);
4759 : 4 : tree condt = TREE_TYPE (lhs);
4760 : 4 : tree res = fold_build2 (gimple_cond_code (cond),
4761 : : condt, c,
4762 : : gimple_cond_rhs (cond));
4763 : 4 : if (TREE_CODE (res) != INTEGER_CST)
4764 : : return NULL_TREE;
4765 : :
4766 : :
4767 : 4 : tree prob = gimple_call_arg (def, 2);
4768 : 4 : tree t = TREE_TYPE (prob);
4769 : 4 : tree one
4770 : 8 : = build_real_from_int_cst (t,
4771 : 4 : integer_one_node);
4772 : 4 : if (integer_zerop (res))
4773 : 0 : prob = fold_build2 (MINUS_EXPR, t, one, prob);
4774 : 4 : tree r = fold_build2 (RDIV_EXPR, t, one, prob);
4775 : 4 : if (TREE_CODE (r) != REAL_CST)
4776 : : return NULL_TREE;
4777 : :
4778 : 2 : HOST_WIDE_INT probi
4779 : 2 : = real_to_integer (TREE_REAL_CST_PTR (r));
4780 : 2 : return build_int_cst (condt, probi);
4781 : : }
4782 : :
4783 : : /* Records estimates on numbers of iterations of LOOP. If USE_UNDEFINED_P
4784 : : is true also use estimates derived from undefined behavior. */
4785 : :
4786 : : void
4787 : 23095580 : estimate_numbers_of_iterations (class loop *loop)
4788 : : {
4789 : 23095580 : tree niter, type;
4790 : 23095580 : unsigned i;
4791 : 23095580 : class tree_niter_desc niter_desc;
4792 : 23095580 : edge ex;
4793 : 23095580 : widest_int bound;
4794 : 23095580 : edge likely_exit;
4795 : :
4796 : : /* Give up if we already have tried to compute an estimation. */
4797 : 23095580 : if (loop->estimate_state != EST_NOT_COMPUTED)
4798 : 17599458 : return;
4799 : :
4800 : 5496122 : if (dump_file && (dump_flags & TDF_DETAILS))
4801 : 10782 : fprintf (dump_file, "Estimating # of iterations of loop %d\n", loop->num);
4802 : :
4803 : 5496122 : loop->estimate_state = EST_AVAILABLE;
4804 : :
4805 : 5496122 : sreal nit;
4806 : 5496122 : bool reliable;
4807 : :
4808 : : /* If we have a measured profile, use it to estimate the number of
4809 : : iterations. Normally this is recorded by branch_prob right after
4810 : : reading the profile. In case we however found a new loop, record the
4811 : : information here.
4812 : :
4813 : : Explicitly check for profile status so we do not report
4814 : : wrong prediction hitrates for guessed loop iterations heuristics.
4815 : : Do not recompute already recorded bounds - we ought to be better on
4816 : : updating iteration bounds than updating profile in general and thus
4817 : : recomputing iteration bounds later in the compilation process will just
4818 : : introduce random roundoff errors. */
4819 : 5496122 : if (!loop->any_estimate
4820 : 3740680 : && expected_loop_iterations_by_profile (loop, &nit, &reliable)
4821 : 8441126 : && reliable)
4822 : : {
4823 : 17 : bound = nit.to_nearest_int ();
4824 : 17 : record_niter_bound (loop, bound, true, false);
4825 : : }
4826 : :
4827 : : /* Ensure that loop->nb_iterations is computed if possible. If it turns out
4828 : : to be constant, we avoid undefined behavior implied bounds and instead
4829 : : diagnose those loops with -Waggressive-loop-optimizations. */
4830 : 5496122 : number_of_latch_executions (loop);
4831 : :
4832 : 5496122 : basic_block *body = get_loop_body (loop);
4833 : 5496122 : auto_vec<edge> exits = get_loop_exit_edges (loop, body);
4834 : 5496122 : likely_exit = single_likely_exit (loop, exits);
4835 : 16069358 : FOR_EACH_VEC_ELT (exits, i, ex)
4836 : : {
4837 : 10573236 : if (ex == likely_exit)
4838 : : {
4839 : 4312095 : gimple *stmt = *gsi_last_bb (ex->src);
4840 : 4312095 : if (stmt != NULL)
4841 : : {
4842 : 4312095 : gcond *cond = dyn_cast<gcond *> (stmt);
4843 : 4312095 : tree niter_bound
4844 : 4312095 : = get_upper_bound_based_on_builtin_expr_with_prob (cond);
4845 : 4312095 : if (niter_bound != NULL_TREE)
4846 : : {
4847 : 2 : widest_int max = derive_constant_upper_bound (niter_bound);
4848 : 2 : record_estimate (loop, niter_bound, max, cond,
4849 : : true, true, false);
4850 : 2 : }
4851 : : }
4852 : : }
4853 : :
4854 : 10573236 : if (!number_of_iterations_exit (loop, ex, &niter_desc,
4855 : : false, false, body))
4856 : 6646641 : continue;
4857 : :
4858 : 3926595 : niter = niter_desc.niter;
4859 : 3926595 : type = TREE_TYPE (niter);
4860 : 3926595 : if (TREE_CODE (niter_desc.may_be_zero) != INTEGER_CST)
4861 : 659403 : niter = build3 (COND_EXPR, type, niter_desc.may_be_zero,
4862 : 659403 : build_int_cst (type, 0),
4863 : : niter);
4864 : 3926595 : record_estimate (loop, niter, niter_desc.max,
4865 : : last_nondebug_stmt (ex->src),
4866 : : true, ex == likely_exit, true);
4867 : 3926595 : record_control_iv (loop, &niter_desc);
4868 : : }
4869 : :
4870 : 5496122 : if (flag_aggressive_loop_optimizations)
4871 : 5460519 : infer_loop_bounds_from_undefined (loop, body);
4872 : 5496122 : free (body);
4873 : :
4874 : 5496122 : discover_iteration_bound_by_body_walk (loop);
4875 : :
4876 : 5496122 : maybe_lower_iteration_bound (loop);
4877 : :
4878 : : /* If we know the exact number of iterations of this loop, try to
4879 : : not break code with undefined behavior by not recording smaller
4880 : : maximum number of iterations. */
4881 : 5496122 : if (loop->nb_iterations
4882 : 5496122 : && TREE_CODE (loop->nb_iterations) == INTEGER_CST
4883 : 10992244 : && (wi::min_precision (wi::to_widest (loop->nb_iterations), SIGNED)
4884 : 1693328 : <= bound_wide_int ().get_precision ()))
4885 : : {
4886 : 1693328 : loop->any_upper_bound = true;
4887 : 1693328 : loop->nb_iterations_upper_bound
4888 : 1693328 : = bound_wide_int::from (wi::to_widest (loop->nb_iterations), SIGNED);
4889 : : }
4890 : 23095580 : }
4891 : :
4892 : : /* Sets NIT to the estimated number of executions of the latch of the
4893 : : LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
4894 : : large as the number of iterations. If we have no reliable estimate,
4895 : : the function returns false, otherwise returns true. */
4896 : :
4897 : : bool
4898 : 7769371 : estimated_loop_iterations (class loop *loop, widest_int *nit)
4899 : : {
4900 : : /* When SCEV information is available, try to update loop iterations
4901 : : estimate. Otherwise just return whatever we recorded earlier. */
4902 : 7769371 : if (scev_initialized_p ())
4903 : 7769371 : estimate_numbers_of_iterations (loop);
4904 : :
4905 : 7769371 : return (get_estimated_loop_iterations (loop, nit));
4906 : : }
4907 : :
4908 : : /* Similar to estimated_loop_iterations, but returns the estimate only
4909 : : if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
4910 : : on the number of iterations of LOOP could not be derived, returns -1. */
4911 : :
4912 : : HOST_WIDE_INT
4913 : 7537330 : estimated_loop_iterations_int (class loop *loop)
4914 : : {
4915 : 7537330 : widest_int nit;
4916 : 7537330 : HOST_WIDE_INT hwi_nit;
4917 : :
4918 : 7537330 : if (!estimated_loop_iterations (loop, &nit))
4919 : : return -1;
4920 : :
4921 : 3285934 : if (!wi::fits_shwi_p (nit))
4922 : : return -1;
4923 : 3285926 : hwi_nit = nit.to_shwi ();
4924 : :
4925 : 3285926 : return hwi_nit < 0 ? -1 : hwi_nit;
4926 : 7537330 : }
4927 : :
4928 : :
4929 : : /* Sets NIT to an upper bound for the maximum number of executions of the
4930 : : latch of the LOOP. If we have no reliable estimate, the function returns
4931 : : false, otherwise returns true. */
4932 : :
4933 : : bool
4934 : 8346883 : max_loop_iterations (class loop *loop, widest_int *nit)
4935 : : {
4936 : : /* When SCEV information is available, try to update loop iterations
4937 : : estimate. Otherwise just return whatever we recorded earlier. */
4938 : 8346883 : if (scev_initialized_p ())
4939 : 8334374 : estimate_numbers_of_iterations (loop);
4940 : :
4941 : 8346883 : return get_max_loop_iterations (loop, nit);
4942 : : }
4943 : :
4944 : : /* Similar to max_loop_iterations, but returns the estimate only
4945 : : if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
4946 : : on the number of iterations of LOOP could not be derived, returns -1. */
4947 : :
4948 : : HOST_WIDE_INT
4949 : 1894806 : max_loop_iterations_int (class loop *loop)
4950 : : {
4951 : 1894806 : widest_int nit;
4952 : 1894806 : HOST_WIDE_INT hwi_nit;
4953 : :
4954 : 1894806 : if (!max_loop_iterations (loop, &nit))
4955 : : return -1;
4956 : :
4957 : 1414395 : if (!wi::fits_shwi_p (nit))
4958 : : return -1;
4959 : 1351621 : hwi_nit = nit.to_shwi ();
4960 : :
4961 : 1351621 : return hwi_nit < 0 ? -1 : hwi_nit;
4962 : 1894806 : }
4963 : :
4964 : : /* Sets NIT to an likely upper bound for the maximum number of executions of the
4965 : : latch of the LOOP. If we have no reliable estimate, the function returns
4966 : : false, otherwise returns true. */
4967 : :
4968 : : bool
4969 : 316663 : likely_max_loop_iterations (class loop *loop, widest_int *nit)
4970 : : {
4971 : : /* When SCEV information is available, try to update loop iterations
4972 : : estimate. Otherwise just return whatever we recorded earlier. */
4973 : 316663 : if (scev_initialized_p ())
4974 : 316663 : estimate_numbers_of_iterations (loop);
4975 : :
4976 : 316663 : return get_likely_max_loop_iterations (loop, nit);
4977 : : }
4978 : :
4979 : : /* Similar to max_loop_iterations, but returns the estimate only
4980 : : if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
4981 : : on the number of iterations of LOOP could not be derived, returns -1. */
4982 : :
4983 : : HOST_WIDE_INT
4984 : 91037 : likely_max_loop_iterations_int (class loop *loop)
4985 : : {
4986 : 91037 : widest_int nit;
4987 : 91037 : HOST_WIDE_INT hwi_nit;
4988 : :
4989 : 91037 : if (!likely_max_loop_iterations (loop, &nit))
4990 : : return -1;
4991 : :
4992 : 72710 : if (!wi::fits_shwi_p (nit))
4993 : : return -1;
4994 : 66328 : hwi_nit = nit.to_shwi ();
4995 : :
4996 : 66328 : return hwi_nit < 0 ? -1 : hwi_nit;
4997 : 91037 : }
4998 : :
4999 : : /* Returns an estimate for the number of executions of statements
5000 : : in the LOOP. For statements before the loop exit, this exceeds
5001 : : the number of execution of the latch by one. */
5002 : :
5003 : : HOST_WIDE_INT
5004 : 7397023 : estimated_stmt_executions_int (class loop *loop)
5005 : : {
5006 : 7397023 : HOST_WIDE_INT nit = estimated_loop_iterations_int (loop);
5007 : 7397023 : HOST_WIDE_INT snit;
5008 : :
5009 : 7397023 : if (nit == -1)
5010 : : return -1;
5011 : :
5012 : 3236943 : snit = (HOST_WIDE_INT) ((unsigned HOST_WIDE_INT) nit + 1);
5013 : :
5014 : : /* If the computation overflows, return -1. */
5015 : 3236943 : return snit < 0 ? -1 : snit;
5016 : : }
5017 : :
5018 : : /* Sets NIT to the maximum number of executions of the latch of the
5019 : : LOOP, plus one. If we have no reliable estimate, the function returns
5020 : : false, otherwise returns true. */
5021 : :
5022 : : bool
5023 : 155 : max_stmt_executions (class loop *loop, widest_int *nit)
5024 : : {
5025 : 155 : widest_int nit_minus_one;
5026 : :
5027 : 155 : if (!max_loop_iterations (loop, nit))
5028 : : return false;
5029 : :
5030 : 155 : nit_minus_one = *nit;
5031 : :
5032 : 155 : *nit += 1;
5033 : :
5034 : 155 : return wi::gtu_p (*nit, nit_minus_one);
5035 : 155 : }
5036 : :
5037 : : /* Sets NIT to the estimated maximum number of executions of the latch of the
5038 : : LOOP, plus one. If we have no likely estimate, the function returns
5039 : : false, otherwise returns true. */
5040 : :
5041 : : bool
5042 : 225626 : likely_max_stmt_executions (class loop *loop, widest_int *nit)
5043 : : {
5044 : 225626 : widest_int nit_minus_one;
5045 : :
5046 : 225626 : if (!likely_max_loop_iterations (loop, nit))
5047 : : return false;
5048 : :
5049 : 126164 : nit_minus_one = *nit;
5050 : :
5051 : 126164 : *nit += 1;
5052 : :
5053 : 126164 : return wi::gtu_p (*nit, nit_minus_one);
5054 : 225626 : }
5055 : :
5056 : : /* Sets NIT to the estimated number of executions of the latch of the
5057 : : LOOP, plus one. If we have no reliable estimate, the function returns
5058 : : false, otherwise returns true. */
5059 : :
5060 : : bool
5061 : 232041 : estimated_stmt_executions (class loop *loop, widest_int *nit)
5062 : : {
5063 : 232041 : widest_int nit_minus_one;
5064 : :
5065 : 232041 : if (!estimated_loop_iterations (loop, nit))
5066 : : return false;
5067 : :
5068 : 5406 : nit_minus_one = *nit;
5069 : :
5070 : 5406 : *nit += 1;
5071 : :
5072 : 5406 : return wi::gtu_p (*nit, nit_minus_one);
5073 : 232041 : }
5074 : :
5075 : : /* Records estimates on numbers of iterations of loops. */
5076 : :
5077 : : void
5078 : 1107640 : estimate_numbers_of_iterations (function *fn)
5079 : : {
5080 : : /* We don't want to issue signed overflow warnings while getting
5081 : : loop iteration estimates. */
5082 : 1107640 : fold_defer_overflow_warnings ();
5083 : :
5084 : 6446115 : for (auto loop : loops_list (fn, 0))
5085 : 3123195 : estimate_numbers_of_iterations (loop);
5086 : :
5087 : 1107640 : fold_undefer_and_ignore_overflow_warnings ();
5088 : 1107640 : }
5089 : :
5090 : : /* Returns true if statement S1 dominates statement S2. */
5091 : :
5092 : : bool
5093 : 2074693 : stmt_dominates_stmt_p (gimple *s1, gimple *s2)
5094 : : {
5095 : 2074693 : basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
5096 : :
5097 : 2074693 : if (!bb1
5098 : 2074693 : || s1 == s2)
5099 : : return true;
5100 : :
5101 : 2067701 : if (bb1 == bb2)
5102 : : {
5103 : 807582 : gimple_stmt_iterator bsi;
5104 : :
5105 : 807582 : if (gimple_code (s2) == GIMPLE_PHI)
5106 : : return false;
5107 : :
5108 : 515286 : if (gimple_code (s1) == GIMPLE_PHI)
5109 : : return true;
5110 : :
5111 : 19384243 : for (bsi = gsi_start_bb (bb1); gsi_stmt (bsi) != s2; gsi_next (&bsi))
5112 : 18609853 : if (gsi_stmt (bsi) == s1)
5113 : : return true;
5114 : :
5115 : : return false;
5116 : : }
5117 : :
5118 : 1260119 : return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
5119 : : }
5120 : :
5121 : : /* Returns true when we can prove that the number of executions of
5122 : : STMT in the loop is at most NITER, according to the bound on
5123 : : the number of executions of the statement NITER_BOUND->stmt recorded in
5124 : : NITER_BOUND and fact that NITER_BOUND->stmt dominate STMT.
5125 : :
5126 : : ??? This code can become quite a CPU hog - we can have many bounds,
5127 : : and large basic block forcing stmt_dominates_stmt_p to be queried
5128 : : many times on a large basic blocks, so the whole thing is O(n^2)
5129 : : for scev_probably_wraps_p invocation (that can be done n times).
5130 : :
5131 : : It would make more sense (and give better answers) to remember BB
5132 : : bounds computed by discover_iteration_bound_by_body_walk. */
5133 : :
5134 : : static bool
5135 : 2242852 : n_of_executions_at_most (gimple *stmt,
5136 : : class nb_iter_bound *niter_bound,
5137 : : tree niter)
5138 : : {
5139 : 2242852 : widest_int bound = widest_int::from (niter_bound->bound, SIGNED);
5140 : 2242852 : tree nit_type = TREE_TYPE (niter), e;
5141 : 2242852 : enum tree_code cmp;
5142 : :
5143 : 2242852 : gcc_assert (TYPE_UNSIGNED (nit_type));
5144 : :
5145 : : /* If the bound does not even fit into NIT_TYPE, it cannot tell us that
5146 : : the number of iterations is small. */
5147 : 2242852 : if (!wi::fits_to_tree_p (bound, nit_type))
5148 : : return false;
5149 : :
5150 : : /* We know that NITER_BOUND->stmt is executed at most NITER_BOUND->bound + 1
5151 : : times. This means that:
5152 : :
5153 : : -- if NITER_BOUND->is_exit is true, then everything after
5154 : : it at most NITER_BOUND->bound times.
5155 : :
5156 : : -- If NITER_BOUND->is_exit is false, then if we can prove that when STMT
5157 : : is executed, then NITER_BOUND->stmt is executed as well in the same
5158 : : iteration then STMT is executed at most NITER_BOUND->bound + 1 times.
5159 : :
5160 : : If we can determine that NITER_BOUND->stmt is always executed
5161 : : after STMT, then STMT is executed at most NITER_BOUND->bound + 2 times.
5162 : : We conclude that if both statements belong to the same
5163 : : basic block and STMT is before NITER_BOUND->stmt and there are no
5164 : : statements with side effects in between. */
5165 : :
5166 : 2074693 : if (niter_bound->is_exit)
5167 : : {
5168 : 755854 : if (stmt == niter_bound->stmt
5169 : 755854 : || !stmt_dominates_stmt_p (niter_bound->stmt, stmt))
5170 : 665103 : return false;
5171 : : cmp = GE_EXPR;
5172 : : }
5173 : : else
5174 : : {
5175 : 1318839 : if (!stmt_dominates_stmt_p (niter_bound->stmt, stmt))
5176 : : {
5177 : 545817 : gimple_stmt_iterator bsi;
5178 : 545817 : if (gimple_bb (stmt) != gimple_bb (niter_bound->stmt)
5179 : 220848 : || gimple_code (stmt) == GIMPLE_PHI
5180 : 729424 : || gimple_code (niter_bound->stmt) == GIMPLE_PHI)
5181 : 366927 : return false;
5182 : :
5183 : : /* By stmt_dominates_stmt_p we already know that STMT appears
5184 : : before NITER_BOUND->STMT. Still need to test that the loop
5185 : : cannot be terinated by a side effect in between. */
5186 : 7864191 : for (bsi = gsi_for_stmt (stmt); gsi_stmt (bsi) != niter_bound->stmt;
5187 : 7680584 : gsi_next (&bsi))
5188 : 7681383 : if (gimple_has_side_effects (gsi_stmt (bsi)))
5189 : : return false;
5190 : 182808 : bound += 1;
5191 : 361698 : if (bound == 0
5192 : 182808 : || !wi::fits_to_tree_p (bound, nit_type))
5193 : 3918 : return false;
5194 : : }
5195 : : cmp = GT_EXPR;
5196 : : }
5197 : :
5198 : 1042663 : e = fold_binary (cmp, boolean_type_node,
5199 : : niter, wide_int_to_tree (nit_type, bound));
5200 : 1042663 : return e && integer_nonzerop (e);
5201 : 2242852 : }
5202 : :
5203 : : /* Returns true if the arithmetics in TYPE can be assumed not to wrap. */
5204 : :
5205 : : bool
5206 : 67573187 : nowrap_type_p (tree type)
5207 : : {
5208 : 8833817 : if (ANY_INTEGRAL_TYPE_P (type)
5209 : 67573187 : && TYPE_OVERFLOW_UNDEFINED (type))
5210 : : return true;
5211 : :
5212 : 33848793 : if (POINTER_TYPE_P (type))
5213 : : return true;
5214 : :
5215 : : return false;
5216 : : }
5217 : :
5218 : : /* Return true if we can prove LOOP is exited before evolution of induction
5219 : : variable {BASE, STEP} overflows with respect to its type bound. */
5220 : :
5221 : : static bool
5222 : 2924351 : loop_exits_before_overflow (tree base, tree step,
5223 : : gimple *at_stmt, class loop *loop)
5224 : : {
5225 : 2924351 : widest_int niter;
5226 : 2924351 : struct control_iv *civ;
5227 : 2924351 : class nb_iter_bound *bound;
5228 : 2924351 : tree e, delta, step_abs, unsigned_base;
5229 : 2924351 : tree type = TREE_TYPE (step);
5230 : 2924351 : tree unsigned_type, valid_niter;
5231 : :
5232 : : /* Don't issue signed overflow warnings. */
5233 : 2924351 : fold_defer_overflow_warnings ();
5234 : :
5235 : : /* Compute the number of iterations before we reach the bound of the
5236 : : type, and verify that the loop is exited before this occurs. */
5237 : 2924351 : unsigned_type = unsigned_type_for (type);
5238 : 2924351 : unsigned_base = fold_convert (unsigned_type, base);
5239 : :
5240 : 2924351 : if (tree_int_cst_sign_bit (step))
5241 : : {
5242 : 394447 : tree extreme = fold_convert (unsigned_type,
5243 : : lower_bound_in_type (type, type));
5244 : 394447 : delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme);
5245 : 394447 : step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
5246 : : fold_convert (unsigned_type, step));
5247 : : }
5248 : : else
5249 : : {
5250 : 2529904 : tree extreme = fold_convert (unsigned_type,
5251 : : upper_bound_in_type (type, type));
5252 : 2529904 : delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base);
5253 : 2529904 : step_abs = fold_convert (unsigned_type, step);
5254 : : }
5255 : :
5256 : 2924351 : valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
5257 : :
5258 : 2924351 : estimate_numbers_of_iterations (loop);
5259 : :
5260 : 2924351 : if (max_loop_iterations (loop, &niter)
5261 : 2519890 : && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
5262 : 2344811 : && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
5263 : : wide_int_to_tree (TREE_TYPE (valid_niter),
5264 : : niter))) != NULL
5265 : 5051453 : && integer_nonzerop (e))
5266 : : {
5267 : 988753 : fold_undefer_and_ignore_overflow_warnings ();
5268 : 988753 : return true;
5269 : : }
5270 : 1935598 : if (at_stmt)
5271 : 3415851 : for (bound = loop->bounds; bound; bound = bound->next)
5272 : : {
5273 : 2242852 : if (n_of_executions_at_most (at_stmt, bound, valid_niter))
5274 : : {
5275 : 11369 : fold_undefer_and_ignore_overflow_warnings ();
5276 : 11369 : return true;
5277 : : }
5278 : : }
5279 : 1924229 : fold_undefer_and_ignore_overflow_warnings ();
5280 : :
5281 : : /* Try to prove loop is exited before {base, step} overflows with the
5282 : : help of analyzed loop control IV. This is done only for IVs with
5283 : : constant step because otherwise we don't have the information. */
5284 : 1924229 : if (TREE_CODE (step) == INTEGER_CST)
5285 : : {
5286 : 2837648 : for (civ = loop->control_ivs; civ; civ = civ->next)
5287 : : {
5288 : 1128783 : enum tree_code code;
5289 : 1128783 : tree civ_type = TREE_TYPE (civ->step);
5290 : :
5291 : : /* Have to consider type difference because operand_equal_p ignores
5292 : : that for constants. */
5293 : 1128783 : if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type)
5294 : 1128783 : || element_precision (type) != element_precision (civ_type))
5295 : 629663 : continue;
5296 : :
5297 : : /* Only consider control IV with same step. */
5298 : 499120 : if (!operand_equal_p (step, civ->step, 0))
5299 : 152993 : continue;
5300 : :
5301 : : /* Done proving if this is a no-overflow control IV. */
5302 : 346127 : if (operand_equal_p (base, civ->base, 0))
5303 : : return true;
5304 : :
5305 : : /* Control IV is recorded after expanding simple operations,
5306 : : Here we expand base and compare it too. */
5307 : 193477 : tree expanded_base = expand_simple_operations (base);
5308 : 193477 : if (operand_equal_p (expanded_base, civ->base, 0))
5309 : : return true;
5310 : :
5311 : : /* If this is a before stepping control IV, in other words, we have
5312 : :
5313 : : {civ_base, step} = {base + step, step}
5314 : :
5315 : : Because civ {base + step, step} doesn't overflow during loop
5316 : : iterations, {base, step} will not overflow if we can prove the
5317 : : operation "base + step" does not overflow. Specifically, we try
5318 : : to prove below conditions are satisfied:
5319 : :
5320 : : base <= UPPER_BOUND (type) - step ;;step > 0
5321 : : base >= LOWER_BOUND (type) - step ;;step < 0
5322 : :
5323 : : by proving the reverse conditions are false using loop's initial
5324 : : condition. */
5325 : 150576 : if (POINTER_TYPE_P (TREE_TYPE (base)))
5326 : : code = POINTER_PLUS_EXPR;
5327 : : else
5328 : : code = PLUS_EXPR;
5329 : :
5330 : 150576 : tree stepped = fold_build2 (code, TREE_TYPE (base), base, step);
5331 : 150576 : tree expanded_stepped = fold_build2 (code, TREE_TYPE (base),
5332 : : expanded_base, step);
5333 : 150576 : if (operand_equal_p (stepped, civ->base, 0)
5334 : 150576 : || operand_equal_p (expanded_stepped, civ->base, 0))
5335 : : {
5336 : 31908 : tree extreme;
5337 : :
5338 : 31908 : if (tree_int_cst_sign_bit (step))
5339 : : {
5340 : 6469 : code = LT_EXPR;
5341 : 6469 : extreme = lower_bound_in_type (type, type);
5342 : : }
5343 : : else
5344 : : {
5345 : 25439 : code = GT_EXPR;
5346 : 25439 : extreme = upper_bound_in_type (type, type);
5347 : : }
5348 : 31908 : extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
5349 : 31908 : e = fold_build2 (code, boolean_type_node, base, extreme);
5350 : 31908 : e = simplify_using_initial_conditions (loop, e);
5351 : 31908 : if (integer_zerop (e))
5352 : : return true;
5353 : : }
5354 : : }
5355 : : }
5356 : :
5357 : : return false;
5358 : 2924351 : }
5359 : :
5360 : : /* VAR is scev variable whose evolution part is constant STEP, this function
5361 : : proves that VAR can't overflow by using value range info. If VAR's value
5362 : : range is [MIN, MAX], it can be proven by:
5363 : : MAX + step doesn't overflow ; if step > 0
5364 : : or
5365 : : MIN + step doesn't underflow ; if step < 0.
5366 : :
5367 : : We can only do this if var is computed in every loop iteration, i.e, var's
5368 : : definition has to dominate loop latch. Consider below example:
5369 : :
5370 : : {
5371 : : unsigned int i;
5372 : :
5373 : : <bb 3>:
5374 : :
5375 : : <bb 4>:
5376 : : # RANGE [0, 4294967294] NONZERO 65535
5377 : : # i_21 = PHI <0(3), i_18(9)>
5378 : : if (i_21 != 0)
5379 : : goto <bb 6>;
5380 : : else
5381 : : goto <bb 8>;
5382 : :
5383 : : <bb 6>:
5384 : : # RANGE [0, 65533] NONZERO 65535
5385 : : _6 = i_21 + 4294967295;
5386 : : # RANGE [0, 65533] NONZERO 65535
5387 : : _7 = (long unsigned int) _6;
5388 : : # RANGE [0, 524264] NONZERO 524280
5389 : : _8 = _7 * 8;
5390 : : # PT = nonlocal escaped
5391 : : _9 = a_14 + _8;
5392 : : *_9 = 0;
5393 : :
5394 : : <bb 8>:
5395 : : # RANGE [1, 65535] NONZERO 65535
5396 : : i_18 = i_21 + 1;
5397 : : if (i_18 >= 65535)
5398 : : goto <bb 10>;
5399 : : else
5400 : : goto <bb 9>;
5401 : :
5402 : : <bb 9>:
5403 : : goto <bb 4>;
5404 : :
5405 : : <bb 10>:
5406 : : return;
5407 : : }
5408 : :
5409 : : VAR _6 doesn't overflow only with pre-condition (i_21 != 0), here we
5410 : : can't use _6 to prove no-overlfow for _7. In fact, var _7 takes value
5411 : : sequence (4294967295, 0, 1, ..., 65533) in loop life time, rather than
5412 : : (4294967295, 4294967296, ...). */
5413 : :
5414 : : static bool
5415 : 211489 : scev_var_range_cant_overflow (tree var, tree step, class loop *loop)
5416 : : {
5417 : 211489 : tree type;
5418 : 211489 : wide_int minv, maxv, diff, step_wi;
5419 : :
5420 : 211489 : if (TREE_CODE (step) != INTEGER_CST || !INTEGRAL_TYPE_P (TREE_TYPE (var)))
5421 : : return false;
5422 : :
5423 : : /* Check if VAR evaluates in every loop iteration. It's not the case
5424 : : if VAR is default definition or does not dominate loop's latch. */
5425 : 211489 : basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
5426 : 211489 : if (!def_bb || !dominated_by_p (CDI_DOMINATORS, loop->latch, def_bb))
5427 : 1894 : return false;
5428 : :
5429 : 209595 : Value_Range r (TREE_TYPE (var));
5430 : 419190 : get_range_query (cfun)->range_of_expr (r, var);
5431 : 209595 : if (r.varying_p () || r.undefined_p ())
5432 : : return false;
5433 : :
5434 : : /* VAR is a scev whose evolution part is STEP and value range info
5435 : : is [MIN, MAX], we can prove its no-overflowness by conditions:
5436 : :
5437 : : type_MAX - MAX >= step ; if step > 0
5438 : : MIN - type_MIN >= |step| ; if step < 0.
5439 : :
5440 : : Or VAR must take value outside of value range, which is not true. */
5441 : 65794 : step_wi = wi::to_wide (step);
5442 : 65794 : type = TREE_TYPE (var);
5443 : 65794 : if (tree_int_cst_sign_bit (step))
5444 : : {
5445 : 6621 : diff = r.lower_bound () - wi::to_wide (lower_bound_in_type (type, type));
5446 : 6621 : step_wi = - step_wi;
5447 : : }
5448 : : else
5449 : 59173 : diff = wi::to_wide (upper_bound_in_type (type, type)) - r.upper_bound ();
5450 : :
5451 : 65794 : return (wi::geu_p (diff, step_wi));
5452 : 421084 : }
5453 : :
5454 : : /* Return false only when the induction variable BASE + STEP * I is
5455 : : known to not overflow: i.e. when the number of iterations is small
5456 : : enough with respect to the step and initial condition in order to
5457 : : keep the evolution confined in TYPEs bounds. Return true when the
5458 : : iv is known to overflow or when the property is not computable.
5459 : :
5460 : : USE_OVERFLOW_SEMANTICS is true if this function should assume that
5461 : : the rules for overflow of the given language apply (e.g., that signed
5462 : : arithmetics in C does not overflow).
5463 : :
5464 : : If VAR is a ssa variable, this function also returns false if VAR can
5465 : : be proven not overflow with value range info. */
5466 : :
5467 : : bool
5468 : 7495123 : scev_probably_wraps_p (tree var, tree base, tree step,
5469 : : gimple *at_stmt, class loop *loop,
5470 : : bool use_overflow_semantics)
5471 : : {
5472 : : /* FIXME: We really need something like
5473 : : http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
5474 : :
5475 : : We used to test for the following situation that frequently appears
5476 : : during address arithmetics:
5477 : :
5478 : : D.1621_13 = (long unsigned intD.4) D.1620_12;
5479 : : D.1622_14 = D.1621_13 * 8;
5480 : : D.1623_15 = (doubleD.29 *) D.1622_14;
5481 : :
5482 : : And derived that the sequence corresponding to D_14
5483 : : can be proved to not wrap because it is used for computing a
5484 : : memory access; however, this is not really the case -- for example,
5485 : : if D_12 = (unsigned char) [254,+,1], then D_14 has values
5486 : : 2032, 2040, 0, 8, ..., but the code is still legal. */
5487 : :
5488 : 7495123 : if (chrec_contains_undetermined (base)
5489 : 7495123 : || chrec_contains_undetermined (step))
5490 : 0 : return true;
5491 : :
5492 : 7495123 : if (integer_zerop (step))
5493 : : return false;
5494 : :
5495 : : /* If we can use the fact that signed and pointer arithmetics does not
5496 : : wrap, we are done. */
5497 : 7495123 : if (use_overflow_semantics && nowrap_type_p (TREE_TYPE (base)))
5498 : : return false;
5499 : :
5500 : : /* To be able to use estimates on number of iterations of the loop,
5501 : : we must have an upper bound on the absolute value of the step. */
5502 : 3349993 : if (TREE_CODE (step) != INTEGER_CST)
5503 : : return true;
5504 : :
5505 : : /* Check if var can be proven not overflow with value range info. */
5506 : 212684 : if (var && TREE_CODE (var) == SSA_NAME
5507 : 3192594 : && scev_var_range_cant_overflow (var, step, loop))
5508 : : return false;
5509 : :
5510 : 2924351 : if (loop_exits_before_overflow (base, step, at_stmt, loop))
5511 : : return false;
5512 : :
5513 : : /* Check the nonwrapping flag, which may be set by niter analysis (e.g., the
5514 : : above loop exits before overflow). */
5515 : 1708865 : if (var && nonwrapping_chrec_p (analyze_scalar_evolution (loop, var)))
5516 : : return false;
5517 : :
5518 : : /* At this point we still don't have a proof that the iv does not
5519 : : overflow: give up. */
5520 : : return true;
5521 : : }
5522 : :
5523 : : /* Frees the information on upper bounds on numbers of iterations of LOOP. */
5524 : :
5525 : : void
5526 : 48069357 : free_numbers_of_iterations_estimates (class loop *loop)
5527 : : {
5528 : 48069357 : struct control_iv *civ;
5529 : 48069357 : class nb_iter_bound *bound;
5530 : :
5531 : 48069357 : loop->nb_iterations = NULL;
5532 : 48069357 : loop->estimate_state = EST_NOT_COMPUTED;
5533 : 56344988 : for (bound = loop->bounds; bound;)
5534 : : {
5535 : 8275631 : class nb_iter_bound *next = bound->next;
5536 : 8275631 : ggc_free (bound);
5537 : 8275631 : bound = next;
5538 : : }
5539 : 48069357 : loop->bounds = NULL;
5540 : :
5541 : 51753256 : for (civ = loop->control_ivs; civ;)
5542 : : {
5543 : 3683899 : struct control_iv *next = civ->next;
5544 : 3683899 : ggc_free (civ);
5545 : 3683899 : civ = next;
5546 : : }
5547 : 48069357 : loop->control_ivs = NULL;
5548 : 48069357 : }
5549 : :
5550 : : /* Frees the information on upper bounds on numbers of iterations of loops. */
5551 : :
5552 : : void
5553 : 66224539 : free_numbers_of_iterations_estimates (function *fn)
5554 : : {
5555 : 246079140 : for (auto loop : loops_list (fn, 0))
5556 : 47405523 : free_numbers_of_iterations_estimates (loop);
5557 : 66224539 : }
5558 : :
5559 : : /* Substitute value VAL for ssa name NAME inside expressions held
5560 : : at LOOP. */
5561 : :
5562 : : void
5563 : 65886578 : substitute_in_loop_info (class loop *loop, tree name, tree val)
5564 : : {
5565 : 65886578 : loop->nb_iterations = simplify_replace_tree (loop->nb_iterations, name, val);
5566 : 65886578 : }
|