Line data Source code
1 : /* Data references and dependences detectors.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 : Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : /* This pass walks a given loop structure searching for array
22 : references. The information about the array accesses is recorded
23 : in DATA_REFERENCE structures.
24 :
25 : The basic test for determining the dependences is:
26 : given two access functions chrec1 and chrec2 to a same array, and
27 : x and y two vectors from the iteration domain, the same element of
28 : the array is accessed twice at iterations x and y if and only if:
29 : | chrec1 (x) == chrec2 (y).
30 :
31 : The goals of this analysis are:
32 :
33 : - to determine the independence: the relation between two
34 : independent accesses is qualified with the chrec_known (this
35 : information allows a loop parallelization),
36 :
37 : - when two data references access the same data, to qualify the
38 : dependence relation with classic dependence representations:
39 :
40 : - distance vectors
41 : - direction vectors
42 : - loop carried level dependence
43 : - polyhedron dependence
44 : or with the chains of recurrences based representation,
45 :
46 : - to define a knowledge base for storing the data dependence
47 : information,
48 :
49 : - to define an interface to access this data.
50 :
51 :
52 : Definitions:
53 :
54 : - subscript: given two array accesses a subscript is the tuple
55 : composed of the access functions for a given dimension. Example:
56 : Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 : (f1, g1), (f2, g2), (f3, g3).
58 :
59 : - Diophantine equation: an equation whose coefficients and
60 : solutions are integer constants, for example the equation
61 : | 3*x + 2*y = 1
62 : has an integer solution x = 1 and y = -1.
63 :
64 : References:
65 :
66 : - "Advanced Compilation for High Performance Computing" by Randy
67 : Allen and Ken Kennedy.
68 : http://citeseer.ist.psu.edu/goff91practical.html
69 :
70 : - "Loop Transformations for Restructuring Compilers - The Foundations"
71 : by Utpal Banerjee.
72 :
73 :
74 : */
75 :
76 : #define INCLUDE_ALGORITHM
77 : #include "config.h"
78 : #include "system.h"
79 : #include "coretypes.h"
80 : #include "backend.h"
81 : #include "rtl.h"
82 : #include "tree.h"
83 : #include "gimple.h"
84 : #include "gimple-pretty-print.h"
85 : #include "alias.h"
86 : #include "fold-const.h"
87 : #include "expr.h"
88 : #include "gimple-iterator.h"
89 : #include "tree-ssa-loop-niter.h"
90 : #include "tree-ssa-loop.h"
91 : #include "tree-ssa.h"
92 : #include "cfgloop.h"
93 : #include "tree-data-ref.h"
94 : #include "tree-scalar-evolution.h"
95 : #include "dumpfile.h"
96 : #include "tree-affine.h"
97 : #include "builtins.h"
98 : #include "tree-eh.h"
99 : #include "ssa.h"
100 : #include "internal-fn.h"
101 : #include "vr-values.h"
102 : #include "range-op.h"
103 : #include "tree-ssa-loop-ivopts.h"
104 : #include "calls.h"
105 :
106 : static struct datadep_stats
107 : {
108 : int num_dependence_tests;
109 : int num_dependence_dependent;
110 : int num_dependence_independent;
111 : int num_dependence_undetermined;
112 :
113 : int num_subscript_tests;
114 : int num_subscript_undetermined;
115 : int num_same_subscript_function;
116 :
117 : int num_ziv;
118 : int num_ziv_independent;
119 : int num_ziv_dependent;
120 : int num_ziv_unimplemented;
121 :
122 : int num_siv;
123 : int num_siv_independent;
124 : int num_siv_dependent;
125 : int num_siv_unimplemented;
126 :
127 : int num_miv;
128 : int num_miv_independent;
129 : int num_miv_dependent;
130 : int num_miv_unimplemented;
131 : } dependence_stats;
132 :
133 : static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
134 : unsigned int, unsigned int,
135 : class loop *);
136 : /* Returns true iff A divides B. */
137 :
138 : static inline bool
139 2030 : tree_fold_divides_p (const_tree a, const_tree b)
140 : {
141 2030 : gcc_assert (TREE_CODE (a) == INTEGER_CST);
142 2030 : gcc_assert (TREE_CODE (b) == INTEGER_CST);
143 2030 : return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
144 : }
145 :
146 : /* Returns true iff A divides B. */
147 :
148 : static inline bool
149 1687238 : int_divides_p (lambda_int a, lambda_int b)
150 : {
151 1687238 : return ((b % a) == 0);
152 : }
153 :
154 : /* Return true if reference REF contains a union access. */
155 :
156 : static bool
157 437728 : ref_contains_union_access_p (tree ref)
158 : {
159 484163 : while (handled_component_p (ref))
160 : {
161 99880 : ref = TREE_OPERAND (ref, 0);
162 199760 : if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
163 99880 : || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
164 : return true;
165 : }
166 : return false;
167 : }
168 :
169 :
170 :
171 : /* Dump into FILE all the data references from DATAREFS. */
172 :
173 : static void
174 0 : dump_data_references (FILE *file, vec<data_reference_p> datarefs)
175 : {
176 0 : for (data_reference *dr : datarefs)
177 0 : dump_data_reference (file, dr);
178 0 : }
179 :
180 : /* Unified dump into FILE all the data references from DATAREFS. */
181 :
182 : DEBUG_FUNCTION void
183 0 : debug (vec<data_reference_p> &ref)
184 : {
185 0 : dump_data_references (stderr, ref);
186 0 : }
187 :
188 : DEBUG_FUNCTION void
189 0 : debug (vec<data_reference_p> *ptr)
190 : {
191 0 : if (ptr)
192 0 : debug (*ptr);
193 : else
194 0 : fprintf (stderr, "<nil>\n");
195 0 : }
196 :
197 :
198 : /* Dump into STDERR all the data references from DATAREFS. */
199 :
200 : DEBUG_FUNCTION void
201 0 : debug_data_references (vec<data_reference_p> datarefs)
202 : {
203 0 : dump_data_references (stderr, datarefs);
204 0 : }
205 :
206 : /* Print to STDERR the data_reference DR. */
207 :
208 : DEBUG_FUNCTION void
209 0 : debug_data_reference (struct data_reference *dr)
210 : {
211 0 : dump_data_reference (stderr, dr);
212 0 : }
213 :
214 : /* Dump function for a DATA_REFERENCE structure. */
215 :
216 : void
217 3480 : dump_data_reference (FILE *outf,
218 : struct data_reference *dr)
219 : {
220 3480 : unsigned int i;
221 :
222 3480 : fprintf (outf, "#(Data Ref: \n");
223 3480 : fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
224 3480 : fprintf (outf, "# stmt: ");
225 3480 : print_gimple_stmt (outf, DR_STMT (dr), 0);
226 3480 : fprintf (outf, "# ref: ");
227 3480 : print_generic_stmt (outf, DR_REF (dr));
228 3480 : fprintf (outf, "# base_object: ");
229 3480 : print_generic_stmt (outf, DR_BASE_OBJECT (dr));
230 :
231 10786 : for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
232 : {
233 3826 : fprintf (outf, "# Access function %d: ", i);
234 3826 : print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
235 : }
236 3480 : fprintf (outf, "#)\n");
237 3480 : }
238 :
239 : /* Unified dump function for a DATA_REFERENCE structure. */
240 :
241 : DEBUG_FUNCTION void
242 0 : debug (data_reference &ref)
243 : {
244 0 : dump_data_reference (stderr, &ref);
245 0 : }
246 :
247 : DEBUG_FUNCTION void
248 0 : debug (data_reference *ptr)
249 : {
250 0 : if (ptr)
251 0 : debug (*ptr);
252 : else
253 0 : fprintf (stderr, "<nil>\n");
254 0 : }
255 :
256 :
257 : /* Dumps the affine function described by FN to the file OUTF. */
258 :
259 : DEBUG_FUNCTION void
260 32646 : dump_affine_function (FILE *outf, affine_fn fn)
261 : {
262 32646 : unsigned i;
263 32646 : tree coef;
264 :
265 32646 : print_generic_expr (outf, fn[0], TDF_SLIM);
266 68986 : for (i = 1; fn.iterate (i, &coef); i++)
267 : {
268 3694 : fprintf (outf, " + ");
269 3694 : print_generic_expr (outf, coef, TDF_SLIM);
270 3694 : fprintf (outf, " * x_%u", i);
271 : }
272 32646 : }
273 :
274 : /* Dumps the conflict function CF to the file OUTF. */
275 :
276 : DEBUG_FUNCTION void
277 159100 : dump_conflict_function (FILE *outf, conflict_function *cf)
278 : {
279 159100 : unsigned i;
280 :
281 159100 : if (cf->n == NO_DEPENDENCE)
282 120316 : fprintf (outf, "no dependence");
283 38784 : else if (cf->n == NOT_KNOWN)
284 6138 : fprintf (outf, "not known");
285 : else
286 : {
287 65292 : for (i = 0; i < cf->n; i++)
288 : {
289 32646 : if (i != 0)
290 0 : fprintf (outf, " ");
291 32646 : fprintf (outf, "[");
292 32646 : dump_affine_function (outf, cf->fns[i]);
293 32646 : fprintf (outf, "]");
294 : }
295 : }
296 159100 : }
297 :
298 : /* Dump function for a SUBSCRIPT structure. */
299 :
300 : DEBUG_FUNCTION void
301 838 : dump_subscript (FILE *outf, struct subscript *subscript)
302 : {
303 838 : conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
304 :
305 838 : fprintf (outf, "\n (subscript \n");
306 838 : fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
307 838 : dump_conflict_function (outf, cf);
308 838 : if (CF_NONTRIVIAL_P (cf))
309 : {
310 838 : tree last_iteration = SUB_LAST_CONFLICT (subscript);
311 838 : fprintf (outf, "\n last_conflict: ");
312 838 : print_generic_expr (outf, last_iteration);
313 : }
314 :
315 838 : cf = SUB_CONFLICTS_IN_B (subscript);
316 838 : fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
317 838 : dump_conflict_function (outf, cf);
318 838 : if (CF_NONTRIVIAL_P (cf))
319 : {
320 838 : tree last_iteration = SUB_LAST_CONFLICT (subscript);
321 838 : fprintf (outf, "\n last_conflict: ");
322 838 : print_generic_expr (outf, last_iteration);
323 : }
324 :
325 838 : fprintf (outf, "\n (Subscript distance: ");
326 838 : print_generic_expr (outf, SUB_DISTANCE (subscript));
327 838 : fprintf (outf, " ))\n");
328 838 : }
329 :
330 : /* Print the classic direction vector DIRV to OUTF. */
331 :
332 : DEBUG_FUNCTION void
333 777 : print_direction_vector (FILE *outf,
334 : lambda_vector dirv,
335 : int length)
336 : {
337 777 : int eq;
338 :
339 1683 : for (eq = 0; eq < length; eq++)
340 : {
341 906 : enum data_dependence_direction dir = ((enum data_dependence_direction)
342 906 : dirv[eq]);
343 :
344 906 : switch (dir)
345 : {
346 139 : case dir_positive:
347 139 : fprintf (outf, " +");
348 139 : break;
349 6 : case dir_negative:
350 6 : fprintf (outf, " -");
351 6 : break;
352 761 : case dir_equal:
353 761 : fprintf (outf, " =");
354 761 : break;
355 0 : case dir_positive_or_equal:
356 0 : fprintf (outf, " +=");
357 0 : break;
358 0 : case dir_positive_or_negative:
359 0 : fprintf (outf, " +-");
360 0 : break;
361 0 : case dir_negative_or_equal:
362 0 : fprintf (outf, " -=");
363 0 : break;
364 0 : case dir_star:
365 0 : fprintf (outf, " *");
366 0 : break;
367 0 : default:
368 0 : fprintf (outf, "indep");
369 0 : break;
370 : }
371 : }
372 777 : fprintf (outf, "\n");
373 777 : }
374 :
375 : /* Print a vector of direction vectors. */
376 :
377 : DEBUG_FUNCTION void
378 0 : print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
379 : int length)
380 : {
381 0 : for (lambda_vector v : dir_vects)
382 0 : print_direction_vector (outf, v, length);
383 0 : }
384 :
385 : /* Print out a vector VEC of length N to OUTFILE. */
386 :
387 : DEBUG_FUNCTION void
388 4840 : print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 : {
390 4840 : int i;
391 :
392 10092 : for (i = 0; i < n; i++)
393 5252 : fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
394 4840 : fprintf (outfile, "\n");
395 4840 : }
396 :
397 : /* Print a vector of distance vectors. */
398 :
399 : DEBUG_FUNCTION void
400 0 : print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
401 : int length)
402 : {
403 0 : for (lambda_vector v : dist_vects)
404 0 : print_lambda_vector (outf, v, length);
405 0 : }
406 :
407 : /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408 :
409 : DEBUG_FUNCTION void
410 1582 : dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
411 : {
412 1582 : struct data_reference *dra, *drb;
413 :
414 1582 : fprintf (outf, "(Data Dep: \n");
415 :
416 1582 : if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
417 : {
418 399 : if (ddr)
419 : {
420 399 : dra = DDR_A (ddr);
421 399 : drb = DDR_B (ddr);
422 399 : if (dra)
423 399 : dump_data_reference (outf, dra);
424 : else
425 0 : fprintf (outf, " (nil)\n");
426 399 : if (drb)
427 399 : dump_data_reference (outf, drb);
428 : else
429 0 : fprintf (outf, " (nil)\n");
430 : }
431 399 : fprintf (outf, " (don't know)\n)\n");
432 399 : return;
433 : }
434 :
435 1183 : dra = DDR_A (ddr);
436 1183 : drb = DDR_B (ddr);
437 1183 : dump_data_reference (outf, dra);
438 1183 : dump_data_reference (outf, drb);
439 :
440 1183 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
441 426 : fprintf (outf, " (no dependence)\n");
442 :
443 757 : else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
444 : {
445 : unsigned int i;
446 : class loop *loopi;
447 :
448 : subscript *sub;
449 1595 : FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
450 : {
451 838 : fprintf (outf, " access_fn_A: ");
452 838 : print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
453 838 : fprintf (outf, " access_fn_B: ");
454 838 : print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
455 838 : dump_subscript (outf, sub);
456 : }
457 :
458 757 : fprintf (outf, " loop nest: (");
459 2374 : FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
460 860 : fprintf (outf, "%d ", loopi->num);
461 757 : fprintf (outf, ")\n");
462 :
463 3820 : for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
464 : {
465 777 : fprintf (outf, " distance_vector: ");
466 777 : print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
467 1554 : DDR_NB_LOOPS (ddr));
468 : }
469 :
470 1534 : for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
471 : {
472 777 : fprintf (outf, " direction_vector: ");
473 777 : print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
474 1554 : DDR_NB_LOOPS (ddr));
475 : }
476 : }
477 :
478 1183 : fprintf (outf, ")\n");
479 : }
480 :
481 : /* Debug version. */
482 :
483 : DEBUG_FUNCTION void
484 0 : debug_data_dependence_relation (const struct data_dependence_relation *ddr)
485 : {
486 0 : dump_data_dependence_relation (stderr, ddr);
487 0 : }
488 :
489 : /* Dump into FILE all the dependence relations from DDRS. */
490 :
491 : DEBUG_FUNCTION void
492 307 : dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
493 : {
494 2473 : for (auto ddr : ddrs)
495 1582 : dump_data_dependence_relation (file, ddr);
496 307 : }
497 :
498 : DEBUG_FUNCTION void
499 0 : debug (vec<ddr_p> &ref)
500 : {
501 0 : dump_data_dependence_relations (stderr, ref);
502 0 : }
503 :
504 : DEBUG_FUNCTION void
505 0 : debug (vec<ddr_p> *ptr)
506 : {
507 0 : if (ptr)
508 0 : debug (*ptr);
509 : else
510 0 : fprintf (stderr, "<nil>\n");
511 0 : }
512 :
513 :
514 : /* Dump to STDERR all the dependence relations from DDRS. */
515 :
516 : DEBUG_FUNCTION void
517 0 : debug_data_dependence_relations (vec<ddr_p> ddrs)
518 : {
519 0 : dump_data_dependence_relations (stderr, ddrs);
520 0 : }
521 :
522 : /* Dumps the distance and direction vectors in FILE. DDRS contains
523 : the dependence relations, and VECT_SIZE is the size of the
524 : dependence vectors, or in other words the number of loops in the
525 : considered nest. */
526 :
527 : DEBUG_FUNCTION void
528 0 : dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
529 : {
530 0 : for (data_dependence_relation *ddr : ddrs)
531 0 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
532 : {
533 0 : for (lambda_vector v : DDR_DIST_VECTS (ddr))
534 : {
535 0 : fprintf (file, "DISTANCE_V (");
536 0 : print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
537 0 : fprintf (file, ")\n");
538 : }
539 :
540 0 : for (lambda_vector v : DDR_DIR_VECTS (ddr))
541 : {
542 0 : fprintf (file, "DIRECTION_V (");
543 0 : print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
544 0 : fprintf (file, ")\n");
545 : }
546 : }
547 :
548 0 : fprintf (file, "\n\n");
549 0 : }
550 :
551 : /* Dumps the data dependence relations DDRS in FILE. */
552 :
553 : DEBUG_FUNCTION void
554 0 : dump_ddrs (FILE *file, vec<ddr_p> ddrs)
555 : {
556 0 : for (data_dependence_relation *ddr : ddrs)
557 0 : dump_data_dependence_relation (file, ddr);
558 :
559 0 : fprintf (file, "\n\n");
560 0 : }
561 :
562 : DEBUG_FUNCTION void
563 0 : debug_ddrs (vec<ddr_p> ddrs)
564 : {
565 0 : dump_ddrs (stderr, ddrs);
566 0 : }
567 :
568 : /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
569 : OP0 CODE OP1, where:
570 :
571 : - OP0 CODE OP1 has integral type TYPE
572 : - the range of OP0 is given by OP0_RANGE and
573 : - the range of OP1 is given by OP1_RANGE.
574 :
575 : Independently of RESULT_RANGE, try to compute:
576 :
577 : DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
578 : - (sizetype) (OP0 CODE OP1)
579 :
580 : as a constant and subtract DELTA from the ssizetype constant in *OFF.
581 : Return true on success, or false if DELTA is not known at compile time.
582 :
583 : Truncation and sign changes are known to distribute over CODE, i.e.
584 :
585 : (itype) (A CODE B) == (itype) A CODE (itype) B
586 :
587 : for any integral type ITYPE whose precision is no greater than the
588 : precision of A and B. */
589 :
590 : static bool
591 4597765 : compute_distributive_range (tree type, irange &op0_range,
592 : tree_code code, irange &op1_range,
593 : tree *off, irange *result_range)
594 : {
595 4597765 : gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
596 4597765 : if (result_range)
597 : {
598 1066354 : range_op_handler op (code);
599 1066354 : if (!op.fold_range (*result_range, type, op0_range, op1_range))
600 0 : result_range->set_varying (type);
601 : }
602 :
603 : /* The distributive property guarantees that if TYPE is no narrower
604 : than SIZETYPE,
605 :
606 : (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
607 :
608 : and so we can treat DELTA as zero. */
609 4597765 : if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
610 : return true;
611 :
612 : /* If overflow is undefined, we can assume that:
613 :
614 : X == (ssizetype) OP0 CODE (ssizetype) OP1
615 :
616 : is within the range of TYPE, i.e.:
617 :
618 : X == (ssizetype) (TYPE) X
619 :
620 : Distributing the (TYPE) truncation over X gives:
621 :
622 : X == (ssizetype) (OP0 CODE OP1)
623 :
624 : Casting both sides to sizetype and distributing the sizetype cast
625 : over X gives:
626 :
627 : (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
628 :
629 : and so we can treat DELTA as zero. */
630 269147 : if (TYPE_OVERFLOW_UNDEFINED (type))
631 : return true;
632 :
633 : /* Compute the range of:
634 :
635 : (ssizetype) OP0 CODE (ssizetype) OP1
636 :
637 : The distributive property guarantees that this has the same bitpattern as:
638 :
639 : (sizetype) OP0 CODE (sizetype) OP1
640 :
641 : but its range is more conducive to analysis. */
642 107514 : range_cast (op0_range, ssizetype);
643 107514 : range_cast (op1_range, ssizetype);
644 107514 : int_range_max wide_range;
645 107514 : range_op_handler op (code);
646 107514 : bool saved_flag_wrapv = flag_wrapv;
647 107514 : flag_wrapv = 1;
648 107514 : if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
649 0 : wide_range.set_varying (ssizetype);;
650 107514 : flag_wrapv = saved_flag_wrapv;
651 107514 : if (wide_range.num_pairs () != 1
652 107514 : || wide_range.varying_p () || wide_range.undefined_p ())
653 : return false;
654 :
655 87011 : wide_int lb = wide_range.lower_bound ();
656 87011 : wide_int ub = wide_range.upper_bound ();
657 :
658 : /* Calculate the number of times that each end of the range overflows or
659 : underflows TYPE. We can only calculate DELTA if the numbers match. */
660 87011 : unsigned int precision = TYPE_PRECISION (type);
661 87011 : if (!TYPE_UNSIGNED (type))
662 : {
663 210 : wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
664 210 : lb -= type_min;
665 210 : ub -= type_min;
666 210 : }
667 87011 : wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
668 87011 : lb &= upper_bits;
669 87011 : ub &= upper_bits;
670 87011 : if (lb != ub)
671 : return false;
672 :
673 : /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
674 : negative values indicating underflow. The low PRECISION bits of LB
675 : are clear, so DELTA is therefore LB (== UB). */
676 26922 : *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
677 26922 : return true;
678 107514 : }
679 :
680 : /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
681 : given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
682 : FROM_TYPE are integral types. */
683 :
684 : static bool
685 2629585 : nop_conversion_for_offset_p (tree to_type, tree from_type, irange &range)
686 : {
687 2629585 : gcc_assert (INTEGRAL_TYPE_P (to_type)
688 : && INTEGRAL_TYPE_P (from_type)
689 : && !TYPE_OVERFLOW_TRAPS (to_type)
690 : && !TYPE_OVERFLOW_TRAPS (from_type));
691 :
692 : /* Converting to something no narrower than sizetype and then to sizetype
693 : is equivalent to converting directly to sizetype. */
694 2629585 : if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
695 : return true;
696 :
697 : /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
698 91788 : if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
699 91788 : && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
700 : return true;
701 :
702 : /* For narrowing conversions, we could in principle test whether
703 : the bits in FROM_TYPE but not in TO_TYPE have a fixed value
704 : and apply a constant adjustment.
705 :
706 : For other conversions (which involve a sign change) we could
707 : check that the signs are always equal, and apply a constant
708 : adjustment if the signs are negative.
709 :
710 : However, both cases should be rare. */
711 75742 : return range_fits_type_p (&range, TYPE_PRECISION (to_type),
712 151484 : TYPE_SIGN (to_type));
713 : }
714 :
715 : static void
716 : split_constant_offset (tree type, tree *var, tree *off,
717 : irange *result_range,
718 : hash_map<tree, std::pair<tree, tree> > &cache,
719 : unsigned *limit);
720 :
721 : /* Helper function for split_constant_offset. If TYPE is a pointer type,
722 : try to express OP0 CODE OP1 as:
723 :
724 : POINTER_PLUS <*VAR, (sizetype) *OFF>
725 :
726 : where:
727 :
728 : - *VAR has type TYPE
729 : - *OFF is a constant of type ssizetype.
730 :
731 : If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
732 :
733 : *VAR + (sizetype) *OFF
734 :
735 : where:
736 :
737 : - *VAR has type sizetype
738 : - *OFF is a constant of type ssizetype.
739 :
740 : In both cases, OP0 CODE OP1 has type TYPE.
741 :
742 : Return true on success. A false return value indicates that we can't
743 : do better than set *OFF to zero.
744 :
745 : When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
746 : if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
747 :
748 : CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
749 : visited. LIMIT counts down the number of SSA names that we are
750 : allowed to process before giving up. */
751 :
752 : static bool
753 58162505 : split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
754 : tree *var, tree *off, irange *result_range,
755 : hash_map<tree, std::pair<tree, tree> > &cache,
756 : unsigned *limit)
757 : {
758 58162505 : tree var0, var1;
759 58162505 : tree off0, off1;
760 58162505 : int_range_max op0_range, op1_range;
761 :
762 58162505 : *var = NULL_TREE;
763 58162505 : *off = NULL_TREE;
764 :
765 58162505 : if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
766 : return false;
767 :
768 58161871 : if (TREE_CODE (op0) == SSA_NAME
769 58161871 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
770 : return false;
771 58161398 : if (op1
772 7504773 : && TREE_CODE (op1) == SSA_NAME
773 60503404 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
774 : return false;
775 :
776 58161398 : switch (code)
777 : {
778 17294784 : case INTEGER_CST:
779 17294784 : *var = size_int (0);
780 17294784 : *off = fold_convert (ssizetype, op0);
781 17294784 : if (result_range)
782 : {
783 1446296 : wide_int w = wi::to_wide (op0);
784 1446296 : result_range->set (TREE_TYPE (op0), w, w);
785 1446296 : }
786 : return true;
787 :
788 2111108 : case POINTER_PLUS_EXPR:
789 2111108 : split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
790 2111108 : split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
791 2111108 : *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
792 2111108 : *off = size_binop (PLUS_EXPR, off0, off1);
793 2111108 : return true;
794 :
795 2466403 : case PLUS_EXPR:
796 2466403 : case MINUS_EXPR:
797 2466403 : split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
798 2466403 : split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
799 2466403 : *off = size_binop (code, off0, off1);
800 2466403 : if (!compute_distributive_range (type, op0_range, code, op1_range,
801 : off, result_range))
802 : return false;
803 2406085 : *var = fold_build2 (code, sizetype, var0, var1);
804 2406085 : return true;
805 :
806 2579818 : case MULT_EXPR:
807 2579818 : if (TREE_CODE (op1) != INTEGER_CST)
808 : return false;
809 :
810 2131362 : split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
811 2131362 : op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
812 2131362 : *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
813 2131362 : if (!compute_distributive_range (type, op0_range, code, op1_range,
814 : off, result_range))
815 : return false;
816 2111088 : *var = fold_build2 (MULT_EXPR, sizetype, var0,
817 : fold_convert (sizetype, op1));
818 2111088 : return true;
819 :
820 10164554 : case ADDR_EXPR:
821 10164554 : {
822 10164554 : tree base, poffset;
823 10164554 : poly_int64 pbitsize, pbitpos, pbytepos;
824 10164554 : machine_mode pmode;
825 10164554 : int punsignedp, preversep, pvolatilep;
826 :
827 10164554 : op0 = TREE_OPERAND (op0, 0);
828 10164554 : base
829 10164554 : = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
830 : &punsignedp, &preversep, &pvolatilep);
831 :
832 10190436 : if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
833 : return false;
834 10164554 : base = build_fold_addr_expr (base);
835 10164554 : off0 = ssize_int (pbytepos);
836 :
837 10164554 : if (poffset)
838 : {
839 1615 : split_constant_offset (poffset, &poffset, &off1, nullptr,
840 : cache, limit);
841 1615 : off0 = size_binop (PLUS_EXPR, off0, off1);
842 1615 : base = fold_build_pointer_plus (base, poffset);
843 : }
844 :
845 10164554 : var0 = fold_convert (type, base);
846 :
847 : /* If variable length types are involved, punt, otherwise casts
848 : might be converted into ARRAY_REFs in gimplify_conversion.
849 : To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
850 : possibly no longer appears in current GIMPLE, might resurface.
851 : This perhaps could run
852 : if (CONVERT_EXPR_P (var0))
853 : {
854 : gimplify_conversion (&var0);
855 : // Attempt to fill in any within var0 found ARRAY_REF's
856 : // element size from corresponding op embedded ARRAY_REF,
857 : // if unsuccessful, just punt.
858 : } */
859 20731702 : while (POINTER_TYPE_P (type))
860 10567148 : type = TREE_TYPE (type);
861 10164554 : if (int_size_in_bytes (type) < 0)
862 : return false;
863 :
864 10138672 : *var = var0;
865 10138672 : *off = off0;
866 10138672 : return true;
867 : }
868 :
869 15789388 : case SSA_NAME:
870 15789388 : {
871 15789388 : gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
872 15789388 : enum tree_code subcode;
873 :
874 15789388 : if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
875 : return false;
876 :
877 8613280 : subcode = gimple_assign_rhs_code (def_stmt);
878 :
879 : /* We are using a cache to avoid un-CSEing large amounts of code. */
880 8613280 : bool use_cache = false;
881 8613280 : if (!has_single_use (op0)
882 8613280 : && (subcode == POINTER_PLUS_EXPR
883 4436049 : || subcode == PLUS_EXPR
884 : || subcode == MINUS_EXPR
885 : || subcode == MULT_EXPR
886 : || subcode == ADDR_EXPR
887 : || CONVERT_EXPR_CODE_P (subcode)))
888 : {
889 2152383 : use_cache = true;
890 2152383 : bool existed;
891 2152383 : std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
892 2152383 : if (existed)
893 : {
894 31818 : if (integer_zerop (e.second))
895 31818 : return false;
896 1194 : *var = e.first;
897 1194 : *off = e.second;
898 : /* The caller sets the range in this case. */
899 1194 : return true;
900 : }
901 2120565 : e = std::make_pair (op0, ssize_int (0));
902 : }
903 :
904 8581462 : if (*limit == 0)
905 : return false;
906 8580426 : --*limit;
907 :
908 8580426 : var0 = gimple_assign_rhs1 (def_stmt);
909 8580426 : var1 = gimple_assign_rhs2 (def_stmt);
910 :
911 8580426 : bool res = split_constant_offset_1 (type, var0, subcode, var1,
912 : var, off, nullptr, cache, limit);
913 8580426 : if (res && use_cache)
914 1895263 : *cache.get (op0) = std::make_pair (*var, *off);
915 : /* The caller sets the range in this case. */
916 : return res;
917 : }
918 4352430 : CASE_CONVERT:
919 4352430 : {
920 : /* We can only handle the following conversions:
921 :
922 : - Conversions from one pointer type to another pointer type.
923 :
924 : - Conversions from one non-trapping integral type to another
925 : non-trapping integral type. In this case, the recursive
926 : call makes sure that:
927 :
928 : (sizetype) OP0
929 :
930 : can be expressed as a sizetype operation involving VAR and OFF,
931 : and all we need to do is check whether:
932 :
933 : (sizetype) OP0 == (sizetype) (TYPE) OP0
934 :
935 : - Conversions from a non-trapping sizetype-size integral type to
936 : a like-sized pointer type. In this case, the recursive call
937 : makes sure that:
938 :
939 : (sizetype) OP0 == *VAR + (sizetype) *OFF
940 :
941 : and we can convert that to:
942 :
943 : POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
944 :
945 : - Conversions from a sizetype-sized pointer type to a like-sized
946 : non-trapping integral type. In this case, the recursive call
947 : makes sure that:
948 :
949 : OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
950 :
951 : where the POINTER_PLUS and *VAR have the same precision as
952 : TYPE (and the same precision as sizetype). Then:
953 :
954 : (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
955 4352430 : tree itype = TREE_TYPE (op0);
956 4352430 : if ((POINTER_TYPE_P (itype)
957 3197259 : || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
958 4351994 : && (POINTER_TYPE_P (type)
959 3142039 : || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
960 8704424 : && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
961 1079692 : || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
962 1079692 : && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
963 : {
964 4351983 : if (POINTER_TYPE_P (type))
965 : {
966 1209944 : split_constant_offset (op0, var, off, nullptr, cache, limit);
967 1209944 : *var = fold_convert (type, *var);
968 : }
969 3142039 : else if (POINTER_TYPE_P (itype))
970 : {
971 512454 : split_constant_offset (op0, var, off, nullptr, cache, limit);
972 512454 : *var = fold_convert (sizetype, *var);
973 : }
974 : else
975 : {
976 2629585 : split_constant_offset (op0, var, off, &op0_range,
977 : cache, limit);
978 2629585 : if (!nop_conversion_for_offset_p (type, itype, op0_range))
979 : return false;
980 2569991 : if (result_range)
981 : {
982 1339968 : *result_range = op0_range;
983 1339968 : range_cast (*result_range, type);
984 : }
985 : }
986 4292389 : return true;
987 : }
988 : return false;
989 : }
990 :
991 : default:
992 : return false;
993 : }
994 58162505 : }
995 :
996 : /* If EXP has pointer type, try to express it as:
997 :
998 : POINTER_PLUS <*VAR, (sizetype) *OFF>
999 :
1000 : where:
1001 :
1002 : - *VAR has the same type as EXP
1003 : - *OFF is a constant of type ssizetype.
1004 :
1005 : If EXP has an integral type, try to express (sizetype) EXP as:
1006 :
1007 : *VAR + (sizetype) *OFF
1008 :
1009 : where:
1010 :
1011 : - *VAR has type sizetype
1012 : - *OFF is a constant of type ssizetype.
1013 :
1014 : If EXP_RANGE is nonnull, set it to the range of EXP.
1015 :
1016 : CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1017 : visited. LIMIT counts down the number of SSA names that we are
1018 : allowed to process before giving up. */
1019 :
1020 : static void
1021 49582101 : split_constant_offset (tree exp, tree *var, tree *off, irange *exp_range,
1022 : hash_map<tree, std::pair<tree, tree> > &cache,
1023 : unsigned *limit)
1024 : {
1025 49582101 : tree type = TREE_TYPE (exp), op0, op1;
1026 49582101 : enum tree_code code;
1027 :
1028 49582101 : code = TREE_CODE (exp);
1029 49582101 : if (exp_range)
1030 : {
1031 9693753 : exp_range->set_varying (type);
1032 9693753 : if (code == SSA_NAME)
1033 : {
1034 5328781 : int_range_max vr;
1035 10657562 : get_range_query (cfun)->range_of_expr (vr, exp);
1036 5328781 : if (vr.undefined_p ())
1037 4733 : vr.set_varying (TREE_TYPE (exp));
1038 5328781 : tree vr_min, vr_max;
1039 5328781 : value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1040 5328781 : wide_int var_min = wi::to_wide (vr_min);
1041 5328781 : wide_int var_max = wi::to_wide (vr_max);
1042 5328781 : wide_int var_nonzero = get_nonzero_bits (exp);
1043 15986343 : vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1044 : &var_min, &var_max,
1045 : var_nonzero,
1046 5328781 : TYPE_SIGN (type));
1047 : /* This check for VR_VARYING is here because the old code
1048 : using get_range_info would return VR_RANGE for the entire
1049 : domain, instead of VR_VARYING. The new code normalizes
1050 : full-domain ranges to VR_VARYING. */
1051 5328781 : if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1052 5211650 : exp_range->set (type, var_min, var_max);
1053 5328781 : }
1054 : }
1055 :
1056 49582101 : if (!tree_is_chrec (exp)
1057 49582095 : && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1058 : {
1059 49582079 : extract_ops_from_tree (exp, &code, &op0, &op1);
1060 49582079 : if (split_constant_offset_1 (type, op0, code, op1, var, off,
1061 : exp_range, cache, limit))
1062 38355320 : return;
1063 : }
1064 :
1065 11226781 : *var = exp;
1066 11226781 : if (INTEGRAL_TYPE_P (type))
1067 3453720 : *var = fold_convert (sizetype, *var);
1068 11226781 : *off = ssize_int (0);
1069 :
1070 11226781 : int_range_max r;
1071 3141712 : if (exp_range && code != SSA_NAME
1072 105522 : && get_range_query (cfun)->range_of_expr (r, exp)
1073 11279542 : && !r.undefined_p ())
1074 52761 : *exp_range = r;
1075 11226781 : }
1076 :
1077 : /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1078 : type as EXP while OFF has type ssizetype. */
1079 :
1080 : void
1081 33942119 : split_constant_offset (tree exp, tree *var, tree *off)
1082 : {
1083 33942119 : unsigned limit = param_ssa_name_def_chain_limit;
1084 33942119 : static hash_map<tree, std::pair<tree, tree> > *cache;
1085 33942119 : if (!cache)
1086 79178 : cache = new hash_map<tree, std::pair<tree, tree> > (37);
1087 33942119 : split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1088 33942119 : *var = fold_convert (TREE_TYPE (exp), *var);
1089 33942119 : cache->empty ();
1090 33942119 : }
1091 :
1092 : /* Returns the address ADDR of an object in a canonical shape (without nop
1093 : casts, and with type of pointer to the object). */
1094 :
1095 : static tree
1096 15964623 : canonicalize_base_object_address (tree addr)
1097 : {
1098 15964623 : tree orig = addr;
1099 :
1100 15964623 : STRIP_NOPS (addr);
1101 :
1102 : /* The base address may be obtained by casting from integer, in that case
1103 : keep the cast. */
1104 15964623 : if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1105 : return orig;
1106 :
1107 15892869 : if (TREE_CODE (addr) != ADDR_EXPR)
1108 : return addr;
1109 :
1110 9475515 : return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1111 : }
1112 :
1113 : /* Analyze the behavior of memory reference REF within STMT.
1114 : There are two modes:
1115 :
1116 : - BB analysis. In this case we simply split the address into base,
1117 : init and offset components, without reference to any containing loop.
1118 : The resulting base and offset are general expressions and they can
1119 : vary arbitrarily from one iteration of the containing loop to the next.
1120 : The step is always zero.
1121 :
1122 : - loop analysis. In this case we analyze the reference both wrt LOOP
1123 : and on the basis that the reference occurs (is "used") in LOOP;
1124 : see the comment above analyze_scalar_evolution_in_loop for more
1125 : information about this distinction. The base, init, offset and
1126 : step fields are all invariant in LOOP.
1127 :
1128 : Perform BB analysis if LOOP is null, or if LOOP is the function's
1129 : dummy outermost loop. In other cases perform loop analysis.
1130 :
1131 : Return true if the analysis succeeded and store the results in DRB if so.
1132 : BB analysis can only fail for bitfield or reversed-storage accesses. */
1133 :
1134 : opt_result
1135 16528688 : dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1136 : class loop *loop, const gimple *stmt)
1137 : {
1138 16528688 : poly_int64 pbitsize, pbitpos;
1139 16528688 : tree base, poffset;
1140 16528688 : machine_mode pmode;
1141 16528688 : int punsignedp, preversep, pvolatilep;
1142 16528688 : affine_iv base_iv, offset_iv;
1143 16528688 : tree init, dinit, step;
1144 16528688 : bool in_loop = (loop && loop->num);
1145 :
1146 16528688 : if (dump_file && (dump_flags & TDF_DETAILS))
1147 67781 : fprintf (dump_file, "analyze_innermost: ");
1148 :
1149 16528688 : base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1150 : &punsignedp, &preversep, &pvolatilep);
1151 16528688 : gcc_assert (base != NULL_TREE);
1152 :
1153 16528688 : poly_int64 pbytepos;
1154 16528688 : if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1155 46264 : return opt_result::failure_at (stmt,
1156 : "failed: bit offset alignment.\n");
1157 :
1158 16482424 : if (preversep)
1159 653 : return opt_result::failure_at (stmt,
1160 : "failed: reverse storage order.\n");
1161 :
1162 : /* Calculate the alignment and misalignment for the inner reference. */
1163 16481771 : unsigned int HOST_WIDE_INT bit_base_misalignment;
1164 16481771 : unsigned int bit_base_alignment;
1165 16481771 : get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1166 :
1167 : /* There are no bitfield references remaining in BASE, so the values
1168 : we got back must be whole bytes. */
1169 16481771 : gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1170 : && bit_base_misalignment % BITS_PER_UNIT == 0);
1171 16481771 : unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1172 16481771 : poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1173 :
1174 16481771 : if (TREE_CODE (base) == MEM_REF)
1175 : {
1176 7113889 : if (!integer_zerop (TREE_OPERAND (base, 1)))
1177 : {
1178 : /* Subtract MOFF from the base and add it to POFFSET instead.
1179 : Adjust the misalignment to reflect the amount we subtracted. */
1180 1249048 : poly_offset_int moff = mem_ref_offset (base);
1181 1249048 : base_misalignment -= moff.force_shwi ();
1182 1249048 : tree mofft = wide_int_to_tree (sizetype, moff);
1183 1249048 : if (!poffset)
1184 1238894 : poffset = mofft;
1185 : else
1186 10154 : poffset = size_binop (PLUS_EXPR, poffset, mofft);
1187 : }
1188 7113889 : base = TREE_OPERAND (base, 0);
1189 : }
1190 : else
1191 : {
1192 9367882 : if (may_be_nonaddressable_p (base))
1193 2069 : return opt_result::failure_at (stmt,
1194 : "failed: base not addressable.\n");
1195 9365813 : base = build_fold_addr_expr (base);
1196 : }
1197 :
1198 16479702 : if (in_loop)
1199 : {
1200 3176592 : if (!simple_iv (loop, loop, base, &base_iv, true))
1201 430466 : return opt_result::failure_at
1202 430466 : (stmt, "failed: evolution of base is not affine.\n");
1203 : }
1204 : else
1205 : {
1206 13303110 : base_iv.base = base;
1207 13303110 : base_iv.step = ssize_int (0);
1208 13303110 : base_iv.no_overflow = true;
1209 : }
1210 :
1211 16049236 : if (!poffset)
1212 : {
1213 13265497 : offset_iv.base = ssize_int (0);
1214 13265497 : offset_iv.step = ssize_int (0);
1215 : }
1216 : else
1217 : {
1218 2783739 : if (!in_loop)
1219 : {
1220 1500136 : offset_iv.base = poffset;
1221 1500136 : offset_iv.step = ssize_int (0);
1222 : }
1223 1283603 : else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1224 84613 : return opt_result::failure_at
1225 84613 : (stmt, "failed: evolution of offset is not affine.\n");
1226 : }
1227 :
1228 15964623 : init = ssize_int (pbytepos);
1229 :
1230 : /* Subtract any constant component from the base and add it to INIT instead.
1231 : Adjust the misalignment to reflect the amount we subtracted. */
1232 15964623 : split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1233 15964623 : init = size_binop (PLUS_EXPR, init, dinit);
1234 15964623 : base_misalignment -= TREE_INT_CST_LOW (dinit);
1235 :
1236 15964623 : split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1237 15964623 : init = size_binop (PLUS_EXPR, init, dinit);
1238 :
1239 15964623 : step = size_binop (PLUS_EXPR,
1240 : fold_convert (ssizetype, base_iv.step),
1241 : fold_convert (ssizetype, offset_iv.step));
1242 :
1243 15964623 : base = canonicalize_base_object_address (base_iv.base);
1244 :
1245 : /* See if get_pointer_alignment can guarantee a higher alignment than
1246 : the one we calculated above. */
1247 15964623 : unsigned int HOST_WIDE_INT alt_misalignment;
1248 15964623 : unsigned int alt_alignment;
1249 15964623 : get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1250 :
1251 : /* As above, these values must be whole bytes. */
1252 15964623 : gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1253 : && alt_misalignment % BITS_PER_UNIT == 0);
1254 15964623 : alt_alignment /= BITS_PER_UNIT;
1255 15964623 : alt_misalignment /= BITS_PER_UNIT;
1256 :
1257 15964623 : if (base_alignment < alt_alignment)
1258 : {
1259 140778 : base_alignment = alt_alignment;
1260 140778 : base_misalignment = alt_misalignment;
1261 : }
1262 :
1263 15964623 : drb->base_address = base;
1264 15964623 : drb->offset = fold_convert (ssizetype, offset_iv.base);
1265 15964623 : drb->init = init;
1266 15964623 : drb->step = step;
1267 15964623 : if (known_misalignment (base_misalignment, base_alignment,
1268 : &drb->base_misalignment))
1269 15964623 : drb->base_alignment = base_alignment;
1270 : else
1271 : {
1272 : drb->base_alignment = known_alignment (base_misalignment);
1273 : drb->base_misalignment = 0;
1274 : }
1275 15964623 : drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1276 15964623 : drb->step_alignment = highest_pow2_factor (step);
1277 :
1278 15964623 : if (dump_file && (dump_flags & TDF_DETAILS))
1279 64381 : fprintf (dump_file, "success.\n");
1280 :
1281 15964623 : return opt_result::success ();
1282 : }
1283 :
1284 : /* Return true if OP is a valid component reference for a DR access
1285 : function. This accepts a subset of what handled_component_p accepts. */
1286 :
1287 : static bool
1288 5698244 : access_fn_component_p (tree op)
1289 : {
1290 5698244 : switch (TREE_CODE (op))
1291 : {
1292 : case REALPART_EXPR:
1293 : case IMAGPART_EXPR:
1294 : case ARRAY_REF:
1295 : return true;
1296 :
1297 1917556 : case COMPONENT_REF:
1298 1917556 : return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1299 :
1300 0 : default:
1301 0 : return false;
1302 : }
1303 : }
1304 :
1305 : /* Returns whether BASE can have a access_fn_component_p with BASE
1306 : as base. */
1307 :
1308 : static bool
1309 1709583 : base_supports_access_fn_components_p (tree base)
1310 : {
1311 1709583 : switch (TREE_CODE (TREE_TYPE (base)))
1312 : {
1313 : case COMPLEX_TYPE:
1314 : case ARRAY_TYPE:
1315 : case RECORD_TYPE:
1316 : return true;
1317 1701165 : default:
1318 1701165 : return false;
1319 : }
1320 : }
1321 :
1322 : /* Determines the base object and the list of indices of memory reference
1323 : DR, analyzed in LOOP and instantiated before NEST. */
1324 :
1325 : static void
1326 16629479 : dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1327 : {
1328 : /* If analyzing a basic-block there are no indices to analyze
1329 : and thus no access functions. */
1330 16629479 : if (!nest)
1331 : {
1332 13351404 : dri->base_object = ref;
1333 13351404 : dri->access_fns.create (0);
1334 13351404 : return;
1335 : }
1336 :
1337 3278075 : vec<tree> access_fns = vNULL;
1338 :
1339 : /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1340 : into a two element array with a constant index. The base is
1341 : then just the immediate underlying object. */
1342 3278075 : if (TREE_CODE (ref) == REALPART_EXPR)
1343 : {
1344 41654 : ref = TREE_OPERAND (ref, 0);
1345 41654 : access_fns.safe_push (integer_zero_node);
1346 : }
1347 3236421 : else if (TREE_CODE (ref) == IMAGPART_EXPR)
1348 : {
1349 39807 : ref = TREE_OPERAND (ref, 0);
1350 39807 : access_fns.safe_push (integer_one_node);
1351 : }
1352 :
1353 : /* Analyze access functions of dimensions we know to be independent.
1354 : The list of component references handled here should be kept in
1355 : sync with access_fn_component_p. */
1356 5797901 : while (handled_component_p (ref))
1357 : {
1358 2671901 : if (TREE_CODE (ref) == ARRAY_REF)
1359 : {
1360 1302373 : tree op = TREE_OPERAND (ref, 1);
1361 1302373 : tree access_fn = analyze_scalar_evolution (loop, op);
1362 1302373 : access_fn = instantiate_scev (nest, loop, access_fn);
1363 1302373 : access_fns.safe_push (access_fn);
1364 : }
1365 1369528 : else if (TREE_CODE (ref) == COMPONENT_REF
1366 1369528 : && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1367 : {
1368 : /* For COMPONENT_REFs of records (but not unions!) use the
1369 : FIELD_DECL offset as constant access function so we can
1370 : disambiguate a[i].f1 and a[i].f2. */
1371 1217453 : tree off = component_ref_field_offset (ref);
1372 1217453 : off = size_binop (PLUS_EXPR,
1373 : size_binop (MULT_EXPR,
1374 : fold_convert (bitsizetype, off),
1375 : bitsize_int (BITS_PER_UNIT)),
1376 : DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1377 1217453 : access_fns.safe_push (off);
1378 : }
1379 : else
1380 : /* If we have an unhandled component we could not translate
1381 : to an access function stop analyzing. We have determined
1382 : our base object in this case. */
1383 : break;
1384 :
1385 2519826 : ref = TREE_OPERAND (ref, 0);
1386 : }
1387 :
1388 : /* If the address operand of a MEM_REF base has an evolution in the
1389 : analyzed nest, add it as an additional independent access-function. */
1390 3278075 : if (TREE_CODE (ref) == MEM_REF)
1391 : {
1392 2303571 : tree op = TREE_OPERAND (ref, 0);
1393 2303571 : tree access_fn = analyze_scalar_evolution (loop, op);
1394 2303571 : access_fn = instantiate_scev (nest, loop, access_fn);
1395 2303571 : STRIP_NOPS (access_fn);
1396 2303571 : if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1397 : {
1398 1130300 : tree memoff = TREE_OPERAND (ref, 1);
1399 1130300 : tree base = initial_condition (access_fn);
1400 1130300 : tree orig_type = TREE_TYPE (base);
1401 1130300 : STRIP_USELESS_TYPE_CONVERSION (base);
1402 1130300 : tree off;
1403 1130300 : split_constant_offset (base, &base, &off);
1404 1130300 : STRIP_USELESS_TYPE_CONVERSION (base);
1405 : /* Fold the MEM_REF offset into the evolutions initial
1406 : value to make more bases comparable. */
1407 1130300 : if (!integer_zerop (memoff))
1408 : {
1409 111338 : off = size_binop (PLUS_EXPR, off,
1410 : fold_convert (ssizetype, memoff));
1411 111338 : memoff = build_int_cst (TREE_TYPE (memoff), 0);
1412 : }
1413 : /* Adjust the offset so it is a multiple of the access type
1414 : size and thus we separate bases that can possibly be used
1415 : to produce partial overlaps (which the access_fn machinery
1416 : cannot handle). */
1417 1130300 : wide_int rem;
1418 1130300 : if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1419 1130170 : && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1420 2260153 : && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1421 1129853 : rem = wi::mod_trunc
1422 1129853 : (wi::to_wide (off),
1423 2259706 : wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1424 1129853 : SIGNED);
1425 : else
1426 : /* If we can't compute the remainder simply force the initial
1427 : condition to zero. */
1428 447 : rem = wi::to_wide (off);
1429 1130300 : off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1430 1130300 : memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1431 : /* And finally replace the initial condition. */
1432 2260600 : access_fn = chrec_replace_initial_condition
1433 1130300 : (access_fn, fold_convert (orig_type, off));
1434 : /* ??? This is still not a suitable base object for
1435 : dr_may_alias_p - the base object needs to be an
1436 : access that covers the object as whole. With
1437 : an evolution in the pointer this cannot be
1438 : guaranteed.
1439 : As a band-aid, mark the access so we can special-case
1440 : it in dr_may_alias_p. */
1441 1130300 : tree old = ref;
1442 1130300 : ref = fold_build2_loc (EXPR_LOCATION (ref),
1443 1130300 : MEM_REF, TREE_TYPE (ref),
1444 : base, memoff);
1445 1130300 : MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1446 1130300 : MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1447 1130300 : dri->unconstrained_base = true;
1448 1130300 : access_fns.safe_push (access_fn);
1449 1130300 : }
1450 : }
1451 974504 : else if (DECL_P (ref))
1452 : {
1453 : /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1454 822429 : ref = build2 (MEM_REF, TREE_TYPE (ref),
1455 : build_fold_addr_expr (ref),
1456 : build_int_cst (reference_alias_ptr_type (ref), 0));
1457 : }
1458 :
1459 3278075 : dri->base_object = ref;
1460 3278075 : dri->access_fns = access_fns;
1461 : }
1462 :
1463 : /* Extracts the alias analysis information from the memory reference DR. */
1464 :
1465 : static void
1466 16516959 : dr_analyze_alias (struct data_reference *dr)
1467 : {
1468 16516959 : tree ref = DR_REF (dr);
1469 16516959 : tree base = get_base_address (ref), addr;
1470 :
1471 16516959 : if (INDIRECT_REF_P (base)
1472 16516959 : || TREE_CODE (base) == MEM_REF)
1473 : {
1474 7121553 : addr = TREE_OPERAND (base, 0);
1475 7121553 : if (TREE_CODE (addr) == SSA_NAME)
1476 7120182 : DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1477 : }
1478 16516959 : }
1479 :
1480 : /* Frees data reference DR. */
1481 :
1482 : void
1483 17000422 : free_data_ref (data_reference_p dr)
1484 : {
1485 17000422 : DR_ACCESS_FNS (dr).release ();
1486 17000422 : if (dr->alt_indices.base_object)
1487 112520 : dr->alt_indices.access_fns.release ();
1488 17000422 : free (dr);
1489 17000422 : }
1490 :
1491 : /* Analyze memory reference MEMREF, which is accessed in STMT.
1492 : The reference is a read if IS_READ is true, otherwise it is a write.
1493 : IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1494 : within STMT, i.e. that it might not occur even if STMT is executed
1495 : and runs to completion.
1496 :
1497 : Return the data_reference description of MEMREF. NEST is the outermost
1498 : loop in which the reference should be instantiated, LOOP is the loop
1499 : in which the data reference should be analyzed. */
1500 :
1501 : struct data_reference *
1502 16516959 : create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1503 : bool is_read, bool is_conditional_in_stmt)
1504 : {
1505 16516959 : struct data_reference *dr;
1506 :
1507 16516959 : if (dump_file && (dump_flags & TDF_DETAILS))
1508 : {
1509 66563 : fprintf (dump_file, "Creating dr for ");
1510 66563 : print_generic_expr (dump_file, memref, TDF_SLIM);
1511 66563 : fprintf (dump_file, "\n");
1512 : }
1513 :
1514 16516959 : dr = XCNEW (struct data_reference);
1515 16516959 : DR_STMT (dr) = stmt;
1516 16516959 : DR_REF (dr) = memref;
1517 16516959 : DR_IS_READ (dr) = is_read;
1518 16516959 : DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1519 :
1520 29868363 : dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1521 : nest != NULL ? loop : NULL, stmt);
1522 16516959 : dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1523 16516959 : dr_analyze_alias (dr);
1524 :
1525 16516959 : if (dump_file && (dump_flags & TDF_DETAILS))
1526 : {
1527 66563 : unsigned i;
1528 66563 : fprintf (dump_file, "\tbase_address: ");
1529 66563 : print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1530 66563 : fprintf (dump_file, "\n\toffset from base address: ");
1531 66563 : print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1532 66563 : fprintf (dump_file, "\n\tconstant offset from base address: ");
1533 66563 : print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1534 66563 : fprintf (dump_file, "\n\tstep: ");
1535 66563 : print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1536 66563 : fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1537 66563 : fprintf (dump_file, "\n\tbase misalignment: %d",
1538 : DR_BASE_MISALIGNMENT (dr));
1539 66563 : fprintf (dump_file, "\n\toffset alignment: %d",
1540 : DR_OFFSET_ALIGNMENT (dr));
1541 66563 : fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1542 66563 : fprintf (dump_file, "\n\tbase_object: ");
1543 66563 : print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1544 66563 : fprintf (dump_file, "\n");
1545 191345 : for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1546 : {
1547 58219 : fprintf (dump_file, "\tAccess function %d: ", i);
1548 58219 : print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1549 : }
1550 : }
1551 :
1552 16516959 : return dr;
1553 : }
1554 :
1555 : /* A helper function computes order between two tree expressions T1 and T2.
1556 : This is used in comparator functions sorting objects based on the order
1557 : of tree expressions. The function returns -1, 0, or 1. */
1558 :
1559 : int
1560 405732370 : data_ref_compare_tree (tree t1, tree t2)
1561 : {
1562 405732370 : int i, cmp;
1563 405732370 : enum tree_code code;
1564 405732370 : char tclass;
1565 :
1566 405732370 : if (t1 == t2)
1567 : return 0;
1568 184840655 : if (t1 == NULL)
1569 : return -1;
1570 184718234 : if (t2 == NULL)
1571 : return 1;
1572 :
1573 184644051 : STRIP_USELESS_TYPE_CONVERSION (t1);
1574 184644051 : STRIP_USELESS_TYPE_CONVERSION (t2);
1575 184644051 : if (t1 == t2)
1576 : return 0;
1577 :
1578 184088013 : if (TREE_CODE (t1) != TREE_CODE (t2)
1579 13318037 : && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1580 18857793 : return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1581 :
1582 170769976 : code = TREE_CODE (t1);
1583 170769976 : switch (code)
1584 : {
1585 50255856 : case INTEGER_CST:
1586 50255856 : return tree_int_cst_compare (t1, t2);
1587 :
1588 16 : case STRING_CST:
1589 16 : if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1590 16 : return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1591 0 : return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1592 0 : TREE_STRING_LENGTH (t1));
1593 :
1594 14536243 : case SSA_NAME:
1595 14536243 : if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1596 14536243 : return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1597 : break;
1598 :
1599 105977861 : default:
1600 105977861 : if (POLY_INT_CST_P (t1))
1601 : return compare_sizes_for_sort (wi::to_poly_widest (t1),
1602 : wi::to_poly_widest (t2));
1603 :
1604 105977861 : tclass = TREE_CODE_CLASS (code);
1605 :
1606 : /* For decls, compare their UIDs. */
1607 105977861 : if (tclass == tcc_declaration)
1608 : {
1609 20099263 : if (DECL_UID (t1) != DECL_UID (t2))
1610 20098736 : return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1611 : break;
1612 : }
1613 : /* For expressions, compare their operands recursively. */
1614 85878598 : else if (IS_EXPR_CODE_CLASS (tclass))
1615 : {
1616 152804262 : for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1617 : {
1618 99059247 : cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1619 99059247 : TREE_OPERAND (t2, i));
1620 99059247 : if (cmp != 0)
1621 : return cmp;
1622 : }
1623 : }
1624 : else
1625 0 : gcc_unreachable ();
1626 : }
1627 :
1628 : return 0;
1629 : }
1630 :
1631 : /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1632 : check. */
1633 :
1634 : opt_result
1635 198097 : runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1636 : {
1637 198097 : if (dump_enabled_p ())
1638 7573 : dump_printf (MSG_NOTE,
1639 : "consider run-time aliasing test between %T and %T\n",
1640 7573 : DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1641 :
1642 198097 : if (!speed_p)
1643 0 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1644 : "runtime alias check not supported when"
1645 : " optimizing for size.\n");
1646 :
1647 : /* FORNOW: We don't support versioning with outer-loop in either
1648 : vectorization or loop distribution. */
1649 198097 : if (loop != NULL && loop->inner != NULL)
1650 143 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1651 : "runtime alias check not supported for"
1652 : " outer loop.\n");
1653 :
1654 : /* FORNOW: We don't support handling different address spaces. */
1655 197954 : if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1656 197954 : != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1657 1 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1658 : "runtime alias check between different "
1659 : "address spaces not supported.\n");
1660 :
1661 197953 : return opt_result::success ();
1662 : }
1663 :
1664 : /* Operator == between two dr_with_seg_len objects.
1665 :
1666 : This equality operator is used to make sure two data refs
1667 : are the same one so that we will consider to combine the
1668 : aliasing checks of those two pairs of data dependent data
1669 : refs. */
1670 :
1671 : static bool
1672 117192 : operator == (const dr_with_seg_len& d1,
1673 : const dr_with_seg_len& d2)
1674 : {
1675 117192 : return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1676 117192 : DR_BASE_ADDRESS (d2.dr), 0)
1677 90296 : && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1678 89374 : && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1679 82773 : && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1680 82537 : && known_eq (d1.access_size, d2.access_size)
1681 196632 : && d1.align == d2.align);
1682 : }
1683 :
1684 : /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1685 : so that we can combine aliasing checks in one scan. */
1686 :
1687 : static int
1688 1038877 : comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1689 : {
1690 1038877 : const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1691 1038877 : const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1692 1038877 : const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1693 1038877 : const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1694 :
1695 : /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1696 : if a and c have the same basic address snd step, and b and d have the same
1697 : address and step. Therefore, if any a&c or b&d don't have the same address
1698 : and step, we don't care the order of those two pairs after sorting. */
1699 1038877 : int comp_res;
1700 :
1701 1038877 : if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1702 1038877 : DR_BASE_ADDRESS (b1.dr))) != 0)
1703 : return comp_res;
1704 522101 : if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1705 522101 : DR_BASE_ADDRESS (b2.dr))) != 0)
1706 : return comp_res;
1707 342194 : if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1708 342194 : DR_STEP (b1.dr))) != 0)
1709 : return comp_res;
1710 341578 : if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1711 341578 : DR_STEP (b2.dr))) != 0)
1712 : return comp_res;
1713 334015 : if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1714 334015 : DR_OFFSET (b1.dr))) != 0)
1715 : return comp_res;
1716 317754 : if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1717 317754 : DR_INIT (b1.dr))) != 0)
1718 : return comp_res;
1719 238230 : if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1720 238230 : DR_OFFSET (b2.dr))) != 0)
1721 : return comp_res;
1722 225227 : if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1723 225227 : DR_INIT (b2.dr))) != 0)
1724 : return comp_res;
1725 :
1726 : return 0;
1727 : }
1728 :
1729 : /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1730 :
1731 : static void
1732 997 : dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1733 : {
1734 1994 : dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1735 997 : DR_REF (alias_pair->first.dr),
1736 997 : DR_REF (alias_pair->second.dr));
1737 :
1738 997 : dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1739 : alias_pair->first.seg_len);
1740 997 : if (!operand_equal_p (alias_pair->first.seg_len,
1741 997 : alias_pair->second.seg_len, 0))
1742 248 : dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1743 :
1744 997 : dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1745 997 : dump_dec (MSG_NOTE, alias_pair->first.access_size);
1746 997 : if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1747 : {
1748 228 : dump_printf (MSG_NOTE, " vs. ");
1749 228 : dump_dec (MSG_NOTE, alias_pair->second.access_size);
1750 : }
1751 :
1752 997 : dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1753 : alias_pair->first.align);
1754 997 : if (alias_pair->first.align != alias_pair->second.align)
1755 73 : dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1756 :
1757 997 : dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1758 997 : if (alias_pair->flags & DR_ALIAS_RAW)
1759 150 : dump_printf (MSG_NOTE, " RAW");
1760 997 : if (alias_pair->flags & DR_ALIAS_WAR)
1761 791 : dump_printf (MSG_NOTE, " WAR");
1762 997 : if (alias_pair->flags & DR_ALIAS_WAW)
1763 174 : dump_printf (MSG_NOTE, " WAW");
1764 997 : if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1765 209 : dump_printf (MSG_NOTE, " ARBITRARY");
1766 997 : if (alias_pair->flags & DR_ALIAS_SWAPPED)
1767 0 : dump_printf (MSG_NOTE, " SWAPPED");
1768 997 : if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1769 0 : dump_printf (MSG_NOTE, " UNSWAPPED");
1770 997 : if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1771 0 : dump_printf (MSG_NOTE, " MIXED_STEPS");
1772 997 : if (alias_pair->flags == 0)
1773 0 : dump_printf (MSG_NOTE, " <none>");
1774 997 : dump_printf (MSG_NOTE, "\n");
1775 997 : }
1776 :
1777 : /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1778 : FACTOR is number of iterations that each data reference is accessed.
1779 :
1780 : Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1781 : we create an expression:
1782 :
1783 : ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1784 : || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1785 :
1786 : for aliasing checks. However, in some cases we can decrease the number
1787 : of checks by combining two checks into one. For example, suppose we have
1788 : another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1789 : condition is satisfied:
1790 :
1791 : load_ptr_0 < load_ptr_1 &&
1792 : load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1793 :
1794 : (this condition means, in each iteration of vectorized loop, the accessed
1795 : memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1796 : load_ptr_1.)
1797 :
1798 : we then can use only the following expression to finish the alising checks
1799 : between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1800 :
1801 : ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1802 : || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1803 :
1804 : Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1805 : basic address. */
1806 :
1807 : void
1808 19977 : prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1809 : poly_uint64)
1810 : {
1811 19977 : if (alias_pairs->is_empty ())
1812 19977 : return;
1813 :
1814 : /* Canonicalize each pair so that the base components are ordered wrt
1815 : data_ref_compare_tree. This allows the loop below to merge more
1816 : cases. */
1817 : unsigned int i;
1818 : dr_with_seg_len_pair_t *alias_pair;
1819 77256 : FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1820 : {
1821 58008 : data_reference_p dr_a = alias_pair->first.dr;
1822 58008 : data_reference_p dr_b = alias_pair->second.dr;
1823 58008 : int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1824 : DR_BASE_ADDRESS (dr_b));
1825 58008 : if (comp_res == 0)
1826 1759 : comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1827 1759 : if (comp_res == 0)
1828 122 : comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1829 58008 : if (comp_res > 0)
1830 : {
1831 19307 : std::swap (alias_pair->first, alias_pair->second);
1832 19307 : alias_pair->flags |= DR_ALIAS_SWAPPED;
1833 : }
1834 : else
1835 38701 : alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1836 : }
1837 :
1838 : /* Sort the collected data ref pairs so that we can scan them once to
1839 : combine all possible aliasing checks. */
1840 19248 : alias_pairs->qsort (comp_dr_with_seg_len_pair);
1841 :
1842 : /* Scan the sorted dr pairs and check if we can combine alias checks
1843 : of two neighboring dr pairs. */
1844 : unsigned int last = 0;
1845 58008 : for (i = 1; i < alias_pairs->length (); ++i)
1846 : {
1847 : /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1848 38760 : dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1849 38760 : dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1850 :
1851 38760 : dr_with_seg_len *dr_a1 = &alias_pair1->first;
1852 38760 : dr_with_seg_len *dr_b1 = &alias_pair1->second;
1853 38760 : dr_with_seg_len *dr_a2 = &alias_pair2->first;
1854 38760 : dr_with_seg_len *dr_b2 = &alias_pair2->second;
1855 :
1856 : /* Remove duplicate data ref pairs. */
1857 38760 : if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1858 : {
1859 17284 : if (dump_enabled_p ())
1860 1655 : dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1861 1655 : DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1862 1655 : DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1863 17284 : alias_pair1->flags |= alias_pair2->flags;
1864 56044 : continue;
1865 : }
1866 :
1867 : /* Assume that we won't be able to merge the pairs, then correct
1868 : if we do. */
1869 21476 : last += 1;
1870 21476 : if (last != i)
1871 4896 : (*alias_pairs)[last] = (*alias_pairs)[i];
1872 :
1873 21476 : if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1874 : {
1875 : /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1876 : and DR_A1 and DR_A2 are two consecutive memrefs. */
1877 18196 : if (*dr_a1 == *dr_a2)
1878 : {
1879 13338 : std::swap (dr_a1, dr_b1);
1880 13338 : std::swap (dr_a2, dr_b2);
1881 : }
1882 :
1883 18196 : poly_int64 init_a1, init_a2;
1884 : /* Only consider cases in which the distance between the initial
1885 : DR_A1 and the initial DR_A2 is known at compile time. */
1886 33397 : if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1887 18196 : DR_BASE_ADDRESS (dr_a2->dr), 0)
1888 3492 : || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1889 3492 : DR_OFFSET (dr_a2->dr), 0)
1890 2995 : || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1891 21191 : || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1892 15208 : continue;
1893 :
1894 : /* Don't combine if we can't tell which one comes first. */
1895 2995 : if (!ordered_p (init_a1, init_a2))
1896 : continue;
1897 :
1898 : /* Work out what the segment length would be if we did combine
1899 : DR_A1 and DR_A2:
1900 :
1901 : - If DR_A1 and DR_A2 have equal lengths, that length is
1902 : also the combined length.
1903 :
1904 : - If DR_A1 and DR_A2 both have negative "lengths", the combined
1905 : length is the lower bound on those lengths.
1906 :
1907 : - If DR_A1 and DR_A2 both have positive lengths, the combined
1908 : length is the upper bound on those lengths.
1909 :
1910 : Other cases are unlikely to give a useful combination.
1911 :
1912 : The lengths both have sizetype, so the sign is taken from
1913 : the step instead. */
1914 2995 : poly_uint64 new_seg_len = 0;
1915 2995 : bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1916 2995 : dr_a2->seg_len, 0);
1917 2995 : if (new_seg_len_p)
1918 : {
1919 7 : poly_uint64 seg_len_a1, seg_len_a2;
1920 7 : if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1921 7 : || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1922 7 : continue;
1923 :
1924 0 : tree indicator_a = dr_direction_indicator (dr_a1->dr);
1925 0 : if (TREE_CODE (indicator_a) != INTEGER_CST)
1926 0 : continue;
1927 :
1928 0 : tree indicator_b = dr_direction_indicator (dr_a2->dr);
1929 0 : if (TREE_CODE (indicator_b) != INTEGER_CST)
1930 0 : continue;
1931 :
1932 0 : int sign_a = tree_int_cst_sgn (indicator_a);
1933 0 : int sign_b = tree_int_cst_sgn (indicator_b);
1934 :
1935 0 : if (sign_a <= 0 && sign_b <= 0)
1936 0 : new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1937 0 : else if (sign_a >= 0 && sign_b >= 0)
1938 0 : new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1939 : else
1940 0 : continue;
1941 : }
1942 : /* At this point we're committed to merging the refs. */
1943 :
1944 : /* Make sure dr_a1 starts left of dr_a2. */
1945 2988 : if (maybe_gt (init_a1, init_a2))
1946 : {
1947 0 : std::swap (*dr_a1, *dr_a2);
1948 0 : std::swap (init_a1, init_a2);
1949 : }
1950 :
1951 : /* The DR_Bs are equal, so only the DR_As can introduce
1952 : mixed steps. */
1953 2988 : if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1954 0 : alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1955 :
1956 2988 : if (new_seg_len_p)
1957 : {
1958 0 : dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1959 0 : new_seg_len);
1960 0 : dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1961 : }
1962 :
1963 : /* This is always positive due to the swap above. */
1964 2988 : poly_uint64 diff = init_a2 - init_a1;
1965 :
1966 : /* The new check will start at DR_A1. Make sure that its access
1967 : size encompasses the initial DR_A2. */
1968 2988 : if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1969 : {
1970 1199 : dr_a1->access_size = upper_bound (dr_a1->access_size,
1971 : diff + dr_a2->access_size);
1972 1199 : unsigned int new_align = known_alignment (dr_a1->access_size);
1973 1199 : dr_a1->align = MIN (dr_a1->align, new_align);
1974 : }
1975 2988 : if (dump_enabled_p ())
1976 840 : dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1977 840 : DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1978 840 : DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1979 2988 : alias_pair1->flags |= alias_pair2->flags;
1980 2988 : last -= 1;
1981 : }
1982 : }
1983 19248 : alias_pairs->truncate (last + 1);
1984 :
1985 : /* Try to restore the original dr_with_seg_len order within each
1986 : dr_with_seg_len_pair_t. If we ended up combining swapped and
1987 : unswapped pairs into the same check, we have to invalidate any
1988 : RAW, WAR and WAW information for it. */
1989 19248 : if (dump_enabled_p ())
1990 777 : dump_printf (MSG_NOTE, "merged alias checks:\n");
1991 56984 : FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1992 : {
1993 37736 : unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1994 37736 : unsigned int swapped = (alias_pair->flags & swap_mask);
1995 37736 : if (swapped == DR_ALIAS_SWAPPED)
1996 11834 : std::swap (alias_pair->first, alias_pair->second);
1997 25902 : else if (swapped != DR_ALIAS_UNSWAPPED)
1998 1914 : alias_pair->flags |= DR_ALIAS_ARBITRARY;
1999 37736 : alias_pair->flags &= ~swap_mask;
2000 37736 : if (dump_enabled_p ())
2001 997 : dump_alias_pair (alias_pair, " ");
2002 : }
2003 : }
2004 :
2005 : /* A subroutine of create_intersect_range_checks, with a subset of the
2006 : same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
2007 : to optimize cases in which the references form a simple RAW, WAR or
2008 : WAR dependence. */
2009 :
2010 : static bool
2011 4849 : create_ifn_alias_checks (tree *cond_expr,
2012 : const dr_with_seg_len_pair_t &alias_pair)
2013 : {
2014 4849 : const dr_with_seg_len& dr_a = alias_pair.first;
2015 4849 : const dr_with_seg_len& dr_b = alias_pair.second;
2016 :
2017 : /* Check for cases in which:
2018 :
2019 : (a) we have a known RAW, WAR or WAR dependence
2020 : (b) the accesses are well-ordered in both the original and new code
2021 : (see the comment above the DR_ALIAS_* flags for details); and
2022 : (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2023 4849 : if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2024 : return false;
2025 :
2026 : /* Make sure that both DRs access the same pattern of bytes,
2027 : with a constant length and step. */
2028 3204 : poly_uint64 seg_len;
2029 3204 : if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2030 2800 : || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2031 2689 : || maybe_ne (dr_a.access_size, dr_b.access_size)
2032 2648 : || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2033 5852 : || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2034 571 : return false;
2035 :
2036 2633 : unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2037 2633 : tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2038 2633 : tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2039 :
2040 : /* See whether the target suports what we want to do. WAW checks are
2041 : equivalent to WAR checks here. */
2042 2597 : internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2043 2633 : ? IFN_CHECK_RAW_PTRS
2044 : : IFN_CHECK_WAR_PTRS);
2045 2633 : unsigned int align = MIN (dr_a.align, dr_b.align);
2046 2633 : poly_uint64 full_length = seg_len + bytes;
2047 2633 : if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2048 : full_length, align))
2049 : {
2050 2633 : full_length = seg_len + dr_a.access_size;
2051 2633 : if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2052 : full_length, align))
2053 : return false;
2054 : }
2055 :
2056 : /* Commit to using this form of test. */
2057 0 : addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2058 0 : addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2059 :
2060 0 : addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2061 0 : addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2062 :
2063 0 : *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2064 : ifn, boolean_type_node,
2065 : 4, addr_a, addr_b,
2066 0 : size_int (full_length),
2067 0 : size_int (align));
2068 :
2069 0 : if (dump_enabled_p ())
2070 : {
2071 0 : if (ifn == IFN_CHECK_RAW_PTRS)
2072 0 : dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2073 : else
2074 0 : dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2075 : }
2076 : return true;
2077 : }
2078 :
2079 : /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2080 : free of aliases, using a condition based on index values instead
2081 : of a condition based on addresses. Return true on success,
2082 : storing the condition in *COND_EXPR.
2083 :
2084 : This can only be done if the two data references in ALIAS_PAIR access
2085 : the same array object and the index is the only difference. For example,
2086 : if the two data references are DR_A and DR_B:
2087 :
2088 : DR_A DR_B
2089 : data-ref arr[i] arr[j]
2090 : base_object arr arr
2091 : index {i_0, +, 1}_loop {j_0, +, 1}_loop
2092 :
2093 : The addresses and their index are like:
2094 :
2095 : |<- ADDR_A ->| |<- ADDR_B ->|
2096 : ------------------------------------------------------->
2097 : | | | | | | | | | |
2098 : ------------------------------------------------------->
2099 : i_0 ... i_0+4 j_0 ... j_0+4
2100 :
2101 : We can create expression based on index rather than address:
2102 :
2103 : (unsigned) (i_0 - j_0 + 3) <= 6
2104 :
2105 : i.e. the indices are less than 4 apart.
2106 :
2107 : Note evolution step of index needs to be considered in comparison. */
2108 :
2109 : static bool
2110 5000 : create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2111 : const dr_with_seg_len_pair_t &alias_pair)
2112 : {
2113 5000 : const dr_with_seg_len &dr_a = alias_pair.first;
2114 5000 : const dr_with_seg_len &dr_b = alias_pair.second;
2115 5000 : if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2116 5000 : || integer_zerop (DR_STEP (dr_a.dr))
2117 4755 : || integer_zerop (DR_STEP (dr_b.dr))
2118 19139 : || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2119 355 : return false;
2120 :
2121 4645 : poly_uint64 seg_len1, seg_len2;
2122 4645 : if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2123 4645 : || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2124 385 : return false;
2125 :
2126 4260 : if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2127 : return false;
2128 :
2129 4260 : if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2130 : return false;
2131 :
2132 154 : if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2133 : return false;
2134 :
2135 152 : gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2136 :
2137 152 : bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2138 152 : unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2139 152 : if (neg_step)
2140 : {
2141 30 : abs_step = -abs_step;
2142 30 : seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2143 30 : seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2144 : }
2145 :
2146 : /* Infer the number of iterations with which the memory segment is accessed
2147 : by DR. In other words, alias is checked if memory segment accessed by
2148 : DR_A in some iterations intersect with memory segment accessed by DR_B
2149 : in the same amount iterations.
2150 : Note segnment length is a linear function of number of iterations with
2151 : DR_STEP as the coefficient. */
2152 152 : poly_uint64 niter_len1, niter_len2;
2153 152 : if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2154 152 : || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2155 : return false;
2156 :
2157 : /* Divide each access size by the byte step, rounding up. */
2158 152 : poly_uint64 niter_access1, niter_access2;
2159 152 : if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2160 : abs_step, &niter_access1)
2161 152 : || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2162 : abs_step, &niter_access2))
2163 : return false;
2164 :
2165 152 : bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2166 :
2167 152 : int found = -1;
2168 311 : for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2169 : {
2170 160 : tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2171 160 : tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2172 : /* Two indices must be the same if they are not scev, or not scev wrto
2173 : current loop being vecorized. */
2174 160 : if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2175 152 : || TREE_CODE (access2) != POLYNOMIAL_CHREC
2176 152 : || CHREC_VARIABLE (access1) != (unsigned)loop->num
2177 312 : || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2178 : {
2179 8 : if (operand_equal_p (access1, access2, 0))
2180 7 : continue;
2181 :
2182 : return false;
2183 : }
2184 152 : if (found >= 0)
2185 : return false;
2186 152 : found = i;
2187 : }
2188 :
2189 : /* Ought not to happen in practice, since if all accesses are equal then the
2190 : alias should be decidable at compile time. */
2191 151 : if (found < 0)
2192 : return false;
2193 :
2194 : /* The two indices must have the same step. */
2195 151 : tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2196 151 : tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2197 151 : if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2198 : return false;
2199 :
2200 151 : tree idx_step = CHREC_RIGHT (access1);
2201 : /* Index must have const step, otherwise DR_STEP won't be constant. */
2202 151 : gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2203 : /* Index must evaluate in the same direction as DR. */
2204 151 : gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2205 :
2206 151 : tree min1 = CHREC_LEFT (access1);
2207 151 : tree min2 = CHREC_LEFT (access2);
2208 151 : if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2209 : return false;
2210 :
2211 : /* Ideally, alias can be checked against loop's control IV, but we
2212 : need to prove linear mapping between control IV and reference
2213 : index. Although that should be true, we check against (array)
2214 : index of data reference. Like segment length, index length is
2215 : linear function of the number of iterations with index_step as
2216 : the coefficient, i.e, niter_len * idx_step. */
2217 151 : offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2218 : SIGNED);
2219 151 : if (neg_step)
2220 30 : abs_idx_step = -abs_idx_step;
2221 302 : poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2222 302 : poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2223 151 : poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2224 151 : poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2225 :
2226 151 : gcc_assert (known_ge (idx_len1, 0)
2227 : && known_ge (idx_len2, 0)
2228 : && known_ge (idx_access1, 0)
2229 : && known_ge (idx_access2, 0));
2230 :
2231 : /* Each access has the following pattern, with lengths measured
2232 : in units of INDEX:
2233 :
2234 : <-- idx_len -->
2235 : <--- A: -ve step --->
2236 : +-----+-------+-----+-------+-----+
2237 : | n-1 | ..... | 0 | ..... | n-1 |
2238 : +-----+-------+-----+-------+-----+
2239 : <--- B: +ve step --->
2240 : <-- idx_len -->
2241 : |
2242 : min
2243 :
2244 : where "n" is the number of scalar iterations covered by the segment
2245 : and where each access spans idx_access units.
2246 :
2247 : A is the range of bytes accessed when the step is negative,
2248 : B is the range when the step is positive.
2249 :
2250 : When checking for general overlap, we need to test whether
2251 : the range:
2252 :
2253 : [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2254 :
2255 : overlaps:
2256 :
2257 : [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2258 :
2259 : where:
2260 :
2261 : low_offsetN = +ve step ? 0 : -idx_lenN;
2262 : high_offsetN = +ve step ? idx_lenN : 0;
2263 :
2264 : This is equivalent to testing whether:
2265 :
2266 : min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2267 : && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2268 :
2269 : Converting this into a single test, there is an overlap if:
2270 :
2271 : 0 <= min2 - min1 + bias <= limit
2272 :
2273 : where bias = high_offset2 + idx_access2 - 1 - low_offset1
2274 : limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2275 : + (high_offset2 - low_offset2 + idx_access2 - 1)
2276 : i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2277 :
2278 : Combining the tests requires limit to be computable in an unsigned
2279 : form of the index type; if it isn't, we fall back to the usual
2280 : pointer-based checks.
2281 :
2282 : We can do better if DR_B is a write and if DR_A and DR_B are
2283 : well-ordered in both the original and the new code (see the
2284 : comment above the DR_ALIAS_* flags for details). In this case
2285 : we know that for each i in [0, n-1], the write performed by
2286 : access i of DR_B occurs after access numbers j<=i of DR_A in
2287 : both the original and the new code. Any write or anti
2288 : dependencies wrt those DR_A accesses are therefore maintained.
2289 :
2290 : We just need to make sure that each individual write in DR_B does not
2291 : overlap any higher-indexed access in DR_A; such DR_A accesses happen
2292 : after the DR_B access in the original code but happen before it in
2293 : the new code.
2294 :
2295 : We know the steps for both accesses are equal, so by induction, we
2296 : just need to test whether the first write of DR_B overlaps a later
2297 : access of DR_A. In other words, we need to move min1 along by
2298 : one iteration:
2299 :
2300 : min1' = min1 + idx_step
2301 :
2302 : and use the ranges:
2303 :
2304 : [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2305 :
2306 : and:
2307 :
2308 : [min2, min2 + idx_access2 - 1]
2309 :
2310 : where:
2311 :
2312 : low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2313 : high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2314 151 : if (waw_or_war_p)
2315 120 : idx_len1 -= abs_idx_step;
2316 :
2317 151 : poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2318 151 : if (!waw_or_war_p)
2319 151 : limit += idx_len2;
2320 :
2321 151 : tree utype = unsigned_type_for (TREE_TYPE (min1));
2322 151 : if (!wi::fits_to_tree_p (limit, utype))
2323 : return false;
2324 :
2325 151 : poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2326 151 : poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2327 151 : poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2328 : /* Equivalent to adding IDX_STEP to MIN1. */
2329 151 : if (waw_or_war_p)
2330 120 : bias -= wi::to_offset (idx_step);
2331 :
2332 151 : tree subject = fold_build2 (MINUS_EXPR, utype,
2333 : fold_convert (utype, min2),
2334 : fold_convert (utype, min1));
2335 151 : subject = fold_build2 (PLUS_EXPR, utype, subject,
2336 : wide_int_to_tree (utype, bias));
2337 151 : tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2338 : wide_int_to_tree (utype, limit));
2339 151 : if (*cond_expr)
2340 0 : *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2341 : *cond_expr, part_cond_expr);
2342 : else
2343 151 : *cond_expr = part_cond_expr;
2344 151 : if (dump_enabled_p ())
2345 : {
2346 133 : if (waw_or_war_p)
2347 103 : dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2348 : else
2349 30 : dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2350 : }
2351 : return true;
2352 : }
2353 :
2354 : /* A subroutine of create_intersect_range_checks, with a subset of the
2355 : same arguments. Try to optimize cases in which the second access
2356 : is a write and in which some overlap is valid. */
2357 :
2358 : static bool
2359 4849 : create_waw_or_war_checks (tree *cond_expr,
2360 : const dr_with_seg_len_pair_t &alias_pair)
2361 : {
2362 4849 : const dr_with_seg_len& dr_a = alias_pair.first;
2363 4849 : const dr_with_seg_len& dr_b = alias_pair.second;
2364 :
2365 : /* Check for cases in which:
2366 :
2367 : (a) DR_B is always a write;
2368 : (b) the accesses are well-ordered in both the original and new code
2369 : (see the comment above the DR_ALIAS_* flags for details); and
2370 : (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2371 4849 : if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2372 : return false;
2373 :
2374 : /* Check for equal (but possibly variable) steps. */
2375 3161 : tree step = DR_STEP (dr_a.dr);
2376 3161 : if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2377 : return false;
2378 :
2379 : /* Make sure that we can operate on sizetype without loss of precision. */
2380 2764 : tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2381 2764 : if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2382 : return false;
2383 :
2384 : /* All addresses involved are known to have a common alignment ALIGN.
2385 : We can therefore subtract ALIGN from an exclusive endpoint to get
2386 : an inclusive endpoint. In the best (and common) case, ALIGN is the
2387 : same as the access sizes of both DRs, and so subtracting ALIGN
2388 : cancels out the addition of an access size. */
2389 2764 : unsigned int align = MIN (dr_a.align, dr_b.align);
2390 2764 : poly_uint64 last_chunk_a = dr_a.access_size - align;
2391 2764 : poly_uint64 last_chunk_b = dr_b.access_size - align;
2392 :
2393 : /* Get a boolean expression that is true when the step is negative. */
2394 2764 : tree indicator = dr_direction_indicator (dr_a.dr);
2395 2764 : tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2396 : fold_convert (ssizetype, indicator),
2397 : ssize_int (0));
2398 :
2399 : /* Get lengths in sizetype. */
2400 2764 : tree seg_len_a
2401 2764 : = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2402 2764 : step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2403 :
2404 : /* Each access has the following pattern:
2405 :
2406 : <- |seg_len| ->
2407 : <--- A: -ve step --->
2408 : +-----+-------+-----+-------+-----+
2409 : | n-1 | ..... | 0 | ..... | n-1 |
2410 : +-----+-------+-----+-------+-----+
2411 : <--- B: +ve step --->
2412 : <- |seg_len| ->
2413 : |
2414 : base address
2415 :
2416 : where "n" is the number of scalar iterations covered by the segment.
2417 :
2418 : A is the range of bytes accessed when the step is negative,
2419 : B is the range when the step is positive.
2420 :
2421 : We know that DR_B is a write. We also know (from checking that
2422 : DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2423 : the write performed by access i of DR_B occurs after access numbers
2424 : j<=i of DR_A in both the original and the new code. Any write or
2425 : anti dependencies wrt those DR_A accesses are therefore maintained.
2426 :
2427 : We just need to make sure that each individual write in DR_B does not
2428 : overlap any higher-indexed access in DR_A; such DR_A accesses happen
2429 : after the DR_B access in the original code but happen before it in
2430 : the new code.
2431 :
2432 : We know the steps for both accesses are equal, so by induction, we
2433 : just need to test whether the first write of DR_B overlaps a later
2434 : access of DR_A. In other words, we need to move addr_a along by
2435 : one iteration:
2436 :
2437 : addr_a' = addr_a + step
2438 :
2439 : and check whether:
2440 :
2441 : [addr_b, addr_b + last_chunk_b]
2442 :
2443 : overlaps:
2444 :
2445 : [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2446 :
2447 : where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2448 :
2449 : low_offset_a = +ve step ? 0 : seg_len_a - step
2450 : high_offset_a = +ve step ? seg_len_a - step : 0
2451 :
2452 : This is equivalent to testing whether:
2453 :
2454 : addr_a' + low_offset_a <= addr_b + last_chunk_b
2455 : && addr_b <= addr_a' + high_offset_a + last_chunk_a
2456 :
2457 : Converting this into a single test, there is an overlap if:
2458 :
2459 : 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2460 :
2461 : where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2462 :
2463 : If DR_A is performed, limit + |step| - last_chunk_b is known to be
2464 : less than the size of the object underlying DR_A. We also know
2465 : that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2466 : guaranteed at compile time. There can therefore be no overflow if
2467 : "limit" is calculated in an unsigned type with pointer precision. */
2468 2764 : tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2469 : DR_OFFSET (dr_a.dr));
2470 2764 : addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2471 :
2472 2764 : tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2473 : DR_OFFSET (dr_b.dr));
2474 2764 : addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2475 :
2476 : /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2477 2764 : addr_a = fold_build_pointer_plus (addr_a, step);
2478 2764 : tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2479 : seg_len_a, step);
2480 2764 : if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2481 3 : seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2482 :
2483 2764 : tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2484 : seg_len_a_minus_step, size_zero_node);
2485 2764 : if (!CONSTANT_CLASS_P (low_offset_a))
2486 3 : low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2487 :
2488 : /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2489 : but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2490 2764 : tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2491 : low_offset_a);
2492 :
2493 : /* The amount added to addr_b - addr_a'. */
2494 2764 : tree bias = fold_build2 (MINUS_EXPR, sizetype,
2495 : size_int (last_chunk_b), low_offset_a);
2496 :
2497 2764 : tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2498 2764 : limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2499 : size_int (last_chunk_a + last_chunk_b));
2500 :
2501 2764 : tree subject = fold_build2 (MINUS_EXPR, sizetype,
2502 : fold_convert (sizetype, addr_b),
2503 : fold_convert (sizetype, addr_a));
2504 2764 : subject = fold_build2 (PLUS_EXPR, sizetype, subject, bias);
2505 :
2506 2764 : *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2507 2764 : if (dump_enabled_p ())
2508 352 : dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2509 : return true;
2510 : }
2511 :
2512 : /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2513 : every address ADDR accessed by D:
2514 :
2515 : *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2516 :
2517 : In this case, every element accessed by D is aligned to at least
2518 : ALIGN bytes.
2519 :
2520 : If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2521 :
2522 : *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2523 :
2524 : static void
2525 4170 : get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2526 : tree *seg_max_out, HOST_WIDE_INT align)
2527 : {
2528 : /* Each access has the following pattern:
2529 :
2530 : <- |seg_len| ->
2531 : <--- A: -ve step --->
2532 : +-----+-------+-----+-------+-----+
2533 : | n-1 | ,.... | 0 | ..... | n-1 |
2534 : +-----+-------+-----+-------+-----+
2535 : <--- B: +ve step --->
2536 : <- |seg_len| ->
2537 : |
2538 : base address
2539 :
2540 : where "n" is the number of scalar iterations covered by the segment.
2541 : (This should be VF for a particular pair if we know that both steps
2542 : are the same, otherwise it will be the full number of scalar loop
2543 : iterations.)
2544 :
2545 : A is the range of bytes accessed when the step is negative,
2546 : B is the range when the step is positive.
2547 :
2548 : If the access size is "access_size" bytes, the lowest addressed byte is:
2549 :
2550 : base + (step < 0 ? seg_len : 0) [LB]
2551 :
2552 : and the highest addressed byte is always below:
2553 :
2554 : base + (step < 0 ? 0 : seg_len) + access_size [UB]
2555 :
2556 : Thus:
2557 :
2558 : LB <= ADDR < UB
2559 :
2560 : If ALIGN is nonzero, all three values are aligned to at least ALIGN
2561 : bytes, so:
2562 :
2563 : LB <= ADDR <= UB - ALIGN
2564 :
2565 : where "- ALIGN" folds naturally with the "+ access_size" and often
2566 : cancels it out.
2567 :
2568 : We don't try to simplify LB and UB beyond this (e.g. by using
2569 : MIN and MAX based on whether seg_len rather than the stride is
2570 : negative) because it is possible for the absolute size of the
2571 : segment to overflow the range of a ssize_t.
2572 :
2573 : Keeping the pointer_plus outside of the cond_expr should allow
2574 : the cond_exprs to be shared with other alias checks. */
2575 4170 : tree indicator = dr_direction_indicator (d.dr);
2576 4170 : tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2577 : fold_convert (ssizetype, indicator),
2578 : ssize_int (0));
2579 4170 : tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2580 : DR_OFFSET (d.dr));
2581 4170 : addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2582 4170 : tree seg_len
2583 4170 : = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2584 :
2585 4170 : tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2586 : seg_len, size_zero_node);
2587 4170 : tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2588 : size_zero_node, seg_len);
2589 4170 : max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2590 : size_int (d.access_size - align));
2591 :
2592 4170 : *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2593 4170 : *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2594 4170 : }
2595 :
2596 : /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2597 : storing the condition in *COND_EXPR. The fallback is to generate a
2598 : a test that the two accesses do not overlap:
2599 :
2600 : end_a <= start_b || end_b <= start_a. */
2601 :
2602 : static void
2603 5000 : create_intersect_range_checks (class loop *loop, tree *cond_expr,
2604 : const dr_with_seg_len_pair_t &alias_pair)
2605 : {
2606 5000 : const dr_with_seg_len& dr_a = alias_pair.first;
2607 5000 : const dr_with_seg_len& dr_b = alias_pair.second;
2608 5000 : *cond_expr = NULL_TREE;
2609 5000 : if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2610 2915 : return;
2611 :
2612 4849 : if (create_ifn_alias_checks (cond_expr, alias_pair))
2613 : return;
2614 :
2615 4849 : if (create_waw_or_war_checks (cond_expr, alias_pair))
2616 : return;
2617 :
2618 2085 : unsigned HOST_WIDE_INT min_align;
2619 2085 : tree_code cmp_code;
2620 : /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2621 : are equivalent. This is just an optimization heuristic. */
2622 2085 : if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2623 1980 : && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2624 : {
2625 : /* In this case adding access_size to seg_len is likely to give
2626 : a simple X * step, where X is either the number of scalar
2627 : iterations or the vectorization factor. We're better off
2628 : keeping that, rather than subtracting an alignment from it.
2629 :
2630 : In this case the maximum values are exclusive and so there is
2631 : no alias if the maximum of one segment equals the minimum
2632 : of another. */
2633 : min_align = 0;
2634 : cmp_code = LE_EXPR;
2635 : }
2636 : else
2637 : {
2638 : /* Calculate the minimum alignment shared by all four pointers,
2639 : then arrange for this alignment to be subtracted from the
2640 : exclusive maximum values to get inclusive maximum values.
2641 : This "- min_align" is cumulative with a "+ access_size"
2642 : in the calculation of the maximum values. In the best
2643 : (and common) case, the two cancel each other out, leaving
2644 : us with an inclusive bound based only on seg_len. In the
2645 : worst case we're simply adding a smaller number than before.
2646 :
2647 : Because the maximum values are inclusive, there is an alias
2648 : if the maximum value of one segment is equal to the minimum
2649 : value of the other. */
2650 210 : min_align = std::min (dr_a.align, dr_b.align);
2651 210 : cmp_code = LT_EXPR;
2652 : }
2653 :
2654 2085 : tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2655 2085 : get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2656 2085 : get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2657 :
2658 2085 : *cond_expr
2659 2085 : = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2660 : fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2661 : fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2662 2085 : if (dump_enabled_p ())
2663 285 : dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2664 : }
2665 :
2666 : /* Create a conditional expression that represents the run-time checks for
2667 : overlapping of address ranges represented by a list of data references
2668 : pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2669 : COND_EXPR is the conditional expression to be used in the if statement
2670 : that controls which version of the loop gets executed at runtime. */
2671 :
2672 : void
2673 3243 : create_runtime_alias_checks (class loop *loop,
2674 : const vec<dr_with_seg_len_pair_t> *alias_pairs,
2675 : tree * cond_expr)
2676 : {
2677 3243 : tree part_cond_expr;
2678 :
2679 3243 : fold_defer_overflow_warnings ();
2680 14729 : for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2681 : {
2682 5000 : gcc_assert (alias_pair.flags);
2683 5000 : if (dump_enabled_p ())
2684 770 : dump_printf (MSG_NOTE,
2685 : "create runtime check for data references %T and %T\n",
2686 770 : DR_REF (alias_pair.first.dr),
2687 770 : DR_REF (alias_pair.second.dr));
2688 :
2689 : /* Create condition expression for each pair data references. */
2690 5000 : create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2691 5000 : if (*cond_expr)
2692 4917 : *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2693 : *cond_expr, part_cond_expr);
2694 : else
2695 83 : *cond_expr = part_cond_expr;
2696 : }
2697 3243 : fold_undefer_and_ignore_overflow_warnings ();
2698 3243 : }
2699 :
2700 : /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2701 : expressions. */
2702 : static bool
2703 0 : dr_equal_offsets_p1 (tree offset1, tree offset2)
2704 : {
2705 0 : bool res;
2706 :
2707 0 : STRIP_NOPS (offset1);
2708 0 : STRIP_NOPS (offset2);
2709 :
2710 0 : if (offset1 == offset2)
2711 : return true;
2712 :
2713 0 : if (TREE_CODE (offset1) != TREE_CODE (offset2)
2714 0 : || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2715 : return false;
2716 :
2717 0 : res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2718 0 : TREE_OPERAND (offset2, 0));
2719 :
2720 0 : if (!res || !BINARY_CLASS_P (offset1))
2721 : return res;
2722 :
2723 0 : res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2724 0 : TREE_OPERAND (offset2, 1));
2725 :
2726 0 : return res;
2727 : }
2728 :
2729 : /* Check if DRA and DRB have equal offsets. */
2730 : bool
2731 0 : dr_equal_offsets_p (struct data_reference *dra,
2732 : struct data_reference *drb)
2733 : {
2734 0 : tree offset1, offset2;
2735 :
2736 0 : offset1 = DR_OFFSET (dra);
2737 0 : offset2 = DR_OFFSET (drb);
2738 :
2739 0 : return dr_equal_offsets_p1 (offset1, offset2);
2740 : }
2741 :
2742 : /* Returns true if FNA == FNB. */
2743 :
2744 : static bool
2745 0 : affine_function_equal_p (affine_fn fna, affine_fn fnb)
2746 : {
2747 0 : unsigned i, n = fna.length ();
2748 :
2749 0 : if (n != fnb.length ())
2750 : return false;
2751 :
2752 0 : for (i = 0; i < n; i++)
2753 0 : if (!operand_equal_p (fna[i], fnb[i], 0))
2754 : return false;
2755 :
2756 : return true;
2757 : }
2758 :
2759 : /* If all the functions in CF are the same, returns one of them,
2760 : otherwise returns NULL. */
2761 :
2762 : static affine_fn
2763 2272408 : common_affine_function (conflict_function *cf)
2764 : {
2765 2272408 : unsigned i;
2766 2272408 : affine_fn comm;
2767 :
2768 2272408 : if (!CF_NONTRIVIAL_P (cf))
2769 0 : return affine_fn ();
2770 :
2771 2272408 : comm = cf->fns[0];
2772 :
2773 2272408 : for (i = 1; i < cf->n; i++)
2774 0 : if (!affine_function_equal_p (comm, cf->fns[i]))
2775 0 : return affine_fn ();
2776 :
2777 2272408 : return comm;
2778 : }
2779 :
2780 : /* Returns the base of the affine function FN. */
2781 :
2782 : static tree
2783 1306527 : affine_function_base (affine_fn fn)
2784 : {
2785 0 : return fn[0];
2786 : }
2787 :
2788 : /* Returns true if FN is a constant. */
2789 :
2790 : static bool
2791 1306836 : affine_function_constant_p (affine_fn fn)
2792 : {
2793 1306836 : unsigned i;
2794 1306836 : tree coef;
2795 :
2796 1366555 : for (i = 1; fn.iterate (i, &coef); i++)
2797 60028 : if (!integer_zerop (coef))
2798 : return false;
2799 :
2800 : return true;
2801 : }
2802 :
2803 : /* Returns true if FN is the zero constant function. */
2804 :
2805 : static bool
2806 170632 : affine_function_zero_p (affine_fn fn)
2807 : {
2808 170632 : return (integer_zerop (affine_function_base (fn))
2809 170632 : && affine_function_constant_p (fn));
2810 : }
2811 :
2812 : /* Returns a signed integer type with the largest precision from TA
2813 : and TB. */
2814 :
2815 : static tree
2816 1718898 : signed_type_for_types (tree ta, tree tb)
2817 : {
2818 1718898 : if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2819 189 : return signed_type_for (ta);
2820 : else
2821 1718709 : return signed_type_for (tb);
2822 : }
2823 :
2824 : /* Applies operation OP on affine functions FNA and FNB, and returns the
2825 : result. */
2826 :
2827 : static affine_fn
2828 1136204 : affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2829 : {
2830 1136204 : unsigned i, n, m;
2831 1136204 : affine_fn ret;
2832 1136204 : tree coef;
2833 :
2834 3408612 : if (fnb.length () > fna.length ())
2835 : {
2836 0 : n = fna.length ();
2837 0 : m = fnb.length ();
2838 : }
2839 : else
2840 : {
2841 1136204 : n = fnb.length ();
2842 : m = fna.length ();
2843 : }
2844 :
2845 1136204 : ret.create (m);
2846 2332436 : for (i = 0; i < n; i++)
2847 : {
2848 2392464 : tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2849 1196232 : TREE_TYPE (fnb[i]));
2850 1196232 : ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2851 : }
2852 :
2853 1136204 : for (; fna.iterate (i, &coef); i++)
2854 0 : ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2855 : coef, integer_zero_node));
2856 1136204 : for (; fnb.iterate (i, &coef); i++)
2857 0 : ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2858 : integer_zero_node, coef));
2859 :
2860 1136204 : return ret;
2861 : }
2862 :
2863 : /* Returns the sum of affine functions FNA and FNB. */
2864 :
2865 : static affine_fn
2866 0 : affine_fn_plus (affine_fn fna, affine_fn fnb)
2867 : {
2868 0 : return affine_fn_op (PLUS_EXPR, fna, fnb);
2869 : }
2870 :
2871 : /* Returns the difference of affine functions FNA and FNB. */
2872 :
2873 : static affine_fn
2874 1136204 : affine_fn_minus (affine_fn fna, affine_fn fnb)
2875 : {
2876 0 : return affine_fn_op (MINUS_EXPR, fna, fnb);
2877 : }
2878 :
2879 : /* Frees affine function FN. */
2880 :
2881 : static void
2882 3606862 : affine_fn_free (affine_fn fn)
2883 : {
2884 0 : fn.release ();
2885 0 : }
2886 :
2887 : /* Determine for each subscript in the data dependence relation DDR
2888 : the distance. */
2889 :
2890 : static void
2891 3072451 : compute_subscript_distance (struct data_dependence_relation *ddr)
2892 : {
2893 3072451 : conflict_function *cf_a, *cf_b;
2894 3072451 : affine_fn fn_a, fn_b, diff;
2895 :
2896 3072451 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2897 : {
2898 : unsigned int i;
2899 :
2900 4208655 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2901 : {
2902 1136204 : struct subscript *subscript;
2903 :
2904 1136204 : subscript = DDR_SUBSCRIPT (ddr, i);
2905 1136204 : cf_a = SUB_CONFLICTS_IN_A (subscript);
2906 1136204 : cf_b = SUB_CONFLICTS_IN_B (subscript);
2907 :
2908 1136204 : fn_a = common_affine_function (cf_a);
2909 1136204 : fn_b = common_affine_function (cf_b);
2910 1136204 : if (!fn_a.exists () || !fn_b.exists ())
2911 : {
2912 0 : SUB_DISTANCE (subscript) = chrec_dont_know;
2913 0 : return;
2914 : }
2915 1136204 : diff = affine_fn_minus (fn_a, fn_b);
2916 :
2917 1136204 : if (affine_function_constant_p (diff))
2918 1135895 : SUB_DISTANCE (subscript) = affine_function_base (diff);
2919 : else
2920 309 : SUB_DISTANCE (subscript) = chrec_dont_know;
2921 :
2922 1136204 : affine_fn_free (diff);
2923 : }
2924 : }
2925 : }
2926 :
2927 : /* Returns the conflict function for "unknown". */
2928 :
2929 : static conflict_function *
2930 7965212 : conflict_fn_not_known (void)
2931 : {
2932 0 : conflict_function *fn = XCNEW (conflict_function);
2933 7965212 : fn->n = NOT_KNOWN;
2934 :
2935 7965212 : return fn;
2936 : }
2937 :
2938 : /* Returns the conflict function for "independent". */
2939 :
2940 : static conflict_function *
2941 4280476 : conflict_fn_no_dependence (void)
2942 : {
2943 0 : conflict_function *fn = XCNEW (conflict_function);
2944 4280476 : fn->n = NO_DEPENDENCE;
2945 :
2946 4280476 : return fn;
2947 : }
2948 :
2949 : /* Returns true if the address of OBJ is invariant in LOOP. */
2950 :
2951 : static bool
2952 3259789 : object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2953 : {
2954 3414303 : while (handled_component_p (obj))
2955 : {
2956 159819 : if (TREE_CODE (obj) == ARRAY_REF)
2957 : {
2958 9653 : for (int i = 1; i < 4; ++i)
2959 8566 : if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2960 8566 : loop->num))
2961 : return false;
2962 : }
2963 153427 : else if (TREE_CODE (obj) == COMPONENT_REF)
2964 : {
2965 132948 : if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2966 132948 : loop->num))
2967 : return false;
2968 : }
2969 154514 : obj = TREE_OPERAND (obj, 0);
2970 : }
2971 :
2972 3254484 : if (!INDIRECT_REF_P (obj)
2973 3254484 : && TREE_CODE (obj) != MEM_REF)
2974 : return true;
2975 :
2976 3230175 : return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2977 6460350 : loop->num);
2978 : }
2979 :
2980 : /* Helper for contains_ssa_ref_p. */
2981 :
2982 : static bool
2983 95816 : contains_ssa_ref_p_1 (tree, tree *idx, void *data)
2984 : {
2985 95816 : if (TREE_CODE (*idx) == SSA_NAME)
2986 : {
2987 90109 : *(bool *)data = true;
2988 90109 : return false;
2989 : }
2990 : return true;
2991 : }
2992 :
2993 : /* Returns true if the reference REF contains a SSA index. */
2994 :
2995 : static bool
2996 247362 : contains_ssa_ref_p (tree ref)
2997 : {
2998 247362 : bool res = false;
2999 0 : for_each_index (&ref, contains_ssa_ref_p_1, &res);
3000 247362 : return res;
3001 : }
3002 :
3003 : /* Returns false if we can prove that data references A and B do not alias,
3004 : true otherwise. If LOOP_NEST is false no cross-iteration aliases are
3005 : considered. */
3006 :
3007 : bool
3008 14468303 : dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
3009 : class loop *loop_nest)
3010 : {
3011 14468303 : tree addr_a = DR_BASE_OBJECT (a);
3012 14468303 : tree addr_b = DR_BASE_OBJECT (b);
3013 :
3014 : /* If we are not processing a loop nest but scalar code we
3015 : do not need to care about possible cross-iteration dependences
3016 : and thus can process the full original reference. Do so,
3017 : similar to how loop invariant motion applies extra offset-based
3018 : disambiguation. */
3019 14468303 : if (!loop_nest)
3020 : {
3021 8074212 : tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
3022 8074212 : tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
3023 :
3024 8074212 : if (DR_BASE_ADDRESS (a)
3025 8062434 : && DR_BASE_ADDRESS (b)
3026 8062101 : && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
3027 7258238 : && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
3028 7171155 : && tree_size_a
3029 7171155 : && tree_size_b
3030 7171146 : && poly_int_tree_p (tree_size_a)
3031 7171120 : && poly_int_tree_p (tree_size_b)
3032 15245332 : && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
3033 7171120 : wi::to_poly_widest (tree_size_a),
3034 7171120 : wi::to_poly_widest (DR_INIT (b)),
3035 7171120 : wi::to_poly_widest (tree_size_b)))
3036 : {
3037 5378574 : gcc_assert (integer_zerop (DR_STEP (a))
3038 : && integer_zerop (DR_STEP (b)));
3039 5378606 : return false;
3040 : }
3041 :
3042 10782552 : aff_tree off1, off2;
3043 : poly_widest_int size1, size2;
3044 2695638 : get_inner_reference_aff (DR_REF (a), &off1, &size1);
3045 2695638 : get_inner_reference_aff (DR_REF (b), &off2, &size2);
3046 2695638 : aff_combination_scale (&off1, -1);
3047 2695638 : aff_combination_add (&off2, &off1);
3048 2695638 : if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3049 32 : return false;
3050 2695638 : }
3051 :
3052 9089697 : if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3053 6748346 : && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3054 : /* For cross-iteration dependences the cliques must be valid for the
3055 : whole loop, not just individual iterations. */
3056 6501792 : && (!loop_nest
3057 6190844 : || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3058 5308893 : || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3059 6316211 : && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3060 15208552 : && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3061 : return false;
3062 :
3063 : /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3064 : do not know the size of the base-object. So we cannot do any
3065 : offset/overlap based analysis but have to rely on points-to
3066 : information only. */
3067 8865016 : if (TREE_CODE (addr_a) == MEM_REF
3068 8865016 : && (DR_UNCONSTRAINED_BASE (a)
3069 4043411 : || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3070 : {
3071 : /* For true dependences we can apply TBAA. */
3072 4110730 : if (flag_strict_aliasing
3073 3932130 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3074 4281084 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3075 170354 : get_alias_set (DR_REF (b))))
3076 : return false;
3077 4083102 : if (TREE_CODE (addr_b) == MEM_REF)
3078 3981243 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3079 7962486 : TREE_OPERAND (addr_b, 0));
3080 : else
3081 101859 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3082 101859 : build_fold_addr_expr (addr_b));
3083 : }
3084 4754286 : else if (TREE_CODE (addr_b) == MEM_REF
3085 4754286 : && (DR_UNCONSTRAINED_BASE (b)
3086 2470595 : || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3087 : {
3088 : /* For true dependences we can apply TBAA. */
3089 323569 : if (flag_strict_aliasing
3090 265669 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3091 402123 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3092 78554 : get_alias_set (DR_REF (b))))
3093 : return false;
3094 308348 : if (TREE_CODE (addr_a) == MEM_REF)
3095 183241 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3096 366482 : TREE_OPERAND (addr_b, 0));
3097 : else
3098 125107 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3099 250214 : TREE_OPERAND (addr_b, 0));
3100 : }
3101 : /* If dr_analyze_innermost failed to handle a component we are
3102 : possibly left with a non-base in which case we didn't analyze
3103 : a possible evolution of the base when analyzing a loop. */
3104 4430717 : else if (loop_nest
3105 6534445 : && ((handled_component_p (addr_a) && contains_ssa_ref_p (addr_a))
3106 81903 : || (handled_component_p (addr_b) && contains_ssa_ref_p (addr_b))))
3107 : {
3108 : /* For true dependences we can apply TBAA. */
3109 90109 : if (flag_strict_aliasing
3110 89467 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3111 99596 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3112 9487 : get_alias_set (DR_REF (b))))
3113 : return false;
3114 85928 : if (TREE_CODE (addr_a) == MEM_REF)
3115 3400 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3116 3400 : build_fold_addr_expr (addr_b));
3117 82528 : else if (TREE_CODE (addr_b) == MEM_REF)
3118 6540 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3119 13080 : TREE_OPERAND (addr_b, 0));
3120 : else
3121 75988 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3122 75988 : build_fold_addr_expr (addr_b));
3123 : }
3124 :
3125 : /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3126 : that is being subsetted in the loop nest. */
3127 4340608 : if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3128 2890552 : return refs_output_dependent_p (addr_a, addr_b);
3129 1450056 : else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3130 397933 : return refs_anti_dependent_p (addr_a, addr_b);
3131 1052123 : return refs_may_alias_p (addr_a, addr_b);
3132 : }
3133 :
3134 : /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3135 : if it is meaningful to compare their associated access functions
3136 : when checking for dependencies. */
3137 :
3138 : static bool
3139 2849122 : access_fn_components_comparable_p (tree ref_a, tree ref_b)
3140 : {
3141 : /* Allow pairs of component refs from the following sets:
3142 :
3143 : { REALPART_EXPR, IMAGPART_EXPR }
3144 : { COMPONENT_REF }
3145 : { ARRAY_REF }. */
3146 2849122 : tree_code code_a = TREE_CODE (ref_a);
3147 2849122 : tree_code code_b = TREE_CODE (ref_b);
3148 2849122 : if (code_a == IMAGPART_EXPR)
3149 35484 : code_a = REALPART_EXPR;
3150 2849122 : if (code_b == IMAGPART_EXPR)
3151 41694 : code_b = REALPART_EXPR;
3152 2849122 : if (code_a != code_b)
3153 : return false;
3154 :
3155 2827416 : if (TREE_CODE (ref_a) == COMPONENT_REF)
3156 : /* ??? We cannot simply use the type of operand #0 of the refs here as
3157 : the Fortran compiler smuggles type punning into COMPONENT_REFs.
3158 : Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3159 947967 : return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3160 947967 : == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3161 :
3162 1879449 : return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3163 3758898 : TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3164 : }
3165 :
3166 : /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3167 : is true when the main indices of A and B were not comparable so we try again
3168 : with alternate indices computed on an indirect reference. */
3169 :
3170 : struct data_dependence_relation *
3171 6491106 : initialize_data_dependence_relation (struct data_dependence_relation *res,
3172 : vec<loop_p> loop_nest,
3173 : bool use_alt_indices)
3174 : {
3175 6491106 : struct data_reference *a = DDR_A (res);
3176 6491106 : struct data_reference *b = DDR_B (res);
3177 6491106 : unsigned int i;
3178 :
3179 6491106 : struct indices *indices_a = &a->indices;
3180 6491106 : struct indices *indices_b = &b->indices;
3181 6491106 : if (use_alt_indices)
3182 : {
3183 367529 : if (TREE_CODE (DR_REF (a)) != MEM_REF)
3184 228634 : indices_a = &a->alt_indices;
3185 367529 : if (TREE_CODE (DR_REF (b)) != MEM_REF)
3186 259496 : indices_b = &b->alt_indices;
3187 : }
3188 6491106 : unsigned int num_dimensions_a = indices_a->access_fns.length ();
3189 6491106 : unsigned int num_dimensions_b = indices_b->access_fns.length ();
3190 6491106 : if (num_dimensions_a == 0 || num_dimensions_b == 0)
3191 : {
3192 2177214 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3193 2177214 : return res;
3194 : }
3195 :
3196 : /* For unconstrained bases, the root (highest-indexed) subscript
3197 : describes a variation in the base of the original DR_REF rather
3198 : than a component access. We have no type that accurately describes
3199 : the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3200 : applying this subscript) so limit the search to the last real
3201 : component access.
3202 :
3203 : E.g. for:
3204 :
3205 : void
3206 : f (int a[][8], int b[][8])
3207 : {
3208 : for (int i = 0; i < 8; ++i)
3209 : a[i * 2][0] = b[i][0];
3210 : }
3211 :
3212 : the a and b accesses have a single ARRAY_REF component reference [0]
3213 : but have two subscripts. */
3214 4313892 : if (indices_a->unconstrained_base)
3215 2462826 : num_dimensions_a -= 1;
3216 4313892 : if (indices_b->unconstrained_base)
3217 2418304 : num_dimensions_b -= 1;
3218 :
3219 : /* These structures describe sequences of component references in
3220 : DR_REF (A) and DR_REF (B). Each component reference is tied to a
3221 : specific access function. */
3222 4313892 : struct {
3223 : /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3224 : DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3225 : indices. In C notation, these are the indices of the rightmost
3226 : component references; e.g. for a sequence .b.c.d, the start
3227 : index is for .d. */
3228 : unsigned int start_a;
3229 : unsigned int start_b;
3230 :
3231 : /* The sequence contains LENGTH consecutive access functions from
3232 : each DR. */
3233 : unsigned int length;
3234 :
3235 : /* The enclosing objects for the A and B sequences respectively,
3236 : i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3237 : and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3238 : tree object_a;
3239 : tree object_b;
3240 4313892 : } full_seq = {}, struct_seq = {};
3241 :
3242 : /* Before each iteration of the loop:
3243 :
3244 : - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3245 : - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3246 4313892 : unsigned int index_a = 0;
3247 4313892 : unsigned int index_b = 0;
3248 4313892 : tree ref_a = DR_REF (a);
3249 4313892 : tree ref_b = DR_REF (b);
3250 :
3251 : /* Now walk the component references from the final DR_REFs back up to
3252 : the enclosing base objects. Each component reference corresponds
3253 : to one access function in the DR, with access function 0 being for
3254 : the final DR_REF and the highest-indexed access function being the
3255 : one that is applied to the base of the DR.
3256 :
3257 : Look for a sequence of component references whose access functions
3258 : are comparable (see access_fn_components_comparable_p). If more
3259 : than one such sequence exists, pick the one nearest the base
3260 : (which is the leftmost sequence in C notation). Store this sequence
3261 : in FULL_SEQ.
3262 :
3263 : For example, if we have:
3264 :
3265 : struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3266 :
3267 : A: a[0][i].s.c.d
3268 : B: __real b[0][i].s.e[i].f
3269 :
3270 : (where d is the same type as the real component of f) then the access
3271 : functions would be:
3272 :
3273 : 0 1 2 3
3274 : A: .d .c .s [i]
3275 :
3276 : 0 1 2 3 4 5
3277 : B: __real .f [i] .e .s [i]
3278 :
3279 : The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3280 : and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3281 : COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3282 : the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3283 : so is comparable. The A3/B5 column contains two ARRAY_REFs that
3284 : index foo[10] arrays, so is again comparable. The sequence is
3285 : therefore:
3286 :
3287 : A: [1, 3] (i.e. [i].s.c)
3288 : B: [3, 5] (i.e. [i].s.e)
3289 :
3290 : Also look for sequences of component references whose access
3291 : functions are comparable and whose enclosing objects have the same
3292 : RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3293 : example, STRUCT_SEQ would be:
3294 :
3295 : A: [1, 2] (i.e. s.c)
3296 : B: [3, 4] (i.e. s.e) */
3297 7150039 : while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3298 : {
3299 : /* The alternate indices form always has a single dimension
3300 : with unconstrained base. */
3301 2849122 : gcc_assert (!use_alt_indices);
3302 :
3303 : /* REF_A and REF_B must be one of the component access types
3304 : allowed by dr_analyze_indices. */
3305 2849122 : gcc_checking_assert (access_fn_component_p (ref_a));
3306 2849122 : gcc_checking_assert (access_fn_component_p (ref_b));
3307 :
3308 : /* Get the immediately-enclosing objects for REF_A and REF_B,
3309 : i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3310 : and DR_ACCESS_FN (B, INDEX_B). */
3311 2849122 : tree object_a = TREE_OPERAND (ref_a, 0);
3312 2849122 : tree object_b = TREE_OPERAND (ref_b, 0);
3313 :
3314 2849122 : tree type_a = TREE_TYPE (object_a);
3315 2849122 : tree type_b = TREE_TYPE (object_b);
3316 2849122 : if (access_fn_components_comparable_p (ref_a, ref_b))
3317 : {
3318 : /* This pair of component accesses is comparable for dependence
3319 : analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3320 : DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3321 2610748 : if (full_seq.start_a + full_seq.length != index_a
3322 2558066 : || full_seq.start_b + full_seq.length != index_b)
3323 : {
3324 : /* The accesses don't extend the current sequence,
3325 : so start a new one here. */
3326 60528 : full_seq.start_a = index_a;
3327 60528 : full_seq.start_b = index_b;
3328 60528 : full_seq.length = 0;
3329 : }
3330 :
3331 : /* Add this pair of references to the sequence. */
3332 2610748 : full_seq.length += 1;
3333 2610748 : full_seq.object_a = object_a;
3334 2610748 : full_seq.object_b = object_b;
3335 :
3336 : /* If the enclosing objects are structures (and thus have the
3337 : same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3338 2610748 : if (TREE_CODE (type_a) == RECORD_TYPE)
3339 748073 : struct_seq = full_seq;
3340 :
3341 : /* Move to the next containing reference for both A and B. */
3342 2610748 : ref_a = object_a;
3343 2610748 : ref_b = object_b;
3344 2610748 : index_a += 1;
3345 2610748 : index_b += 1;
3346 2610748 : continue;
3347 : }
3348 :
3349 : /* Try to approach equal type sizes. */
3350 238374 : if (!COMPLETE_TYPE_P (type_a)
3351 235343 : || !COMPLETE_TYPE_P (type_b)
3352 227274 : || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3353 464052 : || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3354 : break;
3355 :
3356 225399 : unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3357 225399 : unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3358 225399 : if (size_a <= size_b)
3359 : {
3360 134376 : index_a += 1;
3361 134376 : ref_a = object_a;
3362 : }
3363 225399 : if (size_b <= size_a)
3364 : {
3365 105679 : index_b += 1;
3366 105679 : ref_b = object_b;
3367 : }
3368 : }
3369 :
3370 : /* See whether FULL_SEQ ends at the base and whether the two bases
3371 : are equal. We do not care about TBAA or alignment info so we can
3372 : use OEP_ADDRESS_OF to avoid false negatives. */
3373 4313892 : tree base_a = indices_a->base_object;
3374 4313892 : tree base_b = indices_b->base_object;
3375 4313892 : bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3376 4112736 : && full_seq.start_b + full_seq.length == num_dimensions_b
3377 3960919 : && (indices_a->unconstrained_base
3378 3960919 : == indices_b->unconstrained_base)
3379 3956129 : && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3380 3499454 : && (types_compatible_p (TREE_TYPE (base_a),
3381 3499454 : TREE_TYPE (base_b))
3382 857811 : || (!base_supports_access_fn_components_p (base_a)
3383 851772 : && !base_supports_access_fn_components_p (base_b)
3384 849393 : && operand_equal_p
3385 849393 : (TYPE_SIZE (TREE_TYPE (base_a)),
3386 849393 : TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3387 7363242 : && (!loop_nest.exists ()
3388 3049350 : || (object_address_invariant_in_loop_p
3389 3049350 : (loop_nest[0], base_a))));
3390 :
3391 : /* If the bases are the same, we can include the base variation too.
3392 : E.g. the b accesses in:
3393 :
3394 : for (int i = 0; i < n; ++i)
3395 : b[i + 4][0] = b[i][0];
3396 :
3397 : have a definite dependence distance of 4, while for:
3398 :
3399 : for (int i = 0; i < n; ++i)
3400 : a[i + 4][0] = b[i][0];
3401 :
3402 : the dependence distance depends on the gap between a and b.
3403 :
3404 : If the bases are different then we can only rely on the sequence
3405 : rooted at a structure access, since arrays are allowed to overlap
3406 : arbitrarily and change shape arbitrarily. E.g. we treat this as
3407 : valid code:
3408 :
3409 : int a[256];
3410 : ...
3411 : ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3412 :
3413 : where two lvalues with the same int[4][3] type overlap, and where
3414 : both lvalues are distinct from the object's declared type. */
3415 2933071 : if (same_base_p)
3416 : {
3417 2933071 : if (indices_a->unconstrained_base)
3418 1481794 : full_seq.length += 1;
3419 : }
3420 : else
3421 : full_seq = struct_seq;
3422 :
3423 : /* Punt if we didn't find a suitable sequence. */
3424 4313892 : if (full_seq.length == 0)
3425 : {
3426 1123601 : if (use_alt_indices
3427 1001815 : || (TREE_CODE (DR_REF (a)) == MEM_REF
3428 772739 : && TREE_CODE (DR_REF (b)) == MEM_REF)
3429 369351 : || may_be_nonaddressable_p (DR_REF (a))
3430 1492689 : || may_be_nonaddressable_p (DR_REF (b)))
3431 : {
3432 : /* Fully exhausted possibilities. */
3433 756072 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3434 756072 : return res;
3435 : }
3436 :
3437 : /* Try evaluating both DRs as dereferences of pointers. */
3438 367529 : if (!a->alt_indices.base_object
3439 173129 : && TREE_CODE (DR_REF (a)) != MEM_REF)
3440 : {
3441 34234 : tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3442 : build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3443 : build_int_cst
3444 : (reference_alias_ptr_type (DR_REF (a)), 0));
3445 102702 : dr_analyze_indices (&a->alt_indices, alt_ref,
3446 34234 : loop_preheader_edge (loop_nest[0]),
3447 : loop_containing_stmt (DR_STMT (a)));
3448 : }
3449 367529 : if (!b->alt_indices.base_object
3450 186319 : && TREE_CODE (DR_REF (b)) != MEM_REF)
3451 : {
3452 78286 : tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3453 : build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3454 : build_int_cst
3455 : (reference_alias_ptr_type (DR_REF (b)), 0));
3456 234858 : dr_analyze_indices (&b->alt_indices, alt_ref,
3457 78286 : loop_preheader_edge (loop_nest[0]),
3458 : loop_containing_stmt (DR_STMT (b)));
3459 : }
3460 367529 : return initialize_data_dependence_relation (res, loop_nest, true);
3461 : }
3462 :
3463 3190291 : if (!same_base_p)
3464 : {
3465 : /* Partial overlap is possible for different bases when strict aliasing
3466 : is not in effect. It's also possible if either base involves a union
3467 : access; e.g. for:
3468 :
3469 : struct s1 { int a[2]; };
3470 : struct s2 { struct s1 b; int c; };
3471 : struct s3 { int d; struct s1 e; };
3472 : union u { struct s2 f; struct s3 g; } *p, *q;
3473 :
3474 : the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3475 : "p->g.e" (base "p->g") and might partially overlap the s1 at
3476 : "q->g.e" (base "q->g"). */
3477 257220 : if (!flag_strict_aliasing
3478 245577 : || ref_contains_union_access_p (full_seq.object_a)
3479 449371 : || ref_contains_union_access_p (full_seq.object_b))
3480 : {
3481 65088 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3482 65088 : return res;
3483 : }
3484 :
3485 192132 : DDR_COULD_BE_INDEPENDENT_P (res) = true;
3486 192132 : if (!loop_nest.exists ()
3487 384264 : || (object_address_invariant_in_loop_p (loop_nest[0],
3488 192132 : full_seq.object_a)
3489 18307 : && object_address_invariant_in_loop_p (loop_nest[0],
3490 18307 : full_seq.object_b)))
3491 : {
3492 9361 : DDR_OBJECT_A (res) = full_seq.object_a;
3493 9361 : DDR_OBJECT_B (res) = full_seq.object_b;
3494 : }
3495 : }
3496 :
3497 3125203 : DDR_AFFINE_P (res) = true;
3498 3125203 : DDR_ARE_DEPENDENT (res) = NULL_TREE;
3499 3125203 : DDR_SUBSCRIPTS (res).create (full_seq.length);
3500 3125203 : DDR_LOOP_NEST (res) = loop_nest;
3501 3125203 : DDR_SELF_REFERENCE (res) = false;
3502 :
3503 7077131 : for (i = 0; i < full_seq.length; ++i)
3504 : {
3505 3951928 : struct subscript *subscript;
3506 :
3507 3951928 : subscript = XNEW (struct subscript);
3508 3951928 : SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3509 3951928 : SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3510 3951928 : SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3511 3951928 : SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3512 3951928 : SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3513 3951928 : SUB_DISTANCE (subscript) = chrec_dont_know;
3514 3951928 : DDR_SUBSCRIPTS (res).safe_push (subscript);
3515 : }
3516 :
3517 : return res;
3518 : }
3519 :
3520 : /* Initialize a data dependence relation between data accesses A and
3521 : B. NB_LOOPS is the number of loops surrounding the references: the
3522 : size of the classic distance/direction vectors. */
3523 :
3524 : struct data_dependence_relation *
3525 13258572 : initialize_data_dependence_relation (struct data_reference *a,
3526 : struct data_reference *b,
3527 : vec<loop_p> loop_nest)
3528 : {
3529 13258572 : data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3530 13258572 : DDR_A (res) = a;
3531 13258572 : DDR_B (res) = b;
3532 13258572 : DDR_LOOP_NEST (res).create (0);
3533 13258572 : DDR_SUBSCRIPTS (res).create (0);
3534 13258572 : DDR_DIR_VECTS (res).create (0);
3535 13258572 : DDR_DIST_VECTS (res).create (0);
3536 :
3537 13258572 : if (a == NULL || b == NULL)
3538 : {
3539 0 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3540 0 : return res;
3541 : }
3542 :
3543 : /* If the data references do not alias, then they are independent. */
3544 19634740 : if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3545 : {
3546 7134995 : DDR_ARE_DEPENDENT (res) = chrec_known;
3547 7134995 : return res;
3548 : }
3549 :
3550 6123577 : return initialize_data_dependence_relation (res, loop_nest, false);
3551 : }
3552 :
3553 :
3554 : /* Frees memory used by the conflict function F. */
3555 :
3556 : static void
3557 14716346 : free_conflict_function (conflict_function *f)
3558 : {
3559 14716346 : unsigned i;
3560 :
3561 14716346 : if (CF_NONTRIVIAL_P (f))
3562 : {
3563 4941316 : for (i = 0; i < f->n; i++)
3564 2470658 : affine_fn_free (f->fns[i]);
3565 : }
3566 14716346 : free (f);
3567 14716346 : }
3568 :
3569 : /* Frees memory used by SUBSCRIPTS. */
3570 :
3571 : static void
3572 3125203 : free_subscripts (vec<subscript_p> subscripts)
3573 : {
3574 13327537 : for (subscript_p s : subscripts)
3575 : {
3576 3951928 : free_conflict_function (s->conflicting_iterations_in_a);
3577 3951928 : free_conflict_function (s->conflicting_iterations_in_b);
3578 3951928 : free (s);
3579 : }
3580 3125203 : subscripts.release ();
3581 3125203 : }
3582 :
3583 : /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3584 : description. */
3585 :
3586 : static inline void
3587 2230993 : finalize_ddr_dependent (struct data_dependence_relation *ddr,
3588 : tree chrec)
3589 : {
3590 2230993 : DDR_ARE_DEPENDENT (ddr) = chrec;
3591 2230993 : free_subscripts (DDR_SUBSCRIPTS (ddr));
3592 2230993 : DDR_SUBSCRIPTS (ddr).create (0);
3593 61162 : }
3594 :
3595 : /* The dependence relation DDR cannot be represented by a distance
3596 : vector. */
3597 :
3598 : static inline void
3599 2088 : non_affine_dependence_relation (struct data_dependence_relation *ddr)
3600 : {
3601 2088 : if (dump_file && (dump_flags & TDF_DETAILS))
3602 92 : fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3603 :
3604 2088 : DDR_AFFINE_P (ddr) = false;
3605 2088 : }
3606 :
3607 :
3608 :
3609 : /* This section contains the classic Banerjee tests. */
3610 :
3611 : /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3612 : variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3613 :
3614 : static inline bool
3615 2230951 : ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3616 : {
3617 2230951 : return (evolution_function_is_constant_p (chrec_a)
3618 2725917 : && evolution_function_is_constant_p (chrec_b));
3619 : }
3620 :
3621 : /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3622 : variable, i.e., if the SIV (Single Index Variable) test is true. */
3623 :
3624 : static bool
3625 1737602 : siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3626 : {
3627 3473590 : if ((evolution_function_is_constant_p (chrec_a)
3628 1617 : && evolution_function_is_univariate_p (chrec_b))
3629 3473590 : || (evolution_function_is_constant_p (chrec_b)
3630 1266 : && evolution_function_is_univariate_p (chrec_a)))
3631 2877 : return true;
3632 :
3633 1734725 : if (evolution_function_is_univariate_p (chrec_a)
3634 1734725 : && evolution_function_is_univariate_p (chrec_b))
3635 : {
3636 1708367 : switch (TREE_CODE (chrec_a))
3637 : {
3638 1708367 : case POLYNOMIAL_CHREC:
3639 1708367 : switch (TREE_CODE (chrec_b))
3640 : {
3641 1708367 : case POLYNOMIAL_CHREC:
3642 1708367 : if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3643 : return false;
3644 : /* FALLTHRU */
3645 :
3646 : default:
3647 : return true;
3648 : }
3649 :
3650 : default:
3651 : return true;
3652 : }
3653 : }
3654 :
3655 : return false;
3656 : }
3657 :
3658 : /* Creates a conflict function with N dimensions. The affine functions
3659 : in each dimension follow. */
3660 :
3661 : static conflict_function *
3662 2470658 : conflict_fn (unsigned n, ...)
3663 : {
3664 2470658 : unsigned i;
3665 2470658 : conflict_function *ret = XCNEW (conflict_function);
3666 2470658 : va_list ap;
3667 :
3668 2470658 : gcc_assert (n > 0 && n <= MAX_DIM);
3669 2470658 : va_start (ap, n);
3670 :
3671 2470658 : ret->n = n;
3672 4941316 : for (i = 0; i < n; i++)
3673 2470658 : ret->fns[i] = va_arg (ap, affine_fn);
3674 2470658 : va_end (ap);
3675 :
3676 2470658 : return ret;
3677 : }
3678 :
3679 : /* Returns constant affine function with value CST. */
3680 :
3681 : static affine_fn
3682 2350150 : affine_fn_cst (tree cst)
3683 : {
3684 2350150 : affine_fn fn;
3685 2350150 : fn.create (1);
3686 2350150 : fn.quick_push (cst);
3687 2350150 : return fn;
3688 : }
3689 :
3690 : /* Returns affine function with single variable, CST + COEF * x_DIM. */
3691 :
3692 : static affine_fn
3693 120508 : affine_fn_univar (tree cst, unsigned dim, tree coef)
3694 : {
3695 120508 : affine_fn fn;
3696 120508 : fn.create (dim + 1);
3697 120508 : unsigned i;
3698 :
3699 120508 : gcc_assert (dim > 0);
3700 120508 : fn.quick_push (cst);
3701 241016 : for (i = 1; i < dim; i++)
3702 0 : fn.quick_push (integer_zero_node);
3703 120508 : fn.quick_push (coef);
3704 120508 : return fn;
3705 : }
3706 :
3707 : /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3708 : *OVERLAPS_B are initialized to the functions that describe the
3709 : relation between the elements accessed twice by CHREC_A and
3710 : CHREC_B. For k >= 0, the following property is verified:
3711 :
3712 : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3713 :
3714 : static void
3715 493349 : analyze_ziv_subscript (tree chrec_a,
3716 : tree chrec_b,
3717 : conflict_function **overlaps_a,
3718 : conflict_function **overlaps_b,
3719 : tree *last_conflicts)
3720 : {
3721 493349 : tree type, difference;
3722 493349 : dependence_stats.num_ziv++;
3723 :
3724 493349 : if (dump_file && (dump_flags & TDF_DETAILS))
3725 22420 : fprintf (dump_file, "(analyze_ziv_subscript \n");
3726 :
3727 493349 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3728 493349 : chrec_a = chrec_convert (type, chrec_a, NULL);
3729 493349 : chrec_b = chrec_convert (type, chrec_b, NULL);
3730 493349 : difference = chrec_fold_minus (type, chrec_a, chrec_b);
3731 :
3732 493349 : switch (TREE_CODE (difference))
3733 : {
3734 493349 : case INTEGER_CST:
3735 493349 : if (integer_zerop (difference))
3736 : {
3737 : /* The difference is equal to zero: the accessed index
3738 : overlaps for each iteration in the loop. */
3739 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3740 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3741 0 : *last_conflicts = chrec_dont_know;
3742 0 : dependence_stats.num_ziv_dependent++;
3743 : }
3744 : else
3745 : {
3746 : /* The accesses do not overlap. */
3747 493349 : *overlaps_a = conflict_fn_no_dependence ();
3748 493349 : *overlaps_b = conflict_fn_no_dependence ();
3749 493349 : *last_conflicts = integer_zero_node;
3750 493349 : dependence_stats.num_ziv_independent++;
3751 : }
3752 : break;
3753 :
3754 0 : default:
3755 : /* We're not sure whether the indexes overlap. For the moment,
3756 : conservatively answer "don't know". */
3757 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3758 0 : fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3759 :
3760 0 : *overlaps_a = conflict_fn_not_known ();
3761 0 : *overlaps_b = conflict_fn_not_known ();
3762 0 : *last_conflicts = chrec_dont_know;
3763 0 : dependence_stats.num_ziv_unimplemented++;
3764 0 : break;
3765 : }
3766 :
3767 493349 : if (dump_file && (dump_flags & TDF_DETAILS))
3768 22420 : fprintf (dump_file, ")\n");
3769 493349 : }
3770 :
3771 : /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3772 : and only if it fits to the int type. If this is not the case, or the
3773 : bound on the number of iterations of LOOP could not be derived, returns
3774 : chrec_dont_know. */
3775 :
3776 : static tree
3777 0 : max_stmt_executions_tree (class loop *loop)
3778 : {
3779 0 : widest_int nit;
3780 :
3781 0 : if (!max_stmt_executions (loop, &nit))
3782 0 : return chrec_dont_know;
3783 :
3784 0 : if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3785 0 : return chrec_dont_know;
3786 :
3787 0 : return wide_int_to_tree (unsigned_type_node, nit);
3788 0 : }
3789 :
3790 : /* Determine whether the CHREC is always positive/negative. If the expression
3791 : cannot be statically analyzed, return false, otherwise set the answer into
3792 : VALUE. */
3793 :
3794 : static bool
3795 4430 : chrec_is_positive (tree chrec, bool *value)
3796 : {
3797 4430 : bool value0, value1, value2;
3798 4430 : tree end_value, nb_iter;
3799 :
3800 4430 : switch (TREE_CODE (chrec))
3801 : {
3802 0 : case POLYNOMIAL_CHREC:
3803 0 : if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3804 0 : || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3805 0 : return false;
3806 :
3807 : /* FIXME -- overflows. */
3808 0 : if (value0 == value1)
3809 : {
3810 0 : *value = value0;
3811 0 : return true;
3812 : }
3813 :
3814 : /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3815 : and the proof consists in showing that the sign never
3816 : changes during the execution of the loop, from 0 to
3817 : loop->nb_iterations. */
3818 0 : if (!evolution_function_is_affine_p (chrec))
3819 : return false;
3820 :
3821 0 : nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3822 0 : if (chrec_contains_undetermined (nb_iter))
3823 : return false;
3824 :
3825 : #if 0
3826 : /* TODO -- If the test is after the exit, we may decrease the number of
3827 : iterations by one. */
3828 : if (after_exit)
3829 : nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3830 : #endif
3831 :
3832 0 : end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3833 :
3834 0 : if (!chrec_is_positive (end_value, &value2))
3835 : return false;
3836 :
3837 0 : *value = value0;
3838 0 : return value0 == value1;
3839 :
3840 4430 : case INTEGER_CST:
3841 4430 : switch (tree_int_cst_sgn (chrec))
3842 : {
3843 2034 : case -1:
3844 2034 : *value = false;
3845 2034 : break;
3846 2396 : case 1:
3847 2396 : *value = true;
3848 2396 : break;
3849 : default:
3850 : return false;
3851 : }
3852 : return true;
3853 :
3854 : default:
3855 : return false;
3856 : }
3857 : }
3858 :
3859 :
3860 : /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3861 : constant, and CHREC_B is an affine function. *OVERLAPS_A and
3862 : *OVERLAPS_B are initialized to the functions that describe the
3863 : relation between the elements accessed twice by CHREC_A and
3864 : CHREC_B. For k >= 0, the following property is verified:
3865 :
3866 : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3867 :
3868 : static void
3869 2877 : analyze_siv_subscript_cst_affine (tree chrec_a,
3870 : tree chrec_b,
3871 : conflict_function **overlaps_a,
3872 : conflict_function **overlaps_b,
3873 : tree *last_conflicts)
3874 : {
3875 2877 : bool value0, value1, value2;
3876 2877 : tree type, difference, tmp;
3877 :
3878 2877 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3879 2877 : chrec_a = chrec_convert (type, chrec_a, NULL);
3880 2877 : chrec_b = chrec_convert (type, chrec_b, NULL);
3881 2877 : difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3882 :
3883 : /* Special case overlap in the first iteration. */
3884 2877 : if (integer_zerop (difference))
3885 : {
3886 660 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3887 660 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3888 660 : *last_conflicts = integer_one_node;
3889 660 : return;
3890 : }
3891 :
3892 2217 : if (!chrec_is_positive (initial_condition (difference), &value0))
3893 : {
3894 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3895 0 : fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3896 :
3897 0 : dependence_stats.num_siv_unimplemented++;
3898 0 : *overlaps_a = conflict_fn_not_known ();
3899 0 : *overlaps_b = conflict_fn_not_known ();
3900 0 : *last_conflicts = chrec_dont_know;
3901 0 : return;
3902 : }
3903 : else
3904 : {
3905 2217 : if (value0 == false)
3906 : {
3907 1820 : if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3908 1820 : || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3909 : {
3910 4 : if (dump_file && (dump_flags & TDF_DETAILS))
3911 0 : fprintf (dump_file, "siv test failed: chrec not positive.\n");
3912 :
3913 4 : *overlaps_a = conflict_fn_not_known ();
3914 4 : *overlaps_b = conflict_fn_not_known ();
3915 4 : *last_conflicts = chrec_dont_know;
3916 4 : dependence_stats.num_siv_unimplemented++;
3917 4 : return;
3918 : }
3919 : else
3920 : {
3921 1816 : if (value1 == true)
3922 : {
3923 : /* Example:
3924 : chrec_a = 12
3925 : chrec_b = {10, +, 1}
3926 : */
3927 :
3928 1816 : if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3929 : {
3930 1535 : HOST_WIDE_INT numiter;
3931 1535 : class loop *loop = get_chrec_loop (chrec_b);
3932 :
3933 1535 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3934 1535 : tmp = fold_build2 (EXACT_DIV_EXPR, type,
3935 : fold_build1 (ABS_EXPR, type, difference),
3936 : CHREC_RIGHT (chrec_b));
3937 1535 : *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3938 1535 : *last_conflicts = integer_one_node;
3939 :
3940 :
3941 : /* Perform weak-zero siv test to see if overlap is
3942 : outside the loop bounds. */
3943 1535 : numiter = max_stmt_executions_int (loop);
3944 :
3945 1535 : if (numiter >= 0
3946 1535 : && compare_tree_int (tmp, numiter) > 0)
3947 : {
3948 0 : free_conflict_function (*overlaps_a);
3949 0 : free_conflict_function (*overlaps_b);
3950 0 : *overlaps_a = conflict_fn_no_dependence ();
3951 0 : *overlaps_b = conflict_fn_no_dependence ();
3952 0 : *last_conflicts = integer_zero_node;
3953 0 : dependence_stats.num_siv_independent++;
3954 0 : return;
3955 : }
3956 1535 : dependence_stats.num_siv_dependent++;
3957 1535 : return;
3958 : }
3959 :
3960 : /* When the step does not divide the difference, there are
3961 : no overlaps. */
3962 : else
3963 : {
3964 281 : *overlaps_a = conflict_fn_no_dependence ();
3965 281 : *overlaps_b = conflict_fn_no_dependence ();
3966 281 : *last_conflicts = integer_zero_node;
3967 281 : dependence_stats.num_siv_independent++;
3968 281 : return;
3969 : }
3970 : }
3971 :
3972 : else
3973 : {
3974 : /* Example:
3975 : chrec_a = 12
3976 : chrec_b = {10, +, -1}
3977 :
3978 : In this case, chrec_a will not overlap with chrec_b. */
3979 0 : *overlaps_a = conflict_fn_no_dependence ();
3980 0 : *overlaps_b = conflict_fn_no_dependence ();
3981 0 : *last_conflicts = integer_zero_node;
3982 0 : dependence_stats.num_siv_independent++;
3983 0 : return;
3984 : }
3985 : }
3986 : }
3987 : else
3988 : {
3989 397 : if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3990 397 : || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3991 : {
3992 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3993 0 : fprintf (dump_file, "siv test failed: chrec not positive.\n");
3994 :
3995 0 : *overlaps_a = conflict_fn_not_known ();
3996 0 : *overlaps_b = conflict_fn_not_known ();
3997 0 : *last_conflicts = chrec_dont_know;
3998 0 : dependence_stats.num_siv_unimplemented++;
3999 0 : return;
4000 : }
4001 : else
4002 : {
4003 397 : if (value2 == false)
4004 : {
4005 : /* Example:
4006 : chrec_a = 3
4007 : chrec_b = {10, +, -1}
4008 : */
4009 214 : if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
4010 : {
4011 109 : HOST_WIDE_INT numiter;
4012 109 : class loop *loop = get_chrec_loop (chrec_b);
4013 :
4014 109 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4015 109 : tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
4016 : CHREC_RIGHT (chrec_b));
4017 109 : *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
4018 109 : *last_conflicts = integer_one_node;
4019 :
4020 : /* Perform weak-zero siv test to see if overlap is
4021 : outside the loop bounds. */
4022 109 : numiter = max_stmt_executions_int (loop);
4023 :
4024 109 : if (numiter >= 0
4025 109 : && compare_tree_int (tmp, numiter) > 0)
4026 : {
4027 0 : free_conflict_function (*overlaps_a);
4028 0 : free_conflict_function (*overlaps_b);
4029 0 : *overlaps_a = conflict_fn_no_dependence ();
4030 0 : *overlaps_b = conflict_fn_no_dependence ();
4031 0 : *last_conflicts = integer_zero_node;
4032 0 : dependence_stats.num_siv_independent++;
4033 0 : return;
4034 : }
4035 109 : dependence_stats.num_siv_dependent++;
4036 109 : return;
4037 : }
4038 :
4039 : /* When the step does not divide the difference, there
4040 : are no overlaps. */
4041 : else
4042 : {
4043 105 : *overlaps_a = conflict_fn_no_dependence ();
4044 105 : *overlaps_b = conflict_fn_no_dependence ();
4045 105 : *last_conflicts = integer_zero_node;
4046 105 : dependence_stats.num_siv_independent++;
4047 105 : return;
4048 : }
4049 : }
4050 : else
4051 : {
4052 : /* Example:
4053 : chrec_a = 3
4054 : chrec_b = {4, +, 1}
4055 :
4056 : In this case, chrec_a will not overlap with chrec_b. */
4057 183 : *overlaps_a = conflict_fn_no_dependence ();
4058 183 : *overlaps_b = conflict_fn_no_dependence ();
4059 183 : *last_conflicts = integer_zero_node;
4060 183 : dependence_stats.num_siv_independent++;
4061 183 : return;
4062 : }
4063 : }
4064 : }
4065 : }
4066 : }
4067 :
4068 : /* Helper recursive function for initializing the matrix A. Returns
4069 : the initial value of CHREC. */
4070 :
4071 : static tree
4072 3374628 : initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4073 : {
4074 6749248 : gcc_assert (chrec);
4075 :
4076 6749248 : switch (TREE_CODE (chrec))
4077 : {
4078 3374628 : case POLYNOMIAL_CHREC:
4079 3374628 : HOST_WIDE_INT chrec_right;
4080 3374628 : if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4081 8 : return chrec_dont_know;
4082 3374620 : chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4083 : /* We want to be able to negate without overflow. */
4084 3374620 : if (chrec_right == HOST_WIDE_INT_MIN)
4085 0 : return chrec_dont_know;
4086 3374620 : A[index][0] = mult * chrec_right;
4087 3374620 : return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4088 :
4089 0 : case PLUS_EXPR:
4090 0 : case MULT_EXPR:
4091 0 : case MINUS_EXPR:
4092 0 : {
4093 0 : tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4094 0 : tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4095 :
4096 0 : return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4097 : }
4098 :
4099 0 : CASE_CONVERT:
4100 0 : {
4101 0 : tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4102 0 : return chrec_convert (chrec_type (chrec), op, NULL);
4103 : }
4104 :
4105 0 : case BIT_NOT_EXPR:
4106 0 : {
4107 : /* Handle ~X as -1 - X. */
4108 0 : tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4109 0 : return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4110 0 : build_int_cst (TREE_TYPE (chrec), -1), op);
4111 : }
4112 :
4113 3374620 : case INTEGER_CST:
4114 3374620 : return cst_and_fits_in_hwi (chrec) ? chrec : chrec_dont_know;
4115 :
4116 0 : default:
4117 0 : gcc_unreachable ();
4118 : return NULL_TREE;
4119 : }
4120 : }
4121 :
4122 : #define FLOOR_DIV(x,y) ((x) / (y))
4123 :
4124 : /* Solves the special case of the Diophantine equation:
4125 : | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4126 :
4127 : Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4128 : number of iterations that loops X and Y run. The overlaps will be
4129 : constructed as evolutions in dimension DIM. */
4130 :
4131 : static void
4132 64 : compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4133 : HOST_WIDE_INT step_a,
4134 : HOST_WIDE_INT step_b,
4135 : affine_fn *overlaps_a,
4136 : affine_fn *overlaps_b,
4137 : tree *last_conflicts, int dim)
4138 : {
4139 64 : if (((step_a > 0 && step_b > 0)
4140 8 : || (step_a < 0 && step_b < 0)))
4141 : {
4142 60 : HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4143 60 : HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4144 :
4145 60 : gcd_steps_a_b = gcd (step_a, step_b);
4146 60 : step_overlaps_a = step_b / gcd_steps_a_b;
4147 60 : step_overlaps_b = step_a / gcd_steps_a_b;
4148 :
4149 60 : if (niter > 0)
4150 : {
4151 60 : tau2 = FLOOR_DIV (niter, step_overlaps_a);
4152 60 : tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4153 60 : last_conflict = tau2;
4154 60 : *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4155 : }
4156 : else
4157 0 : *last_conflicts = chrec_dont_know;
4158 :
4159 60 : *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4160 : build_int_cst (NULL_TREE,
4161 60 : step_overlaps_a));
4162 60 : *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4163 : build_int_cst (NULL_TREE,
4164 60 : step_overlaps_b));
4165 60 : }
4166 :
4167 : else
4168 : {
4169 4 : *overlaps_a = affine_fn_cst (integer_zero_node);
4170 4 : *overlaps_b = affine_fn_cst (integer_zero_node);
4171 4 : *last_conflicts = integer_zero_node;
4172 : }
4173 64 : }
4174 :
4175 : /* Solves the special case of a Diophantine equation where CHREC_A is
4176 : an affine bivariate function, and CHREC_B is an affine univariate
4177 : function. For example,
4178 :
4179 : | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4180 :
4181 : has the following overlapping functions:
4182 :
4183 : | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4184 : | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4185 : | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4186 :
4187 : FORNOW: This is a specialized implementation for a case occurring in
4188 : a common benchmark. Implement the general algorithm. */
4189 :
4190 : static void
4191 0 : compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4192 : conflict_function **overlaps_a,
4193 : conflict_function **overlaps_b,
4194 : tree *last_conflicts)
4195 : {
4196 0 : bool xz_p, yz_p, xyz_p;
4197 0 : HOST_WIDE_INT step_x, step_y, step_z;
4198 0 : HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4199 0 : affine_fn overlaps_a_xz, overlaps_b_xz;
4200 0 : affine_fn overlaps_a_yz, overlaps_b_yz;
4201 0 : affine_fn overlaps_a_xyz, overlaps_b_xyz;
4202 0 : affine_fn ova1, ova2, ovb;
4203 0 : tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4204 :
4205 0 : step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4206 0 : step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4207 0 : step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4208 :
4209 0 : niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4210 0 : niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4211 0 : niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4212 :
4213 0 : if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4214 : {
4215 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4216 0 : fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4217 :
4218 0 : *overlaps_a = conflict_fn_not_known ();
4219 0 : *overlaps_b = conflict_fn_not_known ();
4220 0 : *last_conflicts = chrec_dont_know;
4221 0 : return;
4222 : }
4223 :
4224 0 : niter = MIN (niter_x, niter_z);
4225 0 : compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4226 : &overlaps_a_xz,
4227 : &overlaps_b_xz,
4228 : &last_conflicts_xz, 1);
4229 0 : niter = MIN (niter_y, niter_z);
4230 0 : compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4231 : &overlaps_a_yz,
4232 : &overlaps_b_yz,
4233 : &last_conflicts_yz, 2);
4234 0 : niter = MIN (niter_x, niter_z);
4235 0 : niter = MIN (niter_y, niter);
4236 0 : compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4237 : &overlaps_a_xyz,
4238 : &overlaps_b_xyz,
4239 : &last_conflicts_xyz, 3);
4240 :
4241 0 : xz_p = !integer_zerop (last_conflicts_xz);
4242 0 : yz_p = !integer_zerop (last_conflicts_yz);
4243 0 : xyz_p = !integer_zerop (last_conflicts_xyz);
4244 :
4245 0 : if (xz_p || yz_p || xyz_p)
4246 : {
4247 0 : ova1 = affine_fn_cst (integer_zero_node);
4248 0 : ova2 = affine_fn_cst (integer_zero_node);
4249 0 : ovb = affine_fn_cst (integer_zero_node);
4250 0 : if (xz_p)
4251 : {
4252 0 : affine_fn t0 = ova1;
4253 0 : affine_fn t2 = ovb;
4254 :
4255 0 : ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4256 0 : ovb = affine_fn_plus (ovb, overlaps_b_xz);
4257 0 : affine_fn_free (t0);
4258 0 : affine_fn_free (t2);
4259 0 : *last_conflicts = last_conflicts_xz;
4260 : }
4261 0 : if (yz_p)
4262 : {
4263 0 : affine_fn t0 = ova2;
4264 0 : affine_fn t2 = ovb;
4265 :
4266 0 : ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4267 0 : ovb = affine_fn_plus (ovb, overlaps_b_yz);
4268 0 : affine_fn_free (t0);
4269 0 : affine_fn_free (t2);
4270 0 : *last_conflicts = last_conflicts_yz;
4271 : }
4272 0 : if (xyz_p)
4273 : {
4274 0 : affine_fn t0 = ova1;
4275 0 : affine_fn t2 = ova2;
4276 0 : affine_fn t4 = ovb;
4277 :
4278 0 : ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4279 0 : ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4280 0 : ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4281 0 : affine_fn_free (t0);
4282 0 : affine_fn_free (t2);
4283 0 : affine_fn_free (t4);
4284 0 : *last_conflicts = last_conflicts_xyz;
4285 : }
4286 0 : *overlaps_a = conflict_fn (2, ova1, ova2);
4287 0 : *overlaps_b = conflict_fn (1, ovb);
4288 0 : }
4289 : else
4290 : {
4291 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4292 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4293 0 : *last_conflicts = integer_zero_node;
4294 : }
4295 :
4296 0 : affine_fn_free (overlaps_a_xz);
4297 0 : affine_fn_free (overlaps_b_xz);
4298 0 : affine_fn_free (overlaps_a_yz);
4299 0 : affine_fn_free (overlaps_b_yz);
4300 0 : affine_fn_free (overlaps_a_xyz);
4301 0 : affine_fn_free (overlaps_b_xyz);
4302 : }
4303 :
4304 : /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4305 :
4306 : static void
4307 3420569 : lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4308 : int size)
4309 : {
4310 3420569 : memcpy (vec2, vec1, size * sizeof (*vec1));
4311 0 : }
4312 :
4313 : /* Copy the elements of M x N matrix MAT1 to MAT2. */
4314 :
4315 : static void
4316 1687238 : lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4317 : int m, int n)
4318 : {
4319 1687238 : int i;
4320 :
4321 5061714 : for (i = 0; i < m; i++)
4322 3374476 : lambda_vector_copy (mat1[i], mat2[i], n);
4323 1687238 : }
4324 :
4325 : /* Store the N x N identity matrix in MAT. */
4326 :
4327 : static void
4328 1687238 : lambda_matrix_id (lambda_matrix mat, int size)
4329 : {
4330 1687238 : int i, j;
4331 :
4332 5061714 : for (i = 0; i < size; i++)
4333 10123428 : for (j = 0; j < size; j++)
4334 10123428 : mat[i][j] = (i == j) ? 1 : 0;
4335 1687238 : }
4336 :
4337 : /* Return the index of the first nonzero element of vector VEC1 between
4338 : START and N. We must have START <= N.
4339 : Returns N if VEC1 is the zero vector. */
4340 :
4341 : static int
4342 1687238 : lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4343 : {
4344 1687238 : int j = start;
4345 1687238 : while (j < n && vec1[j] == 0)
4346 0 : j++;
4347 1687238 : return j;
4348 : }
4349 :
4350 : /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4351 : R2 = R2 + CONST1 * R1. */
4352 :
4353 : static bool
4354 3374746 : lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4355 : lambda_int const1)
4356 : {
4357 3374746 : int i;
4358 :
4359 3374746 : if (const1 == 0)
4360 : return true;
4361 :
4362 8436280 : for (i = 0; i < n; i++)
4363 : {
4364 5061768 : bool ovf;
4365 5061768 : lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4366 5061768 : if (ovf)
4367 3374746 : return false;
4368 5061768 : lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4369 5061768 : if (ovf || tem2 == HOST_WIDE_INT_MIN)
4370 : return false;
4371 5061768 : mat[r2][i] = tem2;
4372 : }
4373 :
4374 : return true;
4375 : }
4376 :
4377 : /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4378 : and store the result in VEC2. */
4379 :
4380 : static void
4381 1674533 : lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4382 : int size, lambda_int const1)
4383 : {
4384 1674533 : int i;
4385 :
4386 1674533 : if (const1 == 0)
4387 0 : lambda_vector_clear (vec2, size);
4388 : else
4389 5023599 : for (i = 0; i < size; i++)
4390 3349066 : vec2[i] = const1 * vec1[i];
4391 1674533 : }
4392 :
4393 : /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4394 :
4395 : static void
4396 1674533 : lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4397 : int size)
4398 : {
4399 0 : lambda_vector_mult_const (vec1, vec2, size, -1);
4400 0 : }
4401 :
4402 : /* Negate row R1 of matrix MAT which has N columns. */
4403 :
4404 : static void
4405 1674533 : lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4406 : {
4407 0 : lambda_vector_negate (mat[r1], mat[r1], n);
4408 1674533 : }
4409 :
4410 : /* Return true if two vectors are equal. */
4411 :
4412 : static bool
4413 353040 : lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4414 : {
4415 353040 : int i;
4416 354163 : for (i = 0; i < size; i++)
4417 353896 : if (vec1[i] != vec2[i])
4418 : return false;
4419 : return true;
4420 : }
4421 :
4422 : /* Given an M x N integer matrix A, this function determines an M x
4423 : M unimodular matrix U, and an M x N echelon matrix S such that
4424 : "U.A = S". This decomposition is also known as "right Hermite".
4425 :
4426 : Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4427 : Restructuring Compilers" Utpal Banerjee. */
4428 :
4429 : static bool
4430 1687238 : lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4431 : lambda_matrix S, lambda_matrix U)
4432 : {
4433 1687238 : int i, j, i0 = 0;
4434 :
4435 1687238 : lambda_matrix_copy (A, S, m, n);
4436 1687238 : lambda_matrix_id (U, m);
4437 :
4438 3374476 : for (j = 0; j < n; j++)
4439 : {
4440 3374476 : if (lambda_vector_first_nz (S[j], m, i0) < m)
4441 : {
4442 1687238 : ++i0;
4443 3374476 : for (i = m - 1; i >= i0; i--)
4444 : {
4445 3374611 : while (S[i][j] != 0)
4446 : {
4447 1687373 : lambda_int factor, a, b;
4448 :
4449 1687373 : a = S[i-1][j];
4450 1687373 : b = S[i][j];
4451 1687373 : gcc_assert (a != HOST_WIDE_INT_MIN);
4452 1687373 : factor = a / b;
4453 :
4454 1687373 : if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4455 : return false;
4456 1687373 : std::swap (S[i], S[i-1]);
4457 :
4458 1687373 : if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4459 : return false;
4460 1687373 : std::swap (U[i], U[i-1]);
4461 : }
4462 : }
4463 : }
4464 : }
4465 :
4466 : return true;
4467 : }
4468 :
4469 : /* Determines the overlapping elements due to accesses CHREC_A and
4470 : CHREC_B, that are affine functions. This function cannot handle
4471 : symbolic evolution functions, ie. when initial conditions are
4472 : parameters, because it uses lambda matrices of integers. */
4473 :
4474 : static void
4475 1687314 : analyze_subscript_affine_affine (tree chrec_a,
4476 : tree chrec_b,
4477 : conflict_function **overlaps_a,
4478 : conflict_function **overlaps_b,
4479 : tree *last_conflicts)
4480 : {
4481 1687314 : unsigned nb_vars_a, nb_vars_b, dim;
4482 1687314 : lambda_int gamma, gcd_alpha_beta;
4483 1687314 : lambda_matrix A, U, S;
4484 1687314 : struct obstack scratch_obstack;
4485 :
4486 1687314 : if (eq_evolutions_p (chrec_a, chrec_b))
4487 : {
4488 : /* The accessed index overlaps for each iteration in the
4489 : loop. */
4490 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4491 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4492 0 : *last_conflicts = chrec_dont_know;
4493 0 : return;
4494 : }
4495 1687314 : if (dump_file && (dump_flags & TDF_DETAILS))
4496 19753 : fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4497 :
4498 : /* For determining the initial intersection, we have to solve a
4499 : Diophantine equation. This is the most time consuming part.
4500 :
4501 : For answering to the question: "Is there a dependence?" we have
4502 : to prove that there exists a solution to the Diophantine
4503 : equation, and that the solution is in the iteration domain,
4504 : i.e. the solution is positive or zero, and that the solution
4505 : happens before the upper bound loop.nb_iterations. Otherwise
4506 : there is no dependence. This function outputs a description of
4507 : the iterations that hold the intersections. */
4508 :
4509 1687314 : nb_vars_a = nb_vars_in_chrec (chrec_a);
4510 1687314 : nb_vars_b = nb_vars_in_chrec (chrec_b);
4511 :
4512 1687314 : gcc_obstack_init (&scratch_obstack);
4513 :
4514 1687314 : dim = nb_vars_a + nb_vars_b;
4515 1687314 : U = lambda_matrix_new (dim, dim, &scratch_obstack);
4516 1687314 : A = lambda_matrix_new (dim, 1, &scratch_obstack);
4517 1687314 : S = lambda_matrix_new (dim, 1, &scratch_obstack);
4518 :
4519 1687314 : tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4520 1687314 : tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4521 1687314 : if (init_a == chrec_dont_know
4522 1687302 : || init_b == chrec_dont_know)
4523 : {
4524 12 : if (dump_file && (dump_flags & TDF_DETAILS))
4525 0 : fprintf (dump_file, "affine-affine test failed: "
4526 : "representation issue.\n");
4527 12 : *overlaps_a = conflict_fn_not_known ();
4528 12 : *overlaps_b = conflict_fn_not_known ();
4529 12 : *last_conflicts = chrec_dont_know;
4530 12 : goto end_analyze_subs_aa;
4531 : }
4532 1687302 : gamma = int_cst_value (init_b) - int_cst_value (init_a);
4533 :
4534 : /* Don't do all the hard work of solving the Diophantine equation
4535 : when we already know the solution: for example,
4536 : | {3, +, 1}_1
4537 : | {3, +, 4}_2
4538 : | gamma = 3 - 3 = 0.
4539 : Then the first overlap occurs during the first iterations:
4540 : | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4541 : */
4542 1687302 : if (gamma == 0)
4543 : {
4544 64 : if (nb_vars_a == 1 && nb_vars_b == 1)
4545 : {
4546 64 : HOST_WIDE_INT step_a, step_b;
4547 64 : HOST_WIDE_INT niter, niter_a, niter_b;
4548 64 : affine_fn ova, ovb;
4549 :
4550 64 : niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4551 64 : niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4552 64 : niter = MIN (niter_a, niter_b);
4553 64 : step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4554 64 : step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4555 :
4556 64 : compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4557 : &ova, &ovb,
4558 : last_conflicts, 1);
4559 64 : *overlaps_a = conflict_fn (1, ova);
4560 64 : *overlaps_b = conflict_fn (1, ovb);
4561 : }
4562 :
4563 0 : else if (nb_vars_a == 2 && nb_vars_b == 1)
4564 0 : compute_overlap_steps_for_affine_1_2
4565 0 : (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4566 :
4567 0 : else if (nb_vars_a == 1 && nb_vars_b == 2)
4568 0 : compute_overlap_steps_for_affine_1_2
4569 0 : (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4570 :
4571 : else
4572 : {
4573 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4574 0 : fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4575 0 : *overlaps_a = conflict_fn_not_known ();
4576 0 : *overlaps_b = conflict_fn_not_known ();
4577 0 : *last_conflicts = chrec_dont_know;
4578 : }
4579 64 : goto end_analyze_subs_aa;
4580 : }
4581 :
4582 : /* U.A = S */
4583 1687238 : if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4584 : {
4585 0 : *overlaps_a = conflict_fn_not_known ();
4586 0 : *overlaps_b = conflict_fn_not_known ();
4587 0 : *last_conflicts = chrec_dont_know;
4588 0 : goto end_analyze_subs_aa;
4589 : }
4590 :
4591 1687238 : if (S[0][0] < 0)
4592 : {
4593 1674533 : S[0][0] *= -1;
4594 1674533 : lambda_matrix_row_negate (U, dim, 0);
4595 : }
4596 1687238 : gcd_alpha_beta = S[0][0];
4597 :
4598 : /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4599 : but that is a quite strange case. Instead of ICEing, answer
4600 : don't know. */
4601 1687238 : if (gcd_alpha_beta == 0)
4602 : {
4603 0 : *overlaps_a = conflict_fn_not_known ();
4604 0 : *overlaps_b = conflict_fn_not_known ();
4605 0 : *last_conflicts = chrec_dont_know;
4606 0 : goto end_analyze_subs_aa;
4607 : }
4608 :
4609 : /* The classic "gcd-test". */
4610 1687238 : if (!int_divides_p (gcd_alpha_beta, gamma))
4611 : {
4612 : /* The "gcd-test" has determined that there is no integer
4613 : solution, i.e. there is no dependence. */
4614 1571454 : *overlaps_a = conflict_fn_no_dependence ();
4615 1571454 : *overlaps_b = conflict_fn_no_dependence ();
4616 1571454 : *last_conflicts = integer_zero_node;
4617 : }
4618 :
4619 : /* Both access functions are univariate. This includes SIV and MIV cases. */
4620 115784 : else if (nb_vars_a == 1 && nb_vars_b == 1)
4621 : {
4622 : /* Both functions should have the same evolution sign. */
4623 115784 : if (((A[0][0] > 0 && -A[1][0] > 0)
4624 8677 : || (A[0][0] < 0 && -A[1][0] < 0)))
4625 : {
4626 : /* The solutions are given by:
4627 : |
4628 : | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4629 : | [u21 u22] [y0]
4630 :
4631 : For a given integer t. Using the following variables,
4632 :
4633 : | i0 = u11 * gamma / gcd_alpha_beta
4634 : | j0 = u12 * gamma / gcd_alpha_beta
4635 : | i1 = u21
4636 : | j1 = u22
4637 :
4638 : the solutions are:
4639 :
4640 : | x0 = i0 + i1 * t,
4641 : | y0 = j0 + j1 * t. */
4642 115390 : HOST_WIDE_INT i0, j0, i1, j1;
4643 :
4644 115390 : i0 = U[0][0] * gamma / gcd_alpha_beta;
4645 115390 : j0 = U[0][1] * gamma / gcd_alpha_beta;
4646 115390 : i1 = U[1][0];
4647 115390 : j1 = U[1][1];
4648 :
4649 115390 : if ((i1 == 0 && i0 < 0)
4650 115390 : || (j1 == 0 && j0 < 0))
4651 : {
4652 : /* There is no solution.
4653 : FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4654 : falls in here, but for the moment we don't look at the
4655 : upper bound of the iteration domain. */
4656 0 : *overlaps_a = conflict_fn_no_dependence ();
4657 0 : *overlaps_b = conflict_fn_no_dependence ();
4658 0 : *last_conflicts = integer_zero_node;
4659 55196 : goto end_analyze_subs_aa;
4660 : }
4661 :
4662 115390 : if (i1 > 0 && j1 > 0)
4663 : {
4664 115390 : HOST_WIDE_INT niter_a
4665 115390 : = max_stmt_executions_int (get_chrec_loop (chrec_a));
4666 115390 : HOST_WIDE_INT niter_b
4667 115390 : = max_stmt_executions_int (get_chrec_loop (chrec_b));
4668 115390 : HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4669 :
4670 : /* (X0, Y0) is a solution of the Diophantine equation:
4671 : "chrec_a (X0) = chrec_b (Y0)". */
4672 115390 : HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4673 : CEIL (-j0, j1));
4674 115390 : HOST_WIDE_INT x0 = i1 * tau1 + i0;
4675 115390 : HOST_WIDE_INT y0 = j1 * tau1 + j0;
4676 :
4677 : /* (X1, Y1) is the smallest positive solution of the eq
4678 : "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4679 : first conflict occurs. */
4680 115390 : HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4681 115390 : HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4682 115390 : HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4683 :
4684 115390 : if (niter > 0)
4685 : {
4686 : /* If the overlap occurs outside of the bounds of the
4687 : loop, there is no dependence. */
4688 106003 : if (x1 >= niter_a || y1 >= niter_b)
4689 : {
4690 55196 : *overlaps_a = conflict_fn_no_dependence ();
4691 55196 : *overlaps_b = conflict_fn_no_dependence ();
4692 55196 : *last_conflicts = integer_zero_node;
4693 55196 : goto end_analyze_subs_aa;
4694 : }
4695 :
4696 : /* max stmt executions can get quite large, avoid
4697 : overflows by using wide ints here. */
4698 50807 : widest_int tau2
4699 101614 : = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4700 152421 : wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4701 50807 : widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4702 50807 : if (wi::min_precision (last_conflict, SIGNED)
4703 50807 : <= TYPE_PRECISION (integer_type_node))
4704 45910 : *last_conflicts
4705 45910 : = build_int_cst (integer_type_node,
4706 45910 : last_conflict.to_shwi ());
4707 : else
4708 4897 : *last_conflicts = chrec_dont_know;
4709 50807 : }
4710 : else
4711 9387 : *last_conflicts = chrec_dont_know;
4712 :
4713 60194 : *overlaps_a
4714 60194 : = conflict_fn (1,
4715 60194 : affine_fn_univar (build_int_cst (NULL_TREE, x1),
4716 : 1,
4717 60194 : build_int_cst (NULL_TREE, i1)));
4718 60194 : *overlaps_b
4719 60194 : = conflict_fn (1,
4720 60194 : affine_fn_univar (build_int_cst (NULL_TREE, y1),
4721 : 1,
4722 60194 : build_int_cst (NULL_TREE, j1)));
4723 60194 : }
4724 : else
4725 : {
4726 : /* FIXME: For the moment, the upper bound of the
4727 : iteration domain for i and j is not checked. */
4728 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4729 0 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4730 0 : *overlaps_a = conflict_fn_not_known ();
4731 0 : *overlaps_b = conflict_fn_not_known ();
4732 0 : *last_conflicts = chrec_dont_know;
4733 : }
4734 60194 : }
4735 : else
4736 : {
4737 394 : if (dump_file && (dump_flags & TDF_DETAILS))
4738 19 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4739 394 : *overlaps_a = conflict_fn_not_known ();
4740 394 : *overlaps_b = conflict_fn_not_known ();
4741 394 : *last_conflicts = chrec_dont_know;
4742 : }
4743 : }
4744 : else
4745 : {
4746 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4747 0 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4748 0 : *overlaps_a = conflict_fn_not_known ();
4749 0 : *overlaps_b = conflict_fn_not_known ();
4750 0 : *last_conflicts = chrec_dont_know;
4751 : }
4752 :
4753 1687314 : end_analyze_subs_aa:
4754 1687314 : obstack_free (&scratch_obstack, NULL);
4755 1687314 : if (dump_file && (dump_flags & TDF_DETAILS))
4756 : {
4757 19753 : fprintf (dump_file, " (overlaps_a = ");
4758 19753 : dump_conflict_function (dump_file, *overlaps_a);
4759 19753 : fprintf (dump_file, ")\n (overlaps_b = ");
4760 19753 : dump_conflict_function (dump_file, *overlaps_b);
4761 19753 : fprintf (dump_file, "))\n");
4762 : }
4763 : }
4764 :
4765 : /* Returns true when analyze_subscript_affine_affine can be used for
4766 : determining the dependence relation between chrec_a and chrec_b,
4767 : that contain symbols. This function modifies chrec_a and chrec_b
4768 : such that the analysis result is the same, and such that they don't
4769 : contain symbols, and then can safely be passed to the analyzer.
4770 :
4771 : Example: The analysis of the following tuples of evolutions produce
4772 : the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4773 : vs. {0, +, 1}_1
4774 :
4775 : {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4776 : {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4777 : */
4778 :
4779 : static bool
4780 45182 : can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4781 : {
4782 45182 : tree diff, type, left_a, left_b, right_b;
4783 :
4784 45182 : if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4785 45182 : || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4786 : /* FIXME: For the moment not handled. Might be refined later. */
4787 15683 : return false;
4788 :
4789 29499 : type = chrec_type (*chrec_a);
4790 29499 : left_a = CHREC_LEFT (*chrec_a);
4791 29499 : left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4792 29499 : diff = chrec_fold_minus (type, left_a, left_b);
4793 :
4794 58998 : if (!evolution_function_is_constant_p (diff))
4795 5368 : return false;
4796 :
4797 24131 : if (dump_file && (dump_flags & TDF_DETAILS))
4798 105 : fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4799 :
4800 24131 : *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4801 24131 : diff, CHREC_RIGHT (*chrec_a));
4802 24131 : right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4803 24131 : *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4804 : build_int_cst (type, 0),
4805 : right_b);
4806 24131 : return true;
4807 : }
4808 :
4809 : /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4810 : *OVERLAPS_B are initialized to the functions that describe the
4811 : relation between the elements accessed twice by CHREC_A and
4812 : CHREC_B. For k >= 0, the following property is verified:
4813 :
4814 : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4815 :
4816 : static void
4817 1711162 : analyze_siv_subscript (tree chrec_a,
4818 : tree chrec_b,
4819 : conflict_function **overlaps_a,
4820 : conflict_function **overlaps_b,
4821 : tree *last_conflicts,
4822 : int loop_nest_num)
4823 : {
4824 1711162 : dependence_stats.num_siv++;
4825 :
4826 1711162 : if (dump_file && (dump_flags & TDF_DETAILS))
4827 22884 : fprintf (dump_file, "(analyze_siv_subscript \n");
4828 :
4829 1711162 : if (evolution_function_is_constant_p (chrec_a)
4830 1711162 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4831 1614 : analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4832 : overlaps_a, overlaps_b, last_conflicts);
4833 :
4834 1709548 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4835 3419096 : && evolution_function_is_constant_p (chrec_b))
4836 1263 : analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4837 : overlaps_b, overlaps_a, last_conflicts);
4838 :
4839 1708285 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4840 1708285 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4841 : {
4842 1708285 : if (!chrec_contains_symbols (chrec_a)
4843 1708285 : && !chrec_contains_symbols (chrec_b))
4844 : {
4845 1663103 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4846 : overlaps_a, overlaps_b,
4847 : last_conflicts);
4848 :
4849 1663103 : if (CF_NOT_KNOWN_P (*overlaps_a)
4850 1662717 : || CF_NOT_KNOWN_P (*overlaps_b))
4851 386 : dependence_stats.num_siv_unimplemented++;
4852 1662717 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4853 59224 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4854 1603493 : dependence_stats.num_siv_independent++;
4855 : else
4856 59224 : dependence_stats.num_siv_dependent++;
4857 : }
4858 45182 : else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4859 : &chrec_b))
4860 : {
4861 24131 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4862 : overlaps_a, overlaps_b,
4863 : last_conflicts);
4864 :
4865 24131 : if (CF_NOT_KNOWN_P (*overlaps_a)
4866 24115 : || CF_NOT_KNOWN_P (*overlaps_b))
4867 16 : dependence_stats.num_siv_unimplemented++;
4868 24115 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4869 972 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4870 23143 : dependence_stats.num_siv_independent++;
4871 : else
4872 972 : dependence_stats.num_siv_dependent++;
4873 : }
4874 : else
4875 21051 : goto siv_subscript_dontknow;
4876 : }
4877 :
4878 : else
4879 : {
4880 21051 : siv_subscript_dontknow:;
4881 21051 : if (dump_file && (dump_flags & TDF_DETAILS))
4882 2946 : fprintf (dump_file, " siv test failed: unimplemented");
4883 21051 : *overlaps_a = conflict_fn_not_known ();
4884 21051 : *overlaps_b = conflict_fn_not_known ();
4885 21051 : *last_conflicts = chrec_dont_know;
4886 21051 : dependence_stats.num_siv_unimplemented++;
4887 : }
4888 :
4889 1711162 : if (dump_file && (dump_flags & TDF_DETAILS))
4890 22884 : fprintf (dump_file, ")\n");
4891 1711162 : }
4892 :
4893 : /* Returns false if we can prove that the greatest common divisor of the steps
4894 : of CHREC does not divide CST, false otherwise. */
4895 :
4896 : static bool
4897 20662 : gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4898 : {
4899 20662 : HOST_WIDE_INT cd = 0, val;
4900 20662 : tree step;
4901 :
4902 20662 : if (!tree_fits_shwi_p (cst))
4903 : return true;
4904 20662 : val = tree_to_shwi (cst);
4905 :
4906 61838 : while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4907 : {
4908 41322 : step = CHREC_RIGHT (chrec);
4909 41322 : if (!tree_fits_shwi_p (step))
4910 : return true;
4911 41176 : cd = gcd (cd, tree_to_shwi (step));
4912 41176 : chrec = CHREC_LEFT (chrec);
4913 : }
4914 :
4915 20516 : return val % cd == 0;
4916 : }
4917 :
4918 : /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4919 : LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4920 : functions that describe the relation between the elements accessed
4921 : twice by CHREC_A and CHREC_B. For k >= 0, the following property
4922 : is verified:
4923 :
4924 : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4925 :
4926 : static void
4927 26440 : analyze_miv_subscript (tree chrec_a,
4928 : tree chrec_b,
4929 : conflict_function **overlaps_a,
4930 : conflict_function **overlaps_b,
4931 : tree *last_conflicts,
4932 : class loop *loop_nest)
4933 : {
4934 26440 : tree type, difference;
4935 :
4936 26440 : dependence_stats.num_miv++;
4937 26440 : if (dump_file && (dump_flags & TDF_DETAILS))
4938 27 : fprintf (dump_file, "(analyze_miv_subscript \n");
4939 :
4940 26440 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4941 26440 : chrec_a = chrec_convert (type, chrec_a, NULL);
4942 26440 : chrec_b = chrec_convert (type, chrec_b, NULL);
4943 26440 : difference = chrec_fold_minus (type, chrec_a, chrec_b);
4944 :
4945 26440 : if (eq_evolutions_p (chrec_a, chrec_b))
4946 : {
4947 : /* Access functions are the same: all the elements are accessed
4948 : in the same order. */
4949 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4950 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4951 0 : *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4952 0 : dependence_stats.num_miv_dependent++;
4953 : }
4954 :
4955 26440 : else if (evolution_function_is_constant_p (difference)
4956 20692 : && evolution_function_is_affine_multivariate_p (chrec_a,
4957 : loop_nest->num)
4958 47102 : && !gcd_of_steps_may_divide_p (chrec_a, difference))
4959 : {
4960 : /* testsuite/.../ssa-chrec-33.c
4961 : {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4962 :
4963 : The difference is 1, and all the evolution steps are multiples
4964 : of 2, consequently there are no overlapping elements. */
4965 19670 : *overlaps_a = conflict_fn_no_dependence ();
4966 19670 : *overlaps_b = conflict_fn_no_dependence ();
4967 19670 : *last_conflicts = integer_zero_node;
4968 19670 : dependence_stats.num_miv_independent++;
4969 : }
4970 :
4971 6770 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4972 122 : && !chrec_contains_symbols (chrec_a, loop_nest)
4973 110 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4974 6850 : && !chrec_contains_symbols (chrec_b, loop_nest))
4975 : {
4976 : /* testsuite/.../ssa-chrec-35.c
4977 : {0, +, 1}_2 vs. {0, +, 1}_3
4978 : the overlapping elements are respectively located at iterations:
4979 : {0, +, 1}_x and {0, +, 1}_x,
4980 : in other words, we have the equality:
4981 : {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4982 :
4983 : Other examples:
4984 : {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4985 : {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4986 :
4987 : {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4988 : {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4989 : */
4990 80 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4991 : overlaps_a, overlaps_b, last_conflicts);
4992 :
4993 80 : if (CF_NOT_KNOWN_P (*overlaps_a)
4994 76 : || CF_NOT_KNOWN_P (*overlaps_b))
4995 4 : dependence_stats.num_miv_unimplemented++;
4996 76 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4997 62 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4998 14 : dependence_stats.num_miv_independent++;
4999 : else
5000 62 : dependence_stats.num_miv_dependent++;
5001 : }
5002 :
5003 : else
5004 : {
5005 : /* When the analysis is too difficult, answer "don't know". */
5006 6690 : if (dump_file && (dump_flags & TDF_DETAILS))
5007 23 : fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
5008 :
5009 6690 : *overlaps_a = conflict_fn_not_known ();
5010 6690 : *overlaps_b = conflict_fn_not_known ();
5011 6690 : *last_conflicts = chrec_dont_know;
5012 6690 : dependence_stats.num_miv_unimplemented++;
5013 : }
5014 :
5015 26440 : if (dump_file && (dump_flags & TDF_DETAILS))
5016 27 : fprintf (dump_file, ")\n");
5017 26440 : }
5018 :
5019 : /* Determines the iterations for which CHREC_A is equal to CHREC_B in
5020 : with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
5021 : OVERLAP_ITERATIONS_B are initialized with two functions that
5022 : describe the iterations that contain conflicting elements.
5023 :
5024 : Remark: For an integer k >= 0, the following equality is true:
5025 :
5026 : CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
5027 : */
5028 :
5029 : static void
5030 3406245 : analyze_overlapping_iterations (tree chrec_a,
5031 : tree chrec_b,
5032 : conflict_function **overlap_iterations_a,
5033 : conflict_function **overlap_iterations_b,
5034 : tree *last_conflicts, class loop *loop_nest)
5035 : {
5036 3406245 : unsigned int lnn = loop_nest->num;
5037 :
5038 3406245 : dependence_stats.num_subscript_tests++;
5039 :
5040 3406245 : if (dump_file && (dump_flags & TDF_DETAILS))
5041 : {
5042 58959 : fprintf (dump_file, "(analyze_overlapping_iterations \n");
5043 58959 : fprintf (dump_file, " (chrec_a = ");
5044 58959 : print_generic_expr (dump_file, chrec_a);
5045 58959 : fprintf (dump_file, ")\n (chrec_b = ");
5046 58959 : print_generic_expr (dump_file, chrec_b);
5047 58959 : fprintf (dump_file, ")\n");
5048 : }
5049 :
5050 3406245 : if (chrec_a == NULL_TREE
5051 3406245 : || chrec_b == NULL_TREE
5052 3406245 : || chrec_contains_undetermined (chrec_a)
5053 6812490 : || chrec_contains_undetermined (chrec_b))
5054 : {
5055 0 : dependence_stats.num_subscript_undetermined++;
5056 :
5057 0 : *overlap_iterations_a = conflict_fn_not_known ();
5058 0 : *overlap_iterations_b = conflict_fn_not_known ();
5059 : }
5060 :
5061 : /* If they are the same chrec, and are affine, they overlap
5062 : on every iteration. */
5063 3406245 : else if (eq_evolutions_p (chrec_a, chrec_b)
5064 3406245 : && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5065 483745 : || operand_equal_p (chrec_a, chrec_b, 0)))
5066 : {
5067 1172767 : dependence_stats.num_same_subscript_function++;
5068 1172767 : *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5069 1172767 : *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5070 1172767 : *last_conflicts = chrec_dont_know;
5071 : }
5072 :
5073 : /* If they aren't the same, and aren't affine, we can't do anything
5074 : yet. */
5075 2233478 : else if ((chrec_contains_symbols (chrec_a)
5076 2180584 : || chrec_contains_symbols (chrec_b))
5077 2234347 : && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5078 51536 : || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5079 : {
5080 2527 : dependence_stats.num_subscript_undetermined++;
5081 2527 : *overlap_iterations_a = conflict_fn_not_known ();
5082 2527 : *overlap_iterations_b = conflict_fn_not_known ();
5083 : }
5084 :
5085 2230951 : else if (ziv_subscript_p (chrec_a, chrec_b))
5086 493349 : analyze_ziv_subscript (chrec_a, chrec_b,
5087 : overlap_iterations_a, overlap_iterations_b,
5088 : last_conflicts);
5089 :
5090 1737602 : else if (siv_subscript_p (chrec_a, chrec_b))
5091 1711162 : analyze_siv_subscript (chrec_a, chrec_b,
5092 : overlap_iterations_a, overlap_iterations_b,
5093 : last_conflicts, lnn);
5094 :
5095 : else
5096 26440 : analyze_miv_subscript (chrec_a, chrec_b,
5097 : overlap_iterations_a, overlap_iterations_b,
5098 : last_conflicts, loop_nest);
5099 :
5100 3406245 : if (dump_file && (dump_flags & TDF_DETAILS))
5101 : {
5102 58959 : fprintf (dump_file, " (overlap_iterations_a = ");
5103 58959 : dump_conflict_function (dump_file, *overlap_iterations_a);
5104 58959 : fprintf (dump_file, ")\n (overlap_iterations_b = ");
5105 58959 : dump_conflict_function (dump_file, *overlap_iterations_b);
5106 58959 : fprintf (dump_file, "))\n");
5107 : }
5108 3406245 : }
5109 :
5110 : /* Helper function for uniquely inserting distance vectors. */
5111 :
5112 : static void
5113 1068349 : save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5114 : {
5115 1597323 : for (lambda_vector v : DDR_DIST_VECTS (ddr))
5116 530361 : if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5117 : return;
5118 :
5119 1068082 : DDR_DIST_VECTS (ddr).safe_push (dist_v);
5120 : }
5121 :
5122 : /* Helper function for uniquely inserting direction vectors. */
5123 :
5124 : static void
5125 1068082 : save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5126 : {
5127 1596255 : for (lambda_vector v : DDR_DIR_VECTS (ddr))
5128 528759 : if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5129 : return;
5130 :
5131 1068082 : DDR_DIR_VECTS (ddr).safe_push (dir_v);
5132 : }
5133 :
5134 : /* Add a distance of 1 on all the loops outer than INDEX. If we
5135 : haven't yet determined a distance for this outer loop, push a new
5136 : distance vector composed of the previous distance, and a distance
5137 : of 1 for this outer loop. Example:
5138 :
5139 : | loop_1
5140 : | loop_2
5141 : | A[10]
5142 : | endloop_2
5143 : | endloop_1
5144 :
5145 : Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5146 : save (0, 1), then we have to save (1, 0). */
5147 :
5148 : static void
5149 16690 : add_outer_distances (struct data_dependence_relation *ddr,
5150 : lambda_vector dist_v, int index)
5151 : {
5152 : /* For each outer loop where init_v is not set, the accesses are
5153 : in dependence of distance 1 in the loop. */
5154 19888 : while (--index >= 0)
5155 : {
5156 6396 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5157 3198 : lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5158 3198 : save_v[index] = 1;
5159 3198 : save_dist_v (ddr, save_v);
5160 : }
5161 16690 : }
5162 :
5163 : /* Return false when fail to represent the data dependence as a
5164 : distance vector. A_INDEX is the index of the first reference
5165 : (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5166 : second reference. INIT_B is set to true when a component has been
5167 : added to the distance vector DIST_V. INDEX_CARRY is then set to
5168 : the index in DIST_V that carries the dependence. */
5169 :
5170 : static bool
5171 61698 : build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5172 : unsigned int a_index, unsigned int b_index,
5173 : lambda_vector dist_v, bool *init_b,
5174 : int *index_carry)
5175 : {
5176 61698 : unsigned i;
5177 123396 : lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5178 61698 : class loop *loop = DDR_LOOP_NEST (ddr)[0];
5179 :
5180 139331 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5181 : {
5182 79721 : tree access_fn_a, access_fn_b;
5183 79721 : struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5184 :
5185 79721 : if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5186 : {
5187 309 : non_affine_dependence_relation (ddr);
5188 309 : return false;
5189 : }
5190 :
5191 79412 : access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5192 79412 : access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5193 :
5194 79412 : if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5195 60434 : && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5196 : {
5197 59788 : HOST_WIDE_INT dist;
5198 59788 : int index;
5199 59788 : int var_a = CHREC_VARIABLE (access_fn_a);
5200 59788 : int var_b = CHREC_VARIABLE (access_fn_b);
5201 :
5202 59788 : if (var_a != var_b
5203 59788 : || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5204 : {
5205 34 : non_affine_dependence_relation (ddr);
5206 34 : return false;
5207 : }
5208 :
5209 : /* When data references are collected in a loop while data
5210 : dependences are analyzed in loop nest nested in the loop, we
5211 : would have more number of access functions than number of
5212 : loops. Skip access functions of loops not in the loop nest.
5213 :
5214 : See PR89725 for more information. */
5215 59754 : if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5216 2 : continue;
5217 :
5218 59752 : dist = int_cst_value (SUB_DISTANCE (subscript));
5219 59752 : index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5220 59752 : *index_carry = MIN (index, *index_carry);
5221 :
5222 : /* This is the subscript coupling test. If we have already
5223 : recorded a distance for this loop (a distance coming from
5224 : another subscript), it should be the same. For example,
5225 : in the following code, there is no dependence:
5226 :
5227 : | loop i = 0, N, 1
5228 : | T[i+1][i] = ...
5229 : | ... = T[i][i]
5230 : | endloop
5231 : */
5232 59752 : if (init_v[index] != 0 && dist_v[index] != dist)
5233 : {
5234 0 : finalize_ddr_dependent (ddr, chrec_known);
5235 0 : return false;
5236 : }
5237 :
5238 59752 : dist_v[index] = dist;
5239 59752 : init_v[index] = 1;
5240 59752 : *init_b = true;
5241 59752 : }
5242 19624 : else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5243 : {
5244 : /* This can be for example an affine vs. constant dependence
5245 : (T[i] vs. T[3]) that is not an affine dependence and is
5246 : not representable as a distance vector. */
5247 1745 : non_affine_dependence_relation (ddr);
5248 1745 : return false;
5249 : }
5250 : }
5251 :
5252 : return true;
5253 : }
5254 :
5255 : /* Return true when the DDR contains only invariant access functions wrto. loop
5256 : number LNUM. */
5257 :
5258 : static bool
5259 840926 : invariant_access_functions (const struct data_dependence_relation *ddr,
5260 : int lnum)
5261 : {
5262 2840220 : for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5263 987736 : if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5264 987736 : || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5265 670294 : return false;
5266 :
5267 : return true;
5268 : }
5269 :
5270 : /* Helper function for the case where DDR_A and DDR_B are the same
5271 : multivariate access function with a constant step. For an example
5272 : see pr34635-1.c. */
5273 :
5274 : static void
5275 4560 : add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5276 : {
5277 4560 : int x_1, x_2;
5278 4560 : tree c_1 = CHREC_LEFT (c_2);
5279 4560 : tree c_0 = CHREC_LEFT (c_1);
5280 4560 : lambda_vector dist_v;
5281 4560 : HOST_WIDE_INT v1, v2, cd;
5282 :
5283 : /* Polynomials with more than 2 variables are not handled yet. When
5284 : the evolution steps are parameters, it is not possible to
5285 : represent the dependence using classical distance vectors. */
5286 4560 : if (TREE_CODE (c_0) != INTEGER_CST
5287 3036 : || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5288 6965 : || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5289 : {
5290 2163 : DDR_AFFINE_P (ddr) = false;
5291 2163 : return;
5292 : }
5293 :
5294 2397 : x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5295 2397 : x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5296 :
5297 : /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5298 4794 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5299 2397 : v1 = int_cst_value (CHREC_RIGHT (c_1));
5300 2397 : v2 = int_cst_value (CHREC_RIGHT (c_2));
5301 2397 : cd = gcd (v1, v2);
5302 2397 : v1 /= cd;
5303 2397 : v2 /= cd;
5304 :
5305 2397 : if (v2 < 0)
5306 : {
5307 10 : v2 = -v2;
5308 10 : v1 = -v1;
5309 : }
5310 :
5311 2397 : dist_v[x_1] = v2;
5312 2397 : dist_v[x_2] = -v1;
5313 2397 : save_dist_v (ddr, dist_v);
5314 :
5315 2397 : add_outer_distances (ddr, dist_v, x_1);
5316 : }
5317 :
5318 : /* Helper function for the case where DDR_A and DDR_B are the same
5319 : access functions. */
5320 :
5321 : static void
5322 18996 : add_other_self_distances (struct data_dependence_relation *ddr)
5323 : {
5324 18996 : lambda_vector dist_v;
5325 18996 : unsigned i;
5326 18996 : int index_carry = DDR_NB_LOOPS (ddr);
5327 18996 : subscript *sub;
5328 18996 : class loop *loop = DDR_LOOP_NEST (ddr)[0];
5329 :
5330 40354 : FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5331 : {
5332 26423 : tree access_fun = SUB_ACCESS_FN (sub, 0);
5333 :
5334 26423 : if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5335 : {
5336 19117 : if (!evolution_function_is_univariate_p (access_fun, loop->num))
5337 : {
5338 5065 : if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5339 : {
5340 505 : DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5341 505 : return;
5342 : }
5343 :
5344 4560 : access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5345 :
5346 4560 : if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5347 4560 : add_multivariate_self_dist (ddr, access_fun);
5348 : else
5349 : /* The evolution step is not constant: it varies in
5350 : the outer loop, so this cannot be represented by a
5351 : distance vector. For example in pr34635.c the
5352 : evolution is {0, +, {0, +, 4}_1}_2. */
5353 0 : DDR_AFFINE_P (ddr) = false;
5354 :
5355 4560 : return;
5356 : }
5357 :
5358 : /* When data references are collected in a loop while data
5359 : dependences are analyzed in loop nest nested in the loop, we
5360 : would have more number of access functions than number of
5361 : loops. Skip access functions of loops not in the loop nest.
5362 :
5363 : See PR89725 for more information. */
5364 14052 : if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5365 : loop))
5366 0 : continue;
5367 :
5368 21515 : index_carry = MIN (index_carry,
5369 : index_in_loop_nest (CHREC_VARIABLE (access_fun),
5370 : DDR_LOOP_NEST (ddr)));
5371 : }
5372 : }
5373 :
5374 27862 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5375 13931 : add_outer_distances (ddr, dist_v, index_carry);
5376 : }
5377 :
5378 : static void
5379 170632 : insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5380 : {
5381 341264 : lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5382 :
5383 170632 : dist_v[0] = 1;
5384 170632 : save_dist_v (ddr, dist_v);
5385 170632 : }
5386 :
5387 : /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5388 : is the case for example when access functions are the same and
5389 : equal to a constant, as in:
5390 :
5391 : | loop_1
5392 : | A[3] = ...
5393 : | ... = A[3]
5394 : | endloop_1
5395 :
5396 : in which case the distance vectors are (0) and (1). */
5397 :
5398 : static void
5399 170632 : add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5400 : {
5401 170632 : unsigned i, j;
5402 :
5403 170632 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5404 : {
5405 170632 : subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5406 170632 : conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5407 170632 : conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5408 :
5409 170632 : for (j = 0; j < ca->n; j++)
5410 170632 : if (affine_function_zero_p (ca->fns[j]))
5411 : {
5412 170632 : insert_innermost_unit_dist_vector (ddr);
5413 170632 : return;
5414 : }
5415 :
5416 0 : for (j = 0; j < cb->n; j++)
5417 0 : if (affine_function_zero_p (cb->fns[j]))
5418 : {
5419 0 : insert_innermost_unit_dist_vector (ddr);
5420 0 : return;
5421 : }
5422 : }
5423 : }
5424 :
5425 : /* Return true when the DDR contains two data references that have the
5426 : same access functions. */
5427 :
5428 : static inline bool
5429 894214 : same_access_functions (const struct data_dependence_relation *ddr)
5430 : {
5431 3745928 : for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5432 1116574 : if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5433 1116574 : SUB_ACCESS_FN (sub, 1)))
5434 : return false;
5435 :
5436 : return true;
5437 : }
5438 :
5439 : /* Compute the classic per loop distance vector. DDR is the data
5440 : dependence relation to build a vector from. Return false when fail
5441 : to represent the data dependence as a distance vector. */
5442 :
5443 : static bool
5444 3064041 : build_classic_dist_vector (struct data_dependence_relation *ddr,
5445 : class loop *loop_nest)
5446 : {
5447 3064041 : bool init_b = false;
5448 3064041 : int index_carry = DDR_NB_LOOPS (ddr);
5449 3064041 : lambda_vector dist_v;
5450 :
5451 3064041 : if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5452 : return false;
5453 :
5454 894214 : if (same_access_functions (ddr))
5455 : {
5456 : /* Save the 0 vector. */
5457 1681852 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5458 840926 : save_dist_v (ddr, dist_v);
5459 :
5460 840926 : if (invariant_access_functions (ddr, loop_nest->num))
5461 170632 : add_distance_for_zero_overlaps (ddr);
5462 :
5463 840926 : if (DDR_NB_LOOPS (ddr) > 1)
5464 18996 : add_other_self_distances (ddr);
5465 :
5466 840926 : return true;
5467 : }
5468 :
5469 106576 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5470 53288 : if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5471 : return false;
5472 :
5473 : /* Save the distance vector if we initialized one. */
5474 51200 : if (init_b)
5475 : {
5476 : /* Verify a basic constraint: classic distance vectors should
5477 : always be lexicographically positive.
5478 :
5479 : Data references are collected in the order of execution of
5480 : the program, thus for the following loop
5481 :
5482 : | for (i = 1; i < 100; i++)
5483 : | for (j = 1; j < 100; j++)
5484 : | {
5485 : | t = T[j+1][i-1]; // A
5486 : | T[j][i] = t + 2; // B
5487 : | }
5488 :
5489 : references are collected following the direction of the wind:
5490 : A then B. The data dependence tests are performed also
5491 : following this order, such that we're looking at the distance
5492 : separating the elements accessed by A from the elements later
5493 : accessed by B. But in this example, the distance returned by
5494 : test_dep (A, B) is lexicographically negative (-1, 1), that
5495 : means that the access A occurs later than B with respect to
5496 : the outer loop, ie. we're actually looking upwind. In this
5497 : case we solve test_dep (B, A) looking downwind to the
5498 : lexicographically positive solution, that returns the
5499 : distance vector (1, -1). */
5500 102400 : if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5501 : {
5502 8305 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5503 8305 : if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5504 : return false;
5505 8301 : compute_subscript_distance (ddr);
5506 8301 : if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5507 : &index_carry))
5508 : return false;
5509 8301 : save_dist_v (ddr, save_v);
5510 8301 : DDR_REVERSED_P (ddr) = true;
5511 :
5512 : /* In this case there is a dependence forward for all the
5513 : outer loops:
5514 :
5515 : | for (k = 1; k < 100; k++)
5516 : | for (i = 1; i < 100; i++)
5517 : | for (j = 1; j < 100; j++)
5518 : | {
5519 : | t = T[j+1][i-1]; // A
5520 : | T[j][i] = t + 2; // B
5521 : | }
5522 :
5523 : the vectors are:
5524 : (0, 1, -1)
5525 : (1, 1, -1)
5526 : (1, -1, 1)
5527 : */
5528 8301 : if (DDR_NB_LOOPS (ddr) > 1)
5529 : {
5530 72 : add_outer_distances (ddr, save_v, index_carry);
5531 72 : add_outer_distances (ddr, dist_v, index_carry);
5532 : }
5533 : }
5534 : else
5535 : {
5536 42895 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5537 42895 : lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5538 :
5539 42895 : if (DDR_NB_LOOPS (ddr) > 1)
5540 : {
5541 109 : lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5542 :
5543 109 : if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5544 : return false;
5545 109 : compute_subscript_distance (ddr);
5546 109 : if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5547 : &index_carry))
5548 : return false;
5549 :
5550 109 : save_dist_v (ddr, save_v);
5551 109 : add_outer_distances (ddr, dist_v, index_carry);
5552 109 : add_outer_distances (ddr, opposite_v, index_carry);
5553 : }
5554 : else
5555 42786 : save_dist_v (ddr, save_v);
5556 : }
5557 : }
5558 : else
5559 : {
5560 : /* There is a distance of 1 on all the outer loops: Example:
5561 : there is a dependence of distance 1 on loop_1 for the array A.
5562 :
5563 : | loop_1
5564 : | A[5] = ...
5565 : | endloop
5566 : */
5567 0 : add_outer_distances (ddr, dist_v,
5568 : lambda_vector_first_nz (dist_v,
5569 0 : DDR_NB_LOOPS (ddr), 0));
5570 : }
5571 :
5572 : return true;
5573 : }
5574 :
5575 : /* Return the direction for a given distance.
5576 : FIXME: Computing dir this way is suboptimal, since dir can catch
5577 : cases that dist is unable to represent. */
5578 :
5579 : static inline enum data_dependence_direction
5580 1092952 : dir_from_dist (int dist)
5581 : {
5582 1092952 : if (dist > 0)
5583 : return dir_positive;
5584 865767 : else if (dist < 0)
5585 : return dir_negative;
5586 : else
5587 863334 : return dir_equal;
5588 : }
5589 :
5590 : /* Compute the classic per loop direction vector. DDR is the data
5591 : dependence relation to build a vector from. */
5592 :
5593 : static void
5594 892122 : build_classic_dir_vector (struct data_dependence_relation *ddr)
5595 : {
5596 892122 : unsigned i, j;
5597 892122 : lambda_vector dist_v;
5598 :
5599 1960204 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5600 : {
5601 2136164 : lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5602 :
5603 3229116 : for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5604 1958719 : dir_v[j] = dir_from_dist (dist_v[j]);
5605 :
5606 1068082 : save_dir_v (ddr, dir_v);
5607 : }
5608 892122 : }
5609 :
5610 : /* Helper function. Returns true when there is a dependence between the
5611 : data references. A_INDEX is the index of the first reference (0 for
5612 : DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5613 :
5614 : static bool
5615 3072455 : subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5616 : unsigned int a_index, unsigned int b_index,
5617 : class loop *loop_nest)
5618 : {
5619 3072455 : unsigned int i;
5620 3072455 : tree last_conflicts;
5621 3072455 : struct subscript *subscript;
5622 3072455 : tree res = NULL_TREE;
5623 :
5624 4338462 : for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5625 : {
5626 3406245 : conflict_function *overlaps_a, *overlaps_b;
5627 :
5628 3406245 : analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5629 : SUB_ACCESS_FN (subscript, b_index),
5630 : &overlaps_a, &overlaps_b,
5631 : &last_conflicts, loop_nest);
5632 :
5633 3406245 : if (SUB_CONFLICTS_IN_A (subscript))
5634 3406245 : free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5635 3406245 : if (SUB_CONFLICTS_IN_B (subscript))
5636 3406245 : free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5637 :
5638 3406245 : SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5639 3406245 : SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5640 3406245 : SUB_LAST_CONFLICT (subscript) = last_conflicts;
5641 :
5642 : /* If there is any undetermined conflict function we have to
5643 : give a conservative answer in case we cannot prove that
5644 : no dependence exists when analyzing another subscript. */
5645 3406245 : if (CF_NOT_KNOWN_P (overlaps_a)
5646 3375567 : || CF_NOT_KNOWN_P (overlaps_b))
5647 : {
5648 30678 : res = chrec_dont_know;
5649 30678 : continue;
5650 : }
5651 :
5652 : /* When there is a subscript with no dependence we can stop. */
5653 3375567 : else if (CF_NO_DEPENDENCE_P (overlaps_a)
5654 1235329 : || CF_NO_DEPENDENCE_P (overlaps_b))
5655 : {
5656 2140238 : res = chrec_known;
5657 2140238 : break;
5658 : }
5659 : }
5660 :
5661 3072455 : if (res == NULL_TREE)
5662 : return true;
5663 :
5664 2169831 : if (res == chrec_known)
5665 2140238 : dependence_stats.num_dependence_independent++;
5666 : else
5667 29593 : dependence_stats.num_dependence_undetermined++;
5668 2169831 : finalize_ddr_dependent (ddr, res);
5669 2169831 : return false;
5670 : }
5671 :
5672 : /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5673 :
5674 : static void
5675 3064041 : subscript_dependence_tester (struct data_dependence_relation *ddr,
5676 : class loop *loop_nest)
5677 : {
5678 3064041 : if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5679 894214 : dependence_stats.num_dependence_dependent++;
5680 :
5681 3064041 : compute_subscript_distance (ddr);
5682 3064041 : if (build_classic_dist_vector (ddr, loop_nest))
5683 : {
5684 892122 : if (dump_file && (dump_flags & TDF_DETAILS))
5685 : {
5686 3994 : unsigned i;
5687 :
5688 3994 : fprintf (dump_file, "(build_classic_dist_vector\n");
5689 12051 : for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5690 : {
5691 4063 : fprintf (dump_file, " dist_vector = (");
5692 4063 : print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5693 8126 : DDR_NB_LOOPS (ddr));
5694 4063 : fprintf (dump_file, " )\n");
5695 : }
5696 3994 : fprintf (dump_file, ")\n");
5697 : }
5698 :
5699 892122 : build_classic_dir_vector (ddr);
5700 : }
5701 3064041 : }
5702 :
5703 : /* Returns true when all the access functions of A are affine or
5704 : constant with respect to LOOP_NEST. */
5705 :
5706 : static bool
5707 6192041 : access_functions_are_affine_or_constant_p (const struct data_reference *a,
5708 : const class loop *loop_nest)
5709 : {
5710 6192041 : vec<tree> fns = DR_ACCESS_FNS (a);
5711 26905178 : for (tree t : fns)
5712 8390217 : if (!evolution_function_is_invariant_p (t, loop_nest->num)
5713 8390217 : && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5714 : return false;
5715 :
5716 : return true;
5717 : }
5718 :
5719 : /* This computes the affine dependence relation between A and B with
5720 : respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5721 : independence between two accesses, while CHREC_DONT_KNOW is used
5722 : for representing the unknown relation.
5723 :
5724 : Note that it is possible to stop the computation of the dependence
5725 : relation the first time we detect a CHREC_KNOWN element for a given
5726 : subscript. */
5727 :
5728 : void
5729 6376168 : compute_affine_dependence (struct data_dependence_relation *ddr,
5730 : class loop *loop_nest)
5731 : {
5732 6376168 : struct data_reference *dra = DDR_A (ddr);
5733 6376168 : struct data_reference *drb = DDR_B (ddr);
5734 :
5735 6376168 : if (dump_file && (dump_flags & TDF_DETAILS))
5736 : {
5737 133864 : fprintf (dump_file, "(compute_affine_dependence\n");
5738 133864 : fprintf (dump_file, " ref_a: ");
5739 133864 : print_generic_expr (dump_file, DR_REF (dra));
5740 133864 : fprintf (dump_file, ", stmt_a: ");
5741 133864 : print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5742 133864 : fprintf (dump_file, " ref_b: ");
5743 133864 : print_generic_expr (dump_file, DR_REF (drb));
5744 133864 : fprintf (dump_file, ", stmt_b: ");
5745 133864 : print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5746 : }
5747 :
5748 : /* Analyze only when the dependence relation is not yet known. */
5749 6376168 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5750 : {
5751 3125203 : dependence_stats.num_dependence_tests++;
5752 :
5753 3125203 : if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5754 3125203 : && access_functions_are_affine_or_constant_p (drb, loop_nest))
5755 3064041 : subscript_dependence_tester (ddr, loop_nest);
5756 :
5757 : /* As a last case, if the dependence cannot be determined, or if
5758 : the dependence is considered too difficult to determine, answer
5759 : "don't know". */
5760 : else
5761 : {
5762 61162 : dependence_stats.num_dependence_undetermined++;
5763 :
5764 61162 : if (dump_file && (dump_flags & TDF_DETAILS))
5765 : {
5766 158 : fprintf (dump_file, "Data ref a:\n");
5767 158 : dump_data_reference (dump_file, dra);
5768 158 : fprintf (dump_file, "Data ref b:\n");
5769 158 : dump_data_reference (dump_file, drb);
5770 158 : fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5771 : }
5772 61162 : finalize_ddr_dependent (ddr, chrec_dont_know);
5773 : }
5774 : }
5775 :
5776 6376168 : if (dump_file && (dump_flags & TDF_DETAILS))
5777 : {
5778 133864 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5779 118476 : fprintf (dump_file, ") -> no dependence\n");
5780 15388 : else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5781 11304 : fprintf (dump_file, ") -> dependence analysis failed\n");
5782 : else
5783 4084 : fprintf (dump_file, ")\n");
5784 : }
5785 6376168 : }
5786 :
5787 : /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5788 : the data references in DATAREFS, in the LOOP_NEST. When
5789 : COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5790 : relations. Return true when successful, i.e. data references number
5791 : is small enough to be handled. */
5792 :
5793 : bool
5794 424084 : compute_all_dependences (const vec<data_reference_p> &datarefs,
5795 : vec<ddr_p> *dependence_relations,
5796 : const vec<loop_p> &loop_nest,
5797 : bool compute_self_and_rr)
5798 : {
5799 424084 : struct data_dependence_relation *ddr;
5800 424084 : struct data_reference *a, *b;
5801 424084 : unsigned int i, j;
5802 :
5803 424084 : if ((int) datarefs.length ()
5804 424084 : > param_loop_max_datarefs_for_datadeps)
5805 : {
5806 0 : struct data_dependence_relation *ddr;
5807 :
5808 : /* Insert a single relation into dependence_relations:
5809 : chrec_dont_know. */
5810 0 : ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5811 0 : dependence_relations->safe_push (ddr);
5812 0 : return false;
5813 : }
5814 :
5815 3150620 : FOR_EACH_VEC_ELT (datarefs, i, a)
5816 7562648 : for (j = i + 1; datarefs.iterate (j, &b); j++)
5817 4836112 : if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5818 : {
5819 4469982 : ddr = initialize_data_dependence_relation (a, b, loop_nest);
5820 4469982 : dependence_relations->safe_push (ddr);
5821 4469982 : if (loop_nest.exists ())
5822 4447361 : compute_affine_dependence (ddr, loop_nest[0]);
5823 : }
5824 :
5825 424084 : if (compute_self_and_rr)
5826 1010240 : FOR_EACH_VEC_ELT (datarefs, i, a)
5827 : {
5828 751006 : ddr = initialize_data_dependence_relation (a, a, loop_nest);
5829 751006 : dependence_relations->safe_push (ddr);
5830 751006 : if (loop_nest.exists ())
5831 751006 : compute_affine_dependence (ddr, loop_nest[0]);
5832 : }
5833 :
5834 : return true;
5835 : }
5836 :
5837 : /* Describes a location of a memory reference. */
5838 :
5839 : struct data_ref_loc
5840 : {
5841 : /* The memory reference. */
5842 : tree ref;
5843 :
5844 : /* True if the memory reference is read. */
5845 : bool is_read;
5846 :
5847 : /* True if the data reference is conditional within the containing
5848 : statement, i.e. if it might not occur even when the statement
5849 : is executed and runs to completion. */
5850 : bool is_conditional_in_stmt;
5851 : };
5852 :
5853 :
5854 : /* Stores the locations of memory references in STMT to REFERENCES. Returns
5855 : true if STMT clobbers memory, false otherwise. */
5856 :
5857 : static bool
5858 48789998 : get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5859 : {
5860 48789998 : bool clobbers_memory = false;
5861 48789998 : data_ref_loc ref;
5862 48789998 : tree op0, op1;
5863 48789998 : enum gimple_code stmt_code = gimple_code (stmt);
5864 :
5865 : /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5866 : As we cannot model data-references to not spelled out
5867 : accesses give up if they may occur. */
5868 48789998 : if (stmt_code == GIMPLE_CALL
5869 48789998 : && !(gimple_call_flags (stmt) & ECF_CONST))
5870 : {
5871 : /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5872 4102245 : if (gimple_call_internal_p (stmt))
5873 55053 : switch (gimple_call_internal_fn (stmt))
5874 : {
5875 5613 : case IFN_GOMP_SIMD_LANE:
5876 5613 : {
5877 5613 : class loop *loop = gimple_bb (stmt)->loop_father;
5878 5613 : tree uid = gimple_call_arg (stmt, 0);
5879 5613 : gcc_assert (TREE_CODE (uid) == SSA_NAME);
5880 5613 : if (loop == NULL
5881 5613 : || loop->simduid != SSA_NAME_VAR (uid))
5882 : clobbers_memory = true;
5883 : break;
5884 : }
5885 : case IFN_MASK_LOAD:
5886 : case IFN_MASK_STORE:
5887 : break;
5888 999 : case IFN_MASK_CALL:
5889 999 : {
5890 999 : tree orig_fndecl
5891 999 : = gimple_call_addr_fndecl (gimple_call_arg (stmt, 0));
5892 999 : if (!orig_fndecl
5893 999 : || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5894 : clobbers_memory = true;
5895 : }
5896 : break;
5897 : default:
5898 4143194 : clobbers_memory = true;
5899 : break;
5900 : }
5901 4047192 : else if (gimple_call_builtin_p (stmt, BUILT_IN_PREFETCH))
5902 : clobbers_memory = false;
5903 : else
5904 4143194 : clobbers_memory = true;
5905 : }
5906 44687753 : else if (stmt_code == GIMPLE_ASM
5907 44687753 : && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5908 8536 : || gimple_vuse (stmt)))
5909 : clobbers_memory = true;
5910 :
5911 102071816 : if (!gimple_vuse (stmt))
5912 : return clobbers_memory;
5913 :
5914 19004216 : if (stmt_code == GIMPLE_ASSIGN)
5915 : {
5916 13990075 : tree base;
5917 13990075 : op0 = gimple_assign_lhs (stmt);
5918 13990075 : op1 = gimple_assign_rhs1 (stmt);
5919 :
5920 13990075 : if (DECL_P (op1)
5921 13990075 : || (REFERENCE_CLASS_P (op1)
5922 6730413 : && (base = get_base_address (op1))
5923 6730413 : && TREE_CODE (base) != SSA_NAME
5924 6730348 : && !is_gimple_min_invariant (base)))
5925 : {
5926 7589808 : ref.ref = op1;
5927 7589808 : ref.is_read = true;
5928 7589808 : ref.is_conditional_in_stmt = false;
5929 7589808 : references->safe_push (ref);
5930 : }
5931 : }
5932 5014141 : else if (stmt_code == GIMPLE_CALL)
5933 : {
5934 4110999 : unsigned i = 0, n;
5935 4110999 : tree ptr, type;
5936 4110999 : unsigned int align;
5937 :
5938 4110999 : ref.is_read = false;
5939 4110999 : if (gimple_call_internal_p (stmt))
5940 62854 : switch (gimple_call_internal_fn (stmt))
5941 : {
5942 2359 : case IFN_MASK_LOAD:
5943 2359 : if (gimple_call_lhs (stmt) == NULL_TREE)
5944 : break;
5945 2359 : ref.is_read = true;
5946 : /* FALLTHRU */
5947 4306 : case IFN_MASK_STORE:
5948 4306 : ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5949 4306 : align = tree_to_shwi (gimple_call_arg (stmt, 1));
5950 4306 : if (ref.is_read)
5951 2359 : type = TREE_TYPE (gimple_call_lhs (stmt));
5952 : else
5953 1947 : type = TREE_TYPE (gimple_call_arg (stmt, 3));
5954 4306 : if (TYPE_ALIGN (type) != align)
5955 1641 : type = build_aligned_type (type, align);
5956 4306 : ref.is_conditional_in_stmt = true;
5957 4306 : ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5958 : ptr);
5959 4306 : references->safe_push (ref);
5960 4306 : return false;
5961 : case IFN_MASK_CALL:
5962 4106693 : i = 1;
5963 : gcc_fallthrough ();
5964 : default:
5965 : break;
5966 : }
5967 :
5968 4106693 : op0 = gimple_call_lhs (stmt);
5969 4106693 : n = gimple_call_num_args (stmt);
5970 16613216 : for (; i < n; i++)
5971 : {
5972 8399830 : op1 = gimple_call_arg (stmt, i);
5973 :
5974 8399830 : if (DECL_P (op1)
5975 8399830 : || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5976 : {
5977 488788 : ref.ref = op1;
5978 488788 : ref.is_read = true;
5979 488788 : ref.is_conditional_in_stmt = false;
5980 488788 : references->safe_push (ref);
5981 : }
5982 : }
5983 : }
5984 : else
5985 : return clobbers_memory;
5986 :
5987 18096768 : if (op0
5988 18096768 : && (DECL_P (op0)
5989 14712876 : || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5990 : {
5991 7233258 : ref.ref = op0;
5992 7233258 : ref.is_read = false;
5993 7233258 : ref.is_conditional_in_stmt = false;
5994 7233258 : references->safe_push (ref);
5995 : }
5996 : return clobbers_memory;
5997 : }
5998 :
5999 :
6000 : /* Returns true if the loop-nest has any data reference. */
6001 :
6002 : bool
6003 752 : loop_nest_has_data_refs (loop_p loop)
6004 : {
6005 752 : basic_block *bbs = get_loop_body (loop);
6006 752 : auto_vec<data_ref_loc, 3> references;
6007 :
6008 1001 : for (unsigned i = 0; i < loop->num_nodes; i++)
6009 : {
6010 931 : basic_block bb = bbs[i];
6011 931 : gimple_stmt_iterator bsi;
6012 :
6013 3217 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6014 : {
6015 2037 : gimple *stmt = gsi_stmt (bsi);
6016 2037 : get_references_in_stmt (stmt, &references);
6017 2037 : if (references.length ())
6018 : {
6019 682 : free (bbs);
6020 682 : return true;
6021 : }
6022 : }
6023 : }
6024 70 : free (bbs);
6025 70 : return false;
6026 752 : }
6027 :
6028 : /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
6029 : reference, returns false, otherwise returns true. NEST is the outermost
6030 : loop of the loop nest in which the references should be analyzed. */
6031 :
6032 : opt_result
6033 48773628 : find_data_references_in_stmt (class loop *nest, gimple *stmt,
6034 : vec<data_reference_p> *datarefs)
6035 : {
6036 48773628 : auto_vec<data_ref_loc, 2> references;
6037 48773628 : data_reference_p dr;
6038 :
6039 48773628 : if (get_references_in_stmt (stmt, &references))
6040 4143190 : return opt_result::failure_at (stmt, "statement clobbers memory: %G",
6041 : stmt);
6042 :
6043 148454140 : for (const data_ref_loc &ref : references)
6044 : {
6045 14562826 : dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
6046 14562826 : loop_containing_stmt (stmt), ref.ref,
6047 14562826 : stmt, ref.is_read, ref.is_conditional_in_stmt);
6048 14562826 : gcc_assert (dr != NULL);
6049 14562826 : datarefs->safe_push (dr);
6050 : }
6051 :
6052 44630438 : return opt_result::success ();
6053 48773628 : }
6054 :
6055 : /* Stores the data references in STMT to DATAREFS. If there is an
6056 : unanalyzable reference, returns false, otherwise returns true.
6057 : NEST is the outermost loop of the loop nest in which the references
6058 : should be instantiated, LOOP is the loop in which the references
6059 : should be analyzed. */
6060 :
6061 : bool
6062 14333 : graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
6063 : vec<data_reference_p> *datarefs)
6064 : {
6065 14333 : auto_vec<data_ref_loc, 2> references;
6066 14333 : bool ret = true;
6067 14333 : data_reference_p dr;
6068 :
6069 14333 : if (get_references_in_stmt (stmt, &references))
6070 : return false;
6071 :
6072 45905 : for (const data_ref_loc &ref : references)
6073 : {
6074 5836 : dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
6075 2918 : ref.is_conditional_in_stmt);
6076 2918 : gcc_assert (dr != NULL);
6077 2918 : datarefs->safe_push (dr);
6078 : }
6079 :
6080 : return ret;
6081 14333 : }
6082 :
6083 : /* Search the data references in LOOP, and record the information into
6084 : DATAREFS. Returns chrec_dont_know when failing to analyze a
6085 : difficult case, returns NULL_TREE otherwise. */
6086 :
6087 : tree
6088 2662919 : find_data_references_in_bb (class loop *loop, basic_block bb,
6089 : vec<data_reference_p> *datarefs)
6090 : {
6091 2662919 : gimple_stmt_iterator bsi;
6092 :
6093 21714682 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6094 : {
6095 16874427 : gimple *stmt = gsi_stmt (bsi);
6096 :
6097 16874427 : if (!find_data_references_in_stmt (loop, stmt, datarefs))
6098 : {
6099 485583 : struct data_reference *res;
6100 485583 : res = XCNEW (struct data_reference);
6101 485583 : datarefs->safe_push (res);
6102 :
6103 485583 : return chrec_dont_know;
6104 : }
6105 : }
6106 :
6107 : return NULL_TREE;
6108 : }
6109 :
6110 : /* Search the data references in LOOP, and record the information into
6111 : DATAREFS. Returns chrec_dont_know when failing to analyze a
6112 : difficult case, returns NULL_TREE otherwise.
6113 :
6114 : TODO: This function should be made smarter so that it can handle address
6115 : arithmetic as if they were array accesses, etc. */
6116 :
6117 : tree
6118 808189 : find_data_references_in_loop (class loop *loop,
6119 : vec<data_reference_p> *datarefs)
6120 : {
6121 808189 : basic_block bb, *bbs;
6122 808189 : unsigned int i;
6123 :
6124 808189 : bbs = get_loop_body_in_dom_order (loop);
6125 :
6126 3584471 : for (i = 0; i < loop->num_nodes; i++)
6127 : {
6128 2266366 : bb = bbs[i];
6129 :
6130 2266366 : if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6131 : {
6132 298273 : free (bbs);
6133 298273 : return chrec_dont_know;
6134 : }
6135 : }
6136 509916 : free (bbs);
6137 :
6138 509916 : return NULL_TREE;
6139 : }
6140 :
6141 : /* Return the alignment in bytes that DRB is guaranteed to have at all
6142 : times. */
6143 :
6144 : unsigned int
6145 447992 : dr_alignment (innermost_loop_behavior *drb)
6146 : {
6147 : /* Get the alignment of BASE_ADDRESS + INIT. */
6148 447992 : unsigned int alignment = drb->base_alignment;
6149 447992 : unsigned int misalignment = (drb->base_misalignment
6150 447992 : + TREE_INT_CST_LOW (drb->init));
6151 447992 : if (misalignment != 0)
6152 196641 : alignment = MIN (alignment, misalignment & -misalignment);
6153 :
6154 : /* Cap it to the alignment of OFFSET. */
6155 447992 : if (!integer_zerop (drb->offset))
6156 34942 : alignment = MIN (alignment, drb->offset_alignment);
6157 :
6158 : /* Cap it to the alignment of STEP. */
6159 447992 : if (!integer_zerop (drb->step))
6160 261711 : alignment = MIN (alignment, drb->step_alignment);
6161 :
6162 447992 : return alignment;
6163 : }
6164 :
6165 : /* If BASE is a pointer-typed SSA name, try to find the object that it
6166 : is based on. Return this object X on success and store the alignment
6167 : in bytes of BASE - &X in *ALIGNMENT_OUT. */
6168 :
6169 : static tree
6170 682580 : get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6171 : {
6172 682580 : if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6173 : return NULL_TREE;
6174 :
6175 329535 : gimple *def = SSA_NAME_DEF_STMT (base);
6176 329535 : base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6177 :
6178 : /* Peel chrecs and record the minimum alignment preserved by
6179 : all steps. */
6180 329535 : unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6181 669301 : while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6182 : {
6183 10231 : unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6184 10231 : alignment = MIN (alignment, step_alignment);
6185 10231 : base = CHREC_LEFT (base);
6186 : }
6187 :
6188 : /* Punt if the expression is too complicated to handle. */
6189 329535 : if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6190 : return NULL_TREE;
6191 :
6192 : /* The only useful cases are those for which a dereference folds to something
6193 : other than an INDIRECT_REF. */
6194 329503 : tree ref_type = TREE_TYPE (TREE_TYPE (base));
6195 329503 : tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6196 329503 : if (!ref)
6197 : return NULL_TREE;
6198 :
6199 : /* Analyze the base to which the steps we peeled were applied. */
6200 2443 : poly_int64 bitsize, bitpos, bytepos;
6201 2443 : machine_mode mode;
6202 2443 : int unsignedp, reversep, volatilep;
6203 2443 : tree offset;
6204 2443 : base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6205 : &unsignedp, &reversep, &volatilep);
6206 682580 : if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6207 : return NULL_TREE;
6208 :
6209 : /* Restrict the alignment to that guaranteed by the offsets. */
6210 2443 : unsigned int bytepos_alignment = known_alignment (bytepos);
6211 2443 : if (bytepos_alignment != 0)
6212 2290 : alignment = MIN (alignment, bytepos_alignment);
6213 2443 : if (offset)
6214 : {
6215 0 : unsigned int offset_alignment = highest_pow2_factor (offset);
6216 0 : alignment = MIN (alignment, offset_alignment);
6217 : }
6218 :
6219 2443 : *alignment_out = alignment;
6220 2443 : return base;
6221 : }
6222 :
6223 : /* Return the object whose alignment would need to be changed in order
6224 : to increase the alignment of ADDR. Store the maximum achievable
6225 : alignment in *MAX_ALIGNMENT. */
6226 :
6227 : tree
6228 682580 : get_base_for_alignment (tree addr, unsigned int *max_alignment)
6229 : {
6230 682580 : tree base = get_base_for_alignment_1 (addr, max_alignment);
6231 682580 : if (base)
6232 : return base;
6233 :
6234 680137 : if (TREE_CODE (addr) == ADDR_EXPR)
6235 271237 : addr = TREE_OPERAND (addr, 0);
6236 680137 : *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6237 680137 : return addr;
6238 : }
6239 :
6240 : /* Recursive helper function. */
6241 :
6242 : static bool
6243 139364 : find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6244 : {
6245 : /* Inner loops of the nest should not contain siblings. Example:
6246 : when there are two consecutive loops,
6247 :
6248 : | loop_0
6249 : | loop_1
6250 : | A[{0, +, 1}_1]
6251 : | endloop_1
6252 : | loop_2
6253 : | A[{0, +, 1}_2]
6254 : | endloop_2
6255 : | endloop_0
6256 :
6257 : the dependence relation cannot be captured by the distance
6258 : abstraction. */
6259 139364 : if (loop->next)
6260 : return false;
6261 :
6262 116880 : loop_nest->safe_push (loop);
6263 116880 : if (loop->inner)
6264 41507 : return find_loop_nest_1 (loop->inner, loop_nest);
6265 : return true;
6266 : }
6267 :
6268 : /* Return false when the LOOP is not well nested. Otherwise return
6269 : true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6270 : contain the loops from the outermost to the innermost, as they will
6271 : appear in the classic distance vector. */
6272 :
6273 : bool
6274 1014362 : find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6275 : {
6276 1014362 : loop_nest->safe_push (loop);
6277 1014362 : if (loop->inner)
6278 97857 : return find_loop_nest_1 (loop->inner, loop_nest);
6279 : return true;
6280 : }
6281 :
6282 : /* Returns true when the data dependences have been computed, false otherwise.
6283 : Given a loop nest LOOP, the following vectors are returned:
6284 : DATAREFS is initialized to all the array elements contained in this loop,
6285 : DEPENDENCE_RELATIONS contains the relations between the data references.
6286 : Compute read-read and self relations if
6287 : COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6288 :
6289 : bool
6290 405806 : compute_data_dependences_for_loop (class loop *loop,
6291 : bool compute_self_and_read_read_dependences,
6292 : vec<loop_p> *loop_nest,
6293 : vec<data_reference_p> *datarefs,
6294 : vec<ddr_p> *dependence_relations)
6295 : {
6296 405806 : bool res = true;
6297 :
6298 405806 : memset (&dependence_stats, 0, sizeof (dependence_stats));
6299 :
6300 : /* If the loop nest is not well formed, or one of the data references
6301 : is not computable, give up without spending time to compute other
6302 : dependences. */
6303 405806 : if (!loop
6304 405806 : || !find_loop_nest (loop, loop_nest)
6305 405804 : || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6306 664974 : || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6307 : compute_self_and_read_read_dependences))
6308 : res = false;
6309 :
6310 405806 : if (dump_file && (dump_flags & TDF_STATS))
6311 : {
6312 157 : fprintf (dump_file, "Dependence tester statistics:\n");
6313 :
6314 157 : fprintf (dump_file, "Number of dependence tests: %d\n",
6315 : dependence_stats.num_dependence_tests);
6316 157 : fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6317 : dependence_stats.num_dependence_dependent);
6318 157 : fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6319 : dependence_stats.num_dependence_independent);
6320 157 : fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6321 : dependence_stats.num_dependence_undetermined);
6322 :
6323 157 : fprintf (dump_file, "Number of subscript tests: %d\n",
6324 : dependence_stats.num_subscript_tests);
6325 157 : fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6326 : dependence_stats.num_subscript_undetermined);
6327 157 : fprintf (dump_file, "Number of same subscript function: %d\n",
6328 : dependence_stats.num_same_subscript_function);
6329 :
6330 157 : fprintf (dump_file, "Number of ziv tests: %d\n",
6331 : dependence_stats.num_ziv);
6332 157 : fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6333 : dependence_stats.num_ziv_dependent);
6334 157 : fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6335 : dependence_stats.num_ziv_independent);
6336 157 : fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6337 : dependence_stats.num_ziv_unimplemented);
6338 :
6339 157 : fprintf (dump_file, "Number of siv tests: %d\n",
6340 : dependence_stats.num_siv);
6341 157 : fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6342 : dependence_stats.num_siv_dependent);
6343 157 : fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6344 : dependence_stats.num_siv_independent);
6345 157 : fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6346 : dependence_stats.num_siv_unimplemented);
6347 :
6348 157 : fprintf (dump_file, "Number of miv tests: %d\n",
6349 : dependence_stats.num_miv);
6350 157 : fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6351 : dependence_stats.num_miv_dependent);
6352 157 : fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6353 : dependence_stats.num_miv_independent);
6354 157 : fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6355 : dependence_stats.num_miv_unimplemented);
6356 : }
6357 :
6358 405806 : return res;
6359 : }
6360 :
6361 : /* Free the memory used by a data dependence relation DDR. */
6362 :
6363 : void
6364 13258572 : free_dependence_relation (struct data_dependence_relation *ddr)
6365 : {
6366 13258572 : if (ddr == NULL)
6367 : return;
6368 :
6369 13258572 : if (DDR_SUBSCRIPTS (ddr).exists ())
6370 894210 : free_subscripts (DDR_SUBSCRIPTS (ddr));
6371 13258572 : DDR_DIST_VECTS (ddr).release ();
6372 13258572 : DDR_DIR_VECTS (ddr).release ();
6373 :
6374 13258572 : free (ddr);
6375 : }
6376 :
6377 : /* Free the memory used by the data dependence relations from
6378 : DEPENDENCE_RELATIONS. */
6379 :
6380 : void
6381 2838893 : free_dependence_relations (vec<ddr_p>& dependence_relations)
6382 : {
6383 9222654 : for (data_dependence_relation *ddr : dependence_relations)
6384 5224247 : if (ddr)
6385 5224247 : free_dependence_relation (ddr);
6386 :
6387 2838893 : dependence_relations.release ();
6388 2838893 : }
6389 :
6390 : /* Free the memory used by the data references from DATAREFS. */
6391 :
6392 : void
6393 3478606 : free_data_refs (vec<data_reference_p>& datarefs)
6394 : {
6395 20794358 : for (data_reference *dr : datarefs)
6396 12988450 : free_data_ref (dr);
6397 3478606 : datarefs.release ();
6398 3478606 : }
6399 :
6400 : /* Common routine implementing both dr_direction_indicator and
6401 : dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6402 : to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6403 : Return the step as the indicator otherwise. */
6404 :
6405 : static tree
6406 44122 : dr_step_indicator (struct data_reference *dr, int useful_min)
6407 : {
6408 44122 : tree step = DR_STEP (dr);
6409 44122 : if (!step)
6410 : return NULL_TREE;
6411 44122 : STRIP_NOPS (step);
6412 : /* Look for cases where the step is scaled by a positive constant
6413 : integer, which will often be the access size. If the multiplication
6414 : doesn't change the sign (due to overflow effects) then we can
6415 : test the unscaled value instead. */
6416 44122 : if (TREE_CODE (step) == MULT_EXPR
6417 5391 : && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6418 49482 : && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6419 : {
6420 5360 : tree factor = TREE_OPERAND (step, 1);
6421 5360 : step = TREE_OPERAND (step, 0);
6422 :
6423 : /* Strip widening and truncating conversions as well as nops. */
6424 1147 : if (CONVERT_EXPR_P (step)
6425 5360 : && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6426 4213 : step = TREE_OPERAND (step, 0);
6427 5360 : tree type = TREE_TYPE (step);
6428 :
6429 : /* Get the range of step values that would not cause overflow. */
6430 10720 : widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6431 5360 : / wi::to_widest (factor));
6432 10720 : widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6433 5360 : / wi::to_widest (factor));
6434 :
6435 : /* Get the range of values that the unconverted step actually has. */
6436 5360 : wide_int step_min, step_max;
6437 5360 : int_range_max vr;
6438 5360 : if (TREE_CODE (step) != SSA_NAME
6439 10648 : || !get_range_query (cfun)->range_of_expr (vr, step)
6440 10684 : || vr.undefined_p ())
6441 : {
6442 36 : step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6443 36 : step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6444 : }
6445 : else
6446 : {
6447 5324 : step_min = vr.lower_bound ();
6448 5324 : step_max = vr.upper_bound ();
6449 : }
6450 :
6451 : /* Check whether the unconverted step has an acceptable range. */
6452 5360 : signop sgn = TYPE_SIGN (type);
6453 10720 : if (wi::les_p (minv, widest_int::from (step_min, sgn))
6454 13806 : && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6455 : {
6456 1534 : if (wi::ge_p (step_min, useful_min, sgn))
6457 436 : return ssize_int (useful_min);
6458 1098 : else if (wi::lt_p (step_max, 0, sgn))
6459 0 : return ssize_int (-1);
6460 : else
6461 1098 : return fold_convert (ssizetype, step);
6462 : }
6463 5360 : }
6464 42588 : return DR_STEP (dr);
6465 : }
6466 :
6467 : /* Return a value that is negative iff DR has a negative step. */
6468 :
6469 : tree
6470 11848 : dr_direction_indicator (struct data_reference *dr)
6471 : {
6472 11848 : return dr_step_indicator (dr, 0);
6473 : }
6474 :
6475 : /* Return a value that is zero iff DR has a zero step. */
6476 :
6477 : tree
6478 32274 : dr_zero_step_indicator (struct data_reference *dr)
6479 : {
6480 32274 : return dr_step_indicator (dr, 1);
6481 : }
6482 :
6483 : /* Return true if DR is known to have a nonnegative (but possibly zero)
6484 : step. */
6485 :
6486 : bool
6487 4914 : dr_known_forward_stride_p (struct data_reference *dr)
6488 : {
6489 4914 : tree indicator = dr_direction_indicator (dr);
6490 4914 : tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6491 : fold_convert (ssizetype, indicator),
6492 : ssize_int (0));
6493 4914 : return neg_step_val && integer_zerop (neg_step_val);
6494 : }
|