Line data Source code
1 : /* Data references and dependences detectors.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 : Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : /* This pass walks a given loop structure searching for array
22 : references. The information about the array accesses is recorded
23 : in DATA_REFERENCE structures.
24 :
25 : The basic test for determining the dependences is:
26 : given two access functions chrec1 and chrec2 to a same array, and
27 : x and y two vectors from the iteration domain, the same element of
28 : the array is accessed twice at iterations x and y if and only if:
29 : | chrec1 (x) == chrec2 (y).
30 :
31 : The goals of this analysis are:
32 :
33 : - to determine the independence: the relation between two
34 : independent accesses is qualified with the chrec_known (this
35 : information allows a loop parallelization),
36 :
37 : - when two data references access the same data, to qualify the
38 : dependence relation with classic dependence representations:
39 :
40 : - distance vectors
41 : - direction vectors
42 : - loop carried level dependence
43 : - polyhedron dependence
44 : or with the chains of recurrences based representation,
45 :
46 : - to define a knowledge base for storing the data dependence
47 : information,
48 :
49 : - to define an interface to access this data.
50 :
51 :
52 : Definitions:
53 :
54 : - subscript: given two array accesses a subscript is the tuple
55 : composed of the access functions for a given dimension. Example:
56 : Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 : (f1, g1), (f2, g2), (f3, g3).
58 :
59 : - Diophantine equation: an equation whose coefficients and
60 : solutions are integer constants, for example the equation
61 : | 3*x + 2*y = 1
62 : has an integer solution x = 1 and y = -1.
63 :
64 : References:
65 :
66 : - "Advanced Compilation for High Performance Computing" by Randy
67 : Allen and Ken Kennedy.
68 : http://citeseer.ist.psu.edu/goff91practical.html
69 :
70 : - "Loop Transformations for Restructuring Compilers - The Foundations"
71 : by Utpal Banerjee.
72 :
73 :
74 : */
75 :
76 : #define INCLUDE_ALGORITHM
77 : #include "config.h"
78 : #include "system.h"
79 : #include "coretypes.h"
80 : #include "backend.h"
81 : #include "rtl.h"
82 : #include "tree.h"
83 : #include "gimple.h"
84 : #include "gimple-pretty-print.h"
85 : #include "alias.h"
86 : #include "fold-const.h"
87 : #include "expr.h"
88 : #include "gimple-iterator.h"
89 : #include "tree-ssa-loop-niter.h"
90 : #include "tree-ssa-loop.h"
91 : #include "tree-ssa.h"
92 : #include "cfgloop.h"
93 : #include "tree-data-ref.h"
94 : #include "tree-scalar-evolution.h"
95 : #include "dumpfile.h"
96 : #include "tree-affine.h"
97 : #include "builtins.h"
98 : #include "tree-eh.h"
99 : #include "ssa.h"
100 : #include "internal-fn.h"
101 : #include "vr-values.h"
102 : #include "range-op.h"
103 : #include "tree-ssa-loop-ivopts.h"
104 : #include "calls.h"
105 :
106 : static struct datadep_stats
107 : {
108 : int num_dependence_tests;
109 : int num_dependence_dependent;
110 : int num_dependence_independent;
111 : int num_dependence_undetermined;
112 :
113 : int num_subscript_tests;
114 : int num_subscript_undetermined;
115 : int num_same_subscript_function;
116 :
117 : int num_ziv;
118 : int num_ziv_independent;
119 : int num_ziv_dependent;
120 : int num_ziv_unimplemented;
121 :
122 : int num_siv;
123 : int num_siv_independent;
124 : int num_siv_dependent;
125 : int num_siv_unimplemented;
126 :
127 : int num_miv;
128 : int num_miv_independent;
129 : int num_miv_dependent;
130 : int num_miv_unimplemented;
131 : } dependence_stats;
132 :
133 : static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
134 : unsigned int, unsigned int,
135 : class loop *);
136 : /* Returns true iff A divides B. */
137 :
138 : static inline bool
139 2010 : tree_fold_divides_p (const_tree a, const_tree b)
140 : {
141 2010 : gcc_assert (TREE_CODE (a) == INTEGER_CST);
142 2010 : gcc_assert (TREE_CODE (b) == INTEGER_CST);
143 2010 : return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
144 : }
145 :
146 : /* Returns true iff A divides B. */
147 :
148 : static inline bool
149 1683328 : int_divides_p (lambda_int a, lambda_int b)
150 : {
151 1683328 : return ((b % a) == 0);
152 : }
153 :
154 : /* Return true if reference REF contains a union access. */
155 :
156 : static bool
157 453033 : ref_contains_union_access_p (tree ref)
158 : {
159 499085 : while (handled_component_p (ref))
160 : {
161 99106 : ref = TREE_OPERAND (ref, 0);
162 198212 : if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
163 99106 : || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
164 : return true;
165 : }
166 : return false;
167 : }
168 :
169 :
170 :
171 : /* Dump into FILE all the data references from DATAREFS. */
172 :
173 : static void
174 0 : dump_data_references (FILE *file, vec<data_reference_p> datarefs)
175 : {
176 0 : for (data_reference *dr : datarefs)
177 0 : dump_data_reference (file, dr);
178 0 : }
179 :
180 : /* Unified dump into FILE all the data references from DATAREFS. */
181 :
182 : DEBUG_FUNCTION void
183 0 : debug (vec<data_reference_p> &ref)
184 : {
185 0 : dump_data_references (stderr, ref);
186 0 : }
187 :
188 : DEBUG_FUNCTION void
189 0 : debug (vec<data_reference_p> *ptr)
190 : {
191 0 : if (ptr)
192 0 : debug (*ptr);
193 : else
194 0 : fprintf (stderr, "<nil>\n");
195 0 : }
196 :
197 :
198 : /* Dump into STDERR all the data references from DATAREFS. */
199 :
200 : DEBUG_FUNCTION void
201 0 : debug_data_references (vec<data_reference_p> datarefs)
202 : {
203 0 : dump_data_references (stderr, datarefs);
204 0 : }
205 :
206 : /* Print to STDERR the data_reference DR. */
207 :
208 : DEBUG_FUNCTION void
209 0 : debug_data_reference (struct data_reference *dr)
210 : {
211 0 : dump_data_reference (stderr, dr);
212 0 : }
213 :
214 : /* Dump function for a DATA_REFERENCE structure. */
215 :
216 : void
217 3480 : dump_data_reference (FILE *outf,
218 : struct data_reference *dr)
219 : {
220 3480 : unsigned int i;
221 :
222 3480 : fprintf (outf, "#(Data Ref: \n");
223 3480 : fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
224 3480 : fprintf (outf, "# stmt: ");
225 3480 : print_gimple_stmt (outf, DR_STMT (dr), 0);
226 3480 : fprintf (outf, "# ref: ");
227 3480 : print_generic_stmt (outf, DR_REF (dr));
228 3480 : fprintf (outf, "# base_object: ");
229 3480 : print_generic_stmt (outf, DR_BASE_OBJECT (dr));
230 :
231 10786 : for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
232 : {
233 3826 : fprintf (outf, "# Access function %d: ", i);
234 3826 : print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
235 : }
236 3480 : fprintf (outf, "#)\n");
237 3480 : }
238 :
239 : /* Unified dump function for a DATA_REFERENCE structure. */
240 :
241 : DEBUG_FUNCTION void
242 0 : debug (data_reference &ref)
243 : {
244 0 : dump_data_reference (stderr, &ref);
245 0 : }
246 :
247 : DEBUG_FUNCTION void
248 0 : debug (data_reference *ptr)
249 : {
250 0 : if (ptr)
251 0 : debug (*ptr);
252 : else
253 0 : fprintf (stderr, "<nil>\n");
254 0 : }
255 :
256 :
257 : /* Dumps the affine function described by FN to the file OUTF. */
258 :
259 : DEBUG_FUNCTION void
260 32646 : dump_affine_function (FILE *outf, affine_fn fn)
261 : {
262 32646 : unsigned i;
263 32646 : tree coef;
264 :
265 32646 : print_generic_expr (outf, fn[0], TDF_SLIM);
266 68986 : for (i = 1; fn.iterate (i, &coef); i++)
267 : {
268 3694 : fprintf (outf, " + ");
269 3694 : print_generic_expr (outf, coef, TDF_SLIM);
270 3694 : fprintf (outf, " * x_%u", i);
271 : }
272 32646 : }
273 :
274 : /* Dumps the conflict function CF to the file OUTF. */
275 :
276 : DEBUG_FUNCTION void
277 160102 : dump_conflict_function (FILE *outf, conflict_function *cf)
278 : {
279 160102 : unsigned i;
280 :
281 160102 : if (cf->n == NO_DEPENDENCE)
282 121318 : fprintf (outf, "no dependence");
283 38784 : else if (cf->n == NOT_KNOWN)
284 6138 : fprintf (outf, "not known");
285 : else
286 : {
287 65292 : for (i = 0; i < cf->n; i++)
288 : {
289 32646 : if (i != 0)
290 0 : fprintf (outf, " ");
291 32646 : fprintf (outf, "[");
292 32646 : dump_affine_function (outf, cf->fns[i]);
293 32646 : fprintf (outf, "]");
294 : }
295 : }
296 160102 : }
297 :
298 : /* Dump function for a SUBSCRIPT structure. */
299 :
300 : DEBUG_FUNCTION void
301 838 : dump_subscript (FILE *outf, struct subscript *subscript)
302 : {
303 838 : conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
304 :
305 838 : fprintf (outf, "\n (subscript \n");
306 838 : fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
307 838 : dump_conflict_function (outf, cf);
308 838 : if (CF_NONTRIVIAL_P (cf))
309 : {
310 838 : tree last_iteration = SUB_LAST_CONFLICT (subscript);
311 838 : fprintf (outf, "\n last_conflict: ");
312 838 : print_generic_expr (outf, last_iteration);
313 : }
314 :
315 838 : cf = SUB_CONFLICTS_IN_B (subscript);
316 838 : fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
317 838 : dump_conflict_function (outf, cf);
318 838 : if (CF_NONTRIVIAL_P (cf))
319 : {
320 838 : tree last_iteration = SUB_LAST_CONFLICT (subscript);
321 838 : fprintf (outf, "\n last_conflict: ");
322 838 : print_generic_expr (outf, last_iteration);
323 : }
324 :
325 838 : fprintf (outf, "\n (Subscript distance: ");
326 838 : print_generic_expr (outf, SUB_DISTANCE (subscript));
327 838 : fprintf (outf, " ))\n");
328 838 : }
329 :
330 : /* Print the classic direction vector DIRV to OUTF. */
331 :
332 : DEBUG_FUNCTION void
333 777 : print_direction_vector (FILE *outf,
334 : lambda_vector dirv,
335 : int length)
336 : {
337 777 : int eq;
338 :
339 1683 : for (eq = 0; eq < length; eq++)
340 : {
341 906 : enum data_dependence_direction dir = ((enum data_dependence_direction)
342 906 : dirv[eq]);
343 :
344 906 : switch (dir)
345 : {
346 139 : case dir_positive:
347 139 : fprintf (outf, " +");
348 139 : break;
349 6 : case dir_negative:
350 6 : fprintf (outf, " -");
351 6 : break;
352 761 : case dir_equal:
353 761 : fprintf (outf, " =");
354 761 : break;
355 0 : case dir_positive_or_equal:
356 0 : fprintf (outf, " +=");
357 0 : break;
358 0 : case dir_positive_or_negative:
359 0 : fprintf (outf, " +-");
360 0 : break;
361 0 : case dir_negative_or_equal:
362 0 : fprintf (outf, " -=");
363 0 : break;
364 0 : case dir_star:
365 0 : fprintf (outf, " *");
366 0 : break;
367 0 : default:
368 0 : fprintf (outf, "indep");
369 0 : break;
370 : }
371 : }
372 777 : fprintf (outf, "\n");
373 777 : }
374 :
375 : /* Print a vector of direction vectors. */
376 :
377 : DEBUG_FUNCTION void
378 0 : print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
379 : int length)
380 : {
381 0 : for (lambda_vector v : dir_vects)
382 0 : print_direction_vector (outf, v, length);
383 0 : }
384 :
385 : /* Print out a vector VEC of length N to OUTFILE. */
386 :
387 : DEBUG_FUNCTION void
388 4838 : print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 : {
390 4838 : int i;
391 :
392 10088 : for (i = 0; i < n; i++)
393 5250 : fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
394 4838 : fprintf (outfile, "\n");
395 4838 : }
396 :
397 : /* Print a vector of distance vectors. */
398 :
399 : DEBUG_FUNCTION void
400 0 : print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
401 : int length)
402 : {
403 0 : for (lambda_vector v : dist_vects)
404 0 : print_lambda_vector (outf, v, length);
405 0 : }
406 :
407 : /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408 :
409 : DEBUG_FUNCTION void
410 1582 : dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
411 : {
412 1582 : struct data_reference *dra, *drb;
413 :
414 1582 : fprintf (outf, "(Data Dep: \n");
415 :
416 1582 : if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
417 : {
418 399 : if (ddr)
419 : {
420 399 : dra = DDR_A (ddr);
421 399 : drb = DDR_B (ddr);
422 399 : if (dra)
423 399 : dump_data_reference (outf, dra);
424 : else
425 0 : fprintf (outf, " (nil)\n");
426 399 : if (drb)
427 399 : dump_data_reference (outf, drb);
428 : else
429 0 : fprintf (outf, " (nil)\n");
430 : }
431 399 : fprintf (outf, " (don't know)\n)\n");
432 399 : return;
433 : }
434 :
435 1183 : dra = DDR_A (ddr);
436 1183 : drb = DDR_B (ddr);
437 1183 : dump_data_reference (outf, dra);
438 1183 : dump_data_reference (outf, drb);
439 :
440 1183 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
441 426 : fprintf (outf, " (no dependence)\n");
442 :
443 757 : else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
444 : {
445 : unsigned int i;
446 : class loop *loopi;
447 :
448 : subscript *sub;
449 1595 : FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
450 : {
451 838 : fprintf (outf, " access_fn_A: ");
452 838 : print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
453 838 : fprintf (outf, " access_fn_B: ");
454 838 : print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
455 838 : dump_subscript (outf, sub);
456 : }
457 :
458 757 : fprintf (outf, " loop nest: (");
459 2374 : FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
460 860 : fprintf (outf, "%d ", loopi->num);
461 757 : fprintf (outf, ")\n");
462 :
463 3820 : for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
464 : {
465 777 : fprintf (outf, " distance_vector: ");
466 777 : print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
467 1554 : DDR_NB_LOOPS (ddr));
468 : }
469 :
470 1534 : for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
471 : {
472 777 : fprintf (outf, " direction_vector: ");
473 777 : print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
474 1554 : DDR_NB_LOOPS (ddr));
475 : }
476 : }
477 :
478 1183 : fprintf (outf, ")\n");
479 : }
480 :
481 : /* Debug version. */
482 :
483 : DEBUG_FUNCTION void
484 0 : debug_data_dependence_relation (const struct data_dependence_relation *ddr)
485 : {
486 0 : dump_data_dependence_relation (stderr, ddr);
487 0 : }
488 :
489 : /* Dump into FILE all the dependence relations from DDRS. */
490 :
491 : DEBUG_FUNCTION void
492 307 : dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
493 : {
494 2473 : for (auto ddr : ddrs)
495 1582 : dump_data_dependence_relation (file, ddr);
496 307 : }
497 :
498 : DEBUG_FUNCTION void
499 0 : debug (vec<ddr_p> &ref)
500 : {
501 0 : dump_data_dependence_relations (stderr, ref);
502 0 : }
503 :
504 : DEBUG_FUNCTION void
505 0 : debug (vec<ddr_p> *ptr)
506 : {
507 0 : if (ptr)
508 0 : debug (*ptr);
509 : else
510 0 : fprintf (stderr, "<nil>\n");
511 0 : }
512 :
513 :
514 : /* Dump to STDERR all the dependence relations from DDRS. */
515 :
516 : DEBUG_FUNCTION void
517 0 : debug_data_dependence_relations (vec<ddr_p> ddrs)
518 : {
519 0 : dump_data_dependence_relations (stderr, ddrs);
520 0 : }
521 :
522 : /* Dumps the distance and direction vectors in FILE. DDRS contains
523 : the dependence relations, and VECT_SIZE is the size of the
524 : dependence vectors, or in other words the number of loops in the
525 : considered nest. */
526 :
527 : DEBUG_FUNCTION void
528 0 : dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
529 : {
530 0 : for (data_dependence_relation *ddr : ddrs)
531 0 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
532 : {
533 0 : for (lambda_vector v : DDR_DIST_VECTS (ddr))
534 : {
535 0 : fprintf (file, "DISTANCE_V (");
536 0 : print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
537 0 : fprintf (file, ")\n");
538 : }
539 :
540 0 : for (lambda_vector v : DDR_DIR_VECTS (ddr))
541 : {
542 0 : fprintf (file, "DIRECTION_V (");
543 0 : print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
544 0 : fprintf (file, ")\n");
545 : }
546 : }
547 :
548 0 : fprintf (file, "\n\n");
549 0 : }
550 :
551 : /* Dumps the data dependence relations DDRS in FILE. */
552 :
553 : DEBUG_FUNCTION void
554 0 : dump_ddrs (FILE *file, vec<ddr_p> ddrs)
555 : {
556 0 : for (data_dependence_relation *ddr : ddrs)
557 0 : dump_data_dependence_relation (file, ddr);
558 :
559 0 : fprintf (file, "\n\n");
560 0 : }
561 :
562 : DEBUG_FUNCTION void
563 0 : debug_ddrs (vec<ddr_p> ddrs)
564 : {
565 0 : dump_ddrs (stderr, ddrs);
566 0 : }
567 :
568 : /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
569 : OP0 CODE OP1, where:
570 :
571 : - OP0 CODE OP1 has integral type TYPE
572 : - the range of OP0 is given by OP0_RANGE and
573 : - the range of OP1 is given by OP1_RANGE.
574 :
575 : Independently of RESULT_RANGE, try to compute:
576 :
577 : DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
578 : - (sizetype) (OP0 CODE OP1)
579 :
580 : as a constant and subtract DELTA from the ssizetype constant in *OFF.
581 : Return true on success, or false if DELTA is not known at compile time.
582 :
583 : Truncation and sign changes are known to distribute over CODE, i.e.
584 :
585 : (itype) (A CODE B) == (itype) A CODE (itype) B
586 :
587 : for any integral type ITYPE whose precision is no greater than the
588 : precision of A and B. */
589 :
590 : static bool
591 4369415 : compute_distributive_range (tree type, irange &op0_range,
592 : tree_code code, irange &op1_range,
593 : tree *off, irange *result_range)
594 : {
595 4369415 : gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
596 4369415 : if (result_range)
597 : {
598 1025394 : range_op_handler op (code);
599 1025394 : if (!op.fold_range (*result_range, type, op0_range, op1_range))
600 0 : result_range->set_varying (type);
601 : }
602 :
603 : /* The distributive property guarantees that if TYPE is no narrower
604 : than SIZETYPE,
605 :
606 : (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
607 :
608 : and so we can treat DELTA as zero. */
609 4369415 : if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
610 : return true;
611 :
612 : /* If overflow is undefined, we can assume that:
613 :
614 : X == (ssizetype) OP0 CODE (ssizetype) OP1
615 :
616 : is within the range of TYPE, i.e.:
617 :
618 : X == (ssizetype) (TYPE) X
619 :
620 : Distributing the (TYPE) truncation over X gives:
621 :
622 : X == (ssizetype) (OP0 CODE OP1)
623 :
624 : Casting both sides to sizetype and distributing the sizetype cast
625 : over X gives:
626 :
627 : (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
628 :
629 : and so we can treat DELTA as zero. */
630 270787 : if (TYPE_OVERFLOW_UNDEFINED (type))
631 : return true;
632 :
633 : /* Compute the range of:
634 :
635 : (ssizetype) OP0 CODE (ssizetype) OP1
636 :
637 : The distributive property guarantees that this has the same bitpattern as:
638 :
639 : (sizetype) OP0 CODE (sizetype) OP1
640 :
641 : but its range is more conducive to analysis. */
642 103616 : range_cast (op0_range, ssizetype);
643 103616 : range_cast (op1_range, ssizetype);
644 103616 : int_range_max wide_range;
645 103616 : range_op_handler op (code);
646 103616 : bool saved_flag_wrapv = flag_wrapv;
647 103616 : flag_wrapv = 1;
648 103616 : if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
649 0 : wide_range.set_varying (ssizetype);;
650 103616 : flag_wrapv = saved_flag_wrapv;
651 103616 : if (wide_range.num_pairs () != 1
652 103616 : || wide_range.varying_p () || wide_range.undefined_p ())
653 : return false;
654 :
655 83726 : wide_int lb = wide_range.lower_bound ();
656 83726 : wide_int ub = wide_range.upper_bound ();
657 :
658 : /* Calculate the number of times that each end of the range overflows or
659 : underflows TYPE. We can only calculate DELTA if the numbers match. */
660 83726 : unsigned int precision = TYPE_PRECISION (type);
661 83726 : if (!TYPE_UNSIGNED (type))
662 : {
663 215 : wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
664 215 : lb -= type_min;
665 215 : ub -= type_min;
666 215 : }
667 83726 : wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
668 83726 : lb &= upper_bits;
669 83726 : ub &= upper_bits;
670 83726 : if (lb != ub)
671 : return false;
672 :
673 : /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
674 : negative values indicating underflow. The low PRECISION bits of LB
675 : are clear, so DELTA is therefore LB (== UB). */
676 24531 : *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
677 24531 : return true;
678 103616 : }
679 :
680 : /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
681 : given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
682 : FROM_TYPE are integral types. */
683 :
684 : static bool
685 2795209 : nop_conversion_for_offset_p (tree to_type, tree from_type, irange &range)
686 : {
687 2795209 : gcc_assert (INTEGRAL_TYPE_P (to_type)
688 : && INTEGRAL_TYPE_P (from_type)
689 : && !TYPE_OVERFLOW_TRAPS (to_type)
690 : && !TYPE_OVERFLOW_TRAPS (from_type));
691 :
692 : /* Converting to something no narrower than sizetype and then to sizetype
693 : is equivalent to converting directly to sizetype. */
694 2795209 : if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
695 : return true;
696 :
697 : /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
698 85832 : if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
699 85832 : && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
700 : return true;
701 :
702 : /* For narrowing conversions, we could in principle test whether
703 : the bits in FROM_TYPE but not in TO_TYPE have a fixed value
704 : and apply a constant adjustment.
705 :
706 : For other conversions (which involve a sign change) we could
707 : check that the signs are always equal, and apply a constant
708 : adjustment if the signs are negative.
709 :
710 : However, both cases should be rare. */
711 70765 : return range_fits_type_p (&range, TYPE_PRECISION (to_type),
712 141530 : TYPE_SIGN (to_type));
713 : }
714 :
715 : static void
716 : split_constant_offset (tree type, tree *var, tree *off,
717 : irange *result_range,
718 : hash_map<tree, std::pair<tree, tree> > &cache,
719 : unsigned *limit);
720 :
721 : /* Helper function for split_constant_offset. If TYPE is a pointer type,
722 : try to express OP0 CODE OP1 as:
723 :
724 : POINTER_PLUS <*VAR, (sizetype) *OFF>
725 :
726 : where:
727 :
728 : - *VAR has type TYPE
729 : - *OFF is a constant of type ssizetype.
730 :
731 : If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
732 :
733 : *VAR + (sizetype) *OFF
734 :
735 : where:
736 :
737 : - *VAR has type sizetype
738 : - *OFF is a constant of type ssizetype.
739 :
740 : In both cases, OP0 CODE OP1 has type TYPE.
741 :
742 : Return true on success. A false return value indicates that we can't
743 : do better than set *OFF to zero.
744 :
745 : When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
746 : if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
747 :
748 : CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
749 : visited. LIMIT counts down the number of SSA names that we are
750 : allowed to process before giving up. */
751 :
752 : static bool
753 57740581 : split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
754 : tree *var, tree *off, irange *result_range,
755 : hash_map<tree, std::pair<tree, tree> > &cache,
756 : unsigned *limit)
757 : {
758 57740581 : tree var0, var1;
759 57740581 : tree off0, off1;
760 57740581 : int_range_max op0_range, op1_range;
761 :
762 57740581 : *var = NULL_TREE;
763 57740581 : *off = NULL_TREE;
764 :
765 57740581 : if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
766 : return false;
767 :
768 57739959 : if (TREE_CODE (op0) == SSA_NAME
769 57739959 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
770 : return false;
771 57739544 : if (op1
772 7794536 : && TREE_CODE (op1) == SSA_NAME
773 60124824 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
774 : return false;
775 :
776 57739544 : switch (code)
777 : {
778 17499877 : case INTEGER_CST:
779 17499877 : *var = size_int (0);
780 17499877 : *off = fold_convert (ssizetype, op0);
781 17499877 : if (result_range)
782 : {
783 1174023 : wide_int w = wi::to_wide (op0);
784 1174023 : result_range->set (TREE_TYPE (op0), w, w);
785 1174023 : }
786 : return true;
787 :
788 2587383 : case POINTER_PLUS_EXPR:
789 2587383 : split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
790 2587383 : split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
791 2587383 : *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
792 2587383 : *off = size_binop (PLUS_EXPR, off0, off1);
793 2587383 : return true;
794 :
795 2107419 : case PLUS_EXPR:
796 2107419 : case MINUS_EXPR:
797 2107419 : split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
798 2107419 : split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
799 2107419 : *off = size_binop (code, off0, off1);
800 2107419 : if (!compute_distributive_range (type, op0_range, code, op1_range,
801 : off, result_range))
802 : return false;
803 2048104 : *var = fold_build2 (code, sizetype, var0, var1);
804 2048104 : return true;
805 :
806 2749796 : case MULT_EXPR:
807 2749796 : if (TREE_CODE (op1) != INTEGER_CST)
808 : return false;
809 :
810 2261996 : split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
811 2261996 : op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
812 2261996 : *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
813 2261996 : if (!compute_distributive_range (type, op0_range, code, op1_range,
814 : off, result_range))
815 : return false;
816 2242226 : *var = fold_build2 (MULT_EXPR, sizetype, var0,
817 : fold_convert (sizetype, op1));
818 2242226 : return true;
819 :
820 10177240 : case ADDR_EXPR:
821 10177240 : {
822 10177240 : tree base, poffset;
823 10177240 : poly_int64 pbitsize, pbitpos, pbytepos;
824 10177240 : machine_mode pmode;
825 10177240 : int punsignedp, preversep, pvolatilep;
826 :
827 10177240 : op0 = TREE_OPERAND (op0, 0);
828 10177240 : base
829 10177240 : = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
830 : &punsignedp, &preversep, &pvolatilep);
831 :
832 10203262 : if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
833 : return false;
834 10177240 : base = build_fold_addr_expr (base);
835 10177240 : off0 = ssize_int (pbytepos);
836 :
837 10177240 : if (poffset)
838 : {
839 1605 : split_constant_offset (poffset, &poffset, &off1, nullptr,
840 : cache, limit);
841 1605 : off0 = size_binop (PLUS_EXPR, off0, off1);
842 1605 : base = fold_build_pointer_plus (base, poffset);
843 : }
844 :
845 10177240 : var0 = fold_convert (type, base);
846 :
847 : /* If variable length types are involved, punt, otherwise casts
848 : might be converted into ARRAY_REFs in gimplify_conversion.
849 : To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
850 : possibly no longer appears in current GIMPLE, might resurface.
851 : This perhaps could run
852 : if (CONVERT_EXPR_P (var0))
853 : {
854 : gimplify_conversion (&var0);
855 : // Attempt to fill in any within var0 found ARRAY_REF's
856 : // element size from corresponding op embedded ARRAY_REF,
857 : // if unsuccessful, just punt.
858 : } */
859 20760278 : while (POINTER_TYPE_P (type))
860 10583038 : type = TREE_TYPE (type);
861 10177240 : if (int_size_in_bytes (type) < 0)
862 : return false;
863 :
864 10151218 : *var = var0;
865 10151218 : *off = off0;
866 10151218 : return true;
867 : }
868 :
869 15919365 : case SSA_NAME:
870 15919365 : {
871 15919365 : gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
872 15919365 : enum tree_code subcode;
873 :
874 15919365 : if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
875 : return false;
876 :
877 8713169 : subcode = gimple_assign_rhs_code (def_stmt);
878 :
879 : /* We are using a cache to avoid un-CSEing large amounts of code. */
880 8713169 : bool use_cache = false;
881 8713169 : if (!has_single_use (op0)
882 8713169 : && (subcode == POINTER_PLUS_EXPR
883 4496854 : || subcode == PLUS_EXPR
884 : || subcode == MINUS_EXPR
885 : || subcode == MULT_EXPR
886 : || subcode == ADDR_EXPR
887 : || CONVERT_EXPR_CODE_P (subcode)))
888 : {
889 2174281 : use_cache = true;
890 2174281 : bool existed;
891 2174281 : std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
892 2174281 : if (existed)
893 : {
894 32067 : if (integer_zerop (e.second))
895 32067 : return false;
896 1194 : *var = e.first;
897 1194 : *off = e.second;
898 : /* The caller sets the range in this case. */
899 1194 : return true;
900 : }
901 2142214 : e = std::make_pair (op0, ssize_int (0));
902 : }
903 :
904 8681102 : if (*limit == 0)
905 : return false;
906 8680066 : --*limit;
907 :
908 8680066 : var0 = gimple_assign_rhs1 (def_stmt);
909 8680066 : var1 = gimple_assign_rhs2 (def_stmt);
910 :
911 8680066 : bool res = split_constant_offset_1 (type, var0, subcode, var1,
912 : var, off, nullptr, cache, limit);
913 8680066 : if (res && use_cache)
914 1912516 : *cache.get (op0) = std::make_pair (*var, *off);
915 : /* The caller sets the range in this case. */
916 : return res;
917 : }
918 3256340 : CASE_CONVERT:
919 3256340 : {
920 : /* We can only handle the following conversions:
921 :
922 : - Conversions from one pointer type to another pointer type.
923 :
924 : - Conversions from one non-trapping integral type to another
925 : non-trapping integral type. In this case, the recursive
926 : call makes sure that:
927 :
928 : (sizetype) OP0
929 :
930 : can be expressed as a sizetype operation involving VAR and OFF,
931 : and all we need to do is check whether:
932 :
933 : (sizetype) OP0 == (sizetype) (TYPE) OP0
934 :
935 : - Conversions from a non-trapping sizetype-size integral type to
936 : a like-sized pointer type. In this case, the recursive call
937 : makes sure that:
938 :
939 : (sizetype) OP0 == *VAR + (sizetype) *OFF
940 :
941 : and we can convert that to:
942 :
943 : POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
944 :
945 : - Conversions from a sizetype-sized pointer type to a like-sized
946 : non-trapping integral type. In this case, the recursive call
947 : makes sure that:
948 :
949 : OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
950 :
951 : where the POINTER_PLUS and *VAR have the same precision as
952 : TYPE (and the same precision as sizetype). Then:
953 :
954 : (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
955 3256340 : tree itype = TREE_TYPE (op0);
956 3256340 : if ((POINTER_TYPE_P (itype)
957 2898374 : || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
958 3255901 : && (POINTER_TYPE_P (type)
959 2844070 : || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
960 6512241 : && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
961 151587 : || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
962 151587 : && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
963 : {
964 3255894 : if (POINTER_TYPE_P (type))
965 : {
966 411824 : split_constant_offset (op0, var, off, nullptr, cache, limit);
967 411824 : *var = fold_convert (type, *var);
968 : }
969 2844070 : else if (POINTER_TYPE_P (itype))
970 : {
971 48861 : split_constant_offset (op0, var, off, nullptr, cache, limit);
972 48861 : *var = fold_convert (sizetype, *var);
973 : }
974 : else
975 : {
976 2795209 : split_constant_offset (op0, var, off, &op0_range,
977 : cache, limit);
978 2795209 : if (!nop_conversion_for_offset_p (type, itype, op0_range))
979 : return false;
980 2738398 : if (result_range)
981 : {
982 1491496 : *result_range = op0_range;
983 1491496 : range_cast (*result_range, type);
984 : }
985 : }
986 3199083 : return true;
987 : }
988 : return false;
989 : }
990 :
991 : default:
992 : return false;
993 : }
994 57740581 : }
995 :
996 : /* If EXP has pointer type, try to express it as:
997 :
998 : POINTER_PLUS <*VAR, (sizetype) *OFF>
999 :
1000 : where:
1001 :
1002 : - *VAR has the same type as EXP
1003 : - *OFF is a constant of type ssizetype.
1004 :
1005 : If EXP has an integral type, try to express (sizetype) EXP as:
1006 :
1007 : *VAR + (sizetype) *OFF
1008 :
1009 : where:
1010 :
1011 : - *VAR has type sizetype
1012 : - *OFF is a constant of type ssizetype.
1013 :
1014 : If EXP_RANGE is nonnull, set it to the range of EXP.
1015 :
1016 : CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1017 : visited. LIMIT counts down the number of SSA names that we are
1018 : allowed to process before giving up. */
1019 :
1020 : static void
1021 49060531 : split_constant_offset (tree exp, tree *var, tree *off, irange *exp_range,
1022 : hash_map<tree, std::pair<tree, tree> > &cache,
1023 : unsigned *limit)
1024 : {
1025 49060531 : tree type = TREE_TYPE (exp), op0, op1;
1026 49060531 : enum tree_code code;
1027 :
1028 49060531 : code = TREE_CODE (exp);
1029 49060531 : if (exp_range)
1030 : {
1031 9272043 : exp_range->set_varying (type);
1032 9272043 : if (code == SSA_NAME)
1033 : {
1034 5519583 : int_range_max vr;
1035 11039166 : get_range_query (cfun)->range_of_expr (vr, exp);
1036 5519583 : if (vr.undefined_p ())
1037 5134 : vr.set_varying (TREE_TYPE (exp));
1038 5519583 : tree vr_min, vr_max;
1039 5519583 : value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1040 5519583 : wide_int var_min = wi::to_wide (vr_min);
1041 5519583 : wide_int var_max = wi::to_wide (vr_max);
1042 5519583 : wide_int var_nonzero = get_nonzero_bits (exp);
1043 16558749 : vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1044 : &var_min, &var_max,
1045 : var_nonzero,
1046 5519583 : TYPE_SIGN (type));
1047 : /* This check for VR_VARYING is here because the old code
1048 : using get_range_info would return VR_RANGE for the entire
1049 : domain, instead of VR_VARYING. The new code normalizes
1050 : full-domain ranges to VR_VARYING. */
1051 5519583 : if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1052 5399464 : exp_range->set (type, var_min, var_max);
1053 5519583 : }
1054 : }
1055 :
1056 49060531 : if (!tree_is_chrec (exp)
1057 49060525 : && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1058 : {
1059 49060515 : extract_ops_from_tree (exp, &code, &op0, &op1);
1060 49060515 : if (split_constant_offset_1 (type, op0, code, op1, var, off,
1061 : exp_range, cache, limit))
1062 37729085 : return;
1063 : }
1064 :
1065 11331446 : *var = exp;
1066 11331446 : if (INTEGRAL_TYPE_P (type))
1067 3625292 : *var = fold_convert (sizetype, *var);
1068 11331446 : *off = ssize_int (0);
1069 :
1070 11331446 : int_range_max r;
1071 3316543 : if (exp_range && code != SSA_NAME
1072 131374 : && get_range_query (cfun)->range_of_expr (r, exp)
1073 11397133 : && !r.undefined_p ())
1074 65687 : *exp_range = r;
1075 11331446 : }
1076 :
1077 : /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1078 : type as EXP while OFF has type ssizetype. */
1079 :
1080 : void
1081 34151432 : split_constant_offset (tree exp, tree *var, tree *off)
1082 : {
1083 34151432 : unsigned limit = param_ssa_name_def_chain_limit;
1084 34151432 : static hash_map<tree, std::pair<tree, tree> > *cache;
1085 34151432 : if (!cache)
1086 80010 : cache = new hash_map<tree, std::pair<tree, tree> > (37);
1087 34151432 : split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1088 34151432 : *var = fold_convert (TREE_TYPE (exp), *var);
1089 34151432 : cache->empty ();
1090 34151432 : }
1091 :
1092 : /* Returns the address ADDR of an object in a canonical shape (without nop
1093 : casts, and with type of pointer to the object). */
1094 :
1095 : static tree
1096 15911730 : canonicalize_base_object_address (tree addr)
1097 : {
1098 15911730 : tree orig = addr;
1099 :
1100 15911730 : STRIP_NOPS (addr);
1101 :
1102 : /* The base address may be obtained by casting from integer, in that case
1103 : keep the cast. */
1104 15911730 : if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1105 : return orig;
1106 :
1107 15839689 : if (TREE_CODE (addr) != ADDR_EXPR)
1108 : return addr;
1109 :
1110 9475444 : return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1111 : }
1112 :
1113 : /* Analyze the behavior of memory reference REF within STMT.
1114 : There are two modes:
1115 :
1116 : - BB analysis. In this case we simply split the address into base,
1117 : init and offset components, without reference to any containing loop.
1118 : The resulting base and offset are general expressions and they can
1119 : vary arbitrarily from one iteration of the containing loop to the next.
1120 : The step is always zero.
1121 :
1122 : - loop analysis. In this case we analyze the reference both wrt LOOP
1123 : and on the basis that the reference occurs (is "used") in LOOP;
1124 : see the comment above analyze_scalar_evolution_in_loop for more
1125 : information about this distinction. The base, init, offset and
1126 : step fields are all invariant in LOOP.
1127 :
1128 : Perform BB analysis if LOOP is null, or if LOOP is the function's
1129 : dummy outermost loop. In other cases perform loop analysis.
1130 :
1131 : Return true if the analysis succeeded and store the results in DRB if so.
1132 : BB analysis can only fail for bitfield or reversed-storage accesses. */
1133 :
1134 : opt_result
1135 16457116 : dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1136 : class loop *loop, const gimple *stmt)
1137 : {
1138 16457116 : poly_int64 pbitsize, pbitpos;
1139 16457116 : tree base, poffset;
1140 16457116 : machine_mode pmode;
1141 16457116 : int punsignedp, preversep, pvolatilep;
1142 16457116 : affine_iv base_iv, offset_iv;
1143 16457116 : tree init, dinit, step;
1144 16457116 : bool in_loop = (loop && loop->num);
1145 :
1146 16457116 : if (dump_file && (dump_flags & TDF_DETAILS))
1147 68190 : fprintf (dump_file, "analyze_innermost: ");
1148 :
1149 16457116 : base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1150 : &punsignedp, &preversep, &pvolatilep);
1151 16457116 : gcc_assert (base != NULL_TREE);
1152 :
1153 16457116 : poly_int64 pbytepos;
1154 16457116 : if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1155 36623 : return opt_result::failure_at (stmt,
1156 : "failed: bit offset alignment.\n");
1157 :
1158 16420493 : if (preversep)
1159 653 : return opt_result::failure_at (stmt,
1160 : "failed: reverse storage order.\n");
1161 :
1162 : /* Calculate the alignment and misalignment for the inner reference. */
1163 16419840 : unsigned int HOST_WIDE_INT bit_base_misalignment;
1164 16419840 : unsigned int bit_base_alignment;
1165 16419840 : get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1166 :
1167 : /* There are no bitfield references remaining in BASE, so the values
1168 : we got back must be whole bytes. */
1169 16419840 : gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1170 : && bit_base_misalignment % BITS_PER_UNIT == 0);
1171 16419840 : unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1172 16419840 : poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1173 :
1174 16419840 : if (TREE_CODE (base) == MEM_REF)
1175 : {
1176 7062619 : if (!integer_zerop (TREE_OPERAND (base, 1)))
1177 : {
1178 : /* Subtract MOFF from the base and add it to POFFSET instead.
1179 : Adjust the misalignment to reflect the amount we subtracted. */
1180 1255299 : poly_offset_int moff = mem_ref_offset (base);
1181 1255299 : base_misalignment -= moff.force_shwi ();
1182 1255299 : tree mofft = wide_int_to_tree (sizetype, moff);
1183 1255299 : if (!poffset)
1184 1245283 : poffset = mofft;
1185 : else
1186 10016 : poffset = size_binop (PLUS_EXPR, poffset, mofft);
1187 : }
1188 7062619 : base = TREE_OPERAND (base, 0);
1189 : }
1190 : else
1191 : {
1192 9357221 : if (may_be_nonaddressable_p (base))
1193 2072 : return opt_result::failure_at (stmt,
1194 : "failed: base not addressable.\n");
1195 9355149 : base = build_fold_addr_expr (base);
1196 : }
1197 :
1198 16417768 : if (in_loop)
1199 : {
1200 3175987 : if (!simple_iv (loop, loop, base, &base_iv, true))
1201 424876 : return opt_result::failure_at
1202 424876 : (stmt, "failed: evolution of base is not affine.\n");
1203 : }
1204 : else
1205 : {
1206 13241781 : base_iv.base = base;
1207 13241781 : base_iv.step = ssize_int (0);
1208 13241781 : base_iv.no_overflow = true;
1209 : }
1210 :
1211 15992892 : if (!poffset)
1212 : {
1213 13209398 : offset_iv.base = ssize_int (0);
1214 13209398 : offset_iv.step = ssize_int (0);
1215 : }
1216 : else
1217 : {
1218 2783494 : if (!in_loop)
1219 : {
1220 1488341 : offset_iv.base = poffset;
1221 1488341 : offset_iv.step = ssize_int (0);
1222 : }
1223 1295153 : else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1224 81162 : return opt_result::failure_at
1225 81162 : (stmt, "failed: evolution of offset is not affine.\n");
1226 : }
1227 :
1228 15911730 : init = ssize_int (pbytepos);
1229 :
1230 : /* Subtract any constant component from the base and add it to INIT instead.
1231 : Adjust the misalignment to reflect the amount we subtracted. */
1232 15911730 : split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1233 15911730 : init = size_binop (PLUS_EXPR, init, dinit);
1234 15911730 : base_misalignment -= TREE_INT_CST_LOW (dinit);
1235 :
1236 15911730 : split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1237 15911730 : init = size_binop (PLUS_EXPR, init, dinit);
1238 :
1239 15911730 : step = size_binop (PLUS_EXPR,
1240 : fold_convert (ssizetype, base_iv.step),
1241 : fold_convert (ssizetype, offset_iv.step));
1242 :
1243 15911730 : base = canonicalize_base_object_address (base_iv.base);
1244 :
1245 : /* See if get_pointer_alignment can guarantee a higher alignment than
1246 : the one we calculated above. */
1247 15911730 : unsigned int HOST_WIDE_INT alt_misalignment;
1248 15911730 : unsigned int alt_alignment;
1249 15911730 : get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1250 :
1251 : /* As above, these values must be whole bytes. */
1252 15911730 : gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1253 : && alt_misalignment % BITS_PER_UNIT == 0);
1254 15911730 : alt_alignment /= BITS_PER_UNIT;
1255 15911730 : alt_misalignment /= BITS_PER_UNIT;
1256 :
1257 15911730 : if (base_alignment < alt_alignment)
1258 : {
1259 143503 : base_alignment = alt_alignment;
1260 143503 : base_misalignment = alt_misalignment;
1261 : }
1262 :
1263 15911730 : drb->base_address = base;
1264 15911730 : drb->offset = fold_convert (ssizetype, offset_iv.base);
1265 15911730 : drb->init = init;
1266 15911730 : drb->step = step;
1267 15911730 : if (known_misalignment (base_misalignment, base_alignment,
1268 : &drb->base_misalignment))
1269 15911730 : drb->base_alignment = base_alignment;
1270 : else
1271 : {
1272 : drb->base_alignment = known_alignment (base_misalignment);
1273 : drb->base_misalignment = 0;
1274 : }
1275 15911730 : drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1276 15911730 : drb->step_alignment = highest_pow2_factor (step);
1277 :
1278 15911730 : if (dump_file && (dump_flags & TDF_DETAILS))
1279 64722 : fprintf (dump_file, "success.\n");
1280 :
1281 15911730 : return opt_result::success ();
1282 : }
1283 :
1284 : /* Return true if OP is a valid component reference for a DR access
1285 : function. This accepts a subset of what handled_component_p accepts. */
1286 :
1287 : static bool
1288 5704610 : access_fn_component_p (tree op)
1289 : {
1290 5704610 : switch (TREE_CODE (op))
1291 : {
1292 : case REALPART_EXPR:
1293 : case IMAGPART_EXPR:
1294 : case ARRAY_REF:
1295 : return true;
1296 :
1297 1931091 : case COMPONENT_REF:
1298 1931091 : return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1299 :
1300 0 : default:
1301 0 : return false;
1302 : }
1303 : }
1304 :
1305 : /* Returns whether BASE can have a access_fn_component_p with BASE
1306 : as base. */
1307 :
1308 : static bool
1309 1707913 : base_supports_access_fn_components_p (tree base)
1310 : {
1311 1707913 : switch (TREE_CODE (TREE_TYPE (base)))
1312 : {
1313 : case COMPLEX_TYPE:
1314 : case ARRAY_TYPE:
1315 : case RECORD_TYPE:
1316 : return true;
1317 1700990 : default:
1318 1700990 : return false;
1319 : }
1320 : }
1321 :
1322 : /* Determines the base object and the list of indices of memory reference
1323 : DR, analyzed in LOOP and instantiated before NEST. */
1324 :
1325 : static void
1326 16555979 : dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1327 : {
1328 : /* If analyzing a basic-block there are no indices to analyze
1329 : and thus no access functions. */
1330 16555979 : if (!nest)
1331 : {
1332 13280437 : dri->base_object = ref;
1333 13280437 : dri->access_fns.create (0);
1334 13280437 : return;
1335 : }
1336 :
1337 3275542 : vec<tree> access_fns = vNULL;
1338 :
1339 : /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1340 : into a two element array with a constant index. The base is
1341 : then just the immediate underlying object. */
1342 3275542 : if (TREE_CODE (ref) == REALPART_EXPR)
1343 : {
1344 41705 : ref = TREE_OPERAND (ref, 0);
1345 41705 : access_fns.safe_push (integer_zero_node);
1346 : }
1347 3233837 : else if (TREE_CODE (ref) == IMAGPART_EXPR)
1348 : {
1349 39874 : ref = TREE_OPERAND (ref, 0);
1350 39874 : access_fns.safe_push (integer_one_node);
1351 : }
1352 :
1353 : /* Analyze access functions of dimensions we know to be independent.
1354 : The list of component references handled here should be kept in
1355 : sync with access_fn_component_p. */
1356 5784563 : while (handled_component_p (ref))
1357 : {
1358 2660691 : if (TREE_CODE (ref) == ARRAY_REF)
1359 : {
1360 1303776 : tree op = TREE_OPERAND (ref, 1);
1361 1303776 : tree access_fn = analyze_scalar_evolution (loop, op);
1362 1303776 : access_fn = instantiate_scev (nest, loop, access_fn);
1363 1303776 : access_fns.safe_push (access_fn);
1364 : }
1365 1356915 : else if (TREE_CODE (ref) == COMPONENT_REF
1366 1356915 : && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1367 : {
1368 : /* For COMPONENT_REFs of records (but not unions!) use the
1369 : FIELD_DECL offset as constant access function so we can
1370 : disambiguate a[i].f1 and a[i].f2. */
1371 1205245 : tree off = component_ref_field_offset (ref);
1372 1205245 : off = size_binop (PLUS_EXPR,
1373 : size_binop (MULT_EXPR,
1374 : fold_convert (bitsizetype, off),
1375 : bitsize_int (BITS_PER_UNIT)),
1376 : DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1377 1205245 : access_fns.safe_push (off);
1378 : }
1379 : else
1380 : /* If we have an unhandled component we could not translate
1381 : to an access function stop analyzing. We have determined
1382 : our base object in this case. */
1383 : break;
1384 :
1385 2509021 : ref = TREE_OPERAND (ref, 0);
1386 : }
1387 :
1388 : /* If the address operand of a MEM_REF base has an evolution in the
1389 : analyzed nest, add it as an additional independent access-function. */
1390 3275542 : if (TREE_CODE (ref) == MEM_REF)
1391 : {
1392 2302777 : tree op = TREE_OPERAND (ref, 0);
1393 2302777 : tree access_fn = analyze_scalar_evolution (loop, op);
1394 2302777 : access_fn = instantiate_scev (nest, loop, access_fn);
1395 2302777 : STRIP_NOPS (access_fn);
1396 2302777 : if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1397 : {
1398 1130225 : tree memoff = TREE_OPERAND (ref, 1);
1399 1130225 : tree base = initial_condition (access_fn);
1400 1130225 : tree orig_type = TREE_TYPE (base);
1401 1130225 : STRIP_USELESS_TYPE_CONVERSION (base);
1402 1130225 : tree off;
1403 1130225 : split_constant_offset (base, &base, &off);
1404 1130225 : STRIP_USELESS_TYPE_CONVERSION (base);
1405 : /* Fold the MEM_REF offset into the evolutions initial
1406 : value to make more bases comparable. */
1407 1130225 : if (!integer_zerop (memoff))
1408 : {
1409 120596 : off = size_binop (PLUS_EXPR, off,
1410 : fold_convert (ssizetype, memoff));
1411 120596 : memoff = build_int_cst (TREE_TYPE (memoff), 0);
1412 : }
1413 : /* Adjust the offset so it is a multiple of the access type
1414 : size and thus we separate bases that can possibly be used
1415 : to produce partial overlaps (which the access_fn machinery
1416 : cannot handle). */
1417 1130225 : wide_int rem;
1418 1130225 : if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1419 1130089 : && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1420 2259997 : && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1421 1129772 : rem = wi::mod_trunc
1422 1129772 : (wi::to_wide (off),
1423 2259544 : wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1424 1129772 : SIGNED);
1425 : else
1426 : /* If we can't compute the remainder simply force the initial
1427 : condition to zero. */
1428 453 : rem = wi::to_wide (off);
1429 1130225 : off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1430 1130225 : memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1431 : /* And finally replace the initial condition. */
1432 2260450 : access_fn = chrec_replace_initial_condition
1433 1130225 : (access_fn, fold_convert (orig_type, off));
1434 : /* ??? This is still not a suitable base object for
1435 : dr_may_alias_p - the base object needs to be an
1436 : access that covers the object as whole. With
1437 : an evolution in the pointer this cannot be
1438 : guaranteed.
1439 : As a band-aid, mark the access so we can special-case
1440 : it in dr_may_alias_p. */
1441 1130225 : tree old = ref;
1442 1130225 : ref = fold_build2_loc (EXPR_LOCATION (ref),
1443 1130225 : MEM_REF, TREE_TYPE (ref),
1444 : base, memoff);
1445 1130225 : MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1446 1130225 : MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1447 1130225 : dri->unconstrained_base = true;
1448 1130225 : access_fns.safe_push (access_fn);
1449 1130225 : }
1450 : }
1451 972765 : else if (DECL_P (ref))
1452 : {
1453 : /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1454 821095 : ref = build2 (MEM_REF, TREE_TYPE (ref),
1455 : build_fold_addr_expr (ref),
1456 : build_int_cst (reference_alias_ptr_type (ref), 0));
1457 : }
1458 :
1459 3275542 : dri->base_object = ref;
1460 3275542 : dri->access_fns = access_fns;
1461 : }
1462 :
1463 : /* Extracts the alias analysis information from the memory reference DR. */
1464 :
1465 : static void
1466 16445208 : dr_analyze_alias (struct data_reference *dr)
1467 : {
1468 16445208 : tree ref = DR_REF (dr);
1469 16445208 : tree base = get_base_address (ref), addr;
1470 :
1471 16445208 : if (INDIRECT_REF_P (base)
1472 16445208 : || TREE_CODE (base) == MEM_REF)
1473 : {
1474 7069233 : addr = TREE_OPERAND (base, 0);
1475 7069233 : if (TREE_CODE (addr) == SSA_NAME)
1476 7067863 : DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1477 : }
1478 16445208 : }
1479 :
1480 : /* Frees data reference DR. */
1481 :
1482 : void
1483 16922809 : free_data_ref (data_reference_p dr)
1484 : {
1485 16922809 : DR_ACCESS_FNS (dr).release ();
1486 16922809 : if (dr->alt_indices.base_object)
1487 110771 : dr->alt_indices.access_fns.release ();
1488 16922809 : free (dr);
1489 16922809 : }
1490 :
1491 : /* Analyze memory reference MEMREF, which is accessed in STMT.
1492 : The reference is a read if IS_READ is true, otherwise it is a write.
1493 : IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1494 : within STMT, i.e. that it might not occur even if STMT is executed
1495 : and runs to completion.
1496 :
1497 : Return the data_reference description of MEMREF. NEST is the outermost
1498 : loop in which the reference should be instantiated, LOOP is the loop
1499 : in which the data reference should be analyzed. */
1500 :
1501 : struct data_reference *
1502 16445208 : create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1503 : bool is_read, bool is_conditional_in_stmt)
1504 : {
1505 16445208 : struct data_reference *dr;
1506 :
1507 16445208 : if (dump_file && (dump_flags & TDF_DETAILS))
1508 : {
1509 66965 : fprintf (dump_file, "Creating dr for ");
1510 66965 : print_generic_expr (dump_file, memref, TDF_SLIM);
1511 66965 : fprintf (dump_file, "\n");
1512 : }
1513 :
1514 16445208 : dr = XCNEW (struct data_reference);
1515 16445208 : DR_STMT (dr) = stmt;
1516 16445208 : DR_REF (dr) = memref;
1517 16445208 : DR_IS_READ (dr) = is_read;
1518 16445208 : DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1519 :
1520 29725645 : dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1521 : nest != NULL ? loop : NULL, stmt);
1522 16445208 : dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1523 16445208 : dr_analyze_alias (dr);
1524 :
1525 16445208 : if (dump_file && (dump_flags & TDF_DETAILS))
1526 : {
1527 66965 : unsigned i;
1528 66965 : fprintf (dump_file, "\tbase_address: ");
1529 66965 : print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1530 66965 : fprintf (dump_file, "\n\toffset from base address: ");
1531 66965 : print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1532 66965 : fprintf (dump_file, "\n\tconstant offset from base address: ");
1533 66965 : print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1534 66965 : fprintf (dump_file, "\n\tstep: ");
1535 66965 : print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1536 66965 : fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1537 66965 : fprintf (dump_file, "\n\tbase misalignment: %d",
1538 : DR_BASE_MISALIGNMENT (dr));
1539 66965 : fprintf (dump_file, "\n\toffset alignment: %d",
1540 : DR_OFFSET_ALIGNMENT (dr));
1541 66965 : fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1542 66965 : fprintf (dump_file, "\n\tbase_object: ");
1543 66965 : print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1544 66965 : fprintf (dump_file, "\n");
1545 192319 : for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1546 : {
1547 58389 : fprintf (dump_file, "\tAccess function %d: ", i);
1548 58389 : print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1549 : }
1550 : }
1551 :
1552 16445208 : return dr;
1553 : }
1554 :
1555 : /* A helper function computes order between two tree expressions T1 and T2.
1556 : This is used in comparator functions sorting objects based on the order
1557 : of tree expressions. The function returns -1, 0, or 1. */
1558 :
1559 : int
1560 420105075 : data_ref_compare_tree (tree t1, tree t2)
1561 : {
1562 420105075 : int i, cmp;
1563 420105075 : enum tree_code code;
1564 420105075 : char tclass;
1565 :
1566 420105075 : if (t1 == t2)
1567 : return 0;
1568 191836559 : if (t1 == NULL)
1569 : return -1;
1570 191708003 : if (t2 == NULL)
1571 : return 1;
1572 :
1573 191629872 : STRIP_USELESS_TYPE_CONVERSION (t1);
1574 191629872 : STRIP_USELESS_TYPE_CONVERSION (t2);
1575 191629872 : if (t1 == t2)
1576 : return 0;
1577 :
1578 191057223 : if (TREE_CODE (t1) != TREE_CODE (t2)
1579 13642604 : && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1580 19328586 : return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1581 :
1582 177414619 : code = TREE_CODE (t1);
1583 177414619 : switch (code)
1584 : {
1585 51766045 : case INTEGER_CST:
1586 51766045 : return tree_int_cst_compare (t1, t2);
1587 :
1588 16 : case STRING_CST:
1589 16 : if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1590 16 : return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1591 0 : return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1592 0 : TREE_STRING_LENGTH (t1));
1593 :
1594 14930642 : case SSA_NAME:
1595 14930642 : if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1596 14930642 : return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1597 : break;
1598 :
1599 110717916 : default:
1600 110717916 : if (POLY_INT_CST_P (t1))
1601 : return compare_sizes_for_sort (wi::to_poly_widest (t1),
1602 : wi::to_poly_widest (t2));
1603 :
1604 110717916 : tclass = TREE_CODE_CLASS (code);
1605 :
1606 : /* For decls, compare their UIDs. */
1607 110717916 : if (tclass == tcc_declaration)
1608 : {
1609 20960378 : if (DECL_UID (t1) != DECL_UID (t2))
1610 20959851 : return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1611 : break;
1612 : }
1613 : /* For expressions, compare their operands recursively. */
1614 89757538 : else if (IS_EXPR_CODE_CLASS (tclass))
1615 : {
1616 160206340 : for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1617 : {
1618 104006098 : cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1619 104006098 : TREE_OPERAND (t2, i));
1620 104006098 : if (cmp != 0)
1621 : return cmp;
1622 : }
1623 : }
1624 : else
1625 0 : gcc_unreachable ();
1626 : }
1627 :
1628 : return 0;
1629 : }
1630 :
1631 : /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1632 : check. */
1633 :
1634 : opt_result
1635 220036 : runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1636 : {
1637 220036 : if (dump_enabled_p ())
1638 7815 : dump_printf (MSG_NOTE,
1639 : "consider run-time aliasing test between %T and %T\n",
1640 7815 : DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1641 :
1642 220036 : if (!speed_p)
1643 0 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1644 : "runtime alias check not supported when"
1645 : " optimizing for size.\n");
1646 :
1647 : /* FORNOW: We don't support versioning with outer-loop in either
1648 : vectorization or loop distribution. */
1649 220036 : if (loop != NULL && loop->inner != NULL)
1650 143 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1651 : "runtime alias check not supported for"
1652 : " outer loop.\n");
1653 :
1654 : /* FORNOW: We don't support handling different address spaces. */
1655 219893 : if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1656 219893 : != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1657 1 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1658 : "runtime alias check between different "
1659 : "address spaces not supported.\n");
1660 :
1661 219892 : return opt_result::success ();
1662 : }
1663 :
1664 : /* Operator == between two dr_with_seg_len objects.
1665 :
1666 : This equality operator is used to make sure two data refs
1667 : are the same one so that we will consider to combine the
1668 : aliasing checks of those two pairs of data dependent data
1669 : refs. */
1670 :
1671 : static bool
1672 141919 : operator == (const dr_with_seg_len& d1,
1673 : const dr_with_seg_len& d2)
1674 : {
1675 141919 : return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1676 141919 : DR_BASE_ADDRESS (d2.dr), 0)
1677 107872 : && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1678 106936 : && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1679 97976 : && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1680 97204 : && known_eq (d1.access_size, d2.access_size)
1681 235918 : && d1.align == d2.align);
1682 : }
1683 :
1684 : /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1685 : so that we can combine aliasing checks in one scan. */
1686 :
1687 : static int
1688 1160388 : comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1689 : {
1690 1160388 : const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1691 1160388 : const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1692 1160388 : const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1693 1160388 : const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1694 :
1695 : /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1696 : if a and c have the same basic address snd step, and b and d have the same
1697 : address and step. Therefore, if any a&c or b&d don't have the same address
1698 : and step, we don't care the order of those two pairs after sorting. */
1699 1160388 : int comp_res;
1700 :
1701 1160388 : if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1702 1160388 : DR_BASE_ADDRESS (b1.dr))) != 0)
1703 : return comp_res;
1704 601598 : if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1705 601598 : DR_BASE_ADDRESS (b2.dr))) != 0)
1706 : return comp_res;
1707 407832 : if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1708 407832 : DR_STEP (b1.dr))) != 0)
1709 : return comp_res;
1710 407212 : if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1711 407212 : DR_STEP (b2.dr))) != 0)
1712 : return comp_res;
1713 399649 : if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1714 399649 : DR_OFFSET (b1.dr))) != 0)
1715 : return comp_res;
1716 383388 : if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1717 383388 : DR_INIT (b1.dr))) != 0)
1718 : return comp_res;
1719 283024 : if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1720 283024 : DR_OFFSET (b2.dr))) != 0)
1721 : return comp_res;
1722 267899 : if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1723 267899 : DR_INIT (b2.dr))) != 0)
1724 : return comp_res;
1725 :
1726 : return 0;
1727 : }
1728 :
1729 : /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1730 :
1731 : static void
1732 1004 : dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1733 : {
1734 2008 : dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1735 1004 : DR_REF (alias_pair->first.dr),
1736 1004 : DR_REF (alias_pair->second.dr));
1737 :
1738 1004 : dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1739 : alias_pair->first.seg_len);
1740 1004 : if (!operand_equal_p (alias_pair->first.seg_len,
1741 1004 : alias_pair->second.seg_len, 0))
1742 251 : dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1743 :
1744 1004 : dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1745 1004 : dump_dec (MSG_NOTE, alias_pair->first.access_size);
1746 1004 : if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1747 : {
1748 231 : dump_printf (MSG_NOTE, " vs. ");
1749 231 : dump_dec (MSG_NOTE, alias_pair->second.access_size);
1750 : }
1751 :
1752 1004 : dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1753 : alias_pair->first.align);
1754 1004 : if (alias_pair->first.align != alias_pair->second.align)
1755 73 : dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1756 :
1757 1004 : dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1758 1004 : if (alias_pair->flags & DR_ALIAS_RAW)
1759 153 : dump_printf (MSG_NOTE, " RAW");
1760 1004 : if (alias_pair->flags & DR_ALIAS_WAR)
1761 795 : dump_printf (MSG_NOTE, " WAR");
1762 1004 : if (alias_pair->flags & DR_ALIAS_WAW)
1763 174 : dump_printf (MSG_NOTE, " WAW");
1764 1004 : if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1765 209 : dump_printf (MSG_NOTE, " ARBITRARY");
1766 1004 : if (alias_pair->flags & DR_ALIAS_SWAPPED)
1767 0 : dump_printf (MSG_NOTE, " SWAPPED");
1768 1004 : if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1769 0 : dump_printf (MSG_NOTE, " UNSWAPPED");
1770 1004 : if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1771 0 : dump_printf (MSG_NOTE, " MIXED_STEPS");
1772 1004 : if (alias_pair->flags == 0)
1773 0 : dump_printf (MSG_NOTE, " <none>");
1774 1004 : dump_printf (MSG_NOTE, "\n");
1775 1004 : }
1776 :
1777 : /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1778 : FACTOR is number of iterations that each data reference is accessed.
1779 :
1780 : Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1781 : we create an expression:
1782 :
1783 : ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1784 : || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1785 :
1786 : for aliasing checks. However, in some cases we can decrease the number
1787 : of checks by combining two checks into one. For example, suppose we have
1788 : another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1789 : condition is satisfied:
1790 :
1791 : load_ptr_0 < load_ptr_1 &&
1792 : load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1793 :
1794 : (this condition means, in each iteration of vectorized loop, the accessed
1795 : memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1796 : load_ptr_1.)
1797 :
1798 : we then can use only the following expression to finish the aliasing checks
1799 : between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1800 :
1801 : ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1802 : || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1803 :
1804 : Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1805 : basic address. */
1806 :
1807 : void
1808 22642 : prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1809 : poly_uint64)
1810 : {
1811 22642 : if (alias_pairs->is_empty ())
1812 22642 : return;
1813 :
1814 : /* Canonicalize each pair so that the base components are ordered wrt
1815 : data_ref_compare_tree. This allows the loop below to merge more
1816 : cases. */
1817 : unsigned int i;
1818 : dr_with_seg_len_pair_t *alias_pair;
1819 90951 : FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1820 : {
1821 69182 : data_reference_p dr_a = alias_pair->first.dr;
1822 69182 : data_reference_p dr_b = alias_pair->second.dr;
1823 69182 : int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1824 : DR_BASE_ADDRESS (dr_b));
1825 69182 : if (comp_res == 0)
1826 1828 : comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1827 1828 : if (comp_res == 0)
1828 136 : comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1829 69182 : if (comp_res > 0)
1830 : {
1831 24572 : std::swap (alias_pair->first, alias_pair->second);
1832 24572 : alias_pair->flags |= DR_ALIAS_SWAPPED;
1833 : }
1834 : else
1835 44610 : alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1836 : }
1837 :
1838 : /* Sort the collected data ref pairs so that we can scan them once to
1839 : combine all possible aliasing checks. */
1840 21769 : alias_pairs->qsort (comp_dr_with_seg_len_pair);
1841 :
1842 : /* Scan the sorted dr pairs and check if we can combine alias checks
1843 : of two neighboring dr pairs. */
1844 : unsigned int last = 0;
1845 69182 : for (i = 1; i < alias_pairs->length (); ++i)
1846 : {
1847 : /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1848 47413 : dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1849 47413 : dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1850 :
1851 47413 : dr_with_seg_len *dr_a1 = &alias_pair1->first;
1852 47413 : dr_with_seg_len *dr_b1 = &alias_pair1->second;
1853 47413 : dr_with_seg_len *dr_a2 = &alias_pair2->first;
1854 47413 : dr_with_seg_len *dr_b2 = &alias_pair2->second;
1855 :
1856 : /* Remove duplicate data ref pairs. */
1857 47413 : if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1858 : {
1859 21875 : if (dump_enabled_p ())
1860 1655 : dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1861 1655 : DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1862 1655 : DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1863 21875 : alias_pair1->flags |= alias_pair2->flags;
1864 69288 : continue;
1865 : }
1866 :
1867 : /* Assume that we won't be able to merge the pairs, then correct
1868 : if we do. */
1869 25538 : last += 1;
1870 25538 : if (last != i)
1871 6714 : (*alias_pairs)[last] = (*alias_pairs)[i];
1872 :
1873 25538 : if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1874 : {
1875 : /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1876 : and DR_A1 and DR_A2 are two consecutive memrefs. */
1877 21555 : if (*dr_a1 == *dr_a2)
1878 : {
1879 14347 : std::swap (dr_a1, dr_b1);
1880 14347 : std::swap (dr_a2, dr_b2);
1881 : }
1882 :
1883 21555 : poly_int64 init_a1, init_a2;
1884 : /* Only consider cases in which the distance between the initial
1885 : DR_A1 and the initial DR_A2 is known at compile time. */
1886 39208 : if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1887 21555 : DR_BASE_ADDRESS (dr_a2->dr), 0)
1888 4411 : || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1889 4411 : DR_OFFSET (dr_a2->dr), 0)
1890 3902 : || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1891 25457 : || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1892 17660 : continue;
1893 :
1894 : /* Don't combine if we can't tell which one comes first. */
1895 3902 : if (!ordered_p (init_a1, init_a2))
1896 : continue;
1897 :
1898 : /* Work out what the segment length would be if we did combine
1899 : DR_A1 and DR_A2:
1900 :
1901 : - If DR_A1 and DR_A2 have equal lengths, that length is
1902 : also the combined length.
1903 :
1904 : - If DR_A1 and DR_A2 both have negative "lengths", the combined
1905 : length is the lower bound on those lengths.
1906 :
1907 : - If DR_A1 and DR_A2 both have positive lengths, the combined
1908 : length is the upper bound on those lengths.
1909 :
1910 : Other cases are unlikely to give a useful combination.
1911 :
1912 : The lengths both have sizetype, so the sign is taken from
1913 : the step instead. */
1914 3902 : poly_uint64 new_seg_len = 0;
1915 3902 : bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1916 3902 : dr_a2->seg_len, 0);
1917 3902 : if (new_seg_len_p)
1918 : {
1919 7 : poly_uint64 seg_len_a1, seg_len_a2;
1920 7 : if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1921 7 : || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1922 7 : continue;
1923 :
1924 0 : tree indicator_a = dr_direction_indicator (dr_a1->dr);
1925 0 : if (TREE_CODE (indicator_a) != INTEGER_CST)
1926 0 : continue;
1927 :
1928 0 : tree indicator_b = dr_direction_indicator (dr_a2->dr);
1929 0 : if (TREE_CODE (indicator_b) != INTEGER_CST)
1930 0 : continue;
1931 :
1932 0 : int sign_a = tree_int_cst_sgn (indicator_a);
1933 0 : int sign_b = tree_int_cst_sgn (indicator_b);
1934 :
1935 0 : if (sign_a <= 0 && sign_b <= 0)
1936 0 : new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1937 0 : else if (sign_a >= 0 && sign_b >= 0)
1938 0 : new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1939 : else
1940 0 : continue;
1941 : }
1942 : /* At this point we're committed to merging the refs. */
1943 :
1944 : /* Make sure dr_a1 starts left of dr_a2. */
1945 3895 : if (maybe_gt (init_a1, init_a2))
1946 : {
1947 0 : std::swap (*dr_a1, *dr_a2);
1948 0 : std::swap (init_a1, init_a2);
1949 : }
1950 :
1951 : /* The DR_Bs are equal, so only the DR_As can introduce
1952 : mixed steps. */
1953 3895 : if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1954 0 : alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1955 :
1956 3895 : if (new_seg_len_p)
1957 : {
1958 0 : dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1959 0 : new_seg_len);
1960 0 : dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1961 : }
1962 :
1963 : /* This is always positive due to the swap above. */
1964 3895 : poly_uint64 diff = init_a2 - init_a1;
1965 :
1966 : /* The new check will start at DR_A1. Make sure that its access
1967 : size encompasses the initial DR_A2. */
1968 3895 : if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1969 : {
1970 1385 : dr_a1->access_size = upper_bound (dr_a1->access_size,
1971 : diff + dr_a2->access_size);
1972 1385 : unsigned int new_align = known_alignment (dr_a1->access_size);
1973 1385 : dr_a1->align = MIN (dr_a1->align, new_align);
1974 : }
1975 3895 : if (dump_enabled_p ())
1976 1020 : dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1977 1020 : DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1978 1020 : DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1979 3895 : alias_pair1->flags |= alias_pair2->flags;
1980 3895 : last -= 1;
1981 : }
1982 : }
1983 21769 : alias_pairs->truncate (last + 1);
1984 :
1985 : /* Try to restore the original dr_with_seg_len order within each
1986 : dr_with_seg_len_pair_t. If we ended up combining swapped and
1987 : unswapped pairs into the same check, we have to invalidate any
1988 : RAW, WAR and WAW information for it. */
1989 21769 : if (dump_enabled_p ())
1990 801 : dump_printf (MSG_NOTE, "merged alias checks:\n");
1991 65181 : FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1992 : {
1993 43412 : unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1994 43412 : unsigned int swapped = (alias_pair->flags & swap_mask);
1995 43412 : if (swapped == DR_ALIAS_SWAPPED)
1996 13145 : std::swap (alias_pair->first, alias_pair->second);
1997 30267 : else if (swapped != DR_ALIAS_UNSWAPPED)
1998 3147 : alias_pair->flags |= DR_ALIAS_ARBITRARY;
1999 43412 : alias_pair->flags &= ~swap_mask;
2000 43412 : if (dump_enabled_p ())
2001 1004 : dump_alias_pair (alias_pair, " ");
2002 : }
2003 : }
2004 :
2005 : /* A subroutine of create_intersect_range_checks, with a subset of the
2006 : same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
2007 : to optimize cases in which the references form a simple RAW, WAR or
2008 : WAR dependence. */
2009 :
2010 : static bool
2011 4731 : create_ifn_alias_checks (tree *cond_expr,
2012 : const dr_with_seg_len_pair_t &alias_pair)
2013 : {
2014 4731 : const dr_with_seg_len& dr_a = alias_pair.first;
2015 4731 : const dr_with_seg_len& dr_b = alias_pair.second;
2016 :
2017 : /* Check for cases in which:
2018 :
2019 : (a) we have a known RAW, WAR or WAR dependence
2020 : (b) the accesses are well-ordered in both the original and new code
2021 : (see the comment above the DR_ALIAS_* flags for details); and
2022 : (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2023 4731 : if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2024 : return false;
2025 :
2026 : /* Make sure that both DRs access the same pattern of bytes,
2027 : with a constant length and step. */
2028 3075 : poly_uint64 seg_len;
2029 3075 : if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2030 2676 : || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2031 2669 : || maybe_ne (dr_a.access_size, dr_b.access_size)
2032 2628 : || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2033 5703 : || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2034 462 : return false;
2035 :
2036 2613 : unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2037 2613 : tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2038 2613 : tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2039 :
2040 : /* See whether the target supports what we want to do. WAW checks are
2041 : equivalent to WAR checks here. */
2042 2577 : internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2043 2613 : ? IFN_CHECK_RAW_PTRS
2044 : : IFN_CHECK_WAR_PTRS);
2045 2613 : unsigned int align = MIN (dr_a.align, dr_b.align);
2046 2613 : poly_uint64 full_length = seg_len + bytes;
2047 2613 : if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2048 : full_length, align))
2049 : {
2050 2613 : full_length = seg_len + dr_a.access_size;
2051 2613 : if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2052 : full_length, align))
2053 : return false;
2054 : }
2055 :
2056 : /* Commit to using this form of test. */
2057 0 : addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2058 0 : addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2059 :
2060 0 : addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2061 0 : addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2062 :
2063 0 : *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2064 : ifn, boolean_type_node,
2065 : 4, addr_a, addr_b,
2066 0 : size_int (full_length),
2067 0 : size_int (align));
2068 :
2069 0 : if (dump_enabled_p ())
2070 : {
2071 0 : if (ifn == IFN_CHECK_RAW_PTRS)
2072 0 : dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2073 : else
2074 0 : dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2075 : }
2076 : return true;
2077 : }
2078 :
2079 : /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2080 : free of aliases, using a condition based on index values instead
2081 : of a condition based on addresses. Return true on success,
2082 : storing the condition in *COND_EXPR.
2083 :
2084 : This can only be done if the two data references in ALIAS_PAIR access
2085 : the same array object and the index is the only difference. For example,
2086 : if the two data references are DR_A and DR_B:
2087 :
2088 : DR_A DR_B
2089 : data-ref arr[i] arr[j]
2090 : base_object arr arr
2091 : index {i_0, +, 1}_loop {j_0, +, 1}_loop
2092 :
2093 : The addresses and their index are like:
2094 :
2095 : |<- ADDR_A ->| |<- ADDR_B ->|
2096 : ------------------------------------------------------->
2097 : | | | | | | | | | |
2098 : ------------------------------------------------------->
2099 : i_0 ... i_0+4 j_0 ... j_0+4
2100 :
2101 : We can create expression based on index rather than address:
2102 :
2103 : (unsigned) (i_0 - j_0 + 3) <= 6
2104 :
2105 : i.e. the indices are less than 4 apart.
2106 :
2107 : Note evolution step of index needs to be considered in comparison. */
2108 :
2109 : static bool
2110 4882 : create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2111 : const dr_with_seg_len_pair_t &alias_pair)
2112 : {
2113 4882 : const dr_with_seg_len &dr_a = alias_pair.first;
2114 4882 : const dr_with_seg_len &dr_b = alias_pair.second;
2115 4882 : if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2116 4882 : || integer_zerop (DR_STEP (dr_a.dr))
2117 4628 : || integer_zerop (DR_STEP (dr_b.dr))
2118 18640 : || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2119 364 : return false;
2120 :
2121 4518 : poly_uint64 seg_len1, seg_len2;
2122 4518 : if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2123 4518 : || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2124 269 : return false;
2125 :
2126 4249 : if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2127 : return false;
2128 :
2129 4249 : if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2130 : return false;
2131 :
2132 154 : if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2133 : return false;
2134 :
2135 152 : gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2136 :
2137 152 : bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2138 152 : unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2139 152 : if (neg_step)
2140 : {
2141 30 : abs_step = -abs_step;
2142 30 : seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2143 30 : seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2144 : }
2145 :
2146 : /* Infer the number of iterations with which the memory segment is accessed
2147 : by DR. In other words, alias is checked if memory segment accessed by
2148 : DR_A in some iterations intersect with memory segment accessed by DR_B
2149 : in the same amount iterations.
2150 : Note segnment length is a linear function of number of iterations with
2151 : DR_STEP as the coefficient. */
2152 152 : poly_uint64 niter_len1, niter_len2;
2153 152 : if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2154 152 : || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2155 : return false;
2156 :
2157 : /* Divide each access size by the byte step, rounding up. */
2158 152 : poly_uint64 niter_access1, niter_access2;
2159 152 : if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2160 : abs_step, &niter_access1)
2161 152 : || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2162 : abs_step, &niter_access2))
2163 : return false;
2164 :
2165 152 : bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2166 :
2167 152 : int found = -1;
2168 311 : for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2169 : {
2170 160 : tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2171 160 : tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2172 : /* Two indices must be the same if they are not scev, or not scev wrto
2173 : current loop being vecorized. */
2174 160 : if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2175 152 : || TREE_CODE (access2) != POLYNOMIAL_CHREC
2176 152 : || CHREC_VARIABLE (access1) != (unsigned)loop->num
2177 312 : || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2178 : {
2179 8 : if (operand_equal_p (access1, access2, 0))
2180 7 : continue;
2181 :
2182 : return false;
2183 : }
2184 152 : if (found >= 0)
2185 : return false;
2186 152 : found = i;
2187 : }
2188 :
2189 : /* Ought not to happen in practice, since if all accesses are equal then the
2190 : alias should be decidable at compile time. */
2191 151 : if (found < 0)
2192 : return false;
2193 :
2194 : /* The two indices must have the same step. */
2195 151 : tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2196 151 : tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2197 151 : if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2198 : return false;
2199 :
2200 151 : tree idx_step = CHREC_RIGHT (access1);
2201 : /* Index must have const step, otherwise DR_STEP won't be constant. */
2202 151 : gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2203 : /* Index must evaluate in the same direction as DR. */
2204 151 : gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2205 :
2206 151 : tree min1 = CHREC_LEFT (access1);
2207 151 : tree min2 = CHREC_LEFT (access2);
2208 151 : if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2209 : return false;
2210 :
2211 : /* Ideally, alias can be checked against loop's control IV, but we
2212 : need to prove linear mapping between control IV and reference
2213 : index. Although that should be true, we check against (array)
2214 : index of data reference. Like segment length, index length is
2215 : linear function of the number of iterations with index_step as
2216 : the coefficient, i.e, niter_len * idx_step. */
2217 151 : offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2218 : SIGNED);
2219 151 : if (neg_step)
2220 30 : abs_idx_step = -abs_idx_step;
2221 302 : poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2222 302 : poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2223 151 : poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2224 151 : poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2225 :
2226 151 : gcc_assert (known_ge (idx_len1, 0)
2227 : && known_ge (idx_len2, 0)
2228 : && known_ge (idx_access1, 0)
2229 : && known_ge (idx_access2, 0));
2230 :
2231 : /* Each access has the following pattern, with lengths measured
2232 : in units of INDEX:
2233 :
2234 : <-- idx_len -->
2235 : <--- A: -ve step --->
2236 : +-----+-------+-----+-------+-----+
2237 : | n-1 | ..... | 0 | ..... | n-1 |
2238 : +-----+-------+-----+-------+-----+
2239 : <--- B: +ve step --->
2240 : <-- idx_len -->
2241 : |
2242 : min
2243 :
2244 : where "n" is the number of scalar iterations covered by the segment
2245 : and where each access spans idx_access units.
2246 :
2247 : A is the range of bytes accessed when the step is negative,
2248 : B is the range when the step is positive.
2249 :
2250 : When checking for general overlap, we need to test whether
2251 : the range:
2252 :
2253 : [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2254 :
2255 : overlaps:
2256 :
2257 : [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2258 :
2259 : where:
2260 :
2261 : low_offsetN = +ve step ? 0 : -idx_lenN;
2262 : high_offsetN = +ve step ? idx_lenN : 0;
2263 :
2264 : This is equivalent to testing whether:
2265 :
2266 : min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2267 : && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2268 :
2269 : Converting this into a single test, there is an overlap if:
2270 :
2271 : 0 <= min2 - min1 + bias <= limit
2272 :
2273 : where bias = high_offset2 + idx_access2 - 1 - low_offset1
2274 : limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2275 : + (high_offset2 - low_offset2 + idx_access2 - 1)
2276 : i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2277 :
2278 : Combining the tests requires limit to be computable in an unsigned
2279 : form of the index type; if it isn't, we fall back to the usual
2280 : pointer-based checks.
2281 :
2282 : We can do better if DR_B is a write and if DR_A and DR_B are
2283 : well-ordered in both the original and the new code (see the
2284 : comment above the DR_ALIAS_* flags for details). In this case
2285 : we know that for each i in [0, n-1], the write performed by
2286 : access i of DR_B occurs after access numbers j<=i of DR_A in
2287 : both the original and the new code. Any write or anti
2288 : dependencies wrt those DR_A accesses are therefore maintained.
2289 :
2290 : We just need to make sure that each individual write in DR_B does not
2291 : overlap any higher-indexed access in DR_A; such DR_A accesses happen
2292 : after the DR_B access in the original code but happen before it in
2293 : the new code.
2294 :
2295 : We know the steps for both accesses are equal, so by induction, we
2296 : just need to test whether the first write of DR_B overlaps a later
2297 : access of DR_A. In other words, we need to move min1 along by
2298 : one iteration:
2299 :
2300 : min1' = min1 + idx_step
2301 :
2302 : and use the ranges:
2303 :
2304 : [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2305 :
2306 : and:
2307 :
2308 : [min2, min2 + idx_access2 - 1]
2309 :
2310 : where:
2311 :
2312 : low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2313 : high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2314 151 : if (waw_or_war_p)
2315 120 : idx_len1 -= abs_idx_step;
2316 :
2317 151 : poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2318 151 : if (!waw_or_war_p)
2319 151 : limit += idx_len2;
2320 :
2321 151 : tree utype = unsigned_type_for (TREE_TYPE (min1));
2322 151 : if (!wi::fits_to_tree_p (limit, utype))
2323 : return false;
2324 :
2325 151 : poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2326 151 : poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2327 151 : poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2328 : /* Equivalent to adding IDX_STEP to MIN1. */
2329 151 : if (waw_or_war_p)
2330 120 : bias -= wi::to_offset (idx_step);
2331 :
2332 151 : tree subject = fold_build2 (MINUS_EXPR, utype,
2333 : fold_convert (utype, min2),
2334 : fold_convert (utype, min1));
2335 151 : subject = fold_build2 (PLUS_EXPR, utype, subject,
2336 : wide_int_to_tree (utype, bias));
2337 151 : tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2338 : wide_int_to_tree (utype, limit));
2339 151 : if (*cond_expr)
2340 0 : *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2341 : *cond_expr, part_cond_expr);
2342 : else
2343 151 : *cond_expr = part_cond_expr;
2344 151 : if (dump_enabled_p ())
2345 : {
2346 133 : if (waw_or_war_p)
2347 103 : dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2348 : else
2349 30 : dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2350 : }
2351 : return true;
2352 : }
2353 :
2354 : /* A subroutine of create_intersect_range_checks, with a subset of the
2355 : same arguments. Try to optimize cases in which the second access
2356 : is a write and in which some overlap is valid. */
2357 :
2358 : static bool
2359 4731 : create_waw_or_war_checks (tree *cond_expr,
2360 : const dr_with_seg_len_pair_t &alias_pair)
2361 : {
2362 4731 : const dr_with_seg_len& dr_a = alias_pair.first;
2363 4731 : const dr_with_seg_len& dr_b = alias_pair.second;
2364 :
2365 : /* Check for cases in which:
2366 :
2367 : (a) DR_B is always a write;
2368 : (b) the accesses are well-ordered in both the original and new code
2369 : (see the comment above the DR_ALIAS_* flags for details); and
2370 : (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2371 4731 : if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2372 : return false;
2373 :
2374 : /* Check for equal (but possibly variable) steps. */
2375 3032 : tree step = DR_STEP (dr_a.dr);
2376 3032 : if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2377 : return false;
2378 :
2379 : /* Make sure that we can operate on sizetype without loss of precision. */
2380 2640 : tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2381 2640 : if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2382 : return false;
2383 :
2384 : /* All addresses involved are known to have a common alignment ALIGN.
2385 : We can therefore subtract ALIGN from an exclusive endpoint to get
2386 : an inclusive endpoint. In the best (and common) case, ALIGN is the
2387 : same as the access sizes of both DRs, and so subtracting ALIGN
2388 : cancels out the addition of an access size. */
2389 2640 : unsigned int align = MIN (dr_a.align, dr_b.align);
2390 2640 : poly_uint64 last_chunk_a = dr_a.access_size - align;
2391 2640 : poly_uint64 last_chunk_b = dr_b.access_size - align;
2392 :
2393 : /* Get a boolean expression that is true when the step is negative. */
2394 2640 : tree indicator = dr_direction_indicator (dr_a.dr);
2395 2640 : tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2396 : fold_convert (ssizetype, indicator),
2397 : ssize_int (0));
2398 :
2399 : /* Get lengths in sizetype. */
2400 2640 : tree seg_len_a
2401 2640 : = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2402 2640 : step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2403 :
2404 : /* Each access has the following pattern:
2405 :
2406 : <- |seg_len| ->
2407 : <--- A: -ve step --->
2408 : +-----+-------+-----+-------+-----+
2409 : | n-1 | ..... | 0 | ..... | n-1 |
2410 : +-----+-------+-----+-------+-----+
2411 : <--- B: +ve step --->
2412 : <- |seg_len| ->
2413 : |
2414 : base address
2415 :
2416 : where "n" is the number of scalar iterations covered by the segment.
2417 :
2418 : A is the range of bytes accessed when the step is negative,
2419 : B is the range when the step is positive.
2420 :
2421 : We know that DR_B is a write. We also know (from checking that
2422 : DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2423 : the write performed by access i of DR_B occurs after access numbers
2424 : j<=i of DR_A in both the original and the new code. Any write or
2425 : anti dependencies wrt those DR_A accesses are therefore maintained.
2426 :
2427 : We just need to make sure that each individual write in DR_B does not
2428 : overlap any higher-indexed access in DR_A; such DR_A accesses happen
2429 : after the DR_B access in the original code but happen before it in
2430 : the new code.
2431 :
2432 : We know the steps for both accesses are equal, so by induction, we
2433 : just need to test whether the first write of DR_B overlaps a later
2434 : access of DR_A. In other words, we need to move addr_a along by
2435 : one iteration:
2436 :
2437 : addr_a' = addr_a + step
2438 :
2439 : and check whether:
2440 :
2441 : [addr_b, addr_b + last_chunk_b]
2442 :
2443 : overlaps:
2444 :
2445 : [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2446 :
2447 : where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2448 :
2449 : low_offset_a = +ve step ? 0 : seg_len_a - step
2450 : high_offset_a = +ve step ? seg_len_a - step : 0
2451 :
2452 : This is equivalent to testing whether:
2453 :
2454 : addr_a' + low_offset_a <= addr_b + last_chunk_b
2455 : && addr_b <= addr_a' + high_offset_a + last_chunk_a
2456 :
2457 : Converting this into a single test, there is an overlap if:
2458 :
2459 : 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2460 :
2461 : where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2462 :
2463 : If DR_A is performed, limit + |step| - last_chunk_b is known to be
2464 : less than the size of the object underlying DR_A. We also know
2465 : that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2466 : guaranteed at compile time. There can therefore be no overflow if
2467 : "limit" is calculated in an unsigned type with pointer precision. */
2468 2640 : tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2469 : DR_OFFSET (dr_a.dr));
2470 2640 : addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2471 :
2472 2640 : tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2473 : DR_OFFSET (dr_b.dr));
2474 2640 : addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2475 :
2476 : /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2477 2640 : addr_a = fold_build_pointer_plus (addr_a, step);
2478 2640 : tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2479 : seg_len_a, step);
2480 2640 : if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2481 3 : seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2482 :
2483 2640 : tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2484 : seg_len_a_minus_step, size_zero_node);
2485 2640 : if (!CONSTANT_CLASS_P (low_offset_a))
2486 3 : low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2487 :
2488 : /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2489 : but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2490 2640 : tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2491 : low_offset_a);
2492 :
2493 : /* The amount added to addr_b - addr_a'. */
2494 2640 : tree bias = fold_build2 (MINUS_EXPR, sizetype,
2495 : size_int (last_chunk_b), low_offset_a);
2496 :
2497 2640 : tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2498 2640 : limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2499 : size_int (last_chunk_a + last_chunk_b));
2500 :
2501 2640 : tree subject = fold_build2 (MINUS_EXPR, sizetype,
2502 : fold_convert (sizetype, addr_b),
2503 : fold_convert (sizetype, addr_a));
2504 2640 : subject = fold_build2 (PLUS_EXPR, sizetype, subject, bias);
2505 :
2506 2640 : *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2507 2640 : if (dump_enabled_p ())
2508 322 : dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2509 : return true;
2510 : }
2511 :
2512 : /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2513 : every address ADDR accessed by D:
2514 :
2515 : *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2516 :
2517 : In this case, every element accessed by D is aligned to at least
2518 : ALIGN bytes.
2519 :
2520 : If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2521 :
2522 : *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2523 :
2524 : static void
2525 4182 : get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2526 : tree *seg_max_out, HOST_WIDE_INT align)
2527 : {
2528 : /* Each access has the following pattern:
2529 :
2530 : <- |seg_len| ->
2531 : <--- A: -ve step --->
2532 : +-----+-------+-----+-------+-----+
2533 : | n-1 | ,.... | 0 | ..... | n-1 |
2534 : +-----+-------+-----+-------+-----+
2535 : <--- B: +ve step --->
2536 : <- |seg_len| ->
2537 : |
2538 : base address
2539 :
2540 : where "n" is the number of scalar iterations covered by the segment.
2541 : (This should be VF for a particular pair if we know that both steps
2542 : are the same, otherwise it will be the full number of scalar loop
2543 : iterations.)
2544 :
2545 : A is the range of bytes accessed when the step is negative,
2546 : B is the range when the step is positive.
2547 :
2548 : If the access size is "access_size" bytes, the lowest addressed byte is:
2549 :
2550 : base + (step < 0 ? seg_len : 0) [LB]
2551 :
2552 : and the highest addressed byte is always below:
2553 :
2554 : base + (step < 0 ? 0 : seg_len) + access_size [UB]
2555 :
2556 : Thus:
2557 :
2558 : LB <= ADDR < UB
2559 :
2560 : If ALIGN is nonzero, all three values are aligned to at least ALIGN
2561 : bytes, so:
2562 :
2563 : LB <= ADDR <= UB - ALIGN
2564 :
2565 : where "- ALIGN" folds naturally with the "+ access_size" and often
2566 : cancels it out.
2567 :
2568 : We don't try to simplify LB and UB beyond this (e.g. by using
2569 : MIN and MAX based on whether seg_len rather than the stride is
2570 : negative) because it is possible for the absolute size of the
2571 : segment to overflow the range of a ssize_t.
2572 :
2573 : Keeping the pointer_plus outside of the cond_expr should allow
2574 : the cond_exprs to be shared with other alias checks. */
2575 4182 : tree indicator = dr_direction_indicator (d.dr);
2576 4182 : tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2577 : fold_convert (ssizetype, indicator),
2578 : ssize_int (0));
2579 4182 : tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2580 : DR_OFFSET (d.dr));
2581 4182 : addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2582 4182 : tree seg_len
2583 4182 : = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2584 :
2585 4182 : tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2586 : seg_len, size_zero_node);
2587 4182 : tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2588 : size_zero_node, seg_len);
2589 4182 : max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2590 : size_int (d.access_size - align));
2591 :
2592 4182 : *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2593 4182 : *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2594 4182 : }
2595 :
2596 : /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2597 : storing the condition in *COND_EXPR. The fallback is to generate a
2598 : a test that the two accesses do not overlap:
2599 :
2600 : end_a <= start_b || end_b <= start_a. */
2601 :
2602 : static void
2603 4882 : create_intersect_range_checks (class loop *loop, tree *cond_expr,
2604 : const dr_with_seg_len_pair_t &alias_pair)
2605 : {
2606 4882 : const dr_with_seg_len& dr_a = alias_pair.first;
2607 4882 : const dr_with_seg_len& dr_b = alias_pair.second;
2608 4882 : *cond_expr = NULL_TREE;
2609 4882 : if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2610 2791 : return;
2611 :
2612 4731 : if (create_ifn_alias_checks (cond_expr, alias_pair))
2613 : return;
2614 :
2615 4731 : if (create_waw_or_war_checks (cond_expr, alias_pair))
2616 : return;
2617 :
2618 2091 : unsigned HOST_WIDE_INT min_align;
2619 2091 : tree_code cmp_code;
2620 : /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2621 : are equivalent. This is just an optimization heuristic. */
2622 2091 : if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2623 1998 : && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2624 : {
2625 : /* In this case adding access_size to seg_len is likely to give
2626 : a simple X * step, where X is either the number of scalar
2627 : iterations or the vectorization factor. We're better off
2628 : keeping that, rather than subtracting an alignment from it.
2629 :
2630 : In this case the maximum values are exclusive and so there is
2631 : no alias if the maximum of one segment equals the minimum
2632 : of another. */
2633 : min_align = 0;
2634 : cmp_code = LE_EXPR;
2635 : }
2636 : else
2637 : {
2638 : /* Calculate the minimum alignment shared by all four pointers,
2639 : then arrange for this alignment to be subtracted from the
2640 : exclusive maximum values to get inclusive maximum values.
2641 : This "- min_align" is cumulative with a "+ access_size"
2642 : in the calculation of the maximum values. In the best
2643 : (and common) case, the two cancel each other out, leaving
2644 : us with an inclusive bound based only on seg_len. In the
2645 : worst case we're simply adding a smaller number than before.
2646 :
2647 : Because the maximum values are inclusive, there is an alias
2648 : if the maximum value of one segment is equal to the minimum
2649 : value of the other. */
2650 200 : min_align = std::min (dr_a.align, dr_b.align);
2651 200 : cmp_code = LT_EXPR;
2652 : }
2653 :
2654 2091 : tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2655 2091 : get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2656 2091 : get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2657 :
2658 2091 : *cond_expr
2659 2091 : = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2660 : fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2661 : fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2662 2091 : if (dump_enabled_p ())
2663 282 : dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2664 : }
2665 :
2666 : /* Create a conditional expression that represents the run-time checks for
2667 : overlapping of address ranges represented by a list of data references
2668 : pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2669 : COND_EXPR is the conditional expression to be used in the if statement
2670 : that controls which version of the loop gets executed at runtime. */
2671 :
2672 : void
2673 3257 : create_runtime_alias_checks (class loop *loop,
2674 : const vec<dr_with_seg_len_pair_t> *alias_pairs,
2675 : tree * cond_expr)
2676 : {
2677 3257 : tree part_cond_expr;
2678 :
2679 14653 : for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2680 : {
2681 4882 : gcc_assert (alias_pair.flags);
2682 4882 : if (dump_enabled_p ())
2683 737 : dump_printf (MSG_NOTE,
2684 : "create runtime check for data references %T and %T\n",
2685 737 : DR_REF (alias_pair.first.dr),
2686 737 : DR_REF (alias_pair.second.dr));
2687 :
2688 : /* Create condition expression for each pair data references. */
2689 4882 : create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2690 4882 : if (*cond_expr)
2691 4796 : *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2692 : *cond_expr, part_cond_expr);
2693 : else
2694 86 : *cond_expr = part_cond_expr;
2695 : }
2696 3257 : }
2697 :
2698 : /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2699 : expressions. */
2700 : static bool
2701 0 : dr_equal_offsets_p1 (tree offset1, tree offset2)
2702 : {
2703 0 : bool res;
2704 :
2705 0 : STRIP_NOPS (offset1);
2706 0 : STRIP_NOPS (offset2);
2707 :
2708 0 : if (offset1 == offset2)
2709 : return true;
2710 :
2711 0 : if (TREE_CODE (offset1) != TREE_CODE (offset2)
2712 0 : || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2713 : return false;
2714 :
2715 0 : res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2716 0 : TREE_OPERAND (offset2, 0));
2717 :
2718 0 : if (!res || !BINARY_CLASS_P (offset1))
2719 : return res;
2720 :
2721 0 : res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2722 0 : TREE_OPERAND (offset2, 1));
2723 :
2724 0 : return res;
2725 : }
2726 :
2727 : /* Check if DRA and DRB have equal offsets. */
2728 : bool
2729 0 : dr_equal_offsets_p (struct data_reference *dra,
2730 : struct data_reference *drb)
2731 : {
2732 0 : tree offset1, offset2;
2733 :
2734 0 : offset1 = DR_OFFSET (dra);
2735 0 : offset2 = DR_OFFSET (drb);
2736 :
2737 0 : return dr_equal_offsets_p1 (offset1, offset2);
2738 : }
2739 :
2740 : /* Returns true if FNA == FNB. */
2741 :
2742 : static bool
2743 0 : affine_function_equal_p (affine_fn fna, affine_fn fnb)
2744 : {
2745 0 : unsigned i, n = fna.length ();
2746 :
2747 0 : if (n != fnb.length ())
2748 : return false;
2749 :
2750 0 : for (i = 0; i < n; i++)
2751 0 : if (!operand_equal_p (fna[i], fnb[i], 0))
2752 : return false;
2753 :
2754 : return true;
2755 : }
2756 :
2757 : /* If all the functions in CF are the same, returns one of them,
2758 : otherwise returns NULL. */
2759 :
2760 : static affine_fn
2761 2281142 : common_affine_function (conflict_function *cf)
2762 : {
2763 2281142 : unsigned i;
2764 2281142 : affine_fn comm;
2765 :
2766 2281142 : if (!CF_NONTRIVIAL_P (cf))
2767 0 : return affine_fn ();
2768 :
2769 2281142 : comm = cf->fns[0];
2770 :
2771 2281142 : for (i = 1; i < cf->n; i++)
2772 0 : if (!affine_function_equal_p (comm, cf->fns[i]))
2773 0 : return affine_fn ();
2774 :
2775 2281142 : return comm;
2776 : }
2777 :
2778 : /* Returns the base of the affine function FN. */
2779 :
2780 : static tree
2781 1312256 : affine_function_base (affine_fn fn)
2782 : {
2783 0 : return fn[0];
2784 : }
2785 :
2786 : /* Returns true if FN is a constant. */
2787 :
2788 : static bool
2789 1312565 : affine_function_constant_p (affine_fn fn)
2790 : {
2791 1312565 : unsigned i;
2792 1312565 : tree coef;
2793 :
2794 1372690 : for (i = 1; fn.iterate (i, &coef); i++)
2795 60434 : if (!integer_zerop (coef))
2796 : return false;
2797 :
2798 : return true;
2799 : }
2800 :
2801 : /* Returns true if FN is the zero constant function. */
2802 :
2803 : static bool
2804 171994 : affine_function_zero_p (affine_fn fn)
2805 : {
2806 171994 : return (integer_zerop (affine_function_base (fn))
2807 171994 : && affine_function_constant_p (fn));
2808 : }
2809 :
2810 : /* Returns a signed integer type with the largest precision from TA
2811 : and TB. */
2812 :
2813 : static tree
2814 1729504 : signed_type_for_types (tree ta, tree tb)
2815 : {
2816 1729504 : if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2817 201 : return signed_type_for (ta);
2818 : else
2819 1729303 : return signed_type_for (tb);
2820 : }
2821 :
2822 : /* Applies operation OP on affine functions FNA and FNB, and returns the
2823 : result. */
2824 :
2825 : static affine_fn
2826 1140571 : affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2827 : {
2828 1140571 : unsigned i, n, m;
2829 1140571 : affine_fn ret;
2830 1140571 : tree coef;
2831 :
2832 3421713 : if (fnb.length () > fna.length ())
2833 : {
2834 0 : n = fna.length ();
2835 0 : m = fnb.length ();
2836 : }
2837 : else
2838 : {
2839 1140571 : n = fnb.length ();
2840 : m = fna.length ();
2841 : }
2842 :
2843 1140571 : ret.create (m);
2844 2341576 : for (i = 0; i < n; i++)
2845 : {
2846 2402010 : tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2847 1201005 : TREE_TYPE (fnb[i]));
2848 1201005 : ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2849 : }
2850 :
2851 1140571 : for (; fna.iterate (i, &coef); i++)
2852 0 : ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2853 : coef, integer_zero_node));
2854 1140571 : for (; fnb.iterate (i, &coef); i++)
2855 0 : ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2856 : integer_zero_node, coef));
2857 :
2858 1140571 : return ret;
2859 : }
2860 :
2861 : /* Returns the sum of affine functions FNA and FNB. */
2862 :
2863 : static affine_fn
2864 0 : affine_fn_plus (affine_fn fna, affine_fn fnb)
2865 : {
2866 0 : return affine_fn_op (PLUS_EXPR, fna, fnb);
2867 : }
2868 :
2869 : /* Returns the difference of affine functions FNA and FNB. */
2870 :
2871 : static affine_fn
2872 1140571 : affine_fn_minus (affine_fn fna, affine_fn fnb)
2873 : {
2874 0 : return affine_fn_op (MINUS_EXPR, fna, fnb);
2875 : }
2876 :
2877 : /* Frees affine function FN. */
2878 :
2879 : static void
2880 3618003 : affine_fn_free (affine_fn fn)
2881 : {
2882 0 : fn.release ();
2883 0 : }
2884 :
2885 : /* Determine for each subscript in the data dependence relation DDR
2886 : the distance. */
2887 :
2888 : static void
2889 3079843 : compute_subscript_distance (struct data_dependence_relation *ddr)
2890 : {
2891 3079843 : conflict_function *cf_a, *cf_b;
2892 3079843 : affine_fn fn_a, fn_b, diff;
2893 :
2894 3079843 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2895 : {
2896 : unsigned int i;
2897 :
2898 4220414 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2899 : {
2900 1140571 : struct subscript *subscript;
2901 :
2902 1140571 : subscript = DDR_SUBSCRIPT (ddr, i);
2903 1140571 : cf_a = SUB_CONFLICTS_IN_A (subscript);
2904 1140571 : cf_b = SUB_CONFLICTS_IN_B (subscript);
2905 :
2906 1140571 : fn_a = common_affine_function (cf_a);
2907 1140571 : fn_b = common_affine_function (cf_b);
2908 1140571 : if (!fn_a.exists () || !fn_b.exists ())
2909 : {
2910 0 : SUB_DISTANCE (subscript) = chrec_dont_know;
2911 0 : return;
2912 : }
2913 1140571 : diff = affine_fn_minus (fn_a, fn_b);
2914 :
2915 1140571 : if (affine_function_constant_p (diff))
2916 1140262 : SUB_DISTANCE (subscript) = affine_function_base (diff);
2917 : else
2918 309 : SUB_DISTANCE (subscript) = chrec_dont_know;
2919 :
2920 1140571 : affine_fn_free (diff);
2921 : }
2922 : }
2923 : }
2924 :
2925 : /* Returns the conflict function for "unknown". */
2926 :
2927 : static conflict_function *
2928 7971784 : conflict_fn_not_known (void)
2929 : {
2930 0 : conflict_function *fn = XCNEW (conflict_function);
2931 7971784 : fn->n = NOT_KNOWN;
2932 :
2933 7971784 : return fn;
2934 : }
2935 :
2936 : /* Returns the conflict function for "independent". */
2937 :
2938 : static conflict_function *
2939 4283574 : conflict_fn_no_dependence (void)
2940 : {
2941 0 : conflict_function *fn = XCNEW (conflict_function);
2942 4283574 : fn->n = NO_DEPENDENCE;
2943 :
2944 4283574 : return fn;
2945 : }
2946 :
2947 : /* Returns true if the address of OBJ is invariant in LOOP. */
2948 :
2949 : static bool
2950 3266213 : object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2951 : {
2952 3420018 : while (handled_component_p (obj))
2953 : {
2954 159174 : if (TREE_CODE (obj) == ARRAY_REF)
2955 : {
2956 9733 : for (int i = 1; i < 4; ++i)
2957 8642 : if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2958 8642 : loop->num))
2959 : return false;
2960 : }
2961 152714 : else if (TREE_CODE (obj) == COMPONENT_REF)
2962 : {
2963 131651 : if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2964 131651 : loop->num))
2965 : return false;
2966 : }
2967 153805 : obj = TREE_OPERAND (obj, 0);
2968 : }
2969 :
2970 3260844 : if (!INDIRECT_REF_P (obj)
2971 3260844 : && TREE_CODE (obj) != MEM_REF)
2972 : return true;
2973 :
2974 3236366 : return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2975 6472732 : loop->num);
2976 : }
2977 :
2978 : /* Helper for contains_ssa_ref_p. */
2979 :
2980 : static bool
2981 95755 : contains_ssa_ref_p_1 (tree, tree *idx, void *data)
2982 : {
2983 95755 : if (TREE_CODE (*idx) == SSA_NAME)
2984 : {
2985 89752 : *(bool *)data = true;
2986 89752 : return false;
2987 : }
2988 : return true;
2989 : }
2990 :
2991 : /* Returns true if the reference REF contains a SSA index. */
2992 :
2993 : static bool
2994 249757 : contains_ssa_ref_p (tree ref)
2995 : {
2996 249757 : bool res = false;
2997 0 : for_each_index (&ref, contains_ssa_ref_p_1, &res);
2998 249757 : return res;
2999 : }
3000 :
3001 : /* Returns false if we can prove that data references A and B do not alias,
3002 : true otherwise. If LOOP_NEST is false no cross-iteration aliases are
3003 : considered. */
3004 :
3005 : bool
3006 14500529 : dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
3007 : class loop *loop_nest)
3008 : {
3009 14500529 : tree addr_a = DR_BASE_OBJECT (a);
3010 14500529 : tree addr_b = DR_BASE_OBJECT (b);
3011 :
3012 : /* If we are not processing a loop nest but scalar code we
3013 : do not need to care about possible cross-iteration dependences
3014 : and thus can process the full original reference. Do so,
3015 : similar to how loop invariant motion applies extra offset-based
3016 : disambiguation. */
3017 14500529 : if (!loop_nest)
3018 : {
3019 8092727 : tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
3020 8092727 : tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
3021 :
3022 8092727 : if (DR_BASE_ADDRESS (a)
3023 8084896 : && DR_BASE_ADDRESS (b)
3024 8084565 : && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
3025 7252737 : && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
3026 7169723 : && tree_size_a
3027 7169723 : && tree_size_b
3028 7169714 : && poly_int_tree_p (tree_size_a)
3029 7169688 : && poly_int_tree_p (tree_size_b)
3030 15262415 : && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
3031 7169688 : wi::to_poly_widest (tree_size_a),
3032 7169688 : wi::to_poly_widest (DR_INIT (b)),
3033 7169688 : wi::to_poly_widest (tree_size_b)))
3034 : {
3035 5371019 : gcc_assert (integer_zerop (DR_STEP (a))
3036 : && integer_zerop (DR_STEP (b)));
3037 5371051 : return false;
3038 : }
3039 :
3040 10886832 : aff_tree off1, off2;
3041 : poly_widest_int size1, size2;
3042 2721708 : get_inner_reference_aff (DR_REF (a), &off1, &size1);
3043 2721708 : get_inner_reference_aff (DR_REF (b), &off2, &size2);
3044 2721708 : aff_combination_scale (&off1, -1);
3045 2721708 : aff_combination_add (&off2, &off1);
3046 2721708 : if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3047 32 : return false;
3048 2721708 : }
3049 :
3050 9129478 : if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3051 6776645 : && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3052 : /* For cross-iteration dependences the cliques must be valid for the
3053 : whole loop, not just individual iterations. */
3054 6533564 : && (!loop_nest
3055 6202923 : || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3056 5309584 : || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3057 6345715 : && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3058 15279850 : && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3059 : return false;
3060 :
3061 : /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3062 : do not know the size of the base-object. So we cannot do any
3063 : offset/overlap based analysis but have to rely on points-to
3064 : information only. */
3065 8903499 : if (TREE_CODE (addr_a) == MEM_REF
3066 8903499 : && (DR_UNCONSTRAINED_BASE (a)
3067 4076837 : || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3068 : {
3069 : /* For true dependences we can apply TBAA. */
3070 4123667 : if (flag_strict_aliasing
3071 3944782 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3072 4297604 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3073 173937 : get_alias_set (DR_REF (b))))
3074 : return false;
3075 4093034 : if (TREE_CODE (addr_b) == MEM_REF)
3076 3993467 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3077 7986934 : TREE_OPERAND (addr_b, 0));
3078 : else
3079 99567 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3080 99567 : build_fold_addr_expr (addr_b));
3081 : }
3082 4779832 : else if (TREE_CODE (addr_b) == MEM_REF
3083 4779832 : && (DR_UNCONSTRAINED_BASE (b)
3084 2485736 : || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3085 : {
3086 : /* For true dependences we can apply TBAA. */
3087 325104 : if (flag_strict_aliasing
3088 267076 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3089 404155 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3090 79051 : get_alias_set (DR_REF (b))))
3091 : return false;
3092 309795 : if (TREE_CODE (addr_a) == MEM_REF)
3093 185789 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3094 371578 : TREE_OPERAND (addr_b, 0));
3095 : else
3096 124006 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3097 248012 : TREE_OPERAND (addr_b, 0));
3098 : }
3099 : /* If dr_analyze_innermost failed to handle a component we are
3100 : possibly left with a non-base in which case we didn't analyze
3101 : a possible evolution of the base when analyzing a loop. */
3102 4454728 : else if (loop_nest
3103 6564339 : && ((handled_component_p (addr_a) && contains_ssa_ref_p (addr_a))
3104 83145 : || (handled_component_p (addr_b) && contains_ssa_ref_p (addr_b))))
3105 : {
3106 : /* For true dependences we can apply TBAA. */
3107 89752 : if (flag_strict_aliasing
3108 89110 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3109 99234 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3110 9482 : get_alias_set (DR_REF (b))))
3111 : return false;
3112 85612 : if (TREE_CODE (addr_a) == MEM_REF)
3113 3444 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3114 3444 : build_fold_addr_expr (addr_b));
3115 82168 : else if (TREE_CODE (addr_b) == MEM_REF)
3116 6547 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3117 13094 : TREE_OPERAND (addr_b, 0));
3118 : else
3119 75621 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3120 75621 : build_fold_addr_expr (addr_b));
3121 : }
3122 :
3123 : /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3124 : that is being subsetted in the loop nest. */
3125 4364976 : if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3126 2900711 : return refs_output_dependent_p (addr_a, addr_b);
3127 1464265 : else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3128 401196 : return refs_anti_dependent_p (addr_a, addr_b);
3129 1063069 : return refs_may_alias_p (addr_a, addr_b);
3130 : }
3131 :
3132 : /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3133 : if it is meaningful to compare their associated access functions
3134 : when checking for dependencies. */
3135 :
3136 : static bool
3137 2852305 : access_fn_components_comparable_p (tree ref_a, tree ref_b)
3138 : {
3139 : /* Allow pairs of component refs from the following sets:
3140 :
3141 : { REALPART_EXPR, IMAGPART_EXPR }
3142 : { COMPONENT_REF }
3143 : { ARRAY_REF }. */
3144 2852305 : tree_code code_a = TREE_CODE (ref_a);
3145 2852305 : tree_code code_b = TREE_CODE (ref_b);
3146 2852305 : if (code_a == IMAGPART_EXPR)
3147 34706 : code_a = REALPART_EXPR;
3148 2852305 : if (code_b == IMAGPART_EXPR)
3149 40919 : code_b = REALPART_EXPR;
3150 2852305 : if (code_a != code_b)
3151 : return false;
3152 :
3153 2829846 : if (TREE_CODE (ref_a) == COMPONENT_REF)
3154 : /* ??? We cannot simply use the type of operand #0 of the refs here as
3155 : the Fortran compiler smuggles type punning into COMPONENT_REFs.
3156 : Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3157 954358 : return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3158 954358 : == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3159 :
3160 1875488 : return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3161 3750976 : TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3162 : }
3163 :
3164 : /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3165 : is true when the main indices of A and B were not comparable so we try again
3166 : with alternate indices computed on an indirect reference. */
3167 :
3168 : struct data_dependence_relation *
3169 6524462 : initialize_data_dependence_relation (struct data_dependence_relation *res,
3170 : vec<loop_p> loop_nest,
3171 : bool use_alt_indices)
3172 : {
3173 6524462 : struct data_reference *a = DDR_A (res);
3174 6524462 : struct data_reference *b = DDR_B (res);
3175 6524462 : unsigned int i;
3176 :
3177 6524462 : struct indices *indices_a = &a->indices;
3178 6524462 : struct indices *indices_b = &b->indices;
3179 6524462 : if (use_alt_indices)
3180 : {
3181 365536 : if (TREE_CODE (DR_REF (a)) != MEM_REF)
3182 227590 : indices_a = &a->alt_indices;
3183 365536 : if (TREE_CODE (DR_REF (b)) != MEM_REF)
3184 258281 : indices_b = &b->alt_indices;
3185 : }
3186 6524462 : unsigned int num_dimensions_a = indices_a->access_fns.length ();
3187 6524462 : unsigned int num_dimensions_b = indices_b->access_fns.length ();
3188 6524462 : if (num_dimensions_a == 0 || num_dimensions_b == 0)
3189 : {
3190 2199431 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3191 2199431 : return res;
3192 : }
3193 :
3194 : /* For unconstrained bases, the root (highest-indexed) subscript
3195 : describes a variation in the base of the original DR_REF rather
3196 : than a component access. We have no type that accurately describes
3197 : the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3198 : applying this subscript) so limit the search to the last real
3199 : component access.
3200 :
3201 : E.g. for:
3202 :
3203 : void
3204 : f (int a[][8], int b[][8])
3205 : {
3206 : for (int i = 0; i < 8; ++i)
3207 : a[i * 2][0] = b[i][0];
3208 : }
3209 :
3210 : the a and b accesses have a single ARRAY_REF component reference [0]
3211 : but have two subscripts. */
3212 4325031 : if (indices_a->unconstrained_base)
3213 2468607 : num_dimensions_a -= 1;
3214 4325031 : if (indices_b->unconstrained_base)
3215 2424484 : num_dimensions_b -= 1;
3216 :
3217 : /* These structures describe sequences of component references in
3218 : DR_REF (A) and DR_REF (B). Each component reference is tied to a
3219 : specific access function. */
3220 4325031 : struct {
3221 : /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3222 : DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3223 : indices. In C notation, these are the indices of the rightmost
3224 : component references; e.g. for a sequence .b.c.d, the start
3225 : index is for .d. */
3226 : unsigned int start_a;
3227 : unsigned int start_b;
3228 :
3229 : /* The sequence contains LENGTH consecutive access functions from
3230 : each DR. */
3231 : unsigned int length;
3232 :
3233 : /* The enclosing objects for the A and B sequences respectively,
3234 : i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3235 : and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3236 : tree object_a;
3237 : tree object_b;
3238 4325031 : } full_seq = {}, struct_seq = {};
3239 :
3240 : /* Before each iteration of the loop:
3241 :
3242 : - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3243 : - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3244 4325031 : unsigned int index_a = 0;
3245 4325031 : unsigned int index_b = 0;
3246 4325031 : tree ref_a = DR_REF (a);
3247 4325031 : tree ref_b = DR_REF (b);
3248 :
3249 : /* Now walk the component references from the final DR_REFs back up to
3250 : the enclosing base objects. Each component reference corresponds
3251 : to one access function in the DR, with access function 0 being for
3252 : the final DR_REF and the highest-indexed access function being the
3253 : one that is applied to the base of the DR.
3254 :
3255 : Look for a sequence of component references whose access functions
3256 : are comparable (see access_fn_components_comparable_p). If more
3257 : than one such sequence exists, pick the one nearest the base
3258 : (which is the leftmost sequence in C notation). Store this sequence
3259 : in FULL_SEQ.
3260 :
3261 : For example, if we have:
3262 :
3263 : struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3264 :
3265 : A: a[0][i].s.c.d
3266 : B: __real b[0][i].s.e[i].f
3267 :
3268 : (where d is the same type as the real component of f) then the access
3269 : functions would be:
3270 :
3271 : 0 1 2 3
3272 : A: .d .c .s [i]
3273 :
3274 : 0 1 2 3 4 5
3275 : B: __real .f [i] .e .s [i]
3276 :
3277 : The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3278 : and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3279 : COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3280 : the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3281 : so is comparable. The A3/B5 column contains two ARRAY_REFs that
3282 : index foo[10] arrays, so is again comparable. The sequence is
3283 : therefore:
3284 :
3285 : A: [1, 3] (i.e. [i].s.c)
3286 : B: [3, 5] (i.e. [i].s.e)
3287 :
3288 : Also look for sequences of component references whose access
3289 : functions are comparable and whose enclosing objects have the same
3290 : RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3291 : example, STRUCT_SEQ would be:
3292 :
3293 : A: [1, 2] (i.e. s.c)
3294 : B: [3, 4] (i.e. s.e) */
3295 7164383 : while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3296 : {
3297 : /* The alternate indices form always has a single dimension
3298 : with unconstrained base. */
3299 2852305 : gcc_assert (!use_alt_indices);
3300 :
3301 : /* REF_A and REF_B must be one of the component access types
3302 : allowed by dr_analyze_indices. */
3303 2852305 : gcc_checking_assert (access_fn_component_p (ref_a));
3304 2852305 : gcc_checking_assert (access_fn_component_p (ref_b));
3305 :
3306 : /* Get the immediately-enclosing objects for REF_A and REF_B,
3307 : i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3308 : and DR_ACCESS_FN (B, INDEX_B). */
3309 2852305 : tree object_a = TREE_OPERAND (ref_a, 0);
3310 2852305 : tree object_b = TREE_OPERAND (ref_b, 0);
3311 :
3312 2852305 : tree type_a = TREE_TYPE (object_a);
3313 2852305 : tree type_b = TREE_TYPE (object_b);
3314 2852305 : if (access_fn_components_comparable_p (ref_a, ref_b))
3315 : {
3316 : /* This pair of component accesses is comparable for dependence
3317 : analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3318 : DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3319 2614157 : if (full_seq.start_a + full_seq.length != index_a
3320 2561287 : || full_seq.start_b + full_seq.length != index_b)
3321 : {
3322 : /* The accesses don't extend the current sequence,
3323 : so start a new one here. */
3324 60008 : full_seq.start_a = index_a;
3325 60008 : full_seq.start_b = index_b;
3326 60008 : full_seq.length = 0;
3327 : }
3328 :
3329 : /* Add this pair of references to the sequence. */
3330 2614157 : full_seq.length += 1;
3331 2614157 : full_seq.object_a = object_a;
3332 2614157 : full_seq.object_b = object_b;
3333 :
3334 : /* If the enclosing objects are structures (and thus have the
3335 : same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3336 2614157 : if (TREE_CODE (type_a) == RECORD_TYPE)
3337 755377 : struct_seq = full_seq;
3338 :
3339 : /* Move to the next containing reference for both A and B. */
3340 2614157 : ref_a = object_a;
3341 2614157 : ref_b = object_b;
3342 2614157 : index_a += 1;
3343 2614157 : index_b += 1;
3344 2614157 : continue;
3345 : }
3346 :
3347 : /* Try to approach equal type sizes. */
3348 238148 : if (!COMPLETE_TYPE_P (type_a)
3349 235123 : || !COMPLETE_TYPE_P (type_b)
3350 227070 : || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3351 463622 : || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3352 : break;
3353 :
3354 225195 : unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3355 225195 : unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3356 225195 : if (size_a <= size_b)
3357 : {
3358 134848 : index_a += 1;
3359 134848 : ref_a = object_a;
3360 : }
3361 225195 : if (size_b <= size_a)
3362 : {
3363 104873 : index_b += 1;
3364 104873 : ref_b = object_b;
3365 : }
3366 : }
3367 :
3368 : /* See whether FULL_SEQ ends at the base and whether the two bases
3369 : are equal. We do not care about TBAA or alignment info so we can
3370 : use OEP_ADDRESS_OF to avoid false negatives. */
3371 4325031 : tree base_a = indices_a->base_object;
3372 4325031 : tree base_b = indices_b->base_object;
3373 4325031 : bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3374 4125145 : && full_seq.start_b + full_seq.length == num_dimensions_b
3375 3974345 : && (indices_a->unconstrained_base
3376 3974345 : == indices_b->unconstrained_base)
3377 3969607 : && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3378 3496399 : && (types_compatible_p (TREE_TYPE (base_a),
3379 3496399 : TREE_TYPE (base_b))
3380 856608 : || (!base_supports_access_fn_components_p (base_a)
3381 851305 : && !base_supports_access_fn_components_p (base_b)
3382 849685 : && operand_equal_p
3383 849685 : (TYPE_SIZE (TREE_TYPE (base_a)),
3384 849685 : TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3385 7372467 : && (!loop_nest.exists ()
3386 3047436 : || (object_address_invariant_in_loop_p
3387 3047436 : (loop_nest[0], base_a))));
3388 :
3389 : /* If the bases are the same, we can include the base variation too.
3390 : E.g. the b accesses in:
3391 :
3392 : for (int i = 0; i < n; ++i)
3393 : b[i + 4][0] = b[i][0];
3394 :
3395 : have a definite dependence distance of 4, while for:
3396 :
3397 : for (int i = 0; i < n; ++i)
3398 : a[i + 4][0] = b[i][0];
3399 :
3400 : the dependence distance depends on the gap between a and b.
3401 :
3402 : If the bases are different then we can only rely on the sequence
3403 : rooted at a structure access, since arrays are allowed to overlap
3404 : arbitrarily and change shape arbitrarily. E.g. we treat this as
3405 : valid code:
3406 :
3407 : int a[256];
3408 : ...
3409 : ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3410 :
3411 : where two lvalues with the same int[4][3] type overlap, and where
3412 : both lvalues are distinct from the object's declared type. */
3413 2931208 : if (same_base_p)
3414 : {
3415 2931208 : if (indices_a->unconstrained_base)
3416 1481892 : full_seq.length += 1;
3417 : }
3418 : else
3419 : full_seq = struct_seq;
3420 :
3421 : /* Punt if we didn't find a suitable sequence. */
3422 4325031 : if (full_seq.length == 0)
3423 : {
3424 1129146 : if (use_alt_indices
3425 1009139 : || (TREE_CODE (DR_REF (a)) == MEM_REF
3426 781113 : && TREE_CODE (DR_REF (b)) == MEM_REF)
3427 367352 : || may_be_nonaddressable_p (DR_REF (a))
3428 1496241 : || may_be_nonaddressable_p (DR_REF (b)))
3429 : {
3430 : /* Fully exhausted possibilities. */
3431 763610 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3432 763610 : return res;
3433 : }
3434 :
3435 : /* Try evaluating both DRs as dereferences of pointers. */
3436 365536 : if (!a->alt_indices.base_object
3437 171949 : && TREE_CODE (DR_REF (a)) != MEM_REF)
3438 : {
3439 34003 : tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3440 : build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3441 : build_int_cst
3442 : (reference_alias_ptr_type (DR_REF (a)), 0));
3443 102009 : dr_analyze_indices (&a->alt_indices, alt_ref,
3444 34003 : loop_preheader_edge (loop_nest[0]),
3445 : loop_containing_stmt (DR_STMT (a)));
3446 : }
3447 365536 : if (!b->alt_indices.base_object
3448 184023 : && TREE_CODE (DR_REF (b)) != MEM_REF)
3449 : {
3450 76768 : tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3451 : build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3452 : build_int_cst
3453 : (reference_alias_ptr_type (DR_REF (b)), 0));
3454 230304 : dr_analyze_indices (&b->alt_indices, alt_ref,
3455 76768 : loop_preheader_edge (loop_nest[0]),
3456 : loop_containing_stmt (DR_STMT (b)));
3457 : }
3458 365536 : return initialize_data_dependence_relation (res, loop_nest, true);
3459 : }
3460 :
3461 3195885 : if (!same_base_p)
3462 : {
3463 : /* Partial overlap is possible for different bases when strict aliasing
3464 : is not in effect. It's also possible if either base involves a union
3465 : access; e.g. for:
3466 :
3467 : struct s1 { int a[2]; };
3468 : struct s2 { struct s1 b; int c; };
3469 : struct s3 { int d; struct s1 e; };
3470 : union u { struct s2 f; struct s3 g; } *p, *q;
3471 :
3472 : the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3473 : "p->g.e" (base "p->g") and might partially overlap the s1 at
3474 : "q->g.e" (base "q->g"). */
3475 264677 : if (!flag_strict_aliasing
3476 253034 : || ref_contains_union_access_p (full_seq.object_a)
3477 464676 : || ref_contains_union_access_p (full_seq.object_b))
3478 : {
3479 64697 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3480 64697 : return res;
3481 : }
3482 :
3483 199980 : DDR_COULD_BE_INDEPENDENT_P (res) = true;
3484 199980 : if (!loop_nest.exists ()
3485 399960 : || (object_address_invariant_in_loop_p (loop_nest[0],
3486 199980 : full_seq.object_a)
3487 18797 : && object_address_invariant_in_loop_p (loop_nest[0],
3488 18797 : full_seq.object_b)))
3489 : {
3490 9380 : DDR_OBJECT_A (res) = full_seq.object_a;
3491 9380 : DDR_OBJECT_B (res) = full_seq.object_b;
3492 : }
3493 : }
3494 :
3495 3131188 : DDR_AFFINE_P (res) = true;
3496 3131188 : DDR_ARE_DEPENDENT (res) = NULL_TREE;
3497 3131188 : DDR_SUBSCRIPTS (res).create (full_seq.length);
3498 3131188 : DDR_LOOP_NEST (res) = loop_nest;
3499 3131188 : DDR_SELF_REFERENCE (res) = false;
3500 :
3501 7087129 : for (i = 0; i < full_seq.length; ++i)
3502 : {
3503 3955941 : struct subscript *subscript;
3504 :
3505 3955941 : subscript = XNEW (struct subscript);
3506 3955941 : SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3507 3955941 : SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3508 3955941 : SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3509 3955941 : SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3510 3955941 : SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3511 3955941 : SUB_DISTANCE (subscript) = chrec_dont_know;
3512 3955941 : DDR_SUBSCRIPTS (res).safe_push (subscript);
3513 : }
3514 :
3515 : return res;
3516 : }
3517 :
3518 : /* Initialize a data dependence relation between data accesses A and
3519 : B. NB_LOOPS is the number of loops surrounding the references: the
3520 : size of the classic distance/direction vectors. */
3521 :
3522 : struct data_dependence_relation *
3523 13299404 : initialize_data_dependence_relation (struct data_reference *a,
3524 : struct data_reference *b,
3525 : vec<loop_p> loop_nest)
3526 : {
3527 13299404 : data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3528 13299404 : DDR_A (res) = a;
3529 13299404 : DDR_B (res) = b;
3530 13299404 : DDR_LOOP_NEST (res).create (0);
3531 13299404 : DDR_SUBSCRIPTS (res).create (0);
3532 13299404 : DDR_DIR_VECTS (res).create (0);
3533 13299404 : DDR_DIST_VECTS (res).create (0);
3534 :
3535 13299404 : if (a == NULL || b == NULL)
3536 : {
3537 0 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3538 0 : return res;
3539 : }
3540 :
3541 : /* If the data references do not alias, then they are independent. */
3542 19689212 : if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3543 : {
3544 7140478 : DDR_ARE_DEPENDENT (res) = chrec_known;
3545 7140478 : return res;
3546 : }
3547 :
3548 6158926 : return initialize_data_dependence_relation (res, loop_nest, false);
3549 : }
3550 :
3551 :
3552 : /* Frees memory used by the conflict function F. */
3553 :
3554 : static void
3555 14732790 : free_conflict_function (conflict_function *f)
3556 : {
3557 14732790 : unsigned i;
3558 :
3559 14732790 : if (CF_NONTRIVIAL_P (f))
3560 : {
3561 4954864 : for (i = 0; i < f->n; i++)
3562 2477432 : affine_fn_free (f->fns[i]);
3563 : }
3564 14732790 : free (f);
3565 14732790 : }
3566 :
3567 : /* Frees memory used by SUBSCRIPTS. */
3568 :
3569 : static void
3570 3131188 : free_subscripts (vec<subscript_p> subscripts)
3571 : {
3572 13349505 : for (subscript_p s : subscripts)
3573 : {
3574 3955941 : free_conflict_function (s->conflicting_iterations_in_a);
3575 3955941 : free_conflict_function (s->conflicting_iterations_in_b);
3576 3955941 : free (s);
3577 : }
3578 3131188 : subscripts.release ();
3579 3131188 : }
3580 :
3581 : /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3582 : description. */
3583 :
3584 : static inline void
3585 2230450 : finalize_ddr_dependent (struct data_dependence_relation *ddr,
3586 : tree chrec)
3587 : {
3588 2230450 : DDR_ARE_DEPENDENT (ddr) = chrec;
3589 2230450 : free_subscripts (DDR_SUBSCRIPTS (ddr));
3590 2230450 : DDR_SUBSCRIPTS (ddr).create (0);
3591 59797 : }
3592 :
3593 : /* The dependence relation DDR cannot be represented by a distance
3594 : vector. */
3595 :
3596 : static inline void
3597 2056 : non_affine_dependence_relation (struct data_dependence_relation *ddr)
3598 : {
3599 2056 : if (dump_file && (dump_flags & TDF_DETAILS))
3600 92 : fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3601 :
3602 2056 : DDR_AFFINE_P (ddr) = false;
3603 2056 : }
3604 :
3605 :
3606 :
3607 : /* This section contains the classic Banerjee tests. */
3608 :
3609 : /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3610 : variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3611 :
3612 : static inline bool
3613 2232157 : ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3614 : {
3615 2232157 : return (evolution_function_is_constant_p (chrec_a)
3616 2732954 : && evolution_function_is_constant_p (chrec_b));
3617 : }
3618 :
3619 : /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3620 : variable, i.e., if the SIV (Single Index Variable) test is true. */
3621 :
3622 : static bool
3623 1733019 : siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3624 : {
3625 3464382 : if ((evolution_function_is_constant_p (chrec_a)
3626 1659 : && evolution_function_is_univariate_p (chrec_b))
3627 3464382 : || (evolution_function_is_constant_p (chrec_b)
3628 1268 : && evolution_function_is_univariate_p (chrec_a)))
3629 2921 : return true;
3630 :
3631 1730098 : if (evolution_function_is_univariate_p (chrec_a)
3632 1730098 : && evolution_function_is_univariate_p (chrec_b))
3633 : {
3634 1703740 : switch (TREE_CODE (chrec_a))
3635 : {
3636 1703740 : case POLYNOMIAL_CHREC:
3637 1703740 : switch (TREE_CODE (chrec_b))
3638 : {
3639 1703740 : case POLYNOMIAL_CHREC:
3640 1703740 : if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3641 : return false;
3642 : /* FALLTHRU */
3643 :
3644 : default:
3645 : return true;
3646 : }
3647 :
3648 : default:
3649 : return true;
3650 : }
3651 : }
3652 :
3653 : return false;
3654 : }
3655 :
3656 : /* Creates a conflict function with N dimensions. The affine functions
3657 : in each dimension follow. */
3658 :
3659 : static conflict_function *
3660 2477432 : conflict_fn (unsigned n, ...)
3661 : {
3662 2477432 : unsigned i;
3663 2477432 : conflict_function *ret = XCNEW (conflict_function);
3664 2477432 : va_list ap;
3665 :
3666 2477432 : gcc_assert (n > 0 && n <= MAX_DIM);
3667 2477432 : va_start (ap, n);
3668 :
3669 2477432 : ret->n = n;
3670 4954864 : for (i = 0; i < n; i++)
3671 2477432 : ret->fns[i] = va_arg (ap, affine_fn);
3672 2477432 : va_end (ap);
3673 :
3674 2477432 : return ret;
3675 : }
3676 :
3677 : /* Returns constant affine function with value CST. */
3678 :
3679 : static affine_fn
3680 2356112 : affine_fn_cst (tree cst)
3681 : {
3682 2356112 : affine_fn fn;
3683 2356112 : fn.create (1);
3684 2356112 : fn.quick_push (cst);
3685 2356112 : return fn;
3686 : }
3687 :
3688 : /* Returns affine function with single variable, CST + COEF * x_DIM. */
3689 :
3690 : static affine_fn
3691 121320 : affine_fn_univar (tree cst, unsigned dim, tree coef)
3692 : {
3693 121320 : affine_fn fn;
3694 121320 : fn.create (dim + 1);
3695 121320 : unsigned i;
3696 :
3697 121320 : gcc_assert (dim > 0);
3698 121320 : fn.quick_push (cst);
3699 242640 : for (i = 1; i < dim; i++)
3700 0 : fn.quick_push (integer_zero_node);
3701 121320 : fn.quick_push (coef);
3702 121320 : return fn;
3703 : }
3704 :
3705 : /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3706 : *OVERLAPS_B are initialized to the functions that describe the
3707 : relation between the elements accessed twice by CHREC_A and
3708 : CHREC_B. For k >= 0, the following property is verified:
3709 :
3710 : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3711 :
3712 : static void
3713 499138 : analyze_ziv_subscript (tree chrec_a,
3714 : tree chrec_b,
3715 : conflict_function **overlaps_a,
3716 : conflict_function **overlaps_b,
3717 : tree *last_conflicts)
3718 : {
3719 499138 : tree type, difference;
3720 499138 : dependence_stats.num_ziv++;
3721 :
3722 499138 : if (dump_file && (dump_flags & TDF_DETAILS))
3723 22423 : fprintf (dump_file, "(analyze_ziv_subscript \n");
3724 :
3725 499138 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3726 499138 : chrec_a = chrec_convert (type, chrec_a, NULL);
3727 499138 : chrec_b = chrec_convert (type, chrec_b, NULL);
3728 499138 : difference = chrec_fold_minus (type, chrec_a, chrec_b);
3729 :
3730 499138 : switch (TREE_CODE (difference))
3731 : {
3732 499138 : case INTEGER_CST:
3733 499138 : if (integer_zerop (difference))
3734 : {
3735 : /* The difference is equal to zero: the accessed index
3736 : overlaps for each iteration in the loop. */
3737 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3738 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3739 0 : *last_conflicts = chrec_dont_know;
3740 0 : dependence_stats.num_ziv_dependent++;
3741 : }
3742 : else
3743 : {
3744 : /* The accesses do not overlap. */
3745 499138 : *overlaps_a = conflict_fn_no_dependence ();
3746 499138 : *overlaps_b = conflict_fn_no_dependence ();
3747 499138 : *last_conflicts = integer_zero_node;
3748 499138 : dependence_stats.num_ziv_independent++;
3749 : }
3750 : break;
3751 :
3752 0 : default:
3753 : /* We're not sure whether the indexes overlap. For the moment,
3754 : conservatively answer "don't know". */
3755 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3756 0 : fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3757 :
3758 0 : *overlaps_a = conflict_fn_not_known ();
3759 0 : *overlaps_b = conflict_fn_not_known ();
3760 0 : *last_conflicts = chrec_dont_know;
3761 0 : dependence_stats.num_ziv_unimplemented++;
3762 0 : break;
3763 : }
3764 :
3765 499138 : if (dump_file && (dump_flags & TDF_DETAILS))
3766 22423 : fprintf (dump_file, ")\n");
3767 499138 : }
3768 :
3769 : /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3770 : and only if it fits to the int type. If this is not the case, or the
3771 : bound on the number of iterations of LOOP could not be derived, returns
3772 : chrec_dont_know. */
3773 :
3774 : static tree
3775 0 : max_stmt_executions_tree (class loop *loop)
3776 : {
3777 0 : widest_int nit;
3778 :
3779 0 : if (!max_stmt_executions (loop, &nit))
3780 0 : return chrec_dont_know;
3781 :
3782 0 : if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3783 0 : return chrec_dont_know;
3784 :
3785 0 : return wide_int_to_tree (unsigned_type_node, nit);
3786 0 : }
3787 :
3788 : /* Determine whether the CHREC is always positive/negative. If the expression
3789 : cannot be statically analyzed, return false, otherwise set the answer into
3790 : VALUE. */
3791 :
3792 : static bool
3793 4510 : chrec_is_positive (tree chrec, bool *value)
3794 : {
3795 4510 : bool value0, value1, value2;
3796 4510 : tree end_value, nb_iter;
3797 :
3798 4510 : switch (TREE_CODE (chrec))
3799 : {
3800 0 : case POLYNOMIAL_CHREC:
3801 0 : if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3802 0 : || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3803 0 : return false;
3804 :
3805 : /* FIXME -- overflows. */
3806 0 : if (value0 == value1)
3807 : {
3808 0 : *value = value0;
3809 0 : return true;
3810 : }
3811 :
3812 : /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3813 : and the proof consists in showing that the sign never
3814 : changes during the execution of the loop, from 0 to
3815 : loop->nb_iterations. */
3816 0 : if (!evolution_function_is_affine_p (chrec))
3817 : return false;
3818 :
3819 0 : nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3820 0 : if (chrec_contains_undetermined (nb_iter))
3821 : return false;
3822 :
3823 : #if 0
3824 : /* TODO -- If the test is after the exit, we may decrease the number of
3825 : iterations by one. */
3826 : if (after_exit)
3827 : nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3828 : #endif
3829 :
3830 0 : end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3831 :
3832 0 : if (!chrec_is_positive (end_value, &value2))
3833 : return false;
3834 :
3835 0 : *value = value0;
3836 0 : return value0 == value1;
3837 :
3838 4510 : case INTEGER_CST:
3839 4510 : switch (tree_int_cst_sgn (chrec))
3840 : {
3841 2014 : case -1:
3842 2014 : *value = false;
3843 2014 : break;
3844 2496 : case 1:
3845 2496 : *value = true;
3846 2496 : break;
3847 : default:
3848 : return false;
3849 : }
3850 : return true;
3851 :
3852 : default:
3853 : return false;
3854 : }
3855 : }
3856 :
3857 :
3858 : /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3859 : constant, and CHREC_B is an affine function. *OVERLAPS_A and
3860 : *OVERLAPS_B are initialized to the functions that describe the
3861 : relation between the elements accessed twice by CHREC_A and
3862 : CHREC_B. For k >= 0, the following property is verified:
3863 :
3864 : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3865 :
3866 : static void
3867 2921 : analyze_siv_subscript_cst_affine (tree chrec_a,
3868 : tree chrec_b,
3869 : conflict_function **overlaps_a,
3870 : conflict_function **overlaps_b,
3871 : tree *last_conflicts)
3872 : {
3873 2921 : bool value0, value1, value2;
3874 2921 : tree type, difference, tmp;
3875 :
3876 2921 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3877 2921 : chrec_a = chrec_convert (type, chrec_a, NULL);
3878 2921 : chrec_b = chrec_convert (type, chrec_b, NULL);
3879 2921 : difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3880 :
3881 : /* Special case overlap in the first iteration. */
3882 2921 : if (integer_zerop (difference))
3883 : {
3884 664 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3885 664 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3886 664 : *last_conflicts = integer_one_node;
3887 664 : return;
3888 : }
3889 :
3890 2257 : if (!chrec_is_positive (initial_condition (difference), &value0))
3891 : {
3892 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3893 0 : fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3894 :
3895 0 : dependence_stats.num_siv_unimplemented++;
3896 0 : *overlaps_a = conflict_fn_not_known ();
3897 0 : *overlaps_b = conflict_fn_not_known ();
3898 0 : *last_conflicts = chrec_dont_know;
3899 0 : return;
3900 : }
3901 : else
3902 : {
3903 2257 : if (value0 == false)
3904 : {
3905 1800 : if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3906 1800 : || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3907 : {
3908 4 : if (dump_file && (dump_flags & TDF_DETAILS))
3909 0 : fprintf (dump_file, "siv test failed: chrec not positive.\n");
3910 :
3911 4 : *overlaps_a = conflict_fn_not_known ();
3912 4 : *overlaps_b = conflict_fn_not_known ();
3913 4 : *last_conflicts = chrec_dont_know;
3914 4 : dependence_stats.num_siv_unimplemented++;
3915 4 : return;
3916 : }
3917 : else
3918 : {
3919 1796 : if (value1 == true)
3920 : {
3921 : /* Example:
3922 : chrec_a = 12
3923 : chrec_b = {10, +, 1}
3924 : */
3925 :
3926 1796 : if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3927 : {
3928 1499 : HOST_WIDE_INT numiter;
3929 1499 : class loop *loop = get_chrec_loop (chrec_b);
3930 :
3931 1499 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3932 1499 : tmp = fold_build2 (EXACT_DIV_EXPR, type,
3933 : fold_build1 (ABS_EXPR, type, difference),
3934 : CHREC_RIGHT (chrec_b));
3935 1499 : *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3936 1499 : *last_conflicts = integer_one_node;
3937 :
3938 :
3939 : /* Perform weak-zero siv test to see if overlap is
3940 : outside the loop bounds. */
3941 1499 : numiter = max_stmt_executions_int (loop);
3942 :
3943 1499 : if (numiter >= 0
3944 1499 : && compare_tree_int (tmp, numiter) > 0)
3945 : {
3946 0 : free_conflict_function (*overlaps_a);
3947 0 : free_conflict_function (*overlaps_b);
3948 0 : *overlaps_a = conflict_fn_no_dependence ();
3949 0 : *overlaps_b = conflict_fn_no_dependence ();
3950 0 : *last_conflicts = integer_zero_node;
3951 0 : dependence_stats.num_siv_independent++;
3952 0 : return;
3953 : }
3954 1499 : dependence_stats.num_siv_dependent++;
3955 1499 : return;
3956 : }
3957 :
3958 : /* When the step does not divide the difference, there are
3959 : no overlaps. */
3960 : else
3961 : {
3962 297 : *overlaps_a = conflict_fn_no_dependence ();
3963 297 : *overlaps_b = conflict_fn_no_dependence ();
3964 297 : *last_conflicts = integer_zero_node;
3965 297 : dependence_stats.num_siv_independent++;
3966 297 : return;
3967 : }
3968 : }
3969 :
3970 : else
3971 : {
3972 : /* Example:
3973 : chrec_a = 12
3974 : chrec_b = {10, +, -1}
3975 :
3976 : In this case, chrec_a will not overlap with chrec_b. */
3977 0 : *overlaps_a = conflict_fn_no_dependence ();
3978 0 : *overlaps_b = conflict_fn_no_dependence ();
3979 0 : *last_conflicts = integer_zero_node;
3980 0 : dependence_stats.num_siv_independent++;
3981 0 : return;
3982 : }
3983 : }
3984 : }
3985 : else
3986 : {
3987 457 : if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3988 457 : || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3989 : {
3990 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3991 0 : fprintf (dump_file, "siv test failed: chrec not positive.\n");
3992 :
3993 0 : *overlaps_a = conflict_fn_not_known ();
3994 0 : *overlaps_b = conflict_fn_not_known ();
3995 0 : *last_conflicts = chrec_dont_know;
3996 0 : dependence_stats.num_siv_unimplemented++;
3997 0 : return;
3998 : }
3999 : else
4000 : {
4001 457 : if (value2 == false)
4002 : {
4003 : /* Example:
4004 : chrec_a = 3
4005 : chrec_b = {10, +, -1}
4006 : */
4007 214 : if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
4008 : {
4009 109 : HOST_WIDE_INT numiter;
4010 109 : class loop *loop = get_chrec_loop (chrec_b);
4011 :
4012 109 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4013 109 : tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
4014 : CHREC_RIGHT (chrec_b));
4015 109 : *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
4016 109 : *last_conflicts = integer_one_node;
4017 :
4018 : /* Perform weak-zero siv test to see if overlap is
4019 : outside the loop bounds. */
4020 109 : numiter = max_stmt_executions_int (loop);
4021 :
4022 109 : if (numiter >= 0
4023 109 : && compare_tree_int (tmp, numiter) > 0)
4024 : {
4025 0 : free_conflict_function (*overlaps_a);
4026 0 : free_conflict_function (*overlaps_b);
4027 0 : *overlaps_a = conflict_fn_no_dependence ();
4028 0 : *overlaps_b = conflict_fn_no_dependence ();
4029 0 : *last_conflicts = integer_zero_node;
4030 0 : dependence_stats.num_siv_independent++;
4031 0 : return;
4032 : }
4033 109 : dependence_stats.num_siv_dependent++;
4034 109 : return;
4035 : }
4036 :
4037 : /* When the step does not divide the difference, there
4038 : are no overlaps. */
4039 : else
4040 : {
4041 105 : *overlaps_a = conflict_fn_no_dependence ();
4042 105 : *overlaps_b = conflict_fn_no_dependence ();
4043 105 : *last_conflicts = integer_zero_node;
4044 105 : dependence_stats.num_siv_independent++;
4045 105 : return;
4046 : }
4047 : }
4048 : else
4049 : {
4050 : /* Example:
4051 : chrec_a = 3
4052 : chrec_b = {4, +, 1}
4053 :
4054 : In this case, chrec_a will not overlap with chrec_b. */
4055 243 : *overlaps_a = conflict_fn_no_dependence ();
4056 243 : *overlaps_b = conflict_fn_no_dependence ();
4057 243 : *last_conflicts = integer_zero_node;
4058 243 : dependence_stats.num_siv_independent++;
4059 243 : return;
4060 : }
4061 : }
4062 : }
4063 : }
4064 : }
4065 :
4066 : /* Helper recursive function for initializing the matrix A. Returns
4067 : the initial value of CHREC. */
4068 :
4069 : static tree
4070 3366808 : initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4071 : {
4072 6733608 : gcc_assert (chrec);
4073 :
4074 6733608 : switch (TREE_CODE (chrec))
4075 : {
4076 3366808 : case POLYNOMIAL_CHREC:
4077 3366808 : HOST_WIDE_INT chrec_right;
4078 3366808 : if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4079 8 : return chrec_dont_know;
4080 3366800 : chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4081 : /* We want to be able to negate without overflow. */
4082 3366800 : if (chrec_right == HOST_WIDE_INT_MIN)
4083 0 : return chrec_dont_know;
4084 3366800 : A[index][0] = mult * chrec_right;
4085 3366800 : return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4086 :
4087 0 : case PLUS_EXPR:
4088 0 : case MULT_EXPR:
4089 0 : case MINUS_EXPR:
4090 0 : {
4091 0 : tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4092 0 : tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4093 :
4094 0 : return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4095 : }
4096 :
4097 0 : CASE_CONVERT:
4098 0 : {
4099 0 : tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4100 0 : return chrec_convert (chrec_type (chrec), op, NULL);
4101 : }
4102 :
4103 0 : case BIT_NOT_EXPR:
4104 0 : {
4105 : /* Handle ~X as -1 - X. */
4106 0 : tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4107 0 : return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4108 0 : build_int_cst (TREE_TYPE (chrec), -1), op);
4109 : }
4110 :
4111 3366800 : case INTEGER_CST:
4112 3366800 : return cst_and_fits_in_hwi (chrec) ? chrec : chrec_dont_know;
4113 :
4114 0 : default:
4115 0 : gcc_unreachable ();
4116 : return NULL_TREE;
4117 : }
4118 : }
4119 :
4120 : #define FLOOR_DIV(x,y) ((x) / (y))
4121 :
4122 : /* Solves the special case of the Diophantine equation:
4123 : | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4124 :
4125 : Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4126 : number of iterations that loops X and Y run. The overlaps will be
4127 : constructed as evolutions in dimension DIM. */
4128 :
4129 : static void
4130 64 : compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4131 : HOST_WIDE_INT step_a,
4132 : HOST_WIDE_INT step_b,
4133 : affine_fn *overlaps_a,
4134 : affine_fn *overlaps_b,
4135 : tree *last_conflicts, int dim)
4136 : {
4137 64 : if (((step_a > 0 && step_b > 0)
4138 8 : || (step_a < 0 && step_b < 0)))
4139 : {
4140 60 : HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4141 60 : HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4142 :
4143 60 : gcd_steps_a_b = gcd (step_a, step_b);
4144 60 : step_overlaps_a = step_b / gcd_steps_a_b;
4145 60 : step_overlaps_b = step_a / gcd_steps_a_b;
4146 :
4147 60 : if (niter > 0)
4148 : {
4149 60 : tau2 = FLOOR_DIV (niter, step_overlaps_a);
4150 60 : tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4151 60 : last_conflict = tau2;
4152 60 : *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4153 : }
4154 : else
4155 0 : *last_conflicts = chrec_dont_know;
4156 :
4157 60 : *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4158 : build_int_cst (NULL_TREE,
4159 60 : step_overlaps_a));
4160 60 : *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4161 : build_int_cst (NULL_TREE,
4162 60 : step_overlaps_b));
4163 60 : }
4164 :
4165 : else
4166 : {
4167 4 : *overlaps_a = affine_fn_cst (integer_zero_node);
4168 4 : *overlaps_b = affine_fn_cst (integer_zero_node);
4169 4 : *last_conflicts = integer_zero_node;
4170 : }
4171 64 : }
4172 :
4173 : /* Solves the special case of a Diophantine equation where CHREC_A is
4174 : an affine bivariate function, and CHREC_B is an affine univariate
4175 : function. For example,
4176 :
4177 : | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4178 :
4179 : has the following overlapping functions:
4180 :
4181 : | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4182 : | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4183 : | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4184 :
4185 : FORNOW: This is a specialized implementation for a case occurring in
4186 : a common benchmark. Implement the general algorithm. */
4187 :
4188 : static void
4189 0 : compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4190 : conflict_function **overlaps_a,
4191 : conflict_function **overlaps_b,
4192 : tree *last_conflicts)
4193 : {
4194 0 : bool xz_p, yz_p, xyz_p;
4195 0 : HOST_WIDE_INT step_x, step_y, step_z;
4196 0 : HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4197 0 : affine_fn overlaps_a_xz, overlaps_b_xz;
4198 0 : affine_fn overlaps_a_yz, overlaps_b_yz;
4199 0 : affine_fn overlaps_a_xyz, overlaps_b_xyz;
4200 0 : affine_fn ova1, ova2, ovb;
4201 0 : tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4202 :
4203 0 : step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4204 0 : step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4205 0 : step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4206 :
4207 0 : niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4208 0 : niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4209 0 : niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4210 :
4211 0 : if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4212 : {
4213 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4214 0 : fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4215 :
4216 0 : *overlaps_a = conflict_fn_not_known ();
4217 0 : *overlaps_b = conflict_fn_not_known ();
4218 0 : *last_conflicts = chrec_dont_know;
4219 0 : return;
4220 : }
4221 :
4222 0 : niter = MIN (niter_x, niter_z);
4223 0 : compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4224 : &overlaps_a_xz,
4225 : &overlaps_b_xz,
4226 : &last_conflicts_xz, 1);
4227 0 : niter = MIN (niter_y, niter_z);
4228 0 : compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4229 : &overlaps_a_yz,
4230 : &overlaps_b_yz,
4231 : &last_conflicts_yz, 2);
4232 0 : niter = MIN (niter_x, niter_z);
4233 0 : niter = MIN (niter_y, niter);
4234 0 : compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4235 : &overlaps_a_xyz,
4236 : &overlaps_b_xyz,
4237 : &last_conflicts_xyz, 3);
4238 :
4239 0 : xz_p = !integer_zerop (last_conflicts_xz);
4240 0 : yz_p = !integer_zerop (last_conflicts_yz);
4241 0 : xyz_p = !integer_zerop (last_conflicts_xyz);
4242 :
4243 0 : if (xz_p || yz_p || xyz_p)
4244 : {
4245 0 : ova1 = affine_fn_cst (integer_zero_node);
4246 0 : ova2 = affine_fn_cst (integer_zero_node);
4247 0 : ovb = affine_fn_cst (integer_zero_node);
4248 0 : if (xz_p)
4249 : {
4250 0 : affine_fn t0 = ova1;
4251 0 : affine_fn t2 = ovb;
4252 :
4253 0 : ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4254 0 : ovb = affine_fn_plus (ovb, overlaps_b_xz);
4255 0 : affine_fn_free (t0);
4256 0 : affine_fn_free (t2);
4257 0 : *last_conflicts = last_conflicts_xz;
4258 : }
4259 0 : if (yz_p)
4260 : {
4261 0 : affine_fn t0 = ova2;
4262 0 : affine_fn t2 = ovb;
4263 :
4264 0 : ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4265 0 : ovb = affine_fn_plus (ovb, overlaps_b_yz);
4266 0 : affine_fn_free (t0);
4267 0 : affine_fn_free (t2);
4268 0 : *last_conflicts = last_conflicts_yz;
4269 : }
4270 0 : if (xyz_p)
4271 : {
4272 0 : affine_fn t0 = ova1;
4273 0 : affine_fn t2 = ova2;
4274 0 : affine_fn t4 = ovb;
4275 :
4276 0 : ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4277 0 : ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4278 0 : ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4279 0 : affine_fn_free (t0);
4280 0 : affine_fn_free (t2);
4281 0 : affine_fn_free (t4);
4282 0 : *last_conflicts = last_conflicts_xyz;
4283 : }
4284 0 : *overlaps_a = conflict_fn (2, ova1, ova2);
4285 0 : *overlaps_b = conflict_fn (1, ovb);
4286 0 : }
4287 : else
4288 : {
4289 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4290 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4291 0 : *last_conflicts = integer_zero_node;
4292 : }
4293 :
4294 0 : affine_fn_free (overlaps_a_xz);
4295 0 : affine_fn_free (overlaps_b_xz);
4296 0 : affine_fn_free (overlaps_a_yz);
4297 0 : affine_fn_free (overlaps_b_yz);
4298 0 : affine_fn_free (overlaps_a_xyz);
4299 0 : affine_fn_free (overlaps_b_xyz);
4300 : }
4301 :
4302 : /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4303 :
4304 : static void
4305 3413057 : lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4306 : int size)
4307 : {
4308 3413057 : memcpy (vec2, vec1, size * sizeof (*vec1));
4309 0 : }
4310 :
4311 : /* Copy the elements of M x N matrix MAT1 to MAT2. */
4312 :
4313 : static void
4314 1683328 : lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4315 : int m, int n)
4316 : {
4317 1683328 : int i;
4318 :
4319 5049984 : for (i = 0; i < m; i++)
4320 3366656 : lambda_vector_copy (mat1[i], mat2[i], n);
4321 1683328 : }
4322 :
4323 : /* Store the N x N identity matrix in MAT. */
4324 :
4325 : static void
4326 1683328 : lambda_matrix_id (lambda_matrix mat, int size)
4327 : {
4328 1683328 : int i, j;
4329 :
4330 5049984 : for (i = 0; i < size; i++)
4331 10099968 : for (j = 0; j < size; j++)
4332 10099968 : mat[i][j] = (i == j) ? 1 : 0;
4333 1683328 : }
4334 :
4335 : /* Return the index of the first nonzero element of vector VEC1 between
4336 : START and N. We must have START <= N.
4337 : Returns N if VEC1 is the zero vector. */
4338 :
4339 : static int
4340 1683328 : lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4341 : {
4342 1683328 : int j = start;
4343 1683328 : while (j < n && vec1[j] == 0)
4344 0 : j++;
4345 1683328 : return j;
4346 : }
4347 :
4348 : /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4349 : R2 = R2 + CONST1 * R1. */
4350 :
4351 : static bool
4352 3366926 : lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4353 : lambda_int const1)
4354 : {
4355 3366926 : int i;
4356 :
4357 3366926 : if (const1 == 0)
4358 : return true;
4359 :
4360 8416730 : for (i = 0; i < n; i++)
4361 : {
4362 5050038 : bool ovf;
4363 5050038 : lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4364 5050038 : if (ovf)
4365 3366926 : return false;
4366 5050038 : lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4367 5050038 : if (ovf || tem2 == HOST_WIDE_INT_MIN)
4368 : return false;
4369 5050038 : mat[r2][i] = tem2;
4370 : }
4371 :
4372 : return true;
4373 : }
4374 :
4375 : /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4376 : and store the result in VEC2. */
4377 :
4378 : static void
4379 1670553 : lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4380 : int size, lambda_int const1)
4381 : {
4382 1670553 : int i;
4383 :
4384 1670553 : if (const1 == 0)
4385 0 : lambda_vector_clear (vec2, size);
4386 : else
4387 5011659 : for (i = 0; i < size; i++)
4388 3341106 : vec2[i] = const1 * vec1[i];
4389 1670553 : }
4390 :
4391 : /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4392 :
4393 : static void
4394 1670553 : lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4395 : int size)
4396 : {
4397 0 : lambda_vector_mult_const (vec1, vec2, size, -1);
4398 0 : }
4399 :
4400 : /* Negate row R1 of matrix MAT which has N columns. */
4401 :
4402 : static void
4403 1670553 : lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4404 : {
4405 0 : lambda_vector_negate (mat[r1], mat[r1], n);
4406 1670553 : }
4407 :
4408 : /* Return true if two vectors are equal. */
4409 :
4410 : static bool
4411 355688 : lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4412 : {
4413 355688 : int i;
4414 356795 : for (i = 0; i < size; i++)
4415 356532 : if (vec1[i] != vec2[i])
4416 : return false;
4417 : return true;
4418 : }
4419 :
4420 : /* Given an M x N integer matrix A, this function determines an M x
4421 : M unimodular matrix U, and an M x N echelon matrix S such that
4422 : "U.A = S". This decomposition is also known as "right Hermite".
4423 :
4424 : Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4425 : Restructuring Compilers" Utpal Banerjee. */
4426 :
4427 : static bool
4428 1683328 : lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4429 : lambda_matrix S, lambda_matrix U)
4430 : {
4431 1683328 : int i, j, i0 = 0;
4432 :
4433 1683328 : lambda_matrix_copy (A, S, m, n);
4434 1683328 : lambda_matrix_id (U, m);
4435 :
4436 3366656 : for (j = 0; j < n; j++)
4437 : {
4438 3366656 : if (lambda_vector_first_nz (S[j], m, i0) < m)
4439 : {
4440 1683328 : ++i0;
4441 3366656 : for (i = m - 1; i >= i0; i--)
4442 : {
4443 3366791 : while (S[i][j] != 0)
4444 : {
4445 1683463 : lambda_int factor, a, b;
4446 :
4447 1683463 : a = S[i-1][j];
4448 1683463 : b = S[i][j];
4449 1683463 : gcc_assert (a != HOST_WIDE_INT_MIN);
4450 1683463 : factor = a / b;
4451 :
4452 1683463 : if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4453 : return false;
4454 1683463 : std::swap (S[i], S[i-1]);
4455 :
4456 1683463 : if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4457 : return false;
4458 1683463 : std::swap (U[i], U[i-1]);
4459 : }
4460 : }
4461 : }
4462 : }
4463 :
4464 : return true;
4465 : }
4466 :
4467 : /* Determines the overlapping elements due to accesses CHREC_A and
4468 : CHREC_B, that are affine functions. This function cannot handle
4469 : symbolic evolution functions, ie. when initial conditions are
4470 : parameters, because it uses lambda matrices of integers. */
4471 :
4472 : static void
4473 1683404 : analyze_subscript_affine_affine (tree chrec_a,
4474 : tree chrec_b,
4475 : conflict_function **overlaps_a,
4476 : conflict_function **overlaps_b,
4477 : tree *last_conflicts)
4478 : {
4479 1683404 : unsigned nb_vars_a, nb_vars_b, dim;
4480 1683404 : lambda_int gamma, gcd_alpha_beta;
4481 1683404 : lambda_matrix A, U, S;
4482 1683404 : struct obstack scratch_obstack;
4483 :
4484 1683404 : if (eq_evolutions_p (chrec_a, chrec_b))
4485 : {
4486 : /* The accessed index overlaps for each iteration in the
4487 : loop. */
4488 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4489 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4490 0 : *last_conflicts = chrec_dont_know;
4491 0 : return;
4492 : }
4493 1683404 : if (dump_file && (dump_flags & TDF_DETAILS))
4494 20002 : fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4495 :
4496 : /* For determining the initial intersection, we have to solve a
4497 : Diophantine equation. This is the most time consuming part.
4498 :
4499 : For answering to the question: "Is there a dependence?" we have
4500 : to prove that there exists a solution to the Diophantine
4501 : equation, and that the solution is in the iteration domain,
4502 : i.e. the solution is positive or zero, and that the solution
4503 : happens before the upper bound loop.nb_iterations. Otherwise
4504 : there is no dependence. This function outputs a description of
4505 : the iterations that hold the intersections. */
4506 :
4507 1683404 : nb_vars_a = nb_vars_in_chrec (chrec_a);
4508 1683404 : nb_vars_b = nb_vars_in_chrec (chrec_b);
4509 :
4510 1683404 : gcc_obstack_init (&scratch_obstack);
4511 :
4512 1683404 : dim = nb_vars_a + nb_vars_b;
4513 1683404 : U = lambda_matrix_new (dim, dim, &scratch_obstack);
4514 1683404 : A = lambda_matrix_new (dim, 1, &scratch_obstack);
4515 1683404 : S = lambda_matrix_new (dim, 1, &scratch_obstack);
4516 :
4517 1683404 : tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4518 1683404 : tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4519 1683404 : if (init_a == chrec_dont_know
4520 1683392 : || init_b == chrec_dont_know)
4521 : {
4522 12 : if (dump_file && (dump_flags & TDF_DETAILS))
4523 0 : fprintf (dump_file, "affine-affine test failed: "
4524 : "representation issue.\n");
4525 12 : *overlaps_a = conflict_fn_not_known ();
4526 12 : *overlaps_b = conflict_fn_not_known ();
4527 12 : *last_conflicts = chrec_dont_know;
4528 12 : goto end_analyze_subs_aa;
4529 : }
4530 1683392 : gamma = int_cst_value (init_b) - int_cst_value (init_a);
4531 :
4532 : /* Don't do all the hard work of solving the Diophantine equation
4533 : when we already know the solution: for example,
4534 : | {3, +, 1}_1
4535 : | {3, +, 4}_2
4536 : | gamma = 3 - 3 = 0.
4537 : Then the first overlap occurs during the first iterations:
4538 : | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4539 : */
4540 1683392 : if (gamma == 0)
4541 : {
4542 64 : if (nb_vars_a == 1 && nb_vars_b == 1)
4543 : {
4544 64 : HOST_WIDE_INT step_a, step_b;
4545 64 : HOST_WIDE_INT niter, niter_a, niter_b;
4546 64 : affine_fn ova, ovb;
4547 :
4548 64 : niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4549 64 : niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4550 64 : niter = MIN (niter_a, niter_b);
4551 64 : step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4552 64 : step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4553 :
4554 64 : compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4555 : &ova, &ovb,
4556 : last_conflicts, 1);
4557 64 : *overlaps_a = conflict_fn (1, ova);
4558 64 : *overlaps_b = conflict_fn (1, ovb);
4559 : }
4560 :
4561 0 : else if (nb_vars_a == 2 && nb_vars_b == 1)
4562 0 : compute_overlap_steps_for_affine_1_2
4563 0 : (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4564 :
4565 0 : else if (nb_vars_a == 1 && nb_vars_b == 2)
4566 0 : compute_overlap_steps_for_affine_1_2
4567 0 : (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4568 :
4569 : else
4570 : {
4571 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4572 0 : fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4573 0 : *overlaps_a = conflict_fn_not_known ();
4574 0 : *overlaps_b = conflict_fn_not_known ();
4575 0 : *last_conflicts = chrec_dont_know;
4576 : }
4577 64 : goto end_analyze_subs_aa;
4578 : }
4579 :
4580 : /* U.A = S */
4581 1683328 : if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4582 : {
4583 0 : *overlaps_a = conflict_fn_not_known ();
4584 0 : *overlaps_b = conflict_fn_not_known ();
4585 0 : *last_conflicts = chrec_dont_know;
4586 0 : goto end_analyze_subs_aa;
4587 : }
4588 :
4589 1683328 : if (S[0][0] < 0)
4590 : {
4591 1670553 : S[0][0] *= -1;
4592 1670553 : lambda_matrix_row_negate (U, dim, 0);
4593 : }
4594 1683328 : gcd_alpha_beta = S[0][0];
4595 :
4596 : /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4597 : but that is a quite strange case. Instead of ICEing, answer
4598 : don't know. */
4599 1683328 : if (gcd_alpha_beta == 0)
4600 : {
4601 0 : *overlaps_a = conflict_fn_not_known ();
4602 0 : *overlaps_b = conflict_fn_not_known ();
4603 0 : *last_conflicts = chrec_dont_know;
4604 0 : goto end_analyze_subs_aa;
4605 : }
4606 :
4607 : /* The classic "gcd-test". */
4608 1683328 : if (!int_divides_p (gcd_alpha_beta, gamma))
4609 : {
4610 : /* The "gcd-test" has determined that there is no integer
4611 : solution, i.e. there is no dependence. */
4612 1566991 : *overlaps_a = conflict_fn_no_dependence ();
4613 1566991 : *overlaps_b = conflict_fn_no_dependence ();
4614 1566991 : *last_conflicts = integer_zero_node;
4615 : }
4616 :
4617 : /* Both access functions are univariate. This includes SIV and MIV cases. */
4618 116337 : else if (nb_vars_a == 1 && nb_vars_b == 1)
4619 : {
4620 : /* Both functions should have the same evolution sign. */
4621 116337 : if (((A[0][0] > 0 && -A[1][0] > 0)
4622 8640 : || (A[0][0] < 0 && -A[1][0] < 0)))
4623 : {
4624 : /* The solutions are given by:
4625 : |
4626 : | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4627 : | [u21 u22] [y0]
4628 :
4629 : For a given integer t. Using the following variables,
4630 :
4631 : | i0 = u11 * gamma / gcd_alpha_beta
4632 : | j0 = u12 * gamma / gcd_alpha_beta
4633 : | i1 = u21
4634 : | j1 = u22
4635 :
4636 : the solutions are:
4637 :
4638 : | x0 = i0 + i1 * t,
4639 : | y0 = j0 + j1 * t. */
4640 115943 : HOST_WIDE_INT i0, j0, i1, j1;
4641 :
4642 115943 : i0 = U[0][0] * gamma / gcd_alpha_beta;
4643 115943 : j0 = U[0][1] * gamma / gcd_alpha_beta;
4644 115943 : i1 = U[1][0];
4645 115943 : j1 = U[1][1];
4646 :
4647 115943 : if ((i1 == 0 && i0 < 0)
4648 115943 : || (j1 == 0 && j0 < 0))
4649 : {
4650 : /* There is no solution.
4651 : FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4652 : falls in here, but for the moment we don't look at the
4653 : upper bound of the iteration domain. */
4654 0 : *overlaps_a = conflict_fn_no_dependence ();
4655 0 : *overlaps_b = conflict_fn_no_dependence ();
4656 0 : *last_conflicts = integer_zero_node;
4657 55343 : goto end_analyze_subs_aa;
4658 : }
4659 :
4660 115943 : if (i1 > 0 && j1 > 0)
4661 : {
4662 115943 : HOST_WIDE_INT niter_a
4663 115943 : = max_stmt_executions_int (get_chrec_loop (chrec_a));
4664 115943 : HOST_WIDE_INT niter_b
4665 115943 : = max_stmt_executions_int (get_chrec_loop (chrec_b));
4666 115943 : HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4667 :
4668 : /* (X0, Y0) is a solution of the Diophantine equation:
4669 : "chrec_a (X0) = chrec_b (Y0)". */
4670 115943 : HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4671 : CEIL (-j0, j1));
4672 115943 : HOST_WIDE_INT x0 = i1 * tau1 + i0;
4673 115943 : HOST_WIDE_INT y0 = j1 * tau1 + j0;
4674 :
4675 : /* (X1, Y1) is the smallest positive solution of the eq
4676 : "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4677 : first conflict occurs. */
4678 115943 : HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4679 115943 : HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4680 115943 : HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4681 :
4682 115943 : if (niter > 0)
4683 : {
4684 : /* If the overlap occurs outside of the bounds of the
4685 : loop, there is no dependence. */
4686 106514 : if (x1 >= niter_a || y1 >= niter_b)
4687 : {
4688 55343 : *overlaps_a = conflict_fn_no_dependence ();
4689 55343 : *overlaps_b = conflict_fn_no_dependence ();
4690 55343 : *last_conflicts = integer_zero_node;
4691 55343 : goto end_analyze_subs_aa;
4692 : }
4693 :
4694 : /* max stmt executions can get quite large, avoid
4695 : overflows by using wide ints here. */
4696 51171 : widest_int tau2
4697 102342 : = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4698 153513 : wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4699 51171 : widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4700 51171 : if (wi::min_precision (last_conflict, SIGNED)
4701 51171 : <= TYPE_PRECISION (integer_type_node))
4702 46172 : *last_conflicts
4703 46172 : = build_int_cst (integer_type_node,
4704 46172 : last_conflict.to_shwi ());
4705 : else
4706 4999 : *last_conflicts = chrec_dont_know;
4707 51171 : }
4708 : else
4709 9429 : *last_conflicts = chrec_dont_know;
4710 :
4711 60600 : *overlaps_a
4712 60600 : = conflict_fn (1,
4713 60600 : affine_fn_univar (build_int_cst (NULL_TREE, x1),
4714 : 1,
4715 60600 : build_int_cst (NULL_TREE, i1)));
4716 60600 : *overlaps_b
4717 60600 : = conflict_fn (1,
4718 60600 : affine_fn_univar (build_int_cst (NULL_TREE, y1),
4719 : 1,
4720 60600 : build_int_cst (NULL_TREE, j1)));
4721 60600 : }
4722 : else
4723 : {
4724 : /* FIXME: For the moment, the upper bound of the
4725 : iteration domain for i and j is not checked. */
4726 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4727 0 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4728 0 : *overlaps_a = conflict_fn_not_known ();
4729 0 : *overlaps_b = conflict_fn_not_known ();
4730 0 : *last_conflicts = chrec_dont_know;
4731 : }
4732 60600 : }
4733 : else
4734 : {
4735 394 : if (dump_file && (dump_flags & TDF_DETAILS))
4736 19 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4737 394 : *overlaps_a = conflict_fn_not_known ();
4738 394 : *overlaps_b = conflict_fn_not_known ();
4739 394 : *last_conflicts = chrec_dont_know;
4740 : }
4741 : }
4742 : else
4743 : {
4744 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4745 0 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4746 0 : *overlaps_a = conflict_fn_not_known ();
4747 0 : *overlaps_b = conflict_fn_not_known ();
4748 0 : *last_conflicts = chrec_dont_know;
4749 : }
4750 :
4751 1683404 : end_analyze_subs_aa:
4752 1683404 : obstack_free (&scratch_obstack, NULL);
4753 1683404 : if (dump_file && (dump_flags & TDF_DETAILS))
4754 : {
4755 20002 : fprintf (dump_file, " (overlaps_a = ");
4756 20002 : dump_conflict_function (dump_file, *overlaps_a);
4757 20002 : fprintf (dump_file, ")\n (overlaps_b = ");
4758 20002 : dump_conflict_function (dump_file, *overlaps_b);
4759 20002 : fprintf (dump_file, "))\n");
4760 : }
4761 : }
4762 :
4763 : /* Returns true when analyze_subscript_affine_affine can be used for
4764 : determining the dependence relation between chrec_a and chrec_b,
4765 : that contain symbols. This function modifies chrec_a and chrec_b
4766 : such that the analysis result is the same, and such that they don't
4767 : contain symbols, and then can safely be passed to the analyzer.
4768 :
4769 : Example: The analysis of the following tuples of evolutions produce
4770 : the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4771 : vs. {0, +, 1}_1
4772 :
4773 : {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4774 : {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4775 : */
4776 :
4777 : static bool
4778 44134 : can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4779 : {
4780 44134 : tree diff, type, left_a, left_b, right_b;
4781 :
4782 44134 : if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4783 44134 : || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4784 : /* FIXME: For the moment not handled. Might be refined later. */
4785 14963 : return false;
4786 :
4787 29171 : type = chrec_type (*chrec_a);
4788 29171 : left_a = CHREC_LEFT (*chrec_a);
4789 29171 : left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4790 29171 : diff = chrec_fold_minus (type, left_a, left_b);
4791 :
4792 58342 : if (!evolution_function_is_constant_p (diff))
4793 5371 : return false;
4794 :
4795 23800 : if (dump_file && (dump_flags & TDF_DETAILS))
4796 105 : fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4797 :
4798 23800 : *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4799 23800 : diff, CHREC_RIGHT (*chrec_a));
4800 23800 : right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4801 23800 : *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4802 : build_int_cst (type, 0),
4803 : right_b);
4804 23800 : return true;
4805 : }
4806 :
4807 : /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4808 : *OVERLAPS_B are initialized to the functions that describe the
4809 : relation between the elements accessed twice by CHREC_A and
4810 : CHREC_B. For k >= 0, the following property is verified:
4811 :
4812 : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4813 :
4814 : static void
4815 1706579 : analyze_siv_subscript (tree chrec_a,
4816 : tree chrec_b,
4817 : conflict_function **overlaps_a,
4818 : conflict_function **overlaps_b,
4819 : tree *last_conflicts,
4820 : int loop_nest_num)
4821 : {
4822 1706579 : dependence_stats.num_siv++;
4823 :
4824 1706579 : if (dump_file && (dump_flags & TDF_DETAILS))
4825 23133 : fprintf (dump_file, "(analyze_siv_subscript \n");
4826 :
4827 1706579 : if (evolution_function_is_constant_p (chrec_a)
4828 1706579 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4829 1656 : analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4830 : overlaps_a, overlaps_b, last_conflicts);
4831 :
4832 1704923 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4833 3409846 : && evolution_function_is_constant_p (chrec_b))
4834 1265 : analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4835 : overlaps_b, overlaps_a, last_conflicts);
4836 :
4837 1703658 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4838 1703658 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4839 : {
4840 1703658 : if (!chrec_contains_symbols (chrec_a)
4841 1703658 : && !chrec_contains_symbols (chrec_b))
4842 : {
4843 1659524 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4844 : overlaps_a, overlaps_b,
4845 : last_conflicts);
4846 :
4847 1659524 : if (CF_NOT_KNOWN_P (*overlaps_a)
4848 1659138 : || CF_NOT_KNOWN_P (*overlaps_b))
4849 386 : dependence_stats.num_siv_unimplemented++;
4850 1659138 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4851 59630 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4852 1599508 : dependence_stats.num_siv_independent++;
4853 : else
4854 59630 : dependence_stats.num_siv_dependent++;
4855 : }
4856 44134 : else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4857 : &chrec_b))
4858 : {
4859 23800 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4860 : overlaps_a, overlaps_b,
4861 : last_conflicts);
4862 :
4863 23800 : if (CF_NOT_KNOWN_P (*overlaps_a)
4864 23784 : || CF_NOT_KNOWN_P (*overlaps_b))
4865 16 : dependence_stats.num_siv_unimplemented++;
4866 23784 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4867 972 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4868 22812 : dependence_stats.num_siv_independent++;
4869 : else
4870 972 : dependence_stats.num_siv_dependent++;
4871 : }
4872 : else
4873 20334 : goto siv_subscript_dontknow;
4874 : }
4875 :
4876 : else
4877 : {
4878 20334 : siv_subscript_dontknow:;
4879 20334 : if (dump_file && (dump_flags & TDF_DETAILS))
4880 2946 : fprintf (dump_file, " siv test failed: unimplemented");
4881 20334 : *overlaps_a = conflict_fn_not_known ();
4882 20334 : *overlaps_b = conflict_fn_not_known ();
4883 20334 : *last_conflicts = chrec_dont_know;
4884 20334 : dependence_stats.num_siv_unimplemented++;
4885 : }
4886 :
4887 1706579 : if (dump_file && (dump_flags & TDF_DETAILS))
4888 23133 : fprintf (dump_file, ")\n");
4889 1706579 : }
4890 :
4891 : /* Returns false if we can prove that the greatest common divisor of the steps
4892 : of CHREC does not divide CST, false otherwise. */
4893 :
4894 : static bool
4895 20662 : gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4896 : {
4897 20662 : HOST_WIDE_INT cd = 0, val;
4898 20662 : tree step;
4899 :
4900 20662 : if (!tree_fits_shwi_p (cst))
4901 : return true;
4902 20662 : val = tree_to_shwi (cst);
4903 :
4904 61838 : while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4905 : {
4906 41322 : step = CHREC_RIGHT (chrec);
4907 41322 : if (!tree_fits_shwi_p (step))
4908 : return true;
4909 41176 : cd = gcd (cd, tree_to_shwi (step));
4910 41176 : chrec = CHREC_LEFT (chrec);
4911 : }
4912 :
4913 20516 : return val % cd == 0;
4914 : }
4915 :
4916 : /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4917 : LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4918 : functions that describe the relation between the elements accessed
4919 : twice by CHREC_A and CHREC_B. For k >= 0, the following property
4920 : is verified:
4921 :
4922 : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4923 :
4924 : static void
4925 26440 : analyze_miv_subscript (tree chrec_a,
4926 : tree chrec_b,
4927 : conflict_function **overlaps_a,
4928 : conflict_function **overlaps_b,
4929 : tree *last_conflicts,
4930 : class loop *loop_nest)
4931 : {
4932 26440 : tree type, difference;
4933 :
4934 26440 : dependence_stats.num_miv++;
4935 26440 : if (dump_file && (dump_flags & TDF_DETAILS))
4936 27 : fprintf (dump_file, "(analyze_miv_subscript \n");
4937 :
4938 26440 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4939 26440 : chrec_a = chrec_convert (type, chrec_a, NULL);
4940 26440 : chrec_b = chrec_convert (type, chrec_b, NULL);
4941 26440 : difference = chrec_fold_minus (type, chrec_a, chrec_b);
4942 :
4943 26440 : if (eq_evolutions_p (chrec_a, chrec_b))
4944 : {
4945 : /* Access functions are the same: all the elements are accessed
4946 : in the same order. */
4947 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4948 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4949 0 : *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4950 0 : dependence_stats.num_miv_dependent++;
4951 : }
4952 :
4953 26440 : else if (evolution_function_is_constant_p (difference)
4954 20692 : && evolution_function_is_affine_multivariate_p (chrec_a,
4955 : loop_nest->num)
4956 47102 : && !gcd_of_steps_may_divide_p (chrec_a, difference))
4957 : {
4958 : /* testsuite/.../ssa-chrec-33.c
4959 : {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4960 :
4961 : The difference is 1, and all the evolution steps are multiples
4962 : of 2, consequently there are no overlapping elements. */
4963 19670 : *overlaps_a = conflict_fn_no_dependence ();
4964 19670 : *overlaps_b = conflict_fn_no_dependence ();
4965 19670 : *last_conflicts = integer_zero_node;
4966 19670 : dependence_stats.num_miv_independent++;
4967 : }
4968 :
4969 6770 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4970 122 : && !chrec_contains_symbols (chrec_a, loop_nest)
4971 110 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4972 6850 : && !chrec_contains_symbols (chrec_b, loop_nest))
4973 : {
4974 : /* testsuite/.../ssa-chrec-35.c
4975 : {0, +, 1}_2 vs. {0, +, 1}_3
4976 : the overlapping elements are respectively located at iterations:
4977 : {0, +, 1}_x and {0, +, 1}_x,
4978 : in other words, we have the equality:
4979 : {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4980 :
4981 : Other examples:
4982 : {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4983 : {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4984 :
4985 : {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4986 : {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4987 : */
4988 80 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4989 : overlaps_a, overlaps_b, last_conflicts);
4990 :
4991 80 : if (CF_NOT_KNOWN_P (*overlaps_a)
4992 76 : || CF_NOT_KNOWN_P (*overlaps_b))
4993 4 : dependence_stats.num_miv_unimplemented++;
4994 76 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4995 62 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4996 14 : dependence_stats.num_miv_independent++;
4997 : else
4998 62 : dependence_stats.num_miv_dependent++;
4999 : }
5000 :
5001 : else
5002 : {
5003 : /* When the analysis is too difficult, answer "don't know". */
5004 6690 : if (dump_file && (dump_flags & TDF_DETAILS))
5005 23 : fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
5006 :
5007 6690 : *overlaps_a = conflict_fn_not_known ();
5008 6690 : *overlaps_b = conflict_fn_not_known ();
5009 6690 : *last_conflicts = chrec_dont_know;
5010 6690 : dependence_stats.num_miv_unimplemented++;
5011 : }
5012 :
5013 26440 : if (dump_file && (dump_flags & TDF_DETAILS))
5014 27 : fprintf (dump_file, ")\n");
5015 26440 : }
5016 :
5017 : /* Determines the iterations for which CHREC_A is equal to CHREC_B in
5018 : with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
5019 : OVERLAP_ITERATIONS_B are initialized with two functions that
5020 : describe the iterations that contain conflicting elements.
5021 :
5022 : Remark: For an integer k >= 0, the following equality is true:
5023 :
5024 : CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
5025 : */
5026 :
5027 : static void
5028 3410454 : analyze_overlapping_iterations (tree chrec_a,
5029 : tree chrec_b,
5030 : conflict_function **overlap_iterations_a,
5031 : conflict_function **overlap_iterations_b,
5032 : tree *last_conflicts, class loop *loop_nest)
5033 : {
5034 3410454 : unsigned int lnn = loop_nest->num;
5035 :
5036 3410454 : dependence_stats.num_subscript_tests++;
5037 :
5038 3410454 : if (dump_file && (dump_flags & TDF_DETAILS))
5039 : {
5040 59211 : fprintf (dump_file, "(analyze_overlapping_iterations \n");
5041 59211 : fprintf (dump_file, " (chrec_a = ");
5042 59211 : print_generic_expr (dump_file, chrec_a);
5043 59211 : fprintf (dump_file, ")\n (chrec_b = ");
5044 59211 : print_generic_expr (dump_file, chrec_b);
5045 59211 : fprintf (dump_file, ")\n");
5046 : }
5047 :
5048 3410454 : if (chrec_a == NULL_TREE
5049 3410454 : || chrec_b == NULL_TREE
5050 3410454 : || chrec_contains_undetermined (chrec_a)
5051 6820908 : || chrec_contains_undetermined (chrec_b))
5052 : {
5053 0 : dependence_stats.num_subscript_undetermined++;
5054 :
5055 0 : *overlap_iterations_a = conflict_fn_not_known ();
5056 0 : *overlap_iterations_b = conflict_fn_not_known ();
5057 : }
5058 :
5059 : /* If they are the same chrec, and are affine, they overlap
5060 : on every iteration. */
5061 3410454 : else if (eq_evolutions_p (chrec_a, chrec_b)
5062 3410454 : && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5063 481924 : || operand_equal_p (chrec_a, chrec_b, 0)))
5064 : {
5065 1175780 : dependence_stats.num_same_subscript_function++;
5066 1175780 : *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5067 1175780 : *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5068 1175780 : *last_conflicts = chrec_dont_know;
5069 : }
5070 :
5071 : /* If they aren't the same, and aren't affine, we can't do anything
5072 : yet. */
5073 2234674 : else if ((chrec_contains_symbols (chrec_a)
5074 2182831 : || chrec_contains_symbols (chrec_b))
5075 2235536 : && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5076 50483 : || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5077 : {
5078 2517 : dependence_stats.num_subscript_undetermined++;
5079 2517 : *overlap_iterations_a = conflict_fn_not_known ();
5080 2517 : *overlap_iterations_b = conflict_fn_not_known ();
5081 : }
5082 :
5083 2232157 : else if (ziv_subscript_p (chrec_a, chrec_b))
5084 499138 : analyze_ziv_subscript (chrec_a, chrec_b,
5085 : overlap_iterations_a, overlap_iterations_b,
5086 : last_conflicts);
5087 :
5088 1733019 : else if (siv_subscript_p (chrec_a, chrec_b))
5089 1706579 : analyze_siv_subscript (chrec_a, chrec_b,
5090 : overlap_iterations_a, overlap_iterations_b,
5091 : last_conflicts, lnn);
5092 :
5093 : else
5094 26440 : analyze_miv_subscript (chrec_a, chrec_b,
5095 : overlap_iterations_a, overlap_iterations_b,
5096 : last_conflicts, loop_nest);
5097 :
5098 3410454 : if (dump_file && (dump_flags & TDF_DETAILS))
5099 : {
5100 59211 : fprintf (dump_file, " (overlap_iterations_a = ");
5101 59211 : dump_conflict_function (dump_file, *overlap_iterations_a);
5102 59211 : fprintf (dump_file, ")\n (overlap_iterations_b = ");
5103 59211 : dump_conflict_function (dump_file, *overlap_iterations_b);
5104 59211 : fprintf (dump_file, "))\n");
5105 : }
5106 3410454 : }
5107 :
5108 : /* Helper function for uniquely inserting distance vectors. */
5109 :
5110 : static void
5111 1076237 : save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5112 : {
5113 1609191 : for (lambda_vector v : DDR_DIST_VECTS (ddr))
5114 534321 : if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5115 : return;
5116 :
5117 1075974 : DDR_DIST_VECTS (ddr).safe_push (dist_v);
5118 : }
5119 :
5120 : /* Helper function for uniquely inserting direction vectors. */
5121 :
5122 : static void
5123 1075974 : save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5124 : {
5125 1608139 : for (lambda_vector v : DDR_DIR_VECTS (ddr))
5126 532743 : if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5127 : return;
5128 :
5129 1075974 : DDR_DIR_VECTS (ddr).safe_push (dir_v);
5130 : }
5131 :
5132 : /* Add a distance of 1 on all the loops outer than INDEX. If we
5133 : haven't yet determined a distance for this outer loop, push a new
5134 : distance vector composed of the previous distance, and a distance
5135 : of 1 for this outer loop. Example:
5136 :
5137 : | loop_1
5138 : | loop_2
5139 : | A[10]
5140 : | endloop_2
5141 : | endloop_1
5142 :
5143 : Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5144 : save (0, 1), then we have to save (1, 0). */
5145 :
5146 : static void
5147 16668 : add_outer_distances (struct data_dependence_relation *ddr,
5148 : lambda_vector dist_v, int index)
5149 : {
5150 : /* For each outer loop where init_v is not set, the accesses are
5151 : in dependence of distance 1 in the loop. */
5152 19852 : while (--index >= 0)
5153 : {
5154 6368 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5155 3184 : lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5156 3184 : save_v[index] = 1;
5157 3184 : save_dist_v (ddr, save_v);
5158 : }
5159 16668 : }
5160 :
5161 : /* Return false when fail to represent the data dependence as a
5162 : distance vector. A_INDEX is the index of the first reference
5163 : (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5164 : second reference. INIT_B is set to true when a component has been
5165 : added to the distance vector DIST_V. INDEX_CARRY is then set to
5166 : the index in DIST_V that carries the dependence. */
5167 :
5168 : static bool
5169 62072 : build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5170 : unsigned int a_index, unsigned int b_index,
5171 : lambda_vector dist_v, bool *init_b,
5172 : int *index_carry)
5173 : {
5174 62072 : unsigned i;
5175 124144 : lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5176 62072 : class loop *loop = DDR_LOOP_NEST (ddr)[0];
5177 :
5178 140257 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5179 : {
5180 80241 : tree access_fn_a, access_fn_b;
5181 80241 : struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5182 :
5183 80241 : if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5184 : {
5185 309 : non_affine_dependence_relation (ddr);
5186 309 : return false;
5187 : }
5188 :
5189 79932 : access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5190 79932 : access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5191 :
5192 79932 : if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5193 60820 : && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5194 : {
5195 60194 : HOST_WIDE_INT dist;
5196 60194 : int index;
5197 60194 : int var_a = CHREC_VARIABLE (access_fn_a);
5198 60194 : int var_b = CHREC_VARIABLE (access_fn_b);
5199 :
5200 60194 : if (var_a != var_b
5201 60194 : || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5202 : {
5203 34 : non_affine_dependence_relation (ddr);
5204 34 : return false;
5205 : }
5206 :
5207 : /* When data references are collected in a loop while data
5208 : dependences are analyzed in loop nest nested in the loop, we
5209 : would have more number of access functions than number of
5210 : loops. Skip access functions of loops not in the loop nest.
5211 :
5212 : See PR89725 for more information. */
5213 60160 : if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5214 2 : continue;
5215 :
5216 60158 : dist = int_cst_value (SUB_DISTANCE (subscript));
5217 60158 : index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5218 60158 : *index_carry = MIN (index, *index_carry);
5219 :
5220 : /* This is the subscript coupling test. If we have already
5221 : recorded a distance for this loop (a distance coming from
5222 : another subscript), it should be the same. For example,
5223 : in the following code, there is no dependence:
5224 :
5225 : | loop i = 0, N, 1
5226 : | T[i+1][i] = ...
5227 : | ... = T[i][i]
5228 : | endloop
5229 : */
5230 60158 : if (init_v[index] != 0 && dist_v[index] != dist)
5231 : {
5232 0 : finalize_ddr_dependent (ddr, chrec_known);
5233 0 : return false;
5234 : }
5235 :
5236 60158 : dist_v[index] = dist;
5237 60158 : init_v[index] = 1;
5238 60158 : *init_b = true;
5239 60158 : }
5240 19738 : else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5241 : {
5242 : /* This can be for example an affine vs. constant dependence
5243 : (T[i] vs. T[3]) that is not an affine dependence and is
5244 : not representable as a distance vector. */
5245 1713 : non_affine_dependence_relation (ddr);
5246 1713 : return false;
5247 : }
5248 : }
5249 :
5250 : return true;
5251 : }
5252 :
5253 : /* Return true when the DDR contains only invariant access functions wrto. loop
5254 : number LNUM. */
5255 :
5256 : static bool
5257 847122 : invariant_access_functions (const struct data_dependence_relation *ddr,
5258 : int lnum)
5259 : {
5260 2857706 : for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5261 991468 : if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5262 991468 : || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5263 675128 : return false;
5264 :
5265 : return true;
5266 : }
5267 :
5268 : /* Helper function for the case where DDR_A and DDR_B are the same
5269 : multivariate access function with a constant step. For an example
5270 : see pr34635-1.c. */
5271 :
5272 : static void
5273 4528 : add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5274 : {
5275 4528 : int x_1, x_2;
5276 4528 : tree c_1 = CHREC_LEFT (c_2);
5277 4528 : tree c_0 = CHREC_LEFT (c_1);
5278 4528 : lambda_vector dist_v;
5279 4528 : HOST_WIDE_INT v1, v2, cd;
5280 :
5281 : /* Polynomials with more than 2 variables are not handled yet. When
5282 : the evolution steps are parameters, it is not possible to
5283 : represent the dependence using classical distance vectors. */
5284 4528 : if (TREE_CODE (c_0) != INTEGER_CST
5285 3012 : || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5286 6913 : || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5287 : {
5288 2151 : DDR_AFFINE_P (ddr) = false;
5289 2151 : return;
5290 : }
5291 :
5292 2377 : x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5293 2377 : x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5294 :
5295 : /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5296 4754 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5297 2377 : v1 = int_cst_value (CHREC_RIGHT (c_1));
5298 2377 : v2 = int_cst_value (CHREC_RIGHT (c_2));
5299 2377 : cd = gcd (v1, v2);
5300 2377 : v1 /= cd;
5301 2377 : v2 /= cd;
5302 :
5303 2377 : if (v2 < 0)
5304 : {
5305 2 : v2 = -v2;
5306 2 : v1 = -v1;
5307 : }
5308 :
5309 2377 : dist_v[x_1] = v2;
5310 2377 : dist_v[x_2] = -v1;
5311 2377 : save_dist_v (ddr, dist_v);
5312 :
5313 2377 : add_outer_distances (ddr, dist_v, x_1);
5314 : }
5315 :
5316 : /* Helper function for the case where DDR_A and DDR_B are the same
5317 : access functions. */
5318 :
5319 : static void
5320 18958 : add_other_self_distances (struct data_dependence_relation *ddr)
5321 : {
5322 18958 : lambda_vector dist_v;
5323 18958 : unsigned i;
5324 18958 : int index_carry = DDR_NB_LOOPS (ddr);
5325 18958 : subscript *sub;
5326 18958 : class loop *loop = DDR_LOOP_NEST (ddr)[0];
5327 :
5328 40334 : FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5329 : {
5330 26405 : tree access_fun = SUB_ACCESS_FN (sub, 0);
5331 :
5332 26405 : if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5333 : {
5334 19083 : if (!evolution_function_is_univariate_p (access_fun, loop->num))
5335 : {
5336 5029 : if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5337 : {
5338 501 : DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5339 501 : return;
5340 : }
5341 :
5342 4528 : access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5343 :
5344 4528 : if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5345 4528 : add_multivariate_self_dist (ddr, access_fun);
5346 : else
5347 : /* The evolution step is not constant: it varies in
5348 : the outer loop, so this cannot be represented by a
5349 : distance vector. For example in pr34635.c the
5350 : evolution is {0, +, {0, +, 4}_1}_2. */
5351 0 : DDR_AFFINE_P (ddr) = false;
5352 :
5353 4528 : return;
5354 : }
5355 :
5356 : /* When data references are collected in a loop while data
5357 : dependences are analyzed in loop nest nested in the loop, we
5358 : would have more number of access functions than number of
5359 : loops. Skip access functions of loops not in the loop nest.
5360 :
5361 : See PR89725 for more information. */
5362 14054 : if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5363 : loop))
5364 0 : continue;
5365 :
5366 21533 : index_carry = MIN (index_carry,
5367 : index_in_loop_nest (CHREC_VARIABLE (access_fun),
5368 : DDR_LOOP_NEST (ddr)));
5369 : }
5370 : }
5371 :
5372 27858 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5373 13929 : add_outer_distances (ddr, dist_v, index_carry);
5374 : }
5375 :
5376 : static void
5377 171994 : insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5378 : {
5379 343988 : lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5380 :
5381 171994 : dist_v[0] = 1;
5382 171994 : save_dist_v (ddr, dist_v);
5383 171994 : }
5384 :
5385 : /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5386 : is the case for example when access functions are the same and
5387 : equal to a constant, as in:
5388 :
5389 : | loop_1
5390 : | A[3] = ...
5391 : | ... = A[3]
5392 : | endloop_1
5393 :
5394 : in which case the distance vectors are (0) and (1). */
5395 :
5396 : static void
5397 171994 : add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5398 : {
5399 171994 : unsigned i, j;
5400 :
5401 171994 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5402 : {
5403 171994 : subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5404 171994 : conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5405 171994 : conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5406 :
5407 171994 : for (j = 0; j < ca->n; j++)
5408 171994 : if (affine_function_zero_p (ca->fns[j]))
5409 : {
5410 171994 : insert_innermost_unit_dist_vector (ddr);
5411 171994 : return;
5412 : }
5413 :
5414 0 : for (j = 0; j < cb->n; j++)
5415 0 : if (affine_function_zero_p (cb->fns[j]))
5416 : {
5417 0 : insert_innermost_unit_dist_vector (ddr);
5418 0 : return;
5419 : }
5420 : }
5421 : }
5422 :
5423 : /* Return true when the DDR contains two data references that have the
5424 : same access functions. */
5425 :
5426 : static inline bool
5427 900742 : same_access_functions (const struct data_dependence_relation *ddr)
5428 : {
5429 3769507 : for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5430 1120901 : if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5431 1120901 : SUB_ACCESS_FN (sub, 1)))
5432 : return false;
5433 :
5434 : return true;
5435 : }
5436 :
5437 : /* Compute the classic per loop distance vector. DDR is the data
5438 : dependence relation to build a vector from. Return false when fail
5439 : to represent the data dependence as a distance vector. */
5440 :
5441 : static bool
5442 3071391 : build_classic_dist_vector (struct data_dependence_relation *ddr,
5443 : class loop *loop_nest)
5444 : {
5445 3071391 : bool init_b = false;
5446 3071391 : int index_carry = DDR_NB_LOOPS (ddr);
5447 3071391 : lambda_vector dist_v;
5448 :
5449 3071391 : if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5450 : return false;
5451 :
5452 900742 : if (same_access_functions (ddr))
5453 : {
5454 : /* Save the 0 vector. */
5455 1694244 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5456 847122 : save_dist_v (ddr, dist_v);
5457 :
5458 847122 : if (invariant_access_functions (ddr, loop_nest->num))
5459 171994 : add_distance_for_zero_overlaps (ddr);
5460 :
5461 847122 : if (DDR_NB_LOOPS (ddr) > 1)
5462 18958 : add_other_self_distances (ddr);
5463 :
5464 847122 : return true;
5465 : }
5466 :
5467 107240 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5468 53620 : if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5469 : return false;
5470 :
5471 : /* Save the distance vector if we initialized one. */
5472 51564 : if (init_b)
5473 : {
5474 : /* Verify a basic constraint: classic distance vectors should
5475 : always be lexicographically positive.
5476 :
5477 : Data references are collected in the order of execution of
5478 : the program, thus for the following loop
5479 :
5480 : | for (i = 1; i < 100; i++)
5481 : | for (j = 1; j < 100; j++)
5482 : | {
5483 : | t = T[j+1][i-1]; // A
5484 : | T[j][i] = t + 2; // B
5485 : | }
5486 :
5487 : references are collected following the direction of the wind:
5488 : A then B. The data dependence tests are performed also
5489 : following this order, such that we're looking at the distance
5490 : separating the elements accessed by A from the elements later
5491 : accessed by B. But in this example, the distance returned by
5492 : test_dep (A, B) is lexicographically negative (-1, 1), that
5493 : means that the access A occurs later than B with respect to
5494 : the outer loop, ie. we're actually looking upwind. In this
5495 : case we solve test_dep (B, A) looking downwind to the
5496 : lexicographically positive solution, that returns the
5497 : distance vector (1, -1). */
5498 103128 : if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5499 : {
5500 8347 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5501 8347 : if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5502 : return false;
5503 8343 : compute_subscript_distance (ddr);
5504 8343 : if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5505 : &index_carry))
5506 : return false;
5507 8343 : save_dist_v (ddr, save_v);
5508 8343 : DDR_REVERSED_P (ddr) = true;
5509 :
5510 : /* In this case there is a dependence forward for all the
5511 : outer loops:
5512 :
5513 : | for (k = 1; k < 100; k++)
5514 : | for (i = 1; i < 100; i++)
5515 : | for (j = 1; j < 100; j++)
5516 : | {
5517 : | t = T[j+1][i-1]; // A
5518 : | T[j][i] = t + 2; // B
5519 : | }
5520 :
5521 : the vectors are:
5522 : (0, 1, -1)
5523 : (1, 1, -1)
5524 : (1, -1, 1)
5525 : */
5526 8343 : if (DDR_NB_LOOPS (ddr) > 1)
5527 : {
5528 72 : add_outer_distances (ddr, save_v, index_carry);
5529 72 : add_outer_distances (ddr, dist_v, index_carry);
5530 : }
5531 : }
5532 : else
5533 : {
5534 43217 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5535 43217 : lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5536 :
5537 43217 : if (DDR_NB_LOOPS (ddr) > 1)
5538 : {
5539 109 : lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5540 :
5541 109 : if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5542 : return false;
5543 109 : compute_subscript_distance (ddr);
5544 109 : if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5545 : &index_carry))
5546 : return false;
5547 :
5548 109 : save_dist_v (ddr, save_v);
5549 109 : add_outer_distances (ddr, dist_v, index_carry);
5550 109 : add_outer_distances (ddr, opposite_v, index_carry);
5551 : }
5552 : else
5553 43108 : save_dist_v (ddr, save_v);
5554 : }
5555 : }
5556 : else
5557 : {
5558 : /* There is a distance of 1 on all the outer loops: Example:
5559 : there is a dependence of distance 1 on loop_1 for the array A.
5560 :
5561 : | loop_1
5562 : | A[5] = ...
5563 : | endloop
5564 : */
5565 0 : add_outer_distances (ddr, dist_v,
5566 : lambda_vector_first_nz (dist_v,
5567 0 : DDR_NB_LOOPS (ddr), 0));
5568 : }
5569 :
5570 : return true;
5571 : }
5572 :
5573 : /* Return the direction for a given distance.
5574 : FIXME: Computing dir this way is suboptimal, since dir can catch
5575 : cases that dist is unable to represent. */
5576 :
5577 : static inline enum data_dependence_direction
5578 1100772 : dir_from_dist (int dist)
5579 : {
5580 1100772 : if (dist > 0)
5581 : return dir_positive;
5582 871887 : else if (dist < 0)
5583 : return dir_negative;
5584 : else
5585 869478 : return dir_equal;
5586 : }
5587 :
5588 : /* Compute the classic per loop direction vector. DDR is the data
5589 : dependence relation to build a vector from. */
5590 :
5591 : static void
5592 898682 : build_classic_dir_vector (struct data_dependence_relation *ddr)
5593 : {
5594 898682 : unsigned i, j;
5595 898682 : lambda_vector dist_v;
5596 :
5597 1974656 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5598 : {
5599 2151948 : lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5600 :
5601 3252720 : for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5602 1972659 : dir_v[j] = dir_from_dist (dist_v[j]);
5603 :
5604 1075974 : save_dir_v (ddr, dir_v);
5605 : }
5606 898682 : }
5607 :
5608 : /* Helper function. Returns true when there is a dependence between the
5609 : data references. A_INDEX is the index of the first reference (0 for
5610 : DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5611 :
5612 : static bool
5613 3079847 : subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5614 : unsigned int a_index, unsigned int b_index,
5615 : class loop *loop_nest)
5616 : {
5617 3079847 : unsigned int i;
5618 3079847 : tree last_conflicts;
5619 3079847 : struct subscript *subscript;
5620 3079847 : tree res = NULL_TREE;
5621 :
5622 4348514 : for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5623 : {
5624 3410454 : conflict_function *overlaps_a, *overlaps_b;
5625 :
5626 3410454 : analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5627 : SUB_ACCESS_FN (subscript, b_index),
5628 : &overlaps_a, &overlaps_b,
5629 : &last_conflicts, loop_nest);
5630 :
5631 3410454 : if (SUB_CONFLICTS_IN_A (subscript))
5632 3410454 : free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5633 3410454 : if (SUB_CONFLICTS_IN_B (subscript))
5634 3410454 : free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5635 :
5636 3410454 : SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5637 3410454 : SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5638 3410454 : SUB_LAST_CONFLICT (subscript) = last_conflicts;
5639 :
5640 : /* If there is any undetermined conflict function we have to
5641 : give a conservative answer in case we cannot prove that
5642 : no dependence exists when analyzing another subscript. */
5643 3410454 : if (CF_NOT_KNOWN_P (overlaps_a)
5644 3380503 : || CF_NOT_KNOWN_P (overlaps_b))
5645 : {
5646 29951 : res = chrec_dont_know;
5647 29951 : continue;
5648 : }
5649 :
5650 : /* When there is a subscript with no dependence we can stop. */
5651 3380503 : else if (CF_NO_DEPENDENCE_P (overlaps_a)
5652 1238716 : || CF_NO_DEPENDENCE_P (overlaps_b))
5653 : {
5654 2141787 : res = chrec_known;
5655 2141787 : break;
5656 : }
5657 : }
5658 :
5659 3079847 : if (res == NULL_TREE)
5660 : return true;
5661 :
5662 2170653 : if (res == chrec_known)
5663 2141787 : dependence_stats.num_dependence_independent++;
5664 : else
5665 28866 : dependence_stats.num_dependence_undetermined++;
5666 2170653 : finalize_ddr_dependent (ddr, res);
5667 2170653 : return false;
5668 : }
5669 :
5670 : /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5671 :
5672 : static void
5673 3071391 : subscript_dependence_tester (struct data_dependence_relation *ddr,
5674 : class loop *loop_nest)
5675 : {
5676 3071391 : if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5677 900742 : dependence_stats.num_dependence_dependent++;
5678 :
5679 3071391 : compute_subscript_distance (ddr);
5680 3071391 : if (build_classic_dist_vector (ddr, loop_nest))
5681 : {
5682 898682 : if (dump_file && (dump_flags & TDF_DETAILS))
5683 : {
5684 3992 : unsigned i;
5685 :
5686 3992 : fprintf (dump_file, "(build_classic_dist_vector\n");
5687 12045 : for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5688 : {
5689 4061 : fprintf (dump_file, " dist_vector = (");
5690 4061 : print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5691 8122 : DDR_NB_LOOPS (ddr));
5692 4061 : fprintf (dump_file, " )\n");
5693 : }
5694 3992 : fprintf (dump_file, ")\n");
5695 : }
5696 :
5697 898682 : build_classic_dir_vector (ddr);
5698 : }
5699 3071391 : }
5700 :
5701 : /* Returns true when all the access functions of A are affine or
5702 : constant with respect to LOOP_NEST. */
5703 :
5704 : static bool
5705 6205536 : access_functions_are_affine_or_constant_p (const struct data_reference *a,
5706 : const class loop *loop_nest)
5707 : {
5708 6205536 : vec<tree> fns = DR_ACCESS_FNS (a);
5709 26954206 : for (tree t : fns)
5710 8397395 : if (!evolution_function_is_invariant_p (t, loop_nest->num)
5711 8397395 : && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5712 : return false;
5713 :
5714 : return true;
5715 : }
5716 :
5717 : /* This computes the affine dependence relation between A and B with
5718 : respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5719 : independence between two accesses, while CHREC_DONT_KNOW is used
5720 : for representing the unknown relation.
5721 :
5722 : Note that it is possible to stop the computation of the dependence
5723 : relation the first time we detect a CHREC_KNOWN element for a given
5724 : subscript. */
5725 :
5726 : void
5727 6389808 : compute_affine_dependence (struct data_dependence_relation *ddr,
5728 : class loop *loop_nest)
5729 : {
5730 6389808 : struct data_reference *dra = DDR_A (ddr);
5731 6389808 : struct data_reference *drb = DDR_B (ddr);
5732 :
5733 6389808 : if (dump_file && (dump_flags & TDF_DETAILS))
5734 : {
5735 134356 : fprintf (dump_file, "(compute_affine_dependence\n");
5736 134356 : fprintf (dump_file, " ref_a: ");
5737 134356 : print_generic_expr (dump_file, DR_REF (dra));
5738 134356 : fprintf (dump_file, ", stmt_a: ");
5739 134356 : print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5740 134356 : fprintf (dump_file, " ref_b: ");
5741 134356 : print_generic_expr (dump_file, DR_REF (drb));
5742 134356 : fprintf (dump_file, ", stmt_b: ");
5743 134356 : print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5744 : }
5745 :
5746 : /* Analyze only when the dependence relation is not yet known. */
5747 6389808 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5748 : {
5749 3131188 : dependence_stats.num_dependence_tests++;
5750 :
5751 3131188 : if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5752 3131188 : && access_functions_are_affine_or_constant_p (drb, loop_nest))
5753 3071391 : subscript_dependence_tester (ddr, loop_nest);
5754 :
5755 : /* As a last case, if the dependence cannot be determined, or if
5756 : the dependence is considered too difficult to determine, answer
5757 : "don't know". */
5758 : else
5759 : {
5760 59797 : dependence_stats.num_dependence_undetermined++;
5761 :
5762 59797 : if (dump_file && (dump_flags & TDF_DETAILS))
5763 : {
5764 158 : fprintf (dump_file, "Data ref a:\n");
5765 158 : dump_data_reference (dump_file, dra);
5766 158 : fprintf (dump_file, "Data ref b:\n");
5767 158 : dump_data_reference (dump_file, drb);
5768 158 : fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5769 : }
5770 59797 : finalize_ddr_dependent (ddr, chrec_dont_know);
5771 : }
5772 : }
5773 :
5774 6389808 : if (dump_file && (dump_flags & TDF_DETAILS))
5775 : {
5776 134356 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5777 119054 : fprintf (dump_file, ") -> no dependence\n");
5778 15302 : else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5779 11220 : fprintf (dump_file, ") -> dependence analysis failed\n");
5780 : else
5781 4082 : fprintf (dump_file, ")\n");
5782 : }
5783 6389808 : }
5784 :
5785 : /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5786 : the data references in DATAREFS, in the LOOP_NEST. When
5787 : COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5788 : relations. Return true when successful, i.e. data references number
5789 : is small enough to be handled. */
5790 :
5791 : bool
5792 423843 : compute_all_dependences (const vec<data_reference_p> &datarefs,
5793 : vec<ddr_p> *dependence_relations,
5794 : const vec<loop_p> &loop_nest,
5795 : bool compute_self_and_rr)
5796 : {
5797 423843 : struct data_dependence_relation *ddr;
5798 423843 : struct data_reference *a, *b;
5799 423843 : unsigned int i, j;
5800 :
5801 423843 : if ((int) datarefs.length ()
5802 423843 : > param_loop_max_datarefs_for_datadeps)
5803 : {
5804 0 : struct data_dependence_relation *ddr;
5805 :
5806 : /* Insert a single relation into dependence_relations:
5807 : chrec_dont_know. */
5808 0 : ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5809 0 : dependence_relations->safe_push (ddr);
5810 0 : return false;
5811 : }
5812 :
5813 3151894 : FOR_EACH_VEC_ELT (datarefs, i, a)
5814 7575132 : for (j = i + 1; datarefs.iterate (j, &b); j++)
5815 4847081 : if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5816 : {
5817 4479099 : ddr = initialize_data_dependence_relation (a, b, loop_nest);
5818 4479099 : dependence_relations->safe_push (ddr);
5819 4479099 : if (loop_nest.exists ())
5820 4457104 : compute_affine_dependence (ddr, loop_nest[0]);
5821 : }
5822 :
5823 423843 : if (compute_self_and_rr)
5824 1010007 : FOR_EACH_VEC_ELT (datarefs, i, a)
5825 : {
5826 751433 : ddr = initialize_data_dependence_relation (a, a, loop_nest);
5827 751433 : dependence_relations->safe_push (ddr);
5828 751433 : if (loop_nest.exists ())
5829 751433 : compute_affine_dependence (ddr, loop_nest[0]);
5830 : }
5831 :
5832 : return true;
5833 : }
5834 :
5835 : /* Describes a location of a memory reference. */
5836 :
5837 : struct data_ref_loc
5838 : {
5839 : /* The memory reference. */
5840 : tree ref;
5841 :
5842 : /* True if the memory reference is read. */
5843 : bool is_read;
5844 :
5845 : /* True if the data reference is conditional within the containing
5846 : statement, i.e. if it might not occur even when the statement
5847 : is executed and runs to completion. */
5848 : bool is_conditional_in_stmt;
5849 : };
5850 :
5851 :
5852 : /* Stores the locations of memory references in STMT to REFERENCES. Returns
5853 : true if STMT clobbers memory, false otherwise. */
5854 :
5855 : static bool
5856 48919833 : get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5857 : {
5858 48919833 : bool clobbers_memory = false;
5859 48919833 : data_ref_loc ref;
5860 48919833 : tree op0, op1;
5861 48919833 : enum gimple_code stmt_code = gimple_code (stmt);
5862 :
5863 : /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5864 : As we cannot model data-references to not spelled out
5865 : accesses give up if they may occur. */
5866 48919833 : if (stmt_code == GIMPLE_CALL
5867 48919833 : && !(gimple_call_flags (stmt) & ECF_CONST))
5868 : {
5869 : /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5870 4078457 : if (gimple_call_internal_p (stmt))
5871 57453 : switch (gimple_call_internal_fn (stmt))
5872 : {
5873 5613 : case IFN_GOMP_SIMD_LANE:
5874 5613 : {
5875 5613 : class loop *loop = gimple_bb (stmt)->loop_father;
5876 5613 : tree uid = gimple_call_arg (stmt, 0);
5877 5613 : gcc_assert (TREE_CODE (uid) == SSA_NAME);
5878 5613 : if (loop == NULL
5879 5613 : || loop->simduid != SSA_NAME_VAR (uid))
5880 : clobbers_memory = true;
5881 : break;
5882 : }
5883 : case IFN_MASK_LOAD:
5884 : case IFN_MASK_STORE:
5885 : break;
5886 999 : case IFN_MASK_CALL:
5887 999 : {
5888 999 : tree orig_fndecl
5889 999 : = gimple_call_addr_fndecl (gimple_call_arg (stmt, 0));
5890 999 : if (!orig_fndecl
5891 999 : || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5892 : clobbers_memory = true;
5893 : }
5894 : break;
5895 : default:
5896 4120188 : clobbers_memory = true;
5897 : break;
5898 : }
5899 4021004 : else if (gimple_call_builtin_p (stmt, BUILT_IN_PREFETCH))
5900 : clobbers_memory = false;
5901 : else
5902 4120188 : clobbers_memory = true;
5903 : }
5904 44841376 : else if (stmt_code == GIMPLE_ASM
5905 44841376 : && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5906 8538 : || gimple_vuse (stmt)))
5907 : clobbers_memory = true;
5908 :
5909 101981369 : if (!gimple_vuse (stmt))
5910 : return clobbers_memory;
5911 :
5912 18917435 : if (stmt_code == GIMPLE_ASSIGN)
5913 : {
5914 13927877 : tree base;
5915 13927877 : op0 = gimple_assign_lhs (stmt);
5916 13927877 : op1 = gimple_assign_rhs1 (stmt);
5917 :
5918 13927877 : if (DECL_P (op1)
5919 13927877 : || (REFERENCE_CLASS_P (op1)
5920 6655735 : && (base = get_base_address (op1))
5921 6655735 : && TREE_CODE (base) != SSA_NAME
5922 6655667 : && !is_gimple_min_invariant (base)))
5923 : {
5924 7521357 : ref.ref = op1;
5925 7521357 : ref.is_read = true;
5926 7521357 : ref.is_conditional_in_stmt = false;
5927 7521357 : references->safe_push (ref);
5928 : }
5929 : }
5930 4989558 : else if (stmt_code == GIMPLE_CALL)
5931 : {
5932 4088809 : unsigned i = 0, n;
5933 4088809 : tree ptr, type;
5934 4088809 : unsigned int align;
5935 :
5936 4088809 : ref.is_read = false;
5937 4088809 : if (gimple_call_internal_p (stmt))
5938 66865 : switch (gimple_call_internal_fn (stmt))
5939 : {
5940 2366 : case IFN_MASK_LOAD:
5941 2366 : if (gimple_call_lhs (stmt) == NULL_TREE)
5942 : break;
5943 2366 : ref.is_read = true;
5944 : /* FALLTHRU */
5945 4320 : case IFN_MASK_STORE:
5946 4320 : ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5947 4320 : align = tree_to_shwi (gimple_call_arg (stmt, 1));
5948 4320 : if (ref.is_read)
5949 2366 : type = TREE_TYPE (gimple_call_lhs (stmt));
5950 : else
5951 1954 : type = TREE_TYPE (gimple_call_arg (stmt, 3));
5952 4320 : if (TYPE_ALIGN (type) != align)
5953 1656 : type = build_aligned_type (type, align);
5954 4320 : ref.is_conditional_in_stmt = true;
5955 4320 : ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5956 : ptr);
5957 4320 : references->safe_push (ref);
5958 4320 : return false;
5959 : case IFN_MASK_CALL:
5960 4084489 : i = 1;
5961 : gcc_fallthrough ();
5962 : default:
5963 : break;
5964 : }
5965 :
5966 4084489 : op0 = gimple_call_lhs (stmt);
5967 4084489 : n = gimple_call_num_args (stmt);
5968 16538479 : for (; i < n; i++)
5969 : {
5970 8369501 : op1 = gimple_call_arg (stmt, i);
5971 :
5972 8369501 : if (DECL_P (op1)
5973 8369501 : || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5974 : {
5975 496775 : ref.ref = op1;
5976 496775 : ref.is_read = true;
5977 496775 : ref.is_conditional_in_stmt = false;
5978 496775 : references->safe_push (ref);
5979 : }
5980 : }
5981 : }
5982 : else
5983 : return clobbers_memory;
5984 :
5985 18012366 : if (op0
5986 18012366 : && (DECL_P (op0)
5987 14629636 : || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5988 : {
5989 7234200 : ref.ref = op0;
5990 7234200 : ref.is_read = false;
5991 7234200 : ref.is_conditional_in_stmt = false;
5992 7234200 : references->safe_push (ref);
5993 : }
5994 : return clobbers_memory;
5995 : }
5996 :
5997 :
5998 : /* Returns true if the loop-nest has any data reference. */
5999 :
6000 : bool
6001 752 : loop_nest_has_data_refs (loop_p loop)
6002 : {
6003 752 : basic_block *bbs = get_loop_body (loop);
6004 752 : auto_vec<data_ref_loc, 3> references;
6005 :
6006 1001 : for (unsigned i = 0; i < loop->num_nodes; i++)
6007 : {
6008 931 : basic_block bb = bbs[i];
6009 931 : gimple_stmt_iterator bsi;
6010 :
6011 3217 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6012 : {
6013 2037 : gimple *stmt = gsi_stmt (bsi);
6014 2037 : get_references_in_stmt (stmt, &references);
6015 2037 : if (references.length ())
6016 : {
6017 682 : free (bbs);
6018 682 : return true;
6019 : }
6020 : }
6021 : }
6022 70 : free (bbs);
6023 70 : return false;
6024 752 : }
6025 :
6026 : /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
6027 : reference, returns false, otherwise returns true. NEST is the outermost
6028 : loop of the loop nest in which the references should be analyzed. */
6029 :
6030 : opt_result
6031 48903472 : find_data_references_in_stmt (class loop *nest, gimple *stmt,
6032 : vec<data_reference_p> *datarefs)
6033 : {
6034 48903472 : auto_vec<data_ref_loc, 2> references;
6035 48903472 : data_reference_p dr;
6036 :
6037 48903472 : if (get_references_in_stmt (stmt, &references))
6038 4120184 : return opt_result::failure_at (stmt, "statement clobbers memory: %G",
6039 : stmt);
6040 :
6041 148849239 : for (const data_ref_loc &ref : references)
6042 : {
6043 14499375 : dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
6044 14499375 : loop_containing_stmt (stmt), ref.ref,
6045 14499375 : stmt, ref.is_read, ref.is_conditional_in_stmt);
6046 14499375 : gcc_assert (dr != NULL);
6047 14499375 : datarefs->safe_push (dr);
6048 : }
6049 :
6050 44783288 : return opt_result::success ();
6051 48903472 : }
6052 :
6053 : /* Stores the data references in STMT to DATAREFS. If there is an
6054 : unanalyzable reference, returns false, otherwise returns true.
6055 : NEST is the outermost loop of the loop nest in which the references
6056 : should be instantiated, LOOP is the loop in which the references
6057 : should be analyzed. */
6058 :
6059 : bool
6060 14324 : graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
6061 : vec<data_reference_p> *datarefs)
6062 : {
6063 14324 : auto_vec<data_ref_loc, 2> references;
6064 14324 : bool ret = true;
6065 14324 : data_reference_p dr;
6066 :
6067 14324 : if (get_references_in_stmt (stmt, &references))
6068 : return false;
6069 :
6070 45878 : for (const data_ref_loc &ref : references)
6071 : {
6072 5836 : dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
6073 2918 : ref.is_conditional_in_stmt);
6074 2918 : gcc_assert (dr != NULL);
6075 2918 : datarefs->safe_push (dr);
6076 : }
6077 :
6078 : return ret;
6079 14324 : }
6080 :
6081 : /* Search the data references in LOOP, and record the information into
6082 : DATAREFS. Returns chrec_dont_know when failing to analyze a
6083 : difficult case, returns NULL_TREE otherwise. */
6084 :
6085 : tree
6086 2654465 : find_data_references_in_bb (class loop *loop, basic_block bb,
6087 : vec<data_reference_p> *datarefs)
6088 : {
6089 2654465 : gimple_stmt_iterator bsi;
6090 :
6091 22038131 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6092 : {
6093 17208921 : gimple *stmt = gsi_stmt (bsi);
6094 :
6095 17208921 : if (!find_data_references_in_stmt (loop, stmt, datarefs))
6096 : {
6097 479720 : struct data_reference *res;
6098 479720 : res = XCNEW (struct data_reference);
6099 479720 : datarefs->safe_push (res);
6100 :
6101 479720 : return chrec_dont_know;
6102 : }
6103 : }
6104 :
6105 : return NULL_TREE;
6106 : }
6107 :
6108 : /* Search the data references in LOOP, and record the information into
6109 : DATAREFS. Returns chrec_dont_know when failing to analyze a
6110 : difficult case, returns NULL_TREE otherwise.
6111 :
6112 : TODO: This function should be made smarter so that it can handle address
6113 : arithmetic as if they were array accesses, etc. */
6114 :
6115 : tree
6116 798058 : find_data_references_in_loop (class loop *loop,
6117 : vec<data_reference_p> *datarefs)
6118 : {
6119 798058 : basic_block bb, *bbs;
6120 798058 : unsigned int i;
6121 :
6122 798058 : bbs = get_loop_body_in_dom_order (loop);
6123 :
6124 3562753 : for (i = 0; i < loop->num_nodes; i++)
6125 : {
6126 2256614 : bb = bbs[i];
6127 :
6128 2256614 : if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6129 : {
6130 289977 : free (bbs);
6131 289977 : return chrec_dont_know;
6132 : }
6133 : }
6134 508081 : free (bbs);
6135 :
6136 508081 : return NULL_TREE;
6137 : }
6138 :
6139 : /* Return the alignment in bytes that DRB is guaranteed to have at all
6140 : times. */
6141 :
6142 : unsigned int
6143 476043 : dr_alignment (innermost_loop_behavior *drb)
6144 : {
6145 : /* Get the alignment of BASE_ADDRESS + INIT. */
6146 476043 : unsigned int alignment = drb->base_alignment;
6147 476043 : unsigned int misalignment = (drb->base_misalignment
6148 476043 : + TREE_INT_CST_LOW (drb->init));
6149 476043 : if (misalignment != 0)
6150 207748 : alignment = MIN (alignment, misalignment & -misalignment);
6151 :
6152 : /* Cap it to the alignment of OFFSET. */
6153 476043 : if (!integer_zerop (drb->offset))
6154 36180 : alignment = MIN (alignment, drb->offset_alignment);
6155 :
6156 : /* Cap it to the alignment of STEP. */
6157 476043 : if (!integer_zerop (drb->step))
6158 284956 : alignment = MIN (alignment, drb->step_alignment);
6159 :
6160 476043 : return alignment;
6161 : }
6162 :
6163 : /* If BASE is a pointer-typed SSA name, try to find the object that it
6164 : is based on. Return this object X on success and store the alignment
6165 : in bytes of BASE - &X in *ALIGNMENT_OUT. */
6166 :
6167 : static tree
6168 732320 : get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6169 : {
6170 732320 : if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6171 : return NULL_TREE;
6172 :
6173 360019 : gimple *def = SSA_NAME_DEF_STMT (base);
6174 360019 : base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6175 :
6176 : /* Peel chrecs and record the minimum alignment preserved by
6177 : all steps. */
6178 360019 : unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6179 730395 : while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6180 : {
6181 10357 : unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6182 10357 : alignment = MIN (alignment, step_alignment);
6183 10357 : base = CHREC_LEFT (base);
6184 : }
6185 :
6186 : /* Punt if the expression is too complicated to handle. */
6187 360019 : if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6188 : return NULL_TREE;
6189 :
6190 : /* The only useful cases are those for which a dereference folds to something
6191 : other than an INDIRECT_REF. */
6192 359977 : tree ref_type = TREE_TYPE (TREE_TYPE (base));
6193 359977 : tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6194 359977 : if (!ref)
6195 : return NULL_TREE;
6196 :
6197 : /* Analyze the base to which the steps we peeled were applied. */
6198 2422 : poly_int64 bitsize, bitpos, bytepos;
6199 2422 : machine_mode mode;
6200 2422 : int unsignedp, reversep, volatilep;
6201 2422 : tree offset;
6202 2422 : base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6203 : &unsignedp, &reversep, &volatilep);
6204 732320 : if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6205 : return NULL_TREE;
6206 :
6207 : /* Restrict the alignment to that guaranteed by the offsets. */
6208 2422 : unsigned int bytepos_alignment = known_alignment (bytepos);
6209 2422 : if (bytepos_alignment != 0)
6210 2269 : alignment = MIN (alignment, bytepos_alignment);
6211 2422 : if (offset)
6212 : {
6213 0 : unsigned int offset_alignment = highest_pow2_factor (offset);
6214 0 : alignment = MIN (alignment, offset_alignment);
6215 : }
6216 :
6217 2422 : *alignment_out = alignment;
6218 2422 : return base;
6219 : }
6220 :
6221 : /* Return the object whose alignment would need to be changed in order
6222 : to increase the alignment of ADDR. Store the maximum achievable
6223 : alignment in *MAX_ALIGNMENT. */
6224 :
6225 : tree
6226 732320 : get_base_for_alignment (tree addr, unsigned int *max_alignment)
6227 : {
6228 732320 : tree base = get_base_for_alignment_1 (addr, max_alignment);
6229 732320 : if (base)
6230 : return base;
6231 :
6232 729898 : if (TREE_CODE (addr) == ADDR_EXPR)
6233 274417 : addr = TREE_OPERAND (addr, 0);
6234 729898 : *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6235 729898 : return addr;
6236 : }
6237 :
6238 : /* Recursive helper function. */
6239 :
6240 : static bool
6241 139041 : find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6242 : {
6243 : /* Inner loops of the nest should not contain siblings. Example:
6244 : when there are two consecutive loops,
6245 :
6246 : | loop_0
6247 : | loop_1
6248 : | A[{0, +, 1}_1]
6249 : | endloop_1
6250 : | loop_2
6251 : | A[{0, +, 1}_2]
6252 : | endloop_2
6253 : | endloop_0
6254 :
6255 : the dependence relation cannot be captured by the distance
6256 : abstraction. */
6257 139041 : if (loop->next)
6258 : return false;
6259 :
6260 116737 : loop_nest->safe_push (loop);
6261 116737 : if (loop->inner)
6262 41662 : return find_loop_nest_1 (loop->inner, loop_nest);
6263 : return true;
6264 : }
6265 :
6266 : /* Return false when the LOOP is not well nested. Otherwise return
6267 : true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6268 : contain the loops from the outermost to the innermost, as they will
6269 : appear in the classic distance vector. */
6270 :
6271 : bool
6272 1004240 : find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6273 : {
6274 1004240 : loop_nest->safe_push (loop);
6275 1004240 : if (loop->inner)
6276 97379 : return find_loop_nest_1 (loop->inner, loop_nest);
6277 : return true;
6278 : }
6279 :
6280 : /* Returns true when the data dependences have been computed, false otherwise.
6281 : Given a loop nest LOOP, the following vectors are returned:
6282 : DATAREFS is initialized to all the array elements contained in this loop,
6283 : DEPENDENCE_RELATIONS contains the relations between the data references.
6284 : Compute read-read and self relations if
6285 : COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6286 :
6287 : bool
6288 401100 : compute_data_dependences_for_loop (class loop *loop,
6289 : bool compute_self_and_read_read_dependences,
6290 : vec<loop_p> *loop_nest,
6291 : vec<data_reference_p> *datarefs,
6292 : vec<ddr_p> *dependence_relations)
6293 : {
6294 401100 : bool res = true;
6295 :
6296 401100 : memset (&dependence_stats, 0, sizeof (dependence_stats));
6297 :
6298 : /* If the loop nest is not well formed, or one of the data references
6299 : is not computable, give up without spending time to compute other
6300 : dependences. */
6301 401100 : if (!loop
6302 401100 : || !find_loop_nest (loop, loop_nest)
6303 401098 : || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6304 659608 : || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6305 : compute_self_and_read_read_dependences))
6306 : res = false;
6307 :
6308 401100 : if (dump_file && (dump_flags & TDF_STATS))
6309 : {
6310 157 : fprintf (dump_file, "Dependence tester statistics:\n");
6311 :
6312 157 : fprintf (dump_file, "Number of dependence tests: %d\n",
6313 : dependence_stats.num_dependence_tests);
6314 157 : fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6315 : dependence_stats.num_dependence_dependent);
6316 157 : fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6317 : dependence_stats.num_dependence_independent);
6318 157 : fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6319 : dependence_stats.num_dependence_undetermined);
6320 :
6321 157 : fprintf (dump_file, "Number of subscript tests: %d\n",
6322 : dependence_stats.num_subscript_tests);
6323 157 : fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6324 : dependence_stats.num_subscript_undetermined);
6325 157 : fprintf (dump_file, "Number of same subscript function: %d\n",
6326 : dependence_stats.num_same_subscript_function);
6327 :
6328 157 : fprintf (dump_file, "Number of ziv tests: %d\n",
6329 : dependence_stats.num_ziv);
6330 157 : fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6331 : dependence_stats.num_ziv_dependent);
6332 157 : fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6333 : dependence_stats.num_ziv_independent);
6334 157 : fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6335 : dependence_stats.num_ziv_unimplemented);
6336 :
6337 157 : fprintf (dump_file, "Number of siv tests: %d\n",
6338 : dependence_stats.num_siv);
6339 157 : fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6340 : dependence_stats.num_siv_dependent);
6341 157 : fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6342 : dependence_stats.num_siv_independent);
6343 157 : fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6344 : dependence_stats.num_siv_unimplemented);
6345 :
6346 157 : fprintf (dump_file, "Number of miv tests: %d\n",
6347 : dependence_stats.num_miv);
6348 157 : fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6349 : dependence_stats.num_miv_dependent);
6350 157 : fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6351 : dependence_stats.num_miv_independent);
6352 157 : fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6353 : dependence_stats.num_miv_unimplemented);
6354 : }
6355 :
6356 401100 : return res;
6357 : }
6358 :
6359 : /* Free the memory used by a data dependence relation DDR. */
6360 :
6361 : void
6362 13299404 : free_dependence_relation (struct data_dependence_relation *ddr)
6363 : {
6364 13299404 : if (ddr == NULL)
6365 : return;
6366 :
6367 13299404 : if (DDR_SUBSCRIPTS (ddr).exists ())
6368 900738 : free_subscripts (DDR_SUBSCRIPTS (ddr));
6369 13299404 : DDR_DIST_VECTS (ddr).release ();
6370 13299404 : DDR_DIR_VECTS (ddr).release ();
6371 :
6372 13299404 : free (ddr);
6373 : }
6374 :
6375 : /* Free the memory used by the data dependence relations from
6376 : DEPENDENCE_RELATIONS. */
6377 :
6378 : void
6379 2814630 : free_dependence_relations (vec<ddr_p>& dependence_relations)
6380 : {
6381 9198067 : for (data_dependence_relation *ddr : dependence_relations)
6382 5233791 : if (ddr)
6383 5233791 : free_dependence_relation (ddr);
6384 :
6385 2814630 : dependence_relations.release ();
6386 2814630 : }
6387 :
6388 : /* Free the memory used by the data references from DATAREFS. */
6389 :
6390 : void
6391 3453558 : free_data_refs (vec<data_reference_p>& datarefs)
6392 : {
6393 20689919 : for (data_reference *dr : datarefs)
6394 12935993 : free_data_ref (dr);
6395 3453558 : datarefs.release ();
6396 3453558 : }
6397 :
6398 : /* Common routine implementing both dr_direction_indicator and
6399 : dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6400 : to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6401 : Return the step as the indicator otherwise. */
6402 :
6403 : static tree
6404 66071 : dr_step_indicator (struct data_reference *dr, int useful_min)
6405 : {
6406 66071 : tree step = DR_STEP (dr);
6407 66071 : if (!step)
6408 : return NULL_TREE;
6409 66071 : STRIP_NOPS (step);
6410 : /* Look for cases where the step is scaled by a positive constant
6411 : integer, which will often be the access size. If the multiplication
6412 : doesn't change the sign (due to overflow effects) then we can
6413 : test the unscaled value instead. */
6414 66071 : if (TREE_CODE (step) == MULT_EXPR
6415 5520 : && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6416 71535 : && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6417 : {
6418 5464 : tree factor = TREE_OPERAND (step, 1);
6419 5464 : step = TREE_OPERAND (step, 0);
6420 :
6421 : /* Strip widening and truncating conversions as well as nops. */
6422 1214 : if (CONVERT_EXPR_P (step)
6423 5464 : && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6424 4250 : step = TREE_OPERAND (step, 0);
6425 5464 : tree type = TREE_TYPE (step);
6426 :
6427 : /* Get the range of step values that would not cause overflow. */
6428 10928 : widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6429 5464 : / wi::to_widest (factor));
6430 10928 : widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6431 5464 : / wi::to_widest (factor));
6432 :
6433 : /* Get the range of values that the unconverted step actually has. */
6434 5464 : wide_int step_min, step_max;
6435 5464 : int_range_max vr;
6436 5464 : if (TREE_CODE (step) != SSA_NAME
6437 10820 : || !get_range_query (cfun)->range_of_expr (vr, step)
6438 10874 : || vr.undefined_p ())
6439 : {
6440 54 : step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6441 54 : step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6442 : }
6443 : else
6444 : {
6445 5410 : step_min = vr.lower_bound ();
6446 5410 : step_max = vr.upper_bound ();
6447 : }
6448 :
6449 : /* Check whether the unconverted step has an acceptable range. */
6450 5464 : signop sgn = TYPE_SIGN (type);
6451 10928 : if (wi::les_p (minv, widest_int::from (step_min, sgn))
6452 14100 : && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6453 : {
6454 1577 : if (wi::ge_p (step_min, useful_min, sgn))
6455 436 : return ssize_int (useful_min);
6456 1141 : else if (wi::lt_p (step_max, 0, sgn))
6457 0 : return ssize_int (-1);
6458 : else
6459 1141 : return fold_convert (ssizetype, step);
6460 : }
6461 5464 : }
6462 64494 : return DR_STEP (dr);
6463 : }
6464 :
6465 : /* Return a value that is negative iff DR has a negative step. */
6466 :
6467 : tree
6468 11813 : dr_direction_indicator (struct data_reference *dr)
6469 : {
6470 11813 : return dr_step_indicator (dr, 0);
6471 : }
6472 :
6473 : /* Return a value that is zero iff DR has a zero step. */
6474 :
6475 : tree
6476 54258 : dr_zero_step_indicator (struct data_reference *dr)
6477 : {
6478 54258 : return dr_step_indicator (dr, 1);
6479 : }
6480 :
6481 : /* Return true if DR is known to have a nonnegative (but possibly zero)
6482 : step. */
6483 :
6484 : bool
6485 4991 : dr_known_forward_stride_p (struct data_reference *dr)
6486 : {
6487 4991 : tree indicator = dr_direction_indicator (dr);
6488 4991 : tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6489 : fold_convert (ssizetype, indicator),
6490 : ssize_int (0));
6491 4991 : return neg_step_val && integer_zerop (neg_step_val);
6492 : }
|