Branch data Line data Source code
1 : : /* Data references and dependences detectors.
2 : : Copyright (C) 2003-2025 Free Software Foundation, Inc.
3 : : Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : /* This pass walks a given loop structure searching for array
22 : : references. The information about the array accesses is recorded
23 : : in DATA_REFERENCE structures.
24 : :
25 : : The basic test for determining the dependences is:
26 : : given two access functions chrec1 and chrec2 to a same array, and
27 : : x and y two vectors from the iteration domain, the same element of
28 : : the array is accessed twice at iterations x and y if and only if:
29 : : | chrec1 (x) == chrec2 (y).
30 : :
31 : : The goals of this analysis are:
32 : :
33 : : - to determine the independence: the relation between two
34 : : independent accesses is qualified with the chrec_known (this
35 : : information allows a loop parallelization),
36 : :
37 : : - when two data references access the same data, to qualify the
38 : : dependence relation with classic dependence representations:
39 : :
40 : : - distance vectors
41 : : - direction vectors
42 : : - loop carried level dependence
43 : : - polyhedron dependence
44 : : or with the chains of recurrences based representation,
45 : :
46 : : - to define a knowledge base for storing the data dependence
47 : : information,
48 : :
49 : : - to define an interface to access this data.
50 : :
51 : :
52 : : Definitions:
53 : :
54 : : - subscript: given two array accesses a subscript is the tuple
55 : : composed of the access functions for a given dimension. Example:
56 : : Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 : : (f1, g1), (f2, g2), (f3, g3).
58 : :
59 : : - Diophantine equation: an equation whose coefficients and
60 : : solutions are integer constants, for example the equation
61 : : | 3*x + 2*y = 1
62 : : has an integer solution x = 1 and y = -1.
63 : :
64 : : References:
65 : :
66 : : - "Advanced Compilation for High Performance Computing" by Randy
67 : : Allen and Ken Kennedy.
68 : : http://citeseer.ist.psu.edu/goff91practical.html
69 : :
70 : : - "Loop Transformations for Restructuring Compilers - The Foundations"
71 : : by Utpal Banerjee.
72 : :
73 : :
74 : : */
75 : :
76 : : #define INCLUDE_ALGORITHM
77 : : #include "config.h"
78 : : #include "system.h"
79 : : #include "coretypes.h"
80 : : #include "backend.h"
81 : : #include "rtl.h"
82 : : #include "tree.h"
83 : : #include "gimple.h"
84 : : #include "gimple-pretty-print.h"
85 : : #include "alias.h"
86 : : #include "fold-const.h"
87 : : #include "expr.h"
88 : : #include "gimple-iterator.h"
89 : : #include "tree-ssa-loop-niter.h"
90 : : #include "tree-ssa-loop.h"
91 : : #include "tree-ssa.h"
92 : : #include "cfgloop.h"
93 : : #include "tree-data-ref.h"
94 : : #include "tree-scalar-evolution.h"
95 : : #include "dumpfile.h"
96 : : #include "tree-affine.h"
97 : : #include "builtins.h"
98 : : #include "tree-eh.h"
99 : : #include "ssa.h"
100 : : #include "internal-fn.h"
101 : : #include "vr-values.h"
102 : : #include "range-op.h"
103 : : #include "tree-ssa-loop-ivopts.h"
104 : : #include "calls.h"
105 : :
106 : : static struct datadep_stats
107 : : {
108 : : int num_dependence_tests;
109 : : int num_dependence_dependent;
110 : : int num_dependence_independent;
111 : : int num_dependence_undetermined;
112 : :
113 : : int num_subscript_tests;
114 : : int num_subscript_undetermined;
115 : : int num_same_subscript_function;
116 : :
117 : : int num_ziv;
118 : : int num_ziv_independent;
119 : : int num_ziv_dependent;
120 : : int num_ziv_unimplemented;
121 : :
122 : : int num_siv;
123 : : int num_siv_independent;
124 : : int num_siv_dependent;
125 : : int num_siv_unimplemented;
126 : :
127 : : int num_miv;
128 : : int num_miv_independent;
129 : : int num_miv_dependent;
130 : : int num_miv_unimplemented;
131 : : } dependence_stats;
132 : :
133 : : static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
134 : : unsigned int, unsigned int,
135 : : class loop *);
136 : : /* Returns true iff A divides B. */
137 : :
138 : : static inline bool
139 : 1900 : tree_fold_divides_p (const_tree a, const_tree b)
140 : : {
141 : 1900 : gcc_assert (TREE_CODE (a) == INTEGER_CST);
142 : 1900 : gcc_assert (TREE_CODE (b) == INTEGER_CST);
143 : 1900 : return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
144 : : }
145 : :
146 : : /* Returns true iff A divides B. */
147 : :
148 : : static inline bool
149 : 1665289 : int_divides_p (lambda_int a, lambda_int b)
150 : : {
151 : 1665289 : return ((b % a) == 0);
152 : : }
153 : :
154 : : /* Return true if reference REF contains a union access. */
155 : :
156 : : static bool
157 : 419936 : ref_contains_union_access_p (tree ref)
158 : : {
159 : 466308 : while (handled_component_p (ref))
160 : : {
161 : 88762 : ref = TREE_OPERAND (ref, 0);
162 : 177524 : if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
163 : 88762 : || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
164 : : return true;
165 : : }
166 : : return false;
167 : : }
168 : :
169 : :
170 : :
171 : : /* Dump into FILE all the data references from DATAREFS. */
172 : :
173 : : static void
174 : 0 : dump_data_references (FILE *file, vec<data_reference_p> datarefs)
175 : : {
176 : 0 : for (data_reference *dr : datarefs)
177 : 0 : dump_data_reference (file, dr);
178 : 0 : }
179 : :
180 : : /* Unified dump into FILE all the data references from DATAREFS. */
181 : :
182 : : DEBUG_FUNCTION void
183 : 0 : debug (vec<data_reference_p> &ref)
184 : : {
185 : 0 : dump_data_references (stderr, ref);
186 : 0 : }
187 : :
188 : : DEBUG_FUNCTION void
189 : 0 : debug (vec<data_reference_p> *ptr)
190 : : {
191 : 0 : if (ptr)
192 : 0 : debug (*ptr);
193 : : else
194 : 0 : fprintf (stderr, "<nil>\n");
195 : 0 : }
196 : :
197 : :
198 : : /* Dump into STDERR all the data references from DATAREFS. */
199 : :
200 : : DEBUG_FUNCTION void
201 : 0 : debug_data_references (vec<data_reference_p> datarefs)
202 : : {
203 : 0 : dump_data_references (stderr, datarefs);
204 : 0 : }
205 : :
206 : : /* Print to STDERR the data_reference DR. */
207 : :
208 : : DEBUG_FUNCTION void
209 : 0 : debug_data_reference (struct data_reference *dr)
210 : : {
211 : 0 : dump_data_reference (stderr, dr);
212 : 0 : }
213 : :
214 : : /* Dump function for a DATA_REFERENCE structure. */
215 : :
216 : : void
217 : 3452 : dump_data_reference (FILE *outf,
218 : : struct data_reference *dr)
219 : : {
220 : 3452 : unsigned int i;
221 : :
222 : 3452 : fprintf (outf, "#(Data Ref: \n");
223 : 3452 : fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
224 : 3452 : fprintf (outf, "# stmt: ");
225 : 3452 : print_gimple_stmt (outf, DR_STMT (dr), 0);
226 : 3452 : fprintf (outf, "# ref: ");
227 : 3452 : print_generic_stmt (outf, DR_REF (dr));
228 : 3452 : fprintf (outf, "# base_object: ");
229 : 3452 : print_generic_stmt (outf, DR_BASE_OBJECT (dr));
230 : :
231 : 10702 : for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
232 : : {
233 : 3798 : fprintf (outf, "# Access function %d: ", i);
234 : 3798 : print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
235 : : }
236 : 3452 : fprintf (outf, "#)\n");
237 : 3452 : }
238 : :
239 : : /* Unified dump function for a DATA_REFERENCE structure. */
240 : :
241 : : DEBUG_FUNCTION void
242 : 0 : debug (data_reference &ref)
243 : : {
244 : 0 : dump_data_reference (stderr, &ref);
245 : 0 : }
246 : :
247 : : DEBUG_FUNCTION void
248 : 0 : debug (data_reference *ptr)
249 : : {
250 : 0 : if (ptr)
251 : 0 : debug (*ptr);
252 : : else
253 : 0 : fprintf (stderr, "<nil>\n");
254 : 0 : }
255 : :
256 : :
257 : : /* Dumps the affine function described by FN to the file OUTF. */
258 : :
259 : : DEBUG_FUNCTION void
260 : 30660 : dump_affine_function (FILE *outf, affine_fn fn)
261 : : {
262 : 30660 : unsigned i;
263 : 30660 : tree coef;
264 : :
265 : 30660 : print_generic_expr (outf, fn[0], TDF_SLIM);
266 : 64196 : for (i = 1; fn.iterate (i, &coef); i++)
267 : : {
268 : 2876 : fprintf (outf, " + ");
269 : 2876 : print_generic_expr (outf, coef, TDF_SLIM);
270 : 2876 : fprintf (outf, " * x_%u", i);
271 : : }
272 : 30660 : }
273 : :
274 : : /* Dumps the conflict function CF to the file OUTF. */
275 : :
276 : : DEBUG_FUNCTION void
277 : 145974 : dump_conflict_function (FILE *outf, conflict_function *cf)
278 : : {
279 : 145974 : unsigned i;
280 : :
281 : 145974 : if (cf->n == NO_DEPENDENCE)
282 : 109306 : fprintf (outf, "no dependence");
283 : 36668 : else if (cf->n == NOT_KNOWN)
284 : 6008 : fprintf (outf, "not known");
285 : : else
286 : : {
287 : 61320 : for (i = 0; i < cf->n; i++)
288 : : {
289 : 30660 : if (i != 0)
290 : 0 : fprintf (outf, " ");
291 : 30660 : fprintf (outf, "[");
292 : 30660 : dump_affine_function (outf, cf->fns[i]);
293 : 30660 : fprintf (outf, "]");
294 : : }
295 : : }
296 : 145974 : }
297 : :
298 : : /* Dump function for a SUBSCRIPT structure. */
299 : :
300 : : DEBUG_FUNCTION void
301 : 833 : dump_subscript (FILE *outf, struct subscript *subscript)
302 : : {
303 : 833 : conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
304 : :
305 : 833 : fprintf (outf, "\n (subscript \n");
306 : 833 : fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
307 : 833 : dump_conflict_function (outf, cf);
308 : 833 : if (CF_NONTRIVIAL_P (cf))
309 : : {
310 : 833 : tree last_iteration = SUB_LAST_CONFLICT (subscript);
311 : 833 : fprintf (outf, "\n last_conflict: ");
312 : 833 : print_generic_expr (outf, last_iteration);
313 : : }
314 : :
315 : 833 : cf = SUB_CONFLICTS_IN_B (subscript);
316 : 833 : fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
317 : 833 : dump_conflict_function (outf, cf);
318 : 833 : if (CF_NONTRIVIAL_P (cf))
319 : : {
320 : 833 : tree last_iteration = SUB_LAST_CONFLICT (subscript);
321 : 833 : fprintf (outf, "\n last_conflict: ");
322 : 833 : print_generic_expr (outf, last_iteration);
323 : : }
324 : :
325 : 833 : fprintf (outf, "\n (Subscript distance: ");
326 : 833 : print_generic_expr (outf, SUB_DISTANCE (subscript));
327 : 833 : fprintf (outf, " ))\n");
328 : 833 : }
329 : :
330 : : /* Print the classic direction vector DIRV to OUTF. */
331 : :
332 : : DEBUG_FUNCTION void
333 : 772 : print_direction_vector (FILE *outf,
334 : : lambda_vector dirv,
335 : : int length)
336 : : {
337 : 772 : int eq;
338 : :
339 : 1673 : for (eq = 0; eq < length; eq++)
340 : : {
341 : 901 : enum data_dependence_direction dir = ((enum data_dependence_direction)
342 : 901 : dirv[eq]);
343 : :
344 : 901 : switch (dir)
345 : : {
346 : 140 : case dir_positive:
347 : 140 : fprintf (outf, " +");
348 : 140 : break;
349 : 6 : case dir_negative:
350 : 6 : fprintf (outf, " -");
351 : 6 : break;
352 : 755 : case dir_equal:
353 : 755 : fprintf (outf, " =");
354 : 755 : break;
355 : 0 : case dir_positive_or_equal:
356 : 0 : fprintf (outf, " +=");
357 : 0 : break;
358 : 0 : case dir_positive_or_negative:
359 : 0 : fprintf (outf, " +-");
360 : 0 : break;
361 : 0 : case dir_negative_or_equal:
362 : 0 : fprintf (outf, " -=");
363 : 0 : break;
364 : 0 : case dir_star:
365 : 0 : fprintf (outf, " *");
366 : 0 : break;
367 : 0 : default:
368 : 0 : fprintf (outf, "indep");
369 : 0 : break;
370 : : }
371 : : }
372 : 772 : fprintf (outf, "\n");
373 : 772 : }
374 : :
375 : : /* Print a vector of direction vectors. */
376 : :
377 : : DEBUG_FUNCTION void
378 : 0 : print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
379 : : int length)
380 : : {
381 : 0 : for (lambda_vector v : dir_vects)
382 : 0 : print_direction_vector (outf, v, length);
383 : 0 : }
384 : :
385 : : /* Print out a vector VEC of length N to OUTFILE. */
386 : :
387 : : DEBUG_FUNCTION void
388 : 4435 : print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 : : {
390 : 4435 : int i;
391 : :
392 : 9276 : for (i = 0; i < n; i++)
393 : 4841 : fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
394 : 4435 : fprintf (outfile, "\n");
395 : 4435 : }
396 : :
397 : : /* Print a vector of distance vectors. */
398 : :
399 : : DEBUG_FUNCTION void
400 : 0 : print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
401 : : int length)
402 : : {
403 : 0 : for (lambda_vector v : dist_vects)
404 : 0 : print_lambda_vector (outf, v, length);
405 : 0 : }
406 : :
407 : : /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408 : :
409 : : DEBUG_FUNCTION void
410 : 1570 : dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
411 : : {
412 : 1570 : struct data_reference *dra, *drb;
413 : :
414 : 1570 : fprintf (outf, "(Data Dep: \n");
415 : :
416 : 1570 : if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
417 : : {
418 : 399 : if (ddr)
419 : : {
420 : 399 : dra = DDR_A (ddr);
421 : 399 : drb = DDR_B (ddr);
422 : 399 : if (dra)
423 : 399 : dump_data_reference (outf, dra);
424 : : else
425 : 0 : fprintf (outf, " (nil)\n");
426 : 399 : if (drb)
427 : 399 : dump_data_reference (outf, drb);
428 : : else
429 : 0 : fprintf (outf, " (nil)\n");
430 : : }
431 : 399 : fprintf (outf, " (don't know)\n)\n");
432 : 399 : return;
433 : : }
434 : :
435 : 1171 : dra = DDR_A (ddr);
436 : 1171 : drb = DDR_B (ddr);
437 : 1171 : dump_data_reference (outf, dra);
438 : 1171 : dump_data_reference (outf, drb);
439 : :
440 : 1171 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
441 : 419 : fprintf (outf, " (no dependence)\n");
442 : :
443 : 752 : else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
444 : : {
445 : : unsigned int i;
446 : : class loop *loopi;
447 : :
448 : : subscript *sub;
449 : 1585 : FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
450 : : {
451 : 833 : fprintf (outf, " access_fn_A: ");
452 : 833 : print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
453 : 833 : fprintf (outf, " access_fn_B: ");
454 : 833 : print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
455 : 833 : dump_subscript (outf, sub);
456 : : }
457 : :
458 : 752 : fprintf (outf, " loop nest: (");
459 : 2359 : FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
460 : 855 : fprintf (outf, "%d ", loopi->num);
461 : 752 : fprintf (outf, ")\n");
462 : :
463 : 3795 : for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
464 : : {
465 : 772 : fprintf (outf, " distance_vector: ");
466 : 772 : print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
467 : 1544 : DDR_NB_LOOPS (ddr));
468 : : }
469 : :
470 : 1524 : for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
471 : : {
472 : 772 : fprintf (outf, " direction_vector: ");
473 : 772 : print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
474 : 1544 : DDR_NB_LOOPS (ddr));
475 : : }
476 : : }
477 : :
478 : 1171 : fprintf (outf, ")\n");
479 : : }
480 : :
481 : : /* Debug version. */
482 : :
483 : : DEBUG_FUNCTION void
484 : 0 : debug_data_dependence_relation (const struct data_dependence_relation *ddr)
485 : : {
486 : 0 : dump_data_dependence_relation (stderr, ddr);
487 : 0 : }
488 : :
489 : : /* Dump into FILE all the dependence relations from DDRS. */
490 : :
491 : : DEBUG_FUNCTION void
492 : 305 : dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
493 : : {
494 : 2455 : for (auto ddr : ddrs)
495 : 1570 : dump_data_dependence_relation (file, ddr);
496 : 305 : }
497 : :
498 : : DEBUG_FUNCTION void
499 : 0 : debug (vec<ddr_p> &ref)
500 : : {
501 : 0 : dump_data_dependence_relations (stderr, ref);
502 : 0 : }
503 : :
504 : : DEBUG_FUNCTION void
505 : 0 : debug (vec<ddr_p> *ptr)
506 : : {
507 : 0 : if (ptr)
508 : 0 : debug (*ptr);
509 : : else
510 : 0 : fprintf (stderr, "<nil>\n");
511 : 0 : }
512 : :
513 : :
514 : : /* Dump to STDERR all the dependence relations from DDRS. */
515 : :
516 : : DEBUG_FUNCTION void
517 : 0 : debug_data_dependence_relations (vec<ddr_p> ddrs)
518 : : {
519 : 0 : dump_data_dependence_relations (stderr, ddrs);
520 : 0 : }
521 : :
522 : : /* Dumps the distance and direction vectors in FILE. DDRS contains
523 : : the dependence relations, and VECT_SIZE is the size of the
524 : : dependence vectors, or in other words the number of loops in the
525 : : considered nest. */
526 : :
527 : : DEBUG_FUNCTION void
528 : 0 : dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
529 : : {
530 : 0 : for (data_dependence_relation *ddr : ddrs)
531 : 0 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
532 : : {
533 : 0 : for (lambda_vector v : DDR_DIST_VECTS (ddr))
534 : : {
535 : 0 : fprintf (file, "DISTANCE_V (");
536 : 0 : print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
537 : 0 : fprintf (file, ")\n");
538 : : }
539 : :
540 : 0 : for (lambda_vector v : DDR_DIR_VECTS (ddr))
541 : : {
542 : 0 : fprintf (file, "DIRECTION_V (");
543 : 0 : print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
544 : 0 : fprintf (file, ")\n");
545 : : }
546 : : }
547 : :
548 : 0 : fprintf (file, "\n\n");
549 : 0 : }
550 : :
551 : : /* Dumps the data dependence relations DDRS in FILE. */
552 : :
553 : : DEBUG_FUNCTION void
554 : 0 : dump_ddrs (FILE *file, vec<ddr_p> ddrs)
555 : : {
556 : 0 : for (data_dependence_relation *ddr : ddrs)
557 : 0 : dump_data_dependence_relation (file, ddr);
558 : :
559 : 0 : fprintf (file, "\n\n");
560 : 0 : }
561 : :
562 : : DEBUG_FUNCTION void
563 : 0 : debug_ddrs (vec<ddr_p> ddrs)
564 : : {
565 : 0 : dump_ddrs (stderr, ddrs);
566 : 0 : }
567 : :
568 : : /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
569 : : OP0 CODE OP1, where:
570 : :
571 : : - OP0 CODE OP1 has integral type TYPE
572 : : - the range of OP0 is given by OP0_RANGE and
573 : : - the range of OP1 is given by OP1_RANGE.
574 : :
575 : : Independently of RESULT_RANGE, try to compute:
576 : :
577 : : DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
578 : : - (sizetype) (OP0 CODE OP1)
579 : :
580 : : as a constant and subtract DELTA from the ssizetype constant in *OFF.
581 : : Return true on success, or false if DELTA is not known at compile time.
582 : :
583 : : Truncation and sign changes are known to distribute over CODE, i.e.
584 : :
585 : : (itype) (A CODE B) == (itype) A CODE (itype) B
586 : :
587 : : for any integral type ITYPE whose precision is no greater than the
588 : : precision of A and B. */
589 : :
590 : : static bool
591 : 4450319 : compute_distributive_range (tree type, irange &op0_range,
592 : : tree_code code, irange &op1_range,
593 : : tree *off, irange *result_range)
594 : : {
595 : 4450319 : gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
596 : 4450319 : if (result_range)
597 : : {
598 : 1042691 : range_op_handler op (code);
599 : 1042691 : if (!op.fold_range (*result_range, type, op0_range, op1_range))
600 : 0 : result_range->set_varying (type);
601 : : }
602 : :
603 : : /* The distributive property guarantees that if TYPE is no narrower
604 : : than SIZETYPE,
605 : :
606 : : (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
607 : :
608 : : and so we can treat DELTA as zero. */
609 : 4450319 : if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
610 : : return true;
611 : :
612 : : /* If overflow is undefined, we can assume that:
613 : :
614 : : X == (ssizetype) OP0 CODE (ssizetype) OP1
615 : :
616 : : is within the range of TYPE, i.e.:
617 : :
618 : : X == (ssizetype) (TYPE) X
619 : :
620 : : Distributing the (TYPE) truncation over X gives:
621 : :
622 : : X == (ssizetype) (OP0 CODE OP1)
623 : :
624 : : Casting both sides to sizetype and distributing the sizetype cast
625 : : over X gives:
626 : :
627 : : (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
628 : :
629 : : and so we can treat DELTA as zero. */
630 : 264835 : if (TYPE_OVERFLOW_UNDEFINED (type))
631 : : return true;
632 : :
633 : : /* Compute the range of:
634 : :
635 : : (ssizetype) OP0 CODE (ssizetype) OP1
636 : :
637 : : The distributive property guarantees that this has the same bitpattern as:
638 : :
639 : : (sizetype) OP0 CODE (sizetype) OP1
640 : :
641 : : but its range is more conducive to analysis. */
642 : 108602 : range_cast (op0_range, ssizetype);
643 : 108602 : range_cast (op1_range, ssizetype);
644 : 108602 : int_range_max wide_range;
645 : 108602 : range_op_handler op (code);
646 : 108602 : bool saved_flag_wrapv = flag_wrapv;
647 : 108602 : flag_wrapv = 1;
648 : 108602 : if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
649 : 0 : wide_range.set_varying (ssizetype);;
650 : 108602 : flag_wrapv = saved_flag_wrapv;
651 : 108602 : if (wide_range.num_pairs () != 1
652 : 108602 : || wide_range.varying_p () || wide_range.undefined_p ())
653 : : return false;
654 : :
655 : 88599 : wide_int lb = wide_range.lower_bound ();
656 : 88599 : wide_int ub = wide_range.upper_bound ();
657 : :
658 : : /* Calculate the number of times that each end of the range overflows or
659 : : underflows TYPE. We can only calculate DELTA if the numbers match. */
660 : 88599 : unsigned int precision = TYPE_PRECISION (type);
661 : 88599 : if (!TYPE_UNSIGNED (type))
662 : : {
663 : 195 : wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
664 : 195 : lb -= type_min;
665 : 195 : ub -= type_min;
666 : 195 : }
667 : 88599 : wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
668 : 88599 : lb &= upper_bits;
669 : 88599 : ub &= upper_bits;
670 : 88599 : if (lb != ub)
671 : : return false;
672 : :
673 : : /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
674 : : negative values indicating underflow. The low PRECISION bits of LB
675 : : are clear, so DELTA is therefore LB (== UB). */
676 : 30528 : *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
677 : 30528 : return true;
678 : 108602 : }
679 : :
680 : : /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
681 : : given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
682 : : FROM_TYPE are integral types. */
683 : :
684 : : static bool
685 : 2554966 : nop_conversion_for_offset_p (tree to_type, tree from_type, irange &range)
686 : : {
687 : 2554966 : gcc_assert (INTEGRAL_TYPE_P (to_type)
688 : : && INTEGRAL_TYPE_P (from_type)
689 : : && !TYPE_OVERFLOW_TRAPS (to_type)
690 : : && !TYPE_OVERFLOW_TRAPS (from_type));
691 : :
692 : : /* Converting to something no narrower than sizetype and then to sizetype
693 : : is equivalent to converting directly to sizetype. */
694 : 2554966 : if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
695 : : return true;
696 : :
697 : : /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
698 : 87907 : if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
699 : 87907 : && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
700 : : return true;
701 : :
702 : : /* For narrowing conversions, we could in principle test whether
703 : : the bits in FROM_TYPE but not in TO_TYPE have a fixed value
704 : : and apply a constant adjustment.
705 : :
706 : : For other conversions (which involve a sign change) we could
707 : : check that the signs are always equal, and apply a constant
708 : : adjustment if the signs are negative.
709 : :
710 : : However, both cases should be rare. */
711 : 70867 : return range_fits_type_p (&range, TYPE_PRECISION (to_type),
712 : 141734 : TYPE_SIGN (to_type));
713 : : }
714 : :
715 : : static void
716 : : split_constant_offset (tree type, tree *var, tree *off,
717 : : irange *result_range,
718 : : hash_map<tree, std::pair<tree, tree> > &cache,
719 : : unsigned *limit);
720 : :
721 : : /* Helper function for split_constant_offset. If TYPE is a pointer type,
722 : : try to express OP0 CODE OP1 as:
723 : :
724 : : POINTER_PLUS <*VAR, (sizetype) *OFF>
725 : :
726 : : where:
727 : :
728 : : - *VAR has type TYPE
729 : : - *OFF is a constant of type ssizetype.
730 : :
731 : : If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
732 : :
733 : : *VAR + (sizetype) *OFF
734 : :
735 : : where:
736 : :
737 : : - *VAR has type sizetype
738 : : - *OFF is a constant of type ssizetype.
739 : :
740 : : In both cases, OP0 CODE OP1 has type TYPE.
741 : :
742 : : Return true on success. A false return value indicates that we can't
743 : : do better than set *OFF to zero.
744 : :
745 : : When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
746 : : if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
747 : :
748 : : CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
749 : : visited. LIMIT counts down the number of SSA names that we are
750 : : allowed to process before giving up. */
751 : :
752 : : static bool
753 : 56968178 : split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
754 : : tree *var, tree *off, irange *result_range,
755 : : hash_map<tree, std::pair<tree, tree> > &cache,
756 : : unsigned *limit)
757 : : {
758 : 56968178 : tree var0, var1;
759 : 56968178 : tree off0, off1;
760 : 56968178 : int_range_max op0_range, op1_range;
761 : :
762 : 56968178 : *var = NULL_TREE;
763 : 56968178 : *off = NULL_TREE;
764 : :
765 : 56968178 : if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
766 : : return false;
767 : :
768 : 56967597 : if (TREE_CODE (op0) == SSA_NAME
769 : 56967597 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
770 : : return false;
771 : 56967129 : if (op1
772 : 7201187 : && TREE_CODE (op1) == SSA_NAME
773 : 59186093 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
774 : : return false;
775 : :
776 : 56967129 : switch (code)
777 : : {
778 : 17076167 : case INTEGER_CST:
779 : 17076167 : *var = size_int (0);
780 : 17076167 : *off = fold_convert (ssizetype, op0);
781 : 17076167 : if (result_range)
782 : : {
783 : 1387048 : wide_int w = wi::to_wide (op0);
784 : 1387048 : result_range->set (TREE_TYPE (op0), w, w);
785 : 1387048 : }
786 : : return true;
787 : :
788 : 1993709 : case POINTER_PLUS_EXPR:
789 : 1993709 : split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
790 : 1993709 : split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
791 : 1993709 : *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
792 : 1993709 : *off = size_binop (PLUS_EXPR, off0, off1);
793 : 1993709 : return true;
794 : :
795 : 2365272 : case PLUS_EXPR:
796 : 2365272 : case MINUS_EXPR:
797 : 2365272 : split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
798 : 2365272 : split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
799 : 2365272 : *off = size_binop (code, off0, off1);
800 : 2365272 : if (!compute_distributive_range (type, op0_range, code, op1_range,
801 : : off, result_range))
802 : : return false;
803 : 2306961 : *var = fold_build2 (code, sizetype, var0, var1);
804 : 2306961 : return true;
805 : :
806 : 2515590 : case MULT_EXPR:
807 : 2515590 : if (TREE_CODE (op1) != INTEGER_CST)
808 : : return false;
809 : :
810 : 2085047 : split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
811 : 2085047 : op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
812 : 2085047 : *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
813 : 2085047 : if (!compute_distributive_range (type, op0_range, code, op1_range,
814 : : off, result_range))
815 : : return false;
816 : 2065284 : *var = fold_build2 (MULT_EXPR, sizetype, var0,
817 : : fold_convert (sizetype, op1));
818 : 2065284 : return true;
819 : :
820 : 10318633 : case ADDR_EXPR:
821 : 10318633 : {
822 : 10318633 : tree base, poffset;
823 : 10318633 : poly_int64 pbitsize, pbitpos, pbytepos;
824 : 10318633 : machine_mode pmode;
825 : 10318633 : int punsignedp, preversep, pvolatilep;
826 : :
827 : 10318633 : op0 = TREE_OPERAND (op0, 0);
828 : 10318633 : base
829 : 10318633 : = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
830 : : &punsignedp, &preversep, &pvolatilep);
831 : :
832 : 10342798 : if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
833 : : return false;
834 : 10318633 : base = build_fold_addr_expr (base);
835 : 10318633 : off0 = ssize_int (pbytepos);
836 : :
837 : 10318633 : if (poffset)
838 : : {
839 : 99 : split_constant_offset (poffset, &poffset, &off1, nullptr,
840 : : cache, limit);
841 : 99 : off0 = size_binop (PLUS_EXPR, off0, off1);
842 : 99 : base = fold_build_pointer_plus (base, poffset);
843 : : }
844 : :
845 : 10318633 : var0 = fold_convert (type, base);
846 : :
847 : : /* If variable length types are involved, punt, otherwise casts
848 : : might be converted into ARRAY_REFs in gimplify_conversion.
849 : : To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
850 : : possibly no longer appears in current GIMPLE, might resurface.
851 : : This perhaps could run
852 : : if (CONVERT_EXPR_P (var0))
853 : : {
854 : : gimplify_conversion (&var0);
855 : : // Attempt to fill in any within var0 found ARRAY_REF's
856 : : // element size from corresponding op embedded ARRAY_REF,
857 : : // if unsuccessful, just punt.
858 : : } */
859 : 21026104 : while (POINTER_TYPE_P (type))
860 : 10707471 : type = TREE_TYPE (type);
861 : 10318633 : if (int_size_in_bytes (type) < 0)
862 : : return false;
863 : :
864 : 10294468 : *var = var0;
865 : 10294468 : *off = off0;
866 : 10294468 : return true;
867 : : }
868 : :
869 : 15160638 : case SSA_NAME:
870 : 15160638 : {
871 : 15160638 : gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
872 : 15160638 : enum tree_code subcode;
873 : :
874 : 15160638 : if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
875 : : return false;
876 : :
877 : 8295616 : subcode = gimple_assign_rhs_code (def_stmt);
878 : :
879 : : /* We are using a cache to avoid un-CSEing large amounts of code. */
880 : 8295616 : bool use_cache = false;
881 : 8295616 : if (!has_single_use (op0)
882 : 8295616 : && (subcode == POINTER_PLUS_EXPR
883 : 4181713 : || subcode == PLUS_EXPR
884 : : || subcode == MINUS_EXPR
885 : : || subcode == MULT_EXPR
886 : : || subcode == ADDR_EXPR
887 : : || CONVERT_EXPR_CODE_P (subcode)))
888 : : {
889 : 2026838 : use_cache = true;
890 : 2026838 : bool existed;
891 : 2026838 : std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
892 : 2026838 : if (existed)
893 : : {
894 : 31165 : if (integer_zerop (e.second))
895 : 31165 : return false;
896 : 1228 : *var = e.first;
897 : 1228 : *off = e.second;
898 : : /* The caller sets the range in this case. */
899 : 1228 : return true;
900 : : }
901 : 1995673 : e = std::make_pair (op0, ssize_int (0));
902 : : }
903 : :
904 : 8264451 : if (*limit == 0)
905 : : return false;
906 : 8263415 : --*limit;
907 : :
908 : 8263415 : var0 = gimple_assign_rhs1 (def_stmt);
909 : 8263415 : var1 = gimple_assign_rhs2 (def_stmt);
910 : :
911 : 8263415 : bool res = split_constant_offset_1 (type, var0, subcode, var1,
912 : : var, off, nullptr, cache, limit);
913 : 8263415 : if (res && use_cache)
914 : 1779676 : *cache.get (op0) = std::make_pair (*var, *off);
915 : : /* The caller sets the range in this case. */
916 : : return res;
917 : : }
918 : 4215298 : CASE_CONVERT:
919 : 4215298 : {
920 : : /* We can only handle the following conversions:
921 : :
922 : : - Conversions from one pointer type to another pointer type.
923 : :
924 : : - Conversions from one non-trapping integral type to another
925 : : non-trapping integral type. In this case, the recursive
926 : : call makes sure that:
927 : :
928 : : (sizetype) OP0
929 : :
930 : : can be expressed as a sizetype operation involving VAR and OFF,
931 : : and all we need to do is check whether:
932 : :
933 : : (sizetype) OP0 == (sizetype) (TYPE) OP0
934 : :
935 : : - Conversions from a non-trapping sizetype-size integral type to
936 : : a like-sized pointer type. In this case, the recursive call
937 : : makes sure that:
938 : :
939 : : (sizetype) OP0 == *VAR + (sizetype) *OFF
940 : :
941 : : and we can convert that to:
942 : :
943 : : POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
944 : :
945 : : - Conversions from a sizetype-sized pointer type to a like-sized
946 : : non-trapping integral type. In this case, the recursive call
947 : : makes sure that:
948 : :
949 : : OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
950 : :
951 : : where the POINTER_PLUS and *VAR have the same precision as
952 : : TYPE (and the same precision as sizetype). Then:
953 : :
954 : : (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
955 : 4215298 : tree itype = TREE_TYPE (op0);
956 : 4215298 : if ((POINTER_TYPE_P (itype)
957 : 3088884 : || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
958 : 4215083 : && (POINTER_TYPE_P (type)
959 : 3032949 : || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
960 : 8430381 : && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
961 : 1011686 : || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
962 : 1011686 : && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
963 : : {
964 : 4215072 : if (POINTER_TYPE_P (type))
965 : : {
966 : 1182123 : split_constant_offset (op0, var, off, nullptr, cache, limit);
967 : 1182123 : *var = fold_convert (type, *var);
968 : : }
969 : 3032949 : else if (POINTER_TYPE_P (itype))
970 : : {
971 : 477983 : split_constant_offset (op0, var, off, nullptr, cache, limit);
972 : 477983 : *var = fold_convert (sizetype, *var);
973 : : }
974 : : else
975 : : {
976 : 2554966 : split_constant_offset (op0, var, off, &op0_range,
977 : : cache, limit);
978 : 2554966 : if (!nop_conversion_for_offset_p (type, itype, op0_range))
979 : : return false;
980 : 2503672 : if (result_range)
981 : : {
982 : 1316933 : *result_range = op0_range;
983 : 1316933 : range_cast (*result_range, type);
984 : : }
985 : : }
986 : 4163778 : return true;
987 : : }
988 : : return false;
989 : : }
990 : :
991 : : default:
992 : : return false;
993 : : }
994 : 56968178 : }
995 : :
996 : : /* If EXP has pointer type, try to express it as:
997 : :
998 : : POINTER_PLUS <*VAR, (sizetype) *OFF>
999 : :
1000 : : where:
1001 : :
1002 : : - *VAR has the same type as EXP
1003 : : - *OFF is a constant of type ssizetype.
1004 : :
1005 : : If EXP has an integral type, try to express (sizetype) EXP as:
1006 : :
1007 : : *VAR + (sizetype) *OFF
1008 : :
1009 : : where:
1010 : :
1011 : : - *VAR has type sizetype
1012 : : - *OFF is a constant of type ssizetype.
1013 : :
1014 : : If EXP_RANGE is nonnull, set it to the range of EXP.
1015 : :
1016 : : CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1017 : : visited. LIMIT counts down the number of SSA names that we are
1018 : : allowed to process before giving up. */
1019 : :
1020 : : static void
1021 : 48704836 : split_constant_offset (tree exp, tree *var, tree *off, irange *exp_range,
1022 : : hash_map<tree, std::pair<tree, tree> > &cache,
1023 : : unsigned *limit)
1024 : : {
1025 : 48704836 : tree type = TREE_TYPE (exp), op0, op1;
1026 : 48704836 : enum tree_code code;
1027 : :
1028 : 48704836 : code = TREE_CODE (exp);
1029 : 48704836 : if (exp_range)
1030 : : {
1031 : 9370557 : exp_range->set_varying (type);
1032 : 9370557 : if (code == SSA_NAME)
1033 : : {
1034 : 5143571 : int_range_max vr;
1035 : 10287142 : get_range_query (cfun)->range_of_expr (vr, exp);
1036 : 5143571 : if (vr.undefined_p ())
1037 : 4727 : vr.set_varying (TREE_TYPE (exp));
1038 : 5143571 : tree vr_min, vr_max;
1039 : 5143571 : value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1040 : 5143571 : wide_int var_min = wi::to_wide (vr_min);
1041 : 5143571 : wide_int var_max = wi::to_wide (vr_max);
1042 : 5143571 : wide_int var_nonzero = get_nonzero_bits (exp);
1043 : 15430713 : vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1044 : : &var_min, &var_max,
1045 : : var_nonzero,
1046 : 5143571 : TYPE_SIGN (type));
1047 : : /* This check for VR_VARYING is here because the old code
1048 : : using get_range_info would return VR_RANGE for the entire
1049 : : domain, instead of VR_VARYING. The new code normalizes
1050 : : full-domain ranges to VR_VARYING. */
1051 : 5143571 : if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1052 : 5032022 : exp_range->set (type, var_min, var_max);
1053 : 5143571 : }
1054 : : }
1055 : :
1056 : 48704836 : if (!tree_is_chrec (exp)
1057 : 48704830 : && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1058 : : {
1059 : 48704763 : extract_ops_from_tree (exp, &code, &op0, &op1);
1060 : 48704763 : if (split_constant_offset_1 (type, op0, code, op1, var, off,
1061 : : exp_range, cache, limit))
1062 : 37901595 : return;
1063 : : }
1064 : :
1065 : 10803241 : *var = exp;
1066 : 10803241 : if (INTEGRAL_TYPE_P (type))
1067 : 3337890 : *var = fold_convert (sizetype, *var);
1068 : 10803241 : *off = ssize_int (0);
1069 : :
1070 : 10803241 : int_range_max r;
1071 : 3026934 : if (exp_range && code != SSA_NAME
1072 : 98654 : && get_range_query (cfun)->range_of_expr (r, exp)
1073 : 10852568 : && !r.undefined_p ())
1074 : 49327 : *exp_range = r;
1075 : 10803241 : }
1076 : :
1077 : : /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1078 : : type as EXP while OFF has type ssizetype. */
1079 : :
1080 : : void
1081 : 33686656 : split_constant_offset (tree exp, tree *var, tree *off)
1082 : : {
1083 : 33686656 : unsigned limit = param_ssa_name_def_chain_limit;
1084 : 33686656 : static hash_map<tree, std::pair<tree, tree> > *cache;
1085 : 33686656 : if (!cache)
1086 : 77177 : cache = new hash_map<tree, std::pair<tree, tree> > (37);
1087 : 33686656 : split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1088 : 33686656 : *var = fold_convert (TREE_TYPE (exp), *var);
1089 : 33686656 : cache->empty ();
1090 : 33686656 : }
1091 : :
1092 : : /* Returns the address ADDR of an object in a canonical shape (without nop
1093 : : casts, and with type of pointer to the object). */
1094 : :
1095 : : static tree
1096 : 15861750 : canonicalize_base_object_address (tree addr)
1097 : : {
1098 : 15861750 : tree orig = addr;
1099 : :
1100 : 15861750 : STRIP_NOPS (addr);
1101 : :
1102 : : /* The base address may be obtained by casting from integer, in that case
1103 : : keep the cast. */
1104 : 15861750 : if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1105 : : return orig;
1106 : :
1107 : 15793590 : if (TREE_CODE (addr) != ADDR_EXPR)
1108 : : return addr;
1109 : :
1110 : 9631830 : return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1111 : : }
1112 : :
1113 : : /* Analyze the behavior of memory reference REF within STMT.
1114 : : There are two modes:
1115 : :
1116 : : - BB analysis. In this case we simply split the address into base,
1117 : : init and offset components, without reference to any containing loop.
1118 : : The resulting base and offset are general expressions and they can
1119 : : vary arbitrarily from one iteration of the containing loop to the next.
1120 : : The step is always zero.
1121 : :
1122 : : - loop analysis. In this case we analyze the reference both wrt LOOP
1123 : : and on the basis that the reference occurs (is "used") in LOOP;
1124 : : see the comment above analyze_scalar_evolution_in_loop for more
1125 : : information about this distinction. The base, init, offset and
1126 : : step fields are all invariant in LOOP.
1127 : :
1128 : : Perform BB analysis if LOOP is null, or if LOOP is the function's
1129 : : dummy outermost loop. In other cases perform loop analysis.
1130 : :
1131 : : Return true if the analysis succeeded and store the results in DRB if so.
1132 : : BB analysis can only fail for bitfield or reversed-storage accesses. */
1133 : :
1134 : : opt_result
1135 : 16376987 : dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1136 : : class loop *loop, const gimple *stmt)
1137 : : {
1138 : 16376987 : poly_int64 pbitsize, pbitpos;
1139 : 16376987 : tree base, poffset;
1140 : 16376987 : machine_mode pmode;
1141 : 16376987 : int punsignedp, preversep, pvolatilep;
1142 : 16376987 : affine_iv base_iv, offset_iv;
1143 : 16376987 : tree init, dinit, step;
1144 : 16376987 : bool in_loop = (loop && loop->num);
1145 : :
1146 : 16376987 : if (dump_file && (dump_flags & TDF_DETAILS))
1147 : 64868 : fprintf (dump_file, "analyze_innermost: ");
1148 : :
1149 : 16376987 : base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1150 : : &punsignedp, &preversep, &pvolatilep);
1151 : 16376987 : gcc_assert (base != NULL_TREE);
1152 : :
1153 : 16376987 : poly_int64 pbytepos;
1154 : 16376987 : if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1155 : 58085 : return opt_result::failure_at (stmt,
1156 : : "failed: bit offset alignment.\n");
1157 : :
1158 : 16318902 : if (preversep)
1159 : 653 : return opt_result::failure_at (stmt,
1160 : : "failed: reverse storage order.\n");
1161 : :
1162 : : /* Calculate the alignment and misalignment for the inner reference. */
1163 : 16318249 : unsigned int HOST_WIDE_INT bit_base_misalignment;
1164 : 16318249 : unsigned int bit_base_alignment;
1165 : 16318249 : get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1166 : :
1167 : : /* There are no bitfield references remaining in BASE, so the values
1168 : : we got back must be whole bytes. */
1169 : 16318249 : gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1170 : : && bit_base_misalignment % BITS_PER_UNIT == 0);
1171 : 16318249 : unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1172 : 16318249 : poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1173 : :
1174 : 16318249 : if (TREE_CODE (base) == MEM_REF)
1175 : : {
1176 : 6794968 : if (!integer_zerop (TREE_OPERAND (base, 1)))
1177 : : {
1178 : : /* Subtract MOFF from the base and add it to POFFSET instead.
1179 : : Adjust the misalignment to reflect the amount we subtracted. */
1180 : 1171332 : poly_offset_int moff = mem_ref_offset (base);
1181 : 1171332 : base_misalignment -= moff.force_shwi ();
1182 : 1171332 : tree mofft = wide_int_to_tree (sizetype, moff);
1183 : 1171332 : if (!poffset)
1184 : 1161464 : poffset = mofft;
1185 : : else
1186 : 9868 : poffset = size_binop (PLUS_EXPR, poffset, mofft);
1187 : : }
1188 : 6794968 : base = TREE_OPERAND (base, 0);
1189 : : }
1190 : : else
1191 : : {
1192 : 9523281 : if (may_be_nonaddressable_p (base))
1193 : 2069 : return opt_result::failure_at (stmt,
1194 : : "failed: base not addressable.\n");
1195 : 9521212 : base = build_fold_addr_expr (base);
1196 : : }
1197 : :
1198 : 16316180 : if (in_loop)
1199 : : {
1200 : 3070241 : if (!simple_iv (loop, loop, base, &base_iv, true))
1201 : 372921 : return opt_result::failure_at
1202 : 372921 : (stmt, "failed: evolution of base is not affine.\n");
1203 : : }
1204 : : else
1205 : : {
1206 : 13245939 : base_iv.base = base;
1207 : 13245939 : base_iv.step = ssize_int (0);
1208 : 13245939 : base_iv.no_overflow = true;
1209 : : }
1210 : :
1211 : 15943259 : if (!poffset)
1212 : : {
1213 : 13243164 : offset_iv.base = ssize_int (0);
1214 : 13243164 : offset_iv.step = ssize_int (0);
1215 : : }
1216 : : else
1217 : : {
1218 : 2700095 : if (!in_loop)
1219 : : {
1220 : 1470243 : offset_iv.base = poffset;
1221 : 1470243 : offset_iv.step = ssize_int (0);
1222 : : }
1223 : 1229852 : else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1224 : 81509 : return opt_result::failure_at
1225 : 81509 : (stmt, "failed: evolution of offset is not affine.\n");
1226 : : }
1227 : :
1228 : 15861750 : init = ssize_int (pbytepos);
1229 : :
1230 : : /* Subtract any constant component from the base and add it to INIT instead.
1231 : : Adjust the misalignment to reflect the amount we subtracted. */
1232 : 15861750 : split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1233 : 15861750 : init = size_binop (PLUS_EXPR, init, dinit);
1234 : 15861750 : base_misalignment -= TREE_INT_CST_LOW (dinit);
1235 : :
1236 : 15861750 : split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1237 : 15861750 : init = size_binop (PLUS_EXPR, init, dinit);
1238 : :
1239 : 15861750 : step = size_binop (PLUS_EXPR,
1240 : : fold_convert (ssizetype, base_iv.step),
1241 : : fold_convert (ssizetype, offset_iv.step));
1242 : :
1243 : 15861750 : base = canonicalize_base_object_address (base_iv.base);
1244 : :
1245 : : /* See if get_pointer_alignment can guarantee a higher alignment than
1246 : : the one we calculated above. */
1247 : 15861750 : unsigned int HOST_WIDE_INT alt_misalignment;
1248 : 15861750 : unsigned int alt_alignment;
1249 : 15861750 : get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1250 : :
1251 : : /* As above, these values must be whole bytes. */
1252 : 15861750 : gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1253 : : && alt_misalignment % BITS_PER_UNIT == 0);
1254 : 15861750 : alt_alignment /= BITS_PER_UNIT;
1255 : 15861750 : alt_misalignment /= BITS_PER_UNIT;
1256 : :
1257 : 15861750 : if (base_alignment < alt_alignment)
1258 : : {
1259 : 142275 : base_alignment = alt_alignment;
1260 : 142275 : base_misalignment = alt_misalignment;
1261 : : }
1262 : :
1263 : 15861750 : drb->base_address = base;
1264 : 15861750 : drb->offset = fold_convert (ssizetype, offset_iv.base);
1265 : 15861750 : drb->init = init;
1266 : 15861750 : drb->step = step;
1267 : 15861750 : if (known_misalignment (base_misalignment, base_alignment,
1268 : : &drb->base_misalignment))
1269 : 15861750 : drb->base_alignment = base_alignment;
1270 : : else
1271 : : {
1272 : : drb->base_alignment = known_alignment (base_misalignment);
1273 : : drb->base_misalignment = 0;
1274 : : }
1275 : 15861750 : drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1276 : 15861750 : drb->step_alignment = highest_pow2_factor (step);
1277 : :
1278 : 15861750 : if (dump_file && (dump_flags & TDF_DETAILS))
1279 : 61683 : fprintf (dump_file, "success.\n");
1280 : :
1281 : 15861750 : return opt_result::success ();
1282 : : }
1283 : :
1284 : : /* Return true if OP is a valid component reference for a DR access
1285 : : function. This accepts a subset of what handled_component_p accepts. */
1286 : :
1287 : : static bool
1288 : 5808276 : access_fn_component_p (tree op)
1289 : : {
1290 : 5808276 : switch (TREE_CODE (op))
1291 : : {
1292 : : case REALPART_EXPR:
1293 : : case IMAGPART_EXPR:
1294 : : case ARRAY_REF:
1295 : : return true;
1296 : :
1297 : 1854121 : case COMPONENT_REF:
1298 : 1854121 : return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1299 : :
1300 : 0 : default:
1301 : 0 : return false;
1302 : : }
1303 : : }
1304 : :
1305 : : /* Returns whether BASE can have a access_fn_component_p with BASE
1306 : : as base. */
1307 : :
1308 : : static bool
1309 : 1679018 : base_supports_access_fn_components_p (tree base)
1310 : : {
1311 : 1679018 : switch (TREE_CODE (TREE_TYPE (base)))
1312 : : {
1313 : : case COMPLEX_TYPE:
1314 : : case ARRAY_TYPE:
1315 : : case RECORD_TYPE:
1316 : : return true;
1317 : 1669808 : default:
1318 : 1669808 : return false;
1319 : : }
1320 : : }
1321 : :
1322 : : /* Determines the base object and the list of indices of memory reference
1323 : : DR, analyzed in LOOP and instantiated before NEST. */
1324 : :
1325 : : static void
1326 : 16481397 : dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1327 : : {
1328 : : /* If analyzing a basic-block there are no indices to analyze
1329 : : and thus no access functions. */
1330 : 16481397 : if (!nest)
1331 : : {
1332 : 13306068 : dri->base_object = ref;
1333 : 13306068 : dri->access_fns.create (0);
1334 : 13306068 : return;
1335 : : }
1336 : :
1337 : 3175329 : vec<tree> access_fns = vNULL;
1338 : :
1339 : : /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1340 : : into a two element array with a constant index. The base is
1341 : : then just the immediate underlying object. */
1342 : 3175329 : if (TREE_CODE (ref) == REALPART_EXPR)
1343 : : {
1344 : 41051 : ref = TREE_OPERAND (ref, 0);
1345 : 41051 : access_fns.safe_push (integer_zero_node);
1346 : : }
1347 : 3134278 : else if (TREE_CODE (ref) == IMAGPART_EXPR)
1348 : : {
1349 : 39630 : ref = TREE_OPERAND (ref, 0);
1350 : 39630 : access_fns.safe_push (integer_one_node);
1351 : : }
1352 : :
1353 : : /* Analyze access functions of dimensions we know to be independent.
1354 : : The list of component references handled here should be kept in
1355 : : sync with access_fn_component_p. */
1356 : 5616892 : while (handled_component_p (ref))
1357 : : {
1358 : 2583010 : if (TREE_CODE (ref) == ARRAY_REF)
1359 : : {
1360 : 1275512 : tree op = TREE_OPERAND (ref, 1);
1361 : 1275512 : tree access_fn = analyze_scalar_evolution (loop, op);
1362 : 1275512 : access_fn = instantiate_scev (nest, loop, access_fn);
1363 : 1275512 : access_fns.safe_push (access_fn);
1364 : : }
1365 : 1307498 : else if (TREE_CODE (ref) == COMPONENT_REF
1366 : 1307498 : && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1367 : : {
1368 : : /* For COMPONENT_REFs of records (but not unions!) use the
1369 : : FIELD_DECL offset as constant access function so we can
1370 : : disambiguate a[i].f1 and a[i].f2. */
1371 : 1166051 : tree off = component_ref_field_offset (ref);
1372 : 1166051 : off = size_binop (PLUS_EXPR,
1373 : : size_binop (MULT_EXPR,
1374 : : fold_convert (bitsizetype, off),
1375 : : bitsize_int (BITS_PER_UNIT)),
1376 : : DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1377 : 1166051 : access_fns.safe_push (off);
1378 : : }
1379 : : else
1380 : : /* If we have an unhandled component we could not translate
1381 : : to an access function stop analyzing. We have determined
1382 : : our base object in this case. */
1383 : : break;
1384 : :
1385 : 2441563 : ref = TREE_OPERAND (ref, 0);
1386 : : }
1387 : :
1388 : : /* If the address operand of a MEM_REF base has an evolution in the
1389 : : analyzed nest, add it as an additional independent access-function. */
1390 : 3175329 : if (TREE_CODE (ref) == MEM_REF)
1391 : : {
1392 : 2179721 : tree op = TREE_OPERAND (ref, 0);
1393 : 2179721 : tree access_fn = analyze_scalar_evolution (loop, op);
1394 : 2179721 : access_fn = instantiate_scev (nest, loop, access_fn);
1395 : 2179721 : STRIP_NOPS (access_fn);
1396 : 2179721 : if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1397 : : {
1398 : 1084316 : tree memoff = TREE_OPERAND (ref, 1);
1399 : 1084316 : tree base = initial_condition (access_fn);
1400 : 1084316 : tree orig_type = TREE_TYPE (base);
1401 : 1084316 : STRIP_USELESS_TYPE_CONVERSION (base);
1402 : 1084316 : tree off;
1403 : 1084316 : split_constant_offset (base, &base, &off);
1404 : 1084316 : STRIP_USELESS_TYPE_CONVERSION (base);
1405 : : /* Fold the MEM_REF offset into the evolutions initial
1406 : : value to make more bases comparable. */
1407 : 1084316 : if (!integer_zerop (memoff))
1408 : : {
1409 : 82529 : off = size_binop (PLUS_EXPR, off,
1410 : : fold_convert (ssizetype, memoff));
1411 : 82529 : memoff = build_int_cst (TREE_TYPE (memoff), 0);
1412 : : }
1413 : : /* Adjust the offset so it is a multiple of the access type
1414 : : size and thus we separate bases that can possibly be used
1415 : : to produce partial overlaps (which the access_fn machinery
1416 : : cannot handle). */
1417 : 1084316 : wide_int rem;
1418 : 1084316 : if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1419 : 1084180 : && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1420 : 2168179 : && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1421 : 1083863 : rem = wi::mod_trunc
1422 : 1083863 : (wi::to_wide (off),
1423 : 2167726 : wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1424 : 1083863 : SIGNED);
1425 : : else
1426 : : /* If we can't compute the remainder simply force the initial
1427 : : condition to zero. */
1428 : 453 : rem = wi::to_wide (off);
1429 : 1084316 : off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1430 : 1084316 : memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1431 : : /* And finally replace the initial condition. */
1432 : 2168632 : access_fn = chrec_replace_initial_condition
1433 : 1084316 : (access_fn, fold_convert (orig_type, off));
1434 : : /* ??? This is still not a suitable base object for
1435 : : dr_may_alias_p - the base object needs to be an
1436 : : access that covers the object as whole. With
1437 : : an evolution in the pointer this cannot be
1438 : : guaranteed.
1439 : : As a band-aid, mark the access so we can special-case
1440 : : it in dr_may_alias_p. */
1441 : 1084316 : tree old = ref;
1442 : 1084316 : ref = fold_build2_loc (EXPR_LOCATION (ref),
1443 : 1084316 : MEM_REF, TREE_TYPE (ref),
1444 : : base, memoff);
1445 : 1084316 : MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1446 : 1084316 : MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1447 : 1084316 : dri->unconstrained_base = true;
1448 : 1084316 : access_fns.safe_push (access_fn);
1449 : 1084316 : }
1450 : : }
1451 : 995608 : else if (DECL_P (ref))
1452 : : {
1453 : : /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1454 : 854161 : ref = build2 (MEM_REF, TREE_TYPE (ref),
1455 : : build_fold_addr_expr (ref),
1456 : : build_int_cst (reference_alias_ptr_type (ref), 0));
1457 : : }
1458 : :
1459 : 3175329 : dri->base_object = ref;
1460 : 3175329 : dri->access_fns = access_fns;
1461 : : }
1462 : :
1463 : : /* Extracts the alias analysis information from the memory reference DR. */
1464 : :
1465 : : static void
1466 : 16365845 : dr_analyze_alias (struct data_reference *dr)
1467 : : {
1468 : 16365845 : tree ref = DR_REF (dr);
1469 : 16365845 : tree base = get_base_address (ref), addr;
1470 : :
1471 : 16365845 : if (INDIRECT_REF_P (base)
1472 : 16365845 : || TREE_CODE (base) == MEM_REF)
1473 : : {
1474 : 6803948 : addr = TREE_OPERAND (base, 0);
1475 : 6803948 : if (TREE_CODE (addr) == SSA_NAME)
1476 : 6802632 : DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1477 : : }
1478 : 16365845 : }
1479 : :
1480 : : /* Frees data reference DR. */
1481 : :
1482 : : void
1483 : 16693597 : free_data_ref (data_reference_p dr)
1484 : : {
1485 : 16693597 : DR_ACCESS_FNS (dr).release ();
1486 : 16693597 : if (dr->alt_indices.base_object)
1487 : 115552 : dr->alt_indices.access_fns.release ();
1488 : 16693597 : free (dr);
1489 : 16693597 : }
1490 : :
1491 : : /* Analyze memory reference MEMREF, which is accessed in STMT.
1492 : : The reference is a read if IS_READ is true, otherwise it is a write.
1493 : : IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1494 : : within STMT, i.e. that it might not occur even if STMT is executed
1495 : : and runs to completion.
1496 : :
1497 : : Return the data_reference description of MEMREF. NEST is the outermost
1498 : : loop in which the reference should be instantiated, LOOP is the loop
1499 : : in which the data reference should be analyzed. */
1500 : :
1501 : : struct data_reference *
1502 : 16365845 : create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1503 : : bool is_read, bool is_conditional_in_stmt)
1504 : : {
1505 : 16365845 : struct data_reference *dr;
1506 : :
1507 : 16365845 : if (dump_file && (dump_flags & TDF_DETAILS))
1508 : : {
1509 : 63712 : fprintf (dump_file, "Creating dr for ");
1510 : 63712 : print_generic_expr (dump_file, memref, TDF_SLIM);
1511 : 63712 : fprintf (dump_file, "\n");
1512 : : }
1513 : :
1514 : 16365845 : dr = XCNEW (struct data_reference);
1515 : 16365845 : DR_STMT (dr) = stmt;
1516 : 16365845 : DR_REF (dr) = memref;
1517 : 16365845 : DR_IS_READ (dr) = is_read;
1518 : 16365845 : DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1519 : :
1520 : 29671913 : dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1521 : : nest != NULL ? loop : NULL, stmt);
1522 : 16365845 : dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1523 : 16365845 : dr_analyze_alias (dr);
1524 : :
1525 : 16365845 : if (dump_file && (dump_flags & TDF_DETAILS))
1526 : : {
1527 : 63712 : unsigned i;
1528 : 63712 : fprintf (dump_file, "\tbase_address: ");
1529 : 63712 : print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1530 : 63712 : fprintf (dump_file, "\n\toffset from base address: ");
1531 : 63712 : print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1532 : 63712 : fprintf (dump_file, "\n\tconstant offset from base address: ");
1533 : 63712 : print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1534 : 63712 : fprintf (dump_file, "\n\tstep: ");
1535 : 63712 : print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1536 : 63712 : fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1537 : 63712 : fprintf (dump_file, "\n\tbase misalignment: %d",
1538 : : DR_BASE_MISALIGNMENT (dr));
1539 : 63712 : fprintf (dump_file, "\n\toffset alignment: %d",
1540 : : DR_OFFSET_ALIGNMENT (dr));
1541 : 63712 : fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1542 : 63712 : fprintf (dump_file, "\n\tbase_object: ");
1543 : 63712 : print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1544 : 63712 : fprintf (dump_file, "\n");
1545 : 182406 : for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1546 : : {
1547 : 54982 : fprintf (dump_file, "\tAccess function %d: ", i);
1548 : 54982 : print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1549 : : }
1550 : : }
1551 : :
1552 : 16365845 : return dr;
1553 : : }
1554 : :
1555 : : /* A helper function computes order between two tree expressions T1 and T2.
1556 : : This is used in comparator functions sorting objects based on the order
1557 : : of tree expressions. The function returns -1, 0, or 1. */
1558 : :
1559 : : int
1560 : 401551983 : data_ref_compare_tree (tree t1, tree t2)
1561 : : {
1562 : 401551983 : int i, cmp;
1563 : 401551983 : enum tree_code code;
1564 : 401551983 : char tclass;
1565 : :
1566 : 401551983 : if (t1 == t2)
1567 : : return 0;
1568 : 183342198 : if (t1 == NULL)
1569 : : return -1;
1570 : 183231844 : if (t2 == NULL)
1571 : : return 1;
1572 : :
1573 : 183165551 : STRIP_USELESS_TYPE_CONVERSION (t1);
1574 : 183165551 : STRIP_USELESS_TYPE_CONVERSION (t2);
1575 : 183165551 : if (t1 == t2)
1576 : : return 0;
1577 : :
1578 : 182624860 : if (TREE_CODE (t1) != TREE_CODE (t2)
1579 : 13299755 : && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1580 : 18811300 : return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1581 : :
1582 : 169325105 : code = TREE_CODE (t1);
1583 : 169325105 : switch (code)
1584 : : {
1585 : 49754700 : case INTEGER_CST:
1586 : 49754700 : return tree_int_cst_compare (t1, t2);
1587 : :
1588 : 32 : case STRING_CST:
1589 : 32 : if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1590 : 32 : return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1591 : 0 : return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1592 : 0 : TREE_STRING_LENGTH (t1));
1593 : :
1594 : 14149744 : case SSA_NAME:
1595 : 14149744 : if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1596 : 14149744 : return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1597 : : break;
1598 : :
1599 : 105420629 : default:
1600 : 105420629 : if (POLY_INT_CST_P (t1))
1601 : : return compare_sizes_for_sort (wi::to_poly_widest (t1),
1602 : : wi::to_poly_widest (t2));
1603 : :
1604 : 105420629 : tclass = TREE_CODE_CLASS (code);
1605 : :
1606 : : /* For decls, compare their UIDs. */
1607 : 105420629 : if (tclass == tcc_declaration)
1608 : : {
1609 : 20243577 : if (DECL_UID (t1) != DECL_UID (t2))
1610 : 20243050 : return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1611 : : break;
1612 : : }
1613 : : /* For expressions, compare their operands recursively. */
1614 : 85177052 : else if (IS_EXPR_CODE_CLASS (tclass))
1615 : : {
1616 : 150969177 : for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1617 : : {
1618 : 97856511 : cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1619 : 97856511 : TREE_OPERAND (t2, i));
1620 : 97856511 : if (cmp != 0)
1621 : : return cmp;
1622 : : }
1623 : : }
1624 : : else
1625 : 0 : gcc_unreachable ();
1626 : : }
1627 : :
1628 : : return 0;
1629 : : }
1630 : :
1631 : : /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1632 : : check. */
1633 : :
1634 : : opt_result
1635 : 178067 : runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1636 : : {
1637 : 178067 : if (dump_enabled_p ())
1638 : 7132 : dump_printf (MSG_NOTE,
1639 : : "consider run-time aliasing test between %T and %T\n",
1640 : 7132 : DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1641 : :
1642 : 178067 : if (!speed_p)
1643 : 0 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1644 : : "runtime alias check not supported when"
1645 : : " optimizing for size.\n");
1646 : :
1647 : : /* FORNOW: We don't support versioning with outer-loop in either
1648 : : vectorization or loop distribution. */
1649 : 178067 : if (loop != NULL && loop->inner != NULL)
1650 : 125 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1651 : : "runtime alias check not supported for"
1652 : : " outer loop.\n");
1653 : :
1654 : : /* FORNOW: We don't support handling different address spaces. */
1655 : 177942 : if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1656 : 177942 : != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1657 : 1 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1658 : : "runtime alias check between different "
1659 : : "address spaces not supported.\n");
1660 : :
1661 : 177941 : return opt_result::success ();
1662 : : }
1663 : :
1664 : : /* Operator == between two dr_with_seg_len objects.
1665 : :
1666 : : This equality operator is used to make sure two data refs
1667 : : are the same one so that we will consider to combine the
1668 : : aliasing checks of those two pairs of data dependent data
1669 : : refs. */
1670 : :
1671 : : static bool
1672 : 116430 : operator == (const dr_with_seg_len& d1,
1673 : : const dr_with_seg_len& d2)
1674 : : {
1675 : 116430 : return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1676 : 116430 : DR_BASE_ADDRESS (d2.dr), 0)
1677 : 89719 : && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1678 : 88551 : && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1679 : 82500 : && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1680 : 82240 : && known_eq (d1.access_size, d2.access_size)
1681 : 195365 : && d1.align == d2.align);
1682 : : }
1683 : :
1684 : : /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1685 : : so that we can combine aliasing checks in one scan. */
1686 : :
1687 : : static int
1688 : 1043027 : comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1689 : : {
1690 : 1043027 : const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1691 : 1043027 : const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1692 : 1043027 : const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1693 : 1043027 : const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1694 : :
1695 : : /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1696 : : if a and c have the same basic address snd step, and b and d have the same
1697 : : address and step. Therefore, if any a&c or b&d don't have the same address
1698 : : and step, we don't care the order of those two pairs after sorting. */
1699 : 1043027 : int comp_res;
1700 : :
1701 : 1043027 : if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1702 : 1043027 : DR_BASE_ADDRESS (b1.dr))) != 0)
1703 : : return comp_res;
1704 : 523941 : if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1705 : 523941 : DR_BASE_ADDRESS (b2.dr))) != 0)
1706 : : return comp_res;
1707 : 343812 : if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1708 : 343812 : DR_STEP (b1.dr))) != 0)
1709 : : return comp_res;
1710 : 343196 : if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1711 : 343196 : DR_STEP (b2.dr))) != 0)
1712 : : return comp_res;
1713 : 335633 : if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1714 : 335633 : DR_OFFSET (b1.dr))) != 0)
1715 : : return comp_res;
1716 : 315668 : if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1717 : 315668 : DR_INIT (b1.dr))) != 0)
1718 : : return comp_res;
1719 : 236466 : if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1720 : 236466 : DR_OFFSET (b2.dr))) != 0)
1721 : : return comp_res;
1722 : 220039 : if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1723 : 220039 : DR_INIT (b2.dr))) != 0)
1724 : : return comp_res;
1725 : :
1726 : : return 0;
1727 : : }
1728 : :
1729 : : /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1730 : :
1731 : : static void
1732 : 1061 : dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1733 : : {
1734 : 2122 : dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1735 : 1061 : DR_REF (alias_pair->first.dr),
1736 : 1061 : DR_REF (alias_pair->second.dr));
1737 : :
1738 : 1061 : dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1739 : : alias_pair->first.seg_len);
1740 : 1061 : if (!operand_equal_p (alias_pair->first.seg_len,
1741 : 1061 : alias_pair->second.seg_len, 0))
1742 : 268 : dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1743 : :
1744 : 1061 : dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1745 : 1061 : dump_dec (MSG_NOTE, alias_pair->first.access_size);
1746 : 1061 : if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1747 : : {
1748 : 272 : dump_printf (MSG_NOTE, " vs. ");
1749 : 272 : dump_dec (MSG_NOTE, alias_pair->second.access_size);
1750 : : }
1751 : :
1752 : 1061 : dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1753 : : alias_pair->first.align);
1754 : 1061 : if (alias_pair->first.align != alias_pair->second.align)
1755 : 85 : dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1756 : :
1757 : 1061 : dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1758 : 1061 : if (alias_pair->flags & DR_ALIAS_RAW)
1759 : 173 : dump_printf (MSG_NOTE, " RAW");
1760 : 1061 : if (alias_pair->flags & DR_ALIAS_WAR)
1761 : 855 : dump_printf (MSG_NOTE, " WAR");
1762 : 1061 : if (alias_pair->flags & DR_ALIAS_WAW)
1763 : 177 : dump_printf (MSG_NOTE, " WAW");
1764 : 1061 : if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1765 : 284 : dump_printf (MSG_NOTE, " ARBITRARY");
1766 : 1061 : if (alias_pair->flags & DR_ALIAS_SWAPPED)
1767 : 0 : dump_printf (MSG_NOTE, " SWAPPED");
1768 : 1061 : if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1769 : 0 : dump_printf (MSG_NOTE, " UNSWAPPED");
1770 : 1061 : if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1771 : 0 : dump_printf (MSG_NOTE, " MIXED_STEPS");
1772 : 1061 : if (alias_pair->flags == 0)
1773 : 0 : dump_printf (MSG_NOTE, " <none>");
1774 : 1061 : dump_printf (MSG_NOTE, "\n");
1775 : 1061 : }
1776 : :
1777 : : /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1778 : : FACTOR is number of iterations that each data reference is accessed.
1779 : :
1780 : : Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1781 : : we create an expression:
1782 : :
1783 : : ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1784 : : || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1785 : :
1786 : : for aliasing checks. However, in some cases we can decrease the number
1787 : : of checks by combining two checks into one. For example, suppose we have
1788 : : another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1789 : : condition is satisfied:
1790 : :
1791 : : load_ptr_0 < load_ptr_1 &&
1792 : : load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1793 : :
1794 : : (this condition means, in each iteration of vectorized loop, the accessed
1795 : : memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1796 : : load_ptr_1.)
1797 : :
1798 : : we then can use only the following expression to finish the alising checks
1799 : : between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1800 : :
1801 : : ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1802 : : || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1803 : :
1804 : : Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1805 : : basic address. */
1806 : :
1807 : : void
1808 : 18557 : prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1809 : : poly_uint64)
1810 : : {
1811 : 18557 : if (alias_pairs->is_empty ())
1812 : 18557 : return;
1813 : :
1814 : : /* Canonicalize each pair so that the base components are ordered wrt
1815 : : data_ref_compare_tree. This allows the loop below to merge more
1816 : : cases. */
1817 : : unsigned int i;
1818 : : dr_with_seg_len_pair_t *alias_pair;
1819 : 74449 : FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1820 : : {
1821 : 56498 : data_reference_p dr_a = alias_pair->first.dr;
1822 : 56498 : data_reference_p dr_b = alias_pair->second.dr;
1823 : 56498 : int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1824 : : DR_BASE_ADDRESS (dr_b));
1825 : 56498 : if (comp_res == 0)
1826 : 1937 : comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1827 : 1937 : if (comp_res == 0)
1828 : 110 : comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1829 : 56498 : if (comp_res > 0)
1830 : : {
1831 : 18983 : std::swap (alias_pair->first, alias_pair->second);
1832 : 18983 : alias_pair->flags |= DR_ALIAS_SWAPPED;
1833 : : }
1834 : : else
1835 : 37515 : alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1836 : : }
1837 : :
1838 : : /* Sort the collected data ref pairs so that we can scan them once to
1839 : : combine all possible aliasing checks. */
1840 : 17951 : alias_pairs->qsort (comp_dr_with_seg_len_pair);
1841 : :
1842 : : /* Scan the sorted dr pairs and check if we can combine alias checks
1843 : : of two neighboring dr pairs. */
1844 : : unsigned int last = 0;
1845 : 56498 : for (i = 1; i < alias_pairs->length (); ++i)
1846 : : {
1847 : : /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1848 : 38547 : dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1849 : 38547 : dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1850 : :
1851 : 38547 : dr_with_seg_len *dr_a1 = &alias_pair1->first;
1852 : 38547 : dr_with_seg_len *dr_b1 = &alias_pair1->second;
1853 : 38547 : dr_with_seg_len *dr_a2 = &alias_pair2->first;
1854 : 38547 : dr_with_seg_len *dr_b2 = &alias_pair2->second;
1855 : :
1856 : : /* Remove duplicate data ref pairs. */
1857 : 38547 : if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1858 : : {
1859 : 17238 : if (dump_enabled_p ())
1860 : 1897 : dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1861 : 1897 : DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1862 : 1897 : DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1863 : 17238 : alias_pair1->flags |= alias_pair2->flags;
1864 : 55785 : continue;
1865 : : }
1866 : :
1867 : : /* Assume that we won't be able to merge the pairs, then correct
1868 : : if we do. */
1869 : 21309 : last += 1;
1870 : 21309 : if (last != i)
1871 : 4746 : (*alias_pairs)[last] = (*alias_pairs)[i];
1872 : :
1873 : 21309 : if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1874 : : {
1875 : : /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1876 : : and DR_A1 and DR_A2 are two consecutive memrefs. */
1877 : 18027 : if (*dr_a1 == *dr_a2)
1878 : : {
1879 : 13216 : std::swap (dr_a1, dr_b1);
1880 : 13216 : std::swap (dr_a2, dr_b2);
1881 : : }
1882 : :
1883 : 18027 : poly_int64 init_a1, init_a2;
1884 : : /* Only consider cases in which the distance between the initial
1885 : : DR_A1 and the initial DR_A2 is known at compile time. */
1886 : 33257 : if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1887 : 18027 : DR_BASE_ADDRESS (dr_a2->dr), 0)
1888 : 3448 : || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1889 : 3448 : DR_OFFSET (dr_a2->dr), 0)
1890 : 2797 : || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1891 : 20824 : || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1892 : 15237 : continue;
1893 : :
1894 : : /* Don't combine if we can't tell which one comes first. */
1895 : 2797 : if (!ordered_p (init_a1, init_a2))
1896 : : continue;
1897 : :
1898 : : /* Work out what the segment length would be if we did combine
1899 : : DR_A1 and DR_A2:
1900 : :
1901 : : - If DR_A1 and DR_A2 have equal lengths, that length is
1902 : : also the combined length.
1903 : :
1904 : : - If DR_A1 and DR_A2 both have negative "lengths", the combined
1905 : : length is the lower bound on those lengths.
1906 : :
1907 : : - If DR_A1 and DR_A2 both have positive lengths, the combined
1908 : : length is the upper bound on those lengths.
1909 : :
1910 : : Other cases are unlikely to give a useful combination.
1911 : :
1912 : : The lengths both have sizetype, so the sign is taken from
1913 : : the step instead. */
1914 : 2797 : poly_uint64 new_seg_len = 0;
1915 : 2797 : bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1916 : 2797 : dr_a2->seg_len, 0);
1917 : 2797 : if (new_seg_len_p)
1918 : : {
1919 : 7 : poly_uint64 seg_len_a1, seg_len_a2;
1920 : 7 : if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1921 : 7 : || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1922 : 7 : continue;
1923 : :
1924 : 0 : tree indicator_a = dr_direction_indicator (dr_a1->dr);
1925 : 0 : if (TREE_CODE (indicator_a) != INTEGER_CST)
1926 : 0 : continue;
1927 : :
1928 : 0 : tree indicator_b = dr_direction_indicator (dr_a2->dr);
1929 : 0 : if (TREE_CODE (indicator_b) != INTEGER_CST)
1930 : 0 : continue;
1931 : :
1932 : 0 : int sign_a = tree_int_cst_sgn (indicator_a);
1933 : 0 : int sign_b = tree_int_cst_sgn (indicator_b);
1934 : :
1935 : 0 : if (sign_a <= 0 && sign_b <= 0)
1936 : 0 : new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1937 : 0 : else if (sign_a >= 0 && sign_b >= 0)
1938 : 0 : new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1939 : : else
1940 : 0 : continue;
1941 : : }
1942 : : /* At this point we're committed to merging the refs. */
1943 : :
1944 : : /* Make sure dr_a1 starts left of dr_a2. */
1945 : 2790 : if (maybe_gt (init_a1, init_a2))
1946 : : {
1947 : 0 : std::swap (*dr_a1, *dr_a2);
1948 : 0 : std::swap (init_a1, init_a2);
1949 : : }
1950 : :
1951 : : /* The DR_Bs are equal, so only the DR_As can introduce
1952 : : mixed steps. */
1953 : 2790 : if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1954 : 0 : alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1955 : :
1956 : 2790 : if (new_seg_len_p)
1957 : : {
1958 : 0 : dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1959 : 0 : new_seg_len);
1960 : 0 : dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1961 : : }
1962 : :
1963 : : /* This is always positive due to the swap above. */
1964 : 2790 : poly_uint64 diff = init_a2 - init_a1;
1965 : :
1966 : : /* The new check will start at DR_A1. Make sure that its access
1967 : : size encompasses the initial DR_A2. */
1968 : 2790 : if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1969 : : {
1970 : 1101 : dr_a1->access_size = upper_bound (dr_a1->access_size,
1971 : : diff + dr_a2->access_size);
1972 : 1101 : unsigned int new_align = known_alignment (dr_a1->access_size);
1973 : 1101 : dr_a1->align = MIN (dr_a1->align, new_align);
1974 : : }
1975 : 2790 : if (dump_enabled_p ())
1976 : 761 : dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1977 : 761 : DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1978 : 761 : DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1979 : 2790 : alias_pair1->flags |= alias_pair2->flags;
1980 : 2790 : last -= 1;
1981 : : }
1982 : : }
1983 : 17951 : alias_pairs->truncate (last + 1);
1984 : :
1985 : : /* Try to restore the original dr_with_seg_len order within each
1986 : : dr_with_seg_len_pair_t. If we ended up combining swapped and
1987 : : unswapped pairs into the same check, we have to invalidate any
1988 : : RAW, WAR and WAW information for it. */
1989 : 17951 : if (dump_enabled_p ())
1990 : 797 : dump_printf (MSG_NOTE, "merged alias checks:\n");
1991 : 54421 : FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1992 : : {
1993 : 36470 : unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1994 : 36470 : unsigned int swapped = (alias_pair->flags & swap_mask);
1995 : 36470 : if (swapped == DR_ALIAS_SWAPPED)
1996 : 11754 : std::swap (alias_pair->first, alias_pair->second);
1997 : 24716 : else if (swapped != DR_ALIAS_UNSWAPPED)
1998 : 1722 : alias_pair->flags |= DR_ALIAS_ARBITRARY;
1999 : 36470 : alias_pair->flags &= ~swap_mask;
2000 : 36470 : if (dump_enabled_p ())
2001 : 1061 : dump_alias_pair (alias_pair, " ");
2002 : : }
2003 : : }
2004 : :
2005 : : /* A subroutine of create_intersect_range_checks, with a subset of the
2006 : : same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
2007 : : to optimize cases in which the references form a simple RAW, WAR or
2008 : : WAR dependence. */
2009 : :
2010 : : static bool
2011 : 4594 : create_ifn_alias_checks (tree *cond_expr,
2012 : : const dr_with_seg_len_pair_t &alias_pair)
2013 : : {
2014 : 4594 : const dr_with_seg_len& dr_a = alias_pair.first;
2015 : 4594 : const dr_with_seg_len& dr_b = alias_pair.second;
2016 : :
2017 : : /* Check for cases in which:
2018 : :
2019 : : (a) we have a known RAW, WAR or WAR dependence
2020 : : (b) the accesses are well-ordered in both the original and new code
2021 : : (see the comment above the DR_ALIAS_* flags for details); and
2022 : : (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2023 : 4594 : if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2024 : : return false;
2025 : :
2026 : : /* Make sure that both DRs access the same pattern of bytes,
2027 : : with a constant length and step. */
2028 : 3017 : poly_uint64 seg_len;
2029 : 3017 : if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2030 : 2616 : || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2031 : 2609 : || maybe_ne (dr_a.access_size, dr_b.access_size)
2032 : 2569 : || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2033 : 5586 : || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2034 : 463 : return false;
2035 : :
2036 : 2554 : unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2037 : 2554 : tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2038 : 2554 : tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2039 : :
2040 : : /* See whether the target suports what we want to do. WAW checks are
2041 : : equivalent to WAR checks here. */
2042 : 2518 : internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2043 : 2554 : ? IFN_CHECK_RAW_PTRS
2044 : : : IFN_CHECK_WAR_PTRS);
2045 : 2554 : unsigned int align = MIN (dr_a.align, dr_b.align);
2046 : 2554 : poly_uint64 full_length = seg_len + bytes;
2047 : 2554 : if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2048 : : full_length, align))
2049 : : {
2050 : 2554 : full_length = seg_len + dr_a.access_size;
2051 : 2554 : if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2052 : : full_length, align))
2053 : : return false;
2054 : : }
2055 : :
2056 : : /* Commit to using this form of test. */
2057 : 0 : addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2058 : 0 : addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2059 : :
2060 : 0 : addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2061 : 0 : addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2062 : :
2063 : 0 : *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2064 : : ifn, boolean_type_node,
2065 : : 4, addr_a, addr_b,
2066 : 0 : size_int (full_length),
2067 : 0 : size_int (align));
2068 : :
2069 : 0 : if (dump_enabled_p ())
2070 : : {
2071 : 0 : if (ifn == IFN_CHECK_RAW_PTRS)
2072 : 0 : dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2073 : : else
2074 : 0 : dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2075 : : }
2076 : : return true;
2077 : : }
2078 : :
2079 : : /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2080 : : free of aliases, using a condition based on index values instead
2081 : : of a condition based on addresses. Return true on success,
2082 : : storing the condition in *COND_EXPR.
2083 : :
2084 : : This can only be done if the two data references in ALIAS_PAIR access
2085 : : the same array object and the index is the only difference. For example,
2086 : : if the two data references are DR_A and DR_B:
2087 : :
2088 : : DR_A DR_B
2089 : : data-ref arr[i] arr[j]
2090 : : base_object arr arr
2091 : : index {i_0, +, 1}_loop {j_0, +, 1}_loop
2092 : :
2093 : : The addresses and their index are like:
2094 : :
2095 : : |<- ADDR_A ->| |<- ADDR_B ->|
2096 : : ------------------------------------------------------->
2097 : : | | | | | | | | | |
2098 : : ------------------------------------------------------->
2099 : : i_0 ... i_0+4 j_0 ... j_0+4
2100 : :
2101 : : We can create expression based on index rather than address:
2102 : :
2103 : : (unsigned) (i_0 - j_0 + 3) <= 6
2104 : :
2105 : : i.e. the indices are less than 4 apart.
2106 : :
2107 : : Note evolution step of index needs to be considered in comparison. */
2108 : :
2109 : : static bool
2110 : 4745 : create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2111 : : const dr_with_seg_len_pair_t &alias_pair)
2112 : : {
2113 : 4745 : const dr_with_seg_len &dr_a = alias_pair.first;
2114 : 4745 : const dr_with_seg_len &dr_b = alias_pair.second;
2115 : 4745 : if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2116 : 4745 : || integer_zerop (DR_STEP (dr_a.dr))
2117 : 4496 : || integer_zerop (DR_STEP (dr_b.dr))
2118 : 18113 : || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2119 : 350 : return false;
2120 : :
2121 : 4395 : poly_uint64 seg_len1, seg_len2;
2122 : 4395 : if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2123 : 4395 : || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2124 : 269 : return false;
2125 : :
2126 : 4126 : if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2127 : : return false;
2128 : :
2129 : 4126 : if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2130 : : return false;
2131 : :
2132 : 152 : if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2133 : : return false;
2134 : :
2135 : 152 : gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2136 : :
2137 : 152 : bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2138 : 152 : unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2139 : 152 : if (neg_step)
2140 : : {
2141 : 30 : abs_step = -abs_step;
2142 : 30 : seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2143 : 30 : seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2144 : : }
2145 : :
2146 : : /* Infer the number of iterations with which the memory segment is accessed
2147 : : by DR. In other words, alias is checked if memory segment accessed by
2148 : : DR_A in some iterations intersect with memory segment accessed by DR_B
2149 : : in the same amount iterations.
2150 : : Note segnment length is a linear function of number of iterations with
2151 : : DR_STEP as the coefficient. */
2152 : 152 : poly_uint64 niter_len1, niter_len2;
2153 : 152 : if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2154 : 152 : || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2155 : : return false;
2156 : :
2157 : : /* Divide each access size by the byte step, rounding up. */
2158 : 152 : poly_uint64 niter_access1, niter_access2;
2159 : 152 : if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2160 : : abs_step, &niter_access1)
2161 : 152 : || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2162 : : abs_step, &niter_access2))
2163 : : return false;
2164 : :
2165 : 152 : bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2166 : :
2167 : 152 : int found = -1;
2168 : 311 : for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2169 : : {
2170 : 160 : tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2171 : 160 : tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2172 : : /* Two indices must be the same if they are not scev, or not scev wrto
2173 : : current loop being vecorized. */
2174 : 160 : if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2175 : 152 : || TREE_CODE (access2) != POLYNOMIAL_CHREC
2176 : 152 : || CHREC_VARIABLE (access1) != (unsigned)loop->num
2177 : 312 : || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2178 : : {
2179 : 8 : if (operand_equal_p (access1, access2, 0))
2180 : 7 : continue;
2181 : :
2182 : : return false;
2183 : : }
2184 : 152 : if (found >= 0)
2185 : : return false;
2186 : 152 : found = i;
2187 : : }
2188 : :
2189 : : /* Ought not to happen in practice, since if all accesses are equal then the
2190 : : alias should be decidable at compile time. */
2191 : 151 : if (found < 0)
2192 : : return false;
2193 : :
2194 : : /* The two indices must have the same step. */
2195 : 151 : tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2196 : 151 : tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2197 : 151 : if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2198 : : return false;
2199 : :
2200 : 151 : tree idx_step = CHREC_RIGHT (access1);
2201 : : /* Index must have const step, otherwise DR_STEP won't be constant. */
2202 : 151 : gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2203 : : /* Index must evaluate in the same direction as DR. */
2204 : 151 : gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2205 : :
2206 : 151 : tree min1 = CHREC_LEFT (access1);
2207 : 151 : tree min2 = CHREC_LEFT (access2);
2208 : 151 : if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2209 : : return false;
2210 : :
2211 : : /* Ideally, alias can be checked against loop's control IV, but we
2212 : : need to prove linear mapping between control IV and reference
2213 : : index. Although that should be true, we check against (array)
2214 : : index of data reference. Like segment length, index length is
2215 : : linear function of the number of iterations with index_step as
2216 : : the coefficient, i.e, niter_len * idx_step. */
2217 : 151 : offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2218 : : SIGNED);
2219 : 151 : if (neg_step)
2220 : 30 : abs_idx_step = -abs_idx_step;
2221 : 302 : poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2222 : 302 : poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2223 : 151 : poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2224 : 151 : poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2225 : :
2226 : 151 : gcc_assert (known_ge (idx_len1, 0)
2227 : : && known_ge (idx_len2, 0)
2228 : : && known_ge (idx_access1, 0)
2229 : : && known_ge (idx_access2, 0));
2230 : :
2231 : : /* Each access has the following pattern, with lengths measured
2232 : : in units of INDEX:
2233 : :
2234 : : <-- idx_len -->
2235 : : <--- A: -ve step --->
2236 : : +-----+-------+-----+-------+-----+
2237 : : | n-1 | ..... | 0 | ..... | n-1 |
2238 : : +-----+-------+-----+-------+-----+
2239 : : <--- B: +ve step --->
2240 : : <-- idx_len -->
2241 : : |
2242 : : min
2243 : :
2244 : : where "n" is the number of scalar iterations covered by the segment
2245 : : and where each access spans idx_access units.
2246 : :
2247 : : A is the range of bytes accessed when the step is negative,
2248 : : B is the range when the step is positive.
2249 : :
2250 : : When checking for general overlap, we need to test whether
2251 : : the range:
2252 : :
2253 : : [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2254 : :
2255 : : overlaps:
2256 : :
2257 : : [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2258 : :
2259 : : where:
2260 : :
2261 : : low_offsetN = +ve step ? 0 : -idx_lenN;
2262 : : high_offsetN = +ve step ? idx_lenN : 0;
2263 : :
2264 : : This is equivalent to testing whether:
2265 : :
2266 : : min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2267 : : && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2268 : :
2269 : : Converting this into a single test, there is an overlap if:
2270 : :
2271 : : 0 <= min2 - min1 + bias <= limit
2272 : :
2273 : : where bias = high_offset2 + idx_access2 - 1 - low_offset1
2274 : : limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2275 : : + (high_offset2 - low_offset2 + idx_access2 - 1)
2276 : : i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2277 : :
2278 : : Combining the tests requires limit to be computable in an unsigned
2279 : : form of the index type; if it isn't, we fall back to the usual
2280 : : pointer-based checks.
2281 : :
2282 : : We can do better if DR_B is a write and if DR_A and DR_B are
2283 : : well-ordered in both the original and the new code (see the
2284 : : comment above the DR_ALIAS_* flags for details). In this case
2285 : : we know that for each i in [0, n-1], the write performed by
2286 : : access i of DR_B occurs after access numbers j<=i of DR_A in
2287 : : both the original and the new code. Any write or anti
2288 : : dependencies wrt those DR_A accesses are therefore maintained.
2289 : :
2290 : : We just need to make sure that each individual write in DR_B does not
2291 : : overlap any higher-indexed access in DR_A; such DR_A accesses happen
2292 : : after the DR_B access in the original code but happen before it in
2293 : : the new code.
2294 : :
2295 : : We know the steps for both accesses are equal, so by induction, we
2296 : : just need to test whether the first write of DR_B overlaps a later
2297 : : access of DR_A. In other words, we need to move min1 along by
2298 : : one iteration:
2299 : :
2300 : : min1' = min1 + idx_step
2301 : :
2302 : : and use the ranges:
2303 : :
2304 : : [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2305 : :
2306 : : and:
2307 : :
2308 : : [min2, min2 + idx_access2 - 1]
2309 : :
2310 : : where:
2311 : :
2312 : : low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2313 : : high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2314 : 151 : if (waw_or_war_p)
2315 : 120 : idx_len1 -= abs_idx_step;
2316 : :
2317 : 151 : poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2318 : 151 : if (!waw_or_war_p)
2319 : 151 : limit += idx_len2;
2320 : :
2321 : 151 : tree utype = unsigned_type_for (TREE_TYPE (min1));
2322 : 151 : if (!wi::fits_to_tree_p (limit, utype))
2323 : : return false;
2324 : :
2325 : 151 : poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2326 : 151 : poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2327 : 151 : poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2328 : : /* Equivalent to adding IDX_STEP to MIN1. */
2329 : 151 : if (waw_or_war_p)
2330 : 120 : bias -= wi::to_offset (idx_step);
2331 : :
2332 : 151 : tree subject = fold_build2 (MINUS_EXPR, utype,
2333 : : fold_convert (utype, min2),
2334 : : fold_convert (utype, min1));
2335 : 151 : subject = fold_build2 (PLUS_EXPR, utype, subject,
2336 : : wide_int_to_tree (utype, bias));
2337 : 151 : tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2338 : : wide_int_to_tree (utype, limit));
2339 : 151 : if (*cond_expr)
2340 : 0 : *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2341 : : *cond_expr, part_cond_expr);
2342 : : else
2343 : 151 : *cond_expr = part_cond_expr;
2344 : 151 : if (dump_enabled_p ())
2345 : : {
2346 : 133 : if (waw_or_war_p)
2347 : 103 : dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2348 : : else
2349 : 30 : dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2350 : : }
2351 : : return true;
2352 : : }
2353 : :
2354 : : /* A subroutine of create_intersect_range_checks, with a subset of the
2355 : : same arguments. Try to optimize cases in which the second access
2356 : : is a write and in which some overlap is valid. */
2357 : :
2358 : : static bool
2359 : 4594 : create_waw_or_war_checks (tree *cond_expr,
2360 : : const dr_with_seg_len_pair_t &alias_pair)
2361 : : {
2362 : 4594 : const dr_with_seg_len& dr_a = alias_pair.first;
2363 : 4594 : const dr_with_seg_len& dr_b = alias_pair.second;
2364 : :
2365 : : /* Check for cases in which:
2366 : :
2367 : : (a) DR_B is always a write;
2368 : : (b) the accesses are well-ordered in both the original and new code
2369 : : (see the comment above the DR_ALIAS_* flags for details); and
2370 : : (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2371 : 4594 : if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2372 : : return false;
2373 : :
2374 : : /* Check for equal (but possibly variable) steps. */
2375 : 2974 : tree step = DR_STEP (dr_a.dr);
2376 : 2974 : if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2377 : : return false;
2378 : :
2379 : : /* Make sure that we can operate on sizetype without loss of precision. */
2380 : 2580 : tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2381 : 2580 : if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2382 : : return false;
2383 : :
2384 : : /* All addresses involved are known to have a common alignment ALIGN.
2385 : : We can therefore subtract ALIGN from an exclusive endpoint to get
2386 : : an inclusive endpoint. In the best (and common) case, ALIGN is the
2387 : : same as the access sizes of both DRs, and so subtracting ALIGN
2388 : : cancels out the addition of an access size. */
2389 : 2580 : unsigned int align = MIN (dr_a.align, dr_b.align);
2390 : 2580 : poly_uint64 last_chunk_a = dr_a.access_size - align;
2391 : 2580 : poly_uint64 last_chunk_b = dr_b.access_size - align;
2392 : :
2393 : : /* Get a boolean expression that is true when the step is negative. */
2394 : 2580 : tree indicator = dr_direction_indicator (dr_a.dr);
2395 : 2580 : tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2396 : : fold_convert (ssizetype, indicator),
2397 : : ssize_int (0));
2398 : :
2399 : : /* Get lengths in sizetype. */
2400 : 2580 : tree seg_len_a
2401 : 2580 : = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2402 : 2580 : step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2403 : :
2404 : : /* Each access has the following pattern:
2405 : :
2406 : : <- |seg_len| ->
2407 : : <--- A: -ve step --->
2408 : : +-----+-------+-----+-------+-----+
2409 : : | n-1 | ..... | 0 | ..... | n-1 |
2410 : : +-----+-------+-----+-------+-----+
2411 : : <--- B: +ve step --->
2412 : : <- |seg_len| ->
2413 : : |
2414 : : base address
2415 : :
2416 : : where "n" is the number of scalar iterations covered by the segment.
2417 : :
2418 : : A is the range of bytes accessed when the step is negative,
2419 : : B is the range when the step is positive.
2420 : :
2421 : : We know that DR_B is a write. We also know (from checking that
2422 : : DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2423 : : the write performed by access i of DR_B occurs after access numbers
2424 : : j<=i of DR_A in both the original and the new code. Any write or
2425 : : anti dependencies wrt those DR_A accesses are therefore maintained.
2426 : :
2427 : : We just need to make sure that each individual write in DR_B does not
2428 : : overlap any higher-indexed access in DR_A; such DR_A accesses happen
2429 : : after the DR_B access in the original code but happen before it in
2430 : : the new code.
2431 : :
2432 : : We know the steps for both accesses are equal, so by induction, we
2433 : : just need to test whether the first write of DR_B overlaps a later
2434 : : access of DR_A. In other words, we need to move addr_a along by
2435 : : one iteration:
2436 : :
2437 : : addr_a' = addr_a + step
2438 : :
2439 : : and check whether:
2440 : :
2441 : : [addr_b, addr_b + last_chunk_b]
2442 : :
2443 : : overlaps:
2444 : :
2445 : : [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2446 : :
2447 : : where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2448 : :
2449 : : low_offset_a = +ve step ? 0 : seg_len_a - step
2450 : : high_offset_a = +ve step ? seg_len_a - step : 0
2451 : :
2452 : : This is equivalent to testing whether:
2453 : :
2454 : : addr_a' + low_offset_a <= addr_b + last_chunk_b
2455 : : && addr_b <= addr_a' + high_offset_a + last_chunk_a
2456 : :
2457 : : Converting this into a single test, there is an overlap if:
2458 : :
2459 : : 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2460 : :
2461 : : where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2462 : :
2463 : : If DR_A is performed, limit + |step| - last_chunk_b is known to be
2464 : : less than the size of the object underlying DR_A. We also know
2465 : : that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2466 : : guaranteed at compile time. There can therefore be no overflow if
2467 : : "limit" is calculated in an unsigned type with pointer precision. */
2468 : 2580 : tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2469 : : DR_OFFSET (dr_a.dr));
2470 : 2580 : addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2471 : :
2472 : 2580 : tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2473 : : DR_OFFSET (dr_b.dr));
2474 : 2580 : addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2475 : :
2476 : : /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2477 : 2580 : addr_a = fold_build_pointer_plus (addr_a, step);
2478 : 2580 : tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2479 : : seg_len_a, step);
2480 : 2580 : if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2481 : 3 : seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2482 : :
2483 : 2580 : tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2484 : : seg_len_a_minus_step, size_zero_node);
2485 : 2580 : if (!CONSTANT_CLASS_P (low_offset_a))
2486 : 3 : low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2487 : :
2488 : : /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2489 : : but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2490 : 2580 : tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2491 : : low_offset_a);
2492 : :
2493 : : /* The amount added to addr_b - addr_a'. */
2494 : 2580 : tree bias = fold_build2 (MINUS_EXPR, sizetype,
2495 : : size_int (last_chunk_b), low_offset_a);
2496 : :
2497 : 2580 : tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2498 : 2580 : limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2499 : : size_int (last_chunk_a + last_chunk_b));
2500 : :
2501 : 2580 : tree subject = fold_build2 (MINUS_EXPR, sizetype,
2502 : : fold_convert (sizetype, addr_b),
2503 : : fold_convert (sizetype, addr_a));
2504 : 2580 : subject = fold_build2 (PLUS_EXPR, sizetype, subject, bias);
2505 : :
2506 : 2580 : *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2507 : 2580 : if (dump_enabled_p ())
2508 : 335 : dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2509 : : return true;
2510 : : }
2511 : :
2512 : : /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2513 : : every address ADDR accessed by D:
2514 : :
2515 : : *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2516 : :
2517 : : In this case, every element accessed by D is aligned to at least
2518 : : ALIGN bytes.
2519 : :
2520 : : If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2521 : :
2522 : : *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2523 : :
2524 : : static void
2525 : 4028 : get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2526 : : tree *seg_max_out, HOST_WIDE_INT align)
2527 : : {
2528 : : /* Each access has the following pattern:
2529 : :
2530 : : <- |seg_len| ->
2531 : : <--- A: -ve step --->
2532 : : +-----+-------+-----+-------+-----+
2533 : : | n-1 | ,.... | 0 | ..... | n-1 |
2534 : : +-----+-------+-----+-------+-----+
2535 : : <--- B: +ve step --->
2536 : : <- |seg_len| ->
2537 : : |
2538 : : base address
2539 : :
2540 : : where "n" is the number of scalar iterations covered by the segment.
2541 : : (This should be VF for a particular pair if we know that both steps
2542 : : are the same, otherwise it will be the full number of scalar loop
2543 : : iterations.)
2544 : :
2545 : : A is the range of bytes accessed when the step is negative,
2546 : : B is the range when the step is positive.
2547 : :
2548 : : If the access size is "access_size" bytes, the lowest addressed byte is:
2549 : :
2550 : : base + (step < 0 ? seg_len : 0) [LB]
2551 : :
2552 : : and the highest addressed byte is always below:
2553 : :
2554 : : base + (step < 0 ? 0 : seg_len) + access_size [UB]
2555 : :
2556 : : Thus:
2557 : :
2558 : : LB <= ADDR < UB
2559 : :
2560 : : If ALIGN is nonzero, all three values are aligned to at least ALIGN
2561 : : bytes, so:
2562 : :
2563 : : LB <= ADDR <= UB - ALIGN
2564 : :
2565 : : where "- ALIGN" folds naturally with the "+ access_size" and often
2566 : : cancels it out.
2567 : :
2568 : : We don't try to simplify LB and UB beyond this (e.g. by using
2569 : : MIN and MAX based on whether seg_len rather than the stride is
2570 : : negative) because it is possible for the absolute size of the
2571 : : segment to overflow the range of a ssize_t.
2572 : :
2573 : : Keeping the pointer_plus outside of the cond_expr should allow
2574 : : the cond_exprs to be shared with other alias checks. */
2575 : 4028 : tree indicator = dr_direction_indicator (d.dr);
2576 : 4028 : tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2577 : : fold_convert (ssizetype, indicator),
2578 : : ssize_int (0));
2579 : 4028 : tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2580 : : DR_OFFSET (d.dr));
2581 : 4028 : addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2582 : 4028 : tree seg_len
2583 : 4028 : = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2584 : :
2585 : 4028 : tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2586 : : seg_len, size_zero_node);
2587 : 4028 : tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2588 : : size_zero_node, seg_len);
2589 : 4028 : max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2590 : : size_int (d.access_size - align));
2591 : :
2592 : 4028 : *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2593 : 4028 : *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2594 : 4028 : }
2595 : :
2596 : : /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2597 : : storing the condition in *COND_EXPR. The fallback is to generate a
2598 : : a test that the two accesses do not overlap:
2599 : :
2600 : : end_a <= start_b || end_b <= start_a. */
2601 : :
2602 : : static void
2603 : 4745 : create_intersect_range_checks (class loop *loop, tree *cond_expr,
2604 : : const dr_with_seg_len_pair_t &alias_pair)
2605 : : {
2606 : 4745 : const dr_with_seg_len& dr_a = alias_pair.first;
2607 : 4745 : const dr_with_seg_len& dr_b = alias_pair.second;
2608 : 4745 : *cond_expr = NULL_TREE;
2609 : 4745 : if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2610 : 2731 : return;
2611 : :
2612 : 4594 : if (create_ifn_alias_checks (cond_expr, alias_pair))
2613 : : return;
2614 : :
2615 : 4594 : if (create_waw_or_war_checks (cond_expr, alias_pair))
2616 : : return;
2617 : :
2618 : 2014 : unsigned HOST_WIDE_INT min_align;
2619 : 2014 : tree_code cmp_code;
2620 : : /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2621 : : are equivalent. This is just an optimization heuristic. */
2622 : 2014 : if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2623 : 1908 : && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2624 : : {
2625 : : /* In this case adding access_size to seg_len is likely to give
2626 : : a simple X * step, where X is either the number of scalar
2627 : : iterations or the vectorization factor. We're better off
2628 : : keeping that, rather than subtracting an alignment from it.
2629 : :
2630 : : In this case the maximum values are exclusive and so there is
2631 : : no alias if the maximum of one segment equals the minimum
2632 : : of another. */
2633 : : min_align = 0;
2634 : : cmp_code = LE_EXPR;
2635 : : }
2636 : : else
2637 : : {
2638 : : /* Calculate the minimum alignment shared by all four pointers,
2639 : : then arrange for this alignment to be subtracted from the
2640 : : exclusive maximum values to get inclusive maximum values.
2641 : : This "- min_align" is cumulative with a "+ access_size"
2642 : : in the calculation of the maximum values. In the best
2643 : : (and common) case, the two cancel each other out, leaving
2644 : : us with an inclusive bound based only on seg_len. In the
2645 : : worst case we're simply adding a smaller number than before.
2646 : :
2647 : : Because the maximum values are inclusive, there is an alias
2648 : : if the maximum value of one segment is equal to the minimum
2649 : : value of the other. */
2650 : 202 : min_align = std::min (dr_a.align, dr_b.align);
2651 : 202 : cmp_code = LT_EXPR;
2652 : : }
2653 : :
2654 : 2014 : tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2655 : 2014 : get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2656 : 2014 : get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2657 : :
2658 : 2014 : *cond_expr
2659 : 2014 : = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2660 : : fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2661 : : fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2662 : 2014 : if (dump_enabled_p ())
2663 : 274 : dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2664 : : }
2665 : :
2666 : : /* Create a conditional expression that represents the run-time checks for
2667 : : overlapping of address ranges represented by a list of data references
2668 : : pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2669 : : COND_EXPR is the conditional expression to be used in the if statement
2670 : : that controls which version of the loop gets executed at runtime. */
2671 : :
2672 : : void
2673 : 3096 : create_runtime_alias_checks (class loop *loop,
2674 : : const vec<dr_with_seg_len_pair_t> *alias_pairs,
2675 : : tree * cond_expr)
2676 : : {
2677 : 3096 : tree part_cond_expr;
2678 : :
2679 : 3096 : fold_defer_overflow_warnings ();
2680 : 14033 : for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2681 : : {
2682 : 4745 : gcc_assert (alias_pair.flags);
2683 : 4745 : if (dump_enabled_p ())
2684 : 742 : dump_printf (MSG_NOTE,
2685 : : "create runtime check for data references %T and %T\n",
2686 : 742 : DR_REF (alias_pair.first.dr),
2687 : 742 : DR_REF (alias_pair.second.dr));
2688 : :
2689 : : /* Create condition expression for each pair data references. */
2690 : 4745 : create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2691 : 4745 : if (*cond_expr)
2692 : 4665 : *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2693 : : *cond_expr, part_cond_expr);
2694 : : else
2695 : 80 : *cond_expr = part_cond_expr;
2696 : : }
2697 : 3096 : fold_undefer_and_ignore_overflow_warnings ();
2698 : 3096 : }
2699 : :
2700 : : /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2701 : : expressions. */
2702 : : static bool
2703 : 0 : dr_equal_offsets_p1 (tree offset1, tree offset2)
2704 : : {
2705 : 0 : bool res;
2706 : :
2707 : 0 : STRIP_NOPS (offset1);
2708 : 0 : STRIP_NOPS (offset2);
2709 : :
2710 : 0 : if (offset1 == offset2)
2711 : : return true;
2712 : :
2713 : 0 : if (TREE_CODE (offset1) != TREE_CODE (offset2)
2714 : 0 : || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2715 : : return false;
2716 : :
2717 : 0 : res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2718 : 0 : TREE_OPERAND (offset2, 0));
2719 : :
2720 : 0 : if (!res || !BINARY_CLASS_P (offset1))
2721 : : return res;
2722 : :
2723 : 0 : res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2724 : 0 : TREE_OPERAND (offset2, 1));
2725 : :
2726 : 0 : return res;
2727 : : }
2728 : :
2729 : : /* Check if DRA and DRB have equal offsets. */
2730 : : bool
2731 : 0 : dr_equal_offsets_p (struct data_reference *dra,
2732 : : struct data_reference *drb)
2733 : : {
2734 : 0 : tree offset1, offset2;
2735 : :
2736 : 0 : offset1 = DR_OFFSET (dra);
2737 : 0 : offset2 = DR_OFFSET (drb);
2738 : :
2739 : 0 : return dr_equal_offsets_p1 (offset1, offset2);
2740 : : }
2741 : :
2742 : : /* Returns true if FNA == FNB. */
2743 : :
2744 : : static bool
2745 : 0 : affine_function_equal_p (affine_fn fna, affine_fn fnb)
2746 : : {
2747 : 0 : unsigned i, n = fna.length ();
2748 : :
2749 : 0 : if (n != fnb.length ())
2750 : : return false;
2751 : :
2752 : 0 : for (i = 0; i < n; i++)
2753 : 0 : if (!operand_equal_p (fna[i], fnb[i], 0))
2754 : : return false;
2755 : :
2756 : : return true;
2757 : : }
2758 : :
2759 : : /* If all the functions in CF are the same, returns one of them,
2760 : : otherwise returns NULL. */
2761 : :
2762 : : static affine_fn
2763 : 2261248 : common_affine_function (conflict_function *cf)
2764 : : {
2765 : 2261248 : unsigned i;
2766 : 2261248 : affine_fn comm;
2767 : :
2768 : 2261248 : if (!CF_NONTRIVIAL_P (cf))
2769 : 0 : return affine_fn ();
2770 : :
2771 : 2261248 : comm = cf->fns[0];
2772 : :
2773 : 2261248 : for (i = 1; i < cf->n; i++)
2774 : 0 : if (!affine_function_equal_p (comm, cf->fns[i]))
2775 : 0 : return affine_fn ();
2776 : :
2777 : 2261248 : return comm;
2778 : : }
2779 : :
2780 : : /* Returns the base of the affine function FN. */
2781 : :
2782 : : static tree
2783 : 1301251 : affine_function_base (affine_fn fn)
2784 : : {
2785 : 0 : return fn[0];
2786 : : }
2787 : :
2788 : : /* Returns true if FN is a constant. */
2789 : :
2790 : : static bool
2791 : 1301561 : affine_function_constant_p (affine_fn fn)
2792 : : {
2793 : 1301561 : unsigned i;
2794 : 1301561 : tree coef;
2795 : :
2796 : 1357730 : for (i = 1; fn.iterate (i, &coef); i++)
2797 : 56479 : if (!integer_zerop (coef))
2798 : : return false;
2799 : :
2800 : : return true;
2801 : : }
2802 : :
2803 : : /* Returns true if FN is the zero constant function. */
2804 : :
2805 : : static bool
2806 : 170937 : affine_function_zero_p (affine_fn fn)
2807 : : {
2808 : 170937 : return (integer_zerop (affine_function_base (fn))
2809 : 170937 : && affine_function_constant_p (fn));
2810 : : }
2811 : :
2812 : : /* Returns a signed integer type with the largest precision from TA
2813 : : and TB. */
2814 : :
2815 : : static tree
2816 : 1703407 : signed_type_for_types (tree ta, tree tb)
2817 : : {
2818 : 1703407 : if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2819 : 181 : return signed_type_for (ta);
2820 : : else
2821 : 1703226 : return signed_type_for (tb);
2822 : : }
2823 : :
2824 : : /* Applies operation OP on affine functions FNA and FNB, and returns the
2825 : : result. */
2826 : :
2827 : : static affine_fn
2828 : 1130624 : affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2829 : : {
2830 : 1130624 : unsigned i, n, m;
2831 : 1130624 : affine_fn ret;
2832 : 1130624 : tree coef;
2833 : :
2834 : 3391872 : if (fnb.length () > fna.length ())
2835 : : {
2836 : 0 : n = fna.length ();
2837 : 0 : m = fnb.length ();
2838 : : }
2839 : : else
2840 : : {
2841 : 1130624 : n = fnb.length ();
2842 : : m = fna.length ();
2843 : : }
2844 : :
2845 : 1130624 : ret.create (m);
2846 : 2317727 : for (i = 0; i < n; i++)
2847 : : {
2848 : 2374206 : tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2849 : 1187103 : TREE_TYPE (fnb[i]));
2850 : 1187103 : ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2851 : : }
2852 : :
2853 : 1130624 : for (; fna.iterate (i, &coef); i++)
2854 : 0 : ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2855 : : coef, integer_zero_node));
2856 : 1130624 : for (; fnb.iterate (i, &coef); i++)
2857 : 0 : ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2858 : : integer_zero_node, coef));
2859 : :
2860 : 1130624 : return ret;
2861 : : }
2862 : :
2863 : : /* Returns the sum of affine functions FNA and FNB. */
2864 : :
2865 : : static affine_fn
2866 : 0 : affine_fn_plus (affine_fn fna, affine_fn fnb)
2867 : : {
2868 : 0 : return affine_fn_op (PLUS_EXPR, fna, fnb);
2869 : : }
2870 : :
2871 : : /* Returns the difference of affine functions FNA and FNB. */
2872 : :
2873 : : static affine_fn
2874 : 1130624 : affine_fn_minus (affine_fn fna, affine_fn fnb)
2875 : : {
2876 : 0 : return affine_fn_op (MINUS_EXPR, fna, fnb);
2877 : : }
2878 : :
2879 : : /* Frees affine function FN. */
2880 : :
2881 : : static void
2882 : 3588352 : affine_fn_free (affine_fn fn)
2883 : : {
2884 : 0 : fn.release ();
2885 : 0 : }
2886 : :
2887 : : /* Determine for each subscript in the data dependence relation DDR
2888 : : the distance. */
2889 : :
2890 : : static void
2891 : 3046305 : compute_subscript_distance (struct data_dependence_relation *ddr)
2892 : : {
2893 : 3046305 : conflict_function *cf_a, *cf_b;
2894 : 3046305 : affine_fn fn_a, fn_b, diff;
2895 : :
2896 : 3046305 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2897 : : {
2898 : : unsigned int i;
2899 : :
2900 : 4176929 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2901 : : {
2902 : 1130624 : struct subscript *subscript;
2903 : :
2904 : 1130624 : subscript = DDR_SUBSCRIPT (ddr, i);
2905 : 1130624 : cf_a = SUB_CONFLICTS_IN_A (subscript);
2906 : 1130624 : cf_b = SUB_CONFLICTS_IN_B (subscript);
2907 : :
2908 : 1130624 : fn_a = common_affine_function (cf_a);
2909 : 1130624 : fn_b = common_affine_function (cf_b);
2910 : 1130624 : if (!fn_a.exists () || !fn_b.exists ())
2911 : : {
2912 : 0 : SUB_DISTANCE (subscript) = chrec_dont_know;
2913 : 0 : return;
2914 : : }
2915 : 1130624 : diff = affine_fn_minus (fn_a, fn_b);
2916 : :
2917 : 1130624 : if (affine_function_constant_p (diff))
2918 : 1130314 : SUB_DISTANCE (subscript) = affine_function_base (diff);
2919 : : else
2920 : 310 : SUB_DISTANCE (subscript) = chrec_dont_know;
2921 : :
2922 : 1130624 : affine_fn_free (diff);
2923 : : }
2924 : : }
2925 : : }
2926 : :
2927 : : /* Returns the conflict function for "unknown". */
2928 : :
2929 : : static conflict_function *
2930 : 7848050 : conflict_fn_not_known (void)
2931 : : {
2932 : 0 : conflict_function *fn = XCNEW (conflict_function);
2933 : 7848050 : fn->n = NOT_KNOWN;
2934 : :
2935 : 7848050 : return fn;
2936 : : }
2937 : :
2938 : : /* Returns the conflict function for "independent". */
2939 : :
2940 : : static conflict_function *
2941 : 4231424 : conflict_fn_no_dependence (void)
2942 : : {
2943 : 0 : conflict_function *fn = XCNEW (conflict_function);
2944 : 4231424 : fn->n = NO_DEPENDENCE;
2945 : :
2946 : 4231424 : return fn;
2947 : : }
2948 : :
2949 : : /* Returns true if the address of OBJ is invariant in LOOP. */
2950 : :
2951 : : static bool
2952 : 3216523 : object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2953 : : {
2954 : 3347804 : while (handled_component_p (obj))
2955 : : {
2956 : 136638 : if (TREE_CODE (obj) == ARRAY_REF)
2957 : : {
2958 : 9637 : for (int i = 1; i < 4; ++i)
2959 : 8567 : if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2960 : 8567 : loop->num))
2961 : : return false;
2962 : : }
2963 : 130211 : else if (TREE_CODE (obj) == COMPONENT_REF)
2964 : : {
2965 : 109880 : if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2966 : 109880 : loop->num))
2967 : : return false;
2968 : : }
2969 : 131281 : obj = TREE_OPERAND (obj, 0);
2970 : : }
2971 : :
2972 : 3211166 : if (!INDIRECT_REF_P (obj)
2973 : 3211166 : && TREE_CODE (obj) != MEM_REF)
2974 : : return true;
2975 : :
2976 : 3187282 : return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2977 : 6374564 : loop->num);
2978 : : }
2979 : :
2980 : : /* Helper for contains_ssa_ref_p. */
2981 : :
2982 : : static bool
2983 : 84420 : contains_ssa_ref_p_1 (tree, tree *idx, void *data)
2984 : : {
2985 : 84420 : if (TREE_CODE (*idx) == SSA_NAME)
2986 : : {
2987 : 78841 : *(bool *)data = true;
2988 : 78841 : return false;
2989 : : }
2990 : : return true;
2991 : : }
2992 : :
2993 : : /* Returns true if the reference REF contains a SSA index. */
2994 : :
2995 : : static bool
2996 : 241023 : contains_ssa_ref_p (tree ref)
2997 : : {
2998 : 241023 : bool res = false;
2999 : 0 : for_each_index (&ref, contains_ssa_ref_p_1, &res);
3000 : 241023 : return res;
3001 : : }
3002 : :
3003 : : /* Returns false if we can prove that data references A and B do not alias,
3004 : : true otherwise. If LOOP_NEST is false no cross-iteration aliases are
3005 : : considered. */
3006 : :
3007 : : bool
3008 : 14317653 : dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
3009 : : class loop *loop_nest)
3010 : : {
3011 : 14317653 : tree addr_a = DR_BASE_OBJECT (a);
3012 : 14317653 : tree addr_b = DR_BASE_OBJECT (b);
3013 : :
3014 : : /* If we are not processing a loop nest but scalar code we
3015 : : do not need to care about possible cross-iteration dependences
3016 : : and thus can process the full original reference. Do so,
3017 : : similar to how loop invariant motion applies extra offset-based
3018 : : disambiguation. */
3019 : 14317653 : if (!loop_nest)
3020 : : {
3021 : 7793864 : tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
3022 : 7793864 : tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
3023 : :
3024 : 7793864 : if (DR_BASE_ADDRESS (a)
3025 : 7778726 : && DR_BASE_ADDRESS (b)
3026 : 7778409 : && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
3027 : 6928645 : && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
3028 : 6835604 : && tree_size_a
3029 : 6835604 : && tree_size_b
3030 : 6835595 : && poly_int_tree_p (tree_size_a)
3031 : 6835572 : && poly_int_tree_p (tree_size_b)
3032 : 14629436 : && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
3033 : 6835572 : wi::to_poly_widest (tree_size_a),
3034 : 6835572 : wi::to_poly_widest (DR_INIT (b)),
3035 : 6835572 : wi::to_poly_widest (tree_size_b)))
3036 : : {
3037 : 4910265 : gcc_assert (integer_zerop (DR_STEP (a))
3038 : : && integer_zerop (DR_STEP (b)));
3039 : 4910294 : return false;
3040 : : }
3041 : :
3042 : 11534396 : aff_tree off1, off2;
3043 : : poly_widest_int size1, size2;
3044 : 2883599 : get_inner_reference_aff (DR_REF (a), &off1, &size1);
3045 : 2883599 : get_inner_reference_aff (DR_REF (b), &off2, &size2);
3046 : 2883599 : aff_combination_scale (&off1, -1);
3047 : 2883599 : aff_combination_add (&off2, &off1);
3048 : 2883599 : if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3049 : 29 : return false;
3050 : 2883599 : }
3051 : :
3052 : 9407359 : if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3053 : 6911147 : && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3054 : : /* For cross-iteration dependences the cliques must be valid for the
3055 : : whole loop, not just individual iterations. */
3056 : 6633813 : && (!loop_nest
3057 : 6323766 : || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3058 : 5487778 : || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3059 : 6464458 : && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3060 : 15670725 : && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3061 : : return false;
3062 : :
3063 : : /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3064 : : do not know the size of the base-object. So we cannot do any
3065 : : offset/overlap based analysis but have to rely on points-to
3066 : : information only. */
3067 : 9200368 : if (TREE_CODE (addr_a) == MEM_REF
3068 : 9200368 : && (DR_UNCONSTRAINED_BASE (a)
3069 : 4373080 : || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3070 : : {
3071 : : /* For true dependences we can apply TBAA. */
3072 : 4024071 : if (flag_strict_aliasing
3073 : 3747908 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3074 : 4173778 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3075 : 149707 : get_alias_set (DR_REF (b))))
3076 : : return false;
3077 : 4002235 : if (TREE_CODE (addr_b) == MEM_REF)
3078 : 3899497 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3079 : 7798994 : TREE_OPERAND (addr_b, 0));
3080 : : else
3081 : 102738 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3082 : 102738 : build_fold_addr_expr (addr_b));
3083 : : }
3084 : 5176297 : else if (TREE_CODE (addr_b) == MEM_REF
3085 : 5176297 : && (DR_UNCONSTRAINED_BASE (b)
3086 : 2718340 : || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3087 : : {
3088 : : /* For true dependences we can apply TBAA. */
3089 : 347453 : if (flag_strict_aliasing
3090 : 253405 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3091 : 428604 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3092 : 81151 : get_alias_set (DR_REF (b))))
3093 : : return false;
3094 : 330769 : if (TREE_CODE (addr_a) == MEM_REF)
3095 : 219911 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3096 : 439822 : TREE_OPERAND (addr_b, 0));
3097 : : else
3098 : 110858 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3099 : 221716 : TREE_OPERAND (addr_b, 0));
3100 : : }
3101 : : /* If dr_analyze_innermost failed to handle a component we are
3102 : : possibly left with a non-base in which case we didn't analyze
3103 : : a possible evolution of the base when analyzing a loop. */
3104 : 4828844 : else if (loop_nest
3105 : 7112715 : && ((handled_component_p (addr_a) && contains_ssa_ref_p (addr_a))
3106 : 83177 : || (handled_component_p (addr_b) && contains_ssa_ref_p (addr_b))))
3107 : : {
3108 : : /* For true dependences we can apply TBAA. */
3109 : 78841 : if (flag_strict_aliasing
3110 : 78199 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3111 : 88282 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3112 : 9441 : get_alias_set (DR_REF (b))))
3113 : : return false;
3114 : 74671 : if (TREE_CODE (addr_a) == MEM_REF)
3115 : 3356 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3116 : 3356 : build_fold_addr_expr (addr_b));
3117 : 71315 : else if (TREE_CODE (addr_b) == MEM_REF)
3118 : 6515 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3119 : 13030 : TREE_OPERAND (addr_b, 0));
3120 : : else
3121 : 64800 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3122 : 64800 : build_fold_addr_expr (addr_b));
3123 : : }
3124 : :
3125 : : /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3126 : : that is being subsetted in the loop nest. */
3127 : 4750003 : if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3128 : 3145440 : return refs_output_dependent_p (addr_a, addr_b);
3129 : 1604563 : else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3130 : 441607 : return refs_anti_dependent_p (addr_a, addr_b);
3131 : 1162956 : return refs_may_alias_p (addr_a, addr_b);
3132 : : }
3133 : :
3134 : : /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3135 : : if it is meaningful to compare their associated access functions
3136 : : when checking for dependencies. */
3137 : :
3138 : : static bool
3139 : 2904138 : access_fn_components_comparable_p (tree ref_a, tree ref_b)
3140 : : {
3141 : : /* Allow pairs of component refs from the following sets:
3142 : :
3143 : : { REALPART_EXPR, IMAGPART_EXPR }
3144 : : { COMPONENT_REF }
3145 : : { ARRAY_REF }. */
3146 : 2904138 : tree_code code_a = TREE_CODE (ref_a);
3147 : 2904138 : tree_code code_b = TREE_CODE (ref_b);
3148 : 2904138 : if (code_a == IMAGPART_EXPR)
3149 : 35330 : code_a = REALPART_EXPR;
3150 : 2904138 : if (code_b == IMAGPART_EXPR)
3151 : 41429 : code_b = REALPART_EXPR;
3152 : 2904138 : if (code_a != code_b)
3153 : : return false;
3154 : :
3155 : 2846743 : if (TREE_CODE (ref_a) == COMPONENT_REF)
3156 : : /* ??? We cannot simply use the type of operand #0 of the refs here as
3157 : : the Fortran compiler smuggles type punning into COMPONENT_REFs.
3158 : : Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3159 : 898406 : return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3160 : 898406 : == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3161 : :
3162 : 1948337 : return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3163 : 3896674 : TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3164 : : }
3165 : :
3166 : : /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3167 : : is true when the main indices of A and B were not comparable so we try again
3168 : : with alternate indices computed on an indirect reference. */
3169 : :
3170 : : struct data_dependence_relation *
3171 : 6536584 : initialize_data_dependence_relation (struct data_dependence_relation *res,
3172 : : vec<loop_p> loop_nest,
3173 : : bool use_alt_indices)
3174 : : {
3175 : 6536584 : struct data_reference *a = DDR_A (res);
3176 : 6536584 : struct data_reference *b = DDR_B (res);
3177 : 6536584 : unsigned int i;
3178 : :
3179 : 6536584 : struct indices *indices_a = &a->indices;
3180 : 6536584 : struct indices *indices_b = &b->indices;
3181 : 6536584 : if (use_alt_indices)
3182 : : {
3183 : 382377 : if (TREE_CODE (DR_REF (a)) != MEM_REF)
3184 : 250093 : indices_a = &a->alt_indices;
3185 : 382377 : if (TREE_CODE (DR_REF (b)) != MEM_REF)
3186 : 274420 : indices_b = &b->alt_indices;
3187 : : }
3188 : 6536584 : unsigned int num_dimensions_a = indices_a->access_fns.length ();
3189 : 6536584 : unsigned int num_dimensions_b = indices_b->access_fns.length ();
3190 : 6536584 : if (num_dimensions_a == 0 || num_dimensions_b == 0)
3191 : : {
3192 : 2256566 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3193 : 2256566 : return res;
3194 : : }
3195 : :
3196 : : /* For unconstrained bases, the root (highest-indexed) subscript
3197 : : describes a variation in the base of the original DR_REF rather
3198 : : than a component access. We have no type that accurately describes
3199 : : the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3200 : : applying this subscript) so limit the search to the last real
3201 : : component access.
3202 : :
3203 : : E.g. for:
3204 : :
3205 : : void
3206 : : f (int a[][8], int b[][8])
3207 : : {
3208 : : for (int i = 0; i < 8; ++i)
3209 : : a[i * 2][0] = b[i][0];
3210 : : }
3211 : :
3212 : : the a and b accesses have a single ARRAY_REF component reference [0]
3213 : : but have two subscripts. */
3214 : 4280018 : if (indices_a->unconstrained_base)
3215 : 2356681 : num_dimensions_a -= 1;
3216 : 4280018 : if (indices_b->unconstrained_base)
3217 : 2327375 : num_dimensions_b -= 1;
3218 : :
3219 : : /* These structures describe sequences of component references in
3220 : : DR_REF (A) and DR_REF (B). Each component reference is tied to a
3221 : : specific access function. */
3222 : 4280018 : struct {
3223 : : /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3224 : : DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3225 : : indices. In C notation, these are the indices of the rightmost
3226 : : component references; e.g. for a sequence .b.c.d, the start
3227 : : index is for .d. */
3228 : : unsigned int start_a;
3229 : : unsigned int start_b;
3230 : :
3231 : : /* The sequence contains LENGTH consecutive access functions from
3232 : : each DR. */
3233 : : unsigned int length;
3234 : :
3235 : : /* The enclosing objects for the A and B sequences respectively,
3236 : : i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3237 : : and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3238 : : tree object_a;
3239 : : tree object_b;
3240 : 4280018 : } full_seq = {}, struct_seq = {};
3241 : :
3242 : : /* Before each iteration of the loop:
3243 : :
3244 : : - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3245 : : - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3246 : 4280018 : unsigned int index_a = 0;
3247 : 4280018 : unsigned int index_b = 0;
3248 : 4280018 : tree ref_a = DR_REF (a);
3249 : 4280018 : tree ref_b = DR_REF (b);
3250 : :
3251 : : /* Now walk the component references from the final DR_REFs back up to
3252 : : the enclosing base objects. Each component reference corresponds
3253 : : to one access function in the DR, with access function 0 being for
3254 : : the final DR_REF and the highest-indexed access function being the
3255 : : one that is applied to the base of the DR.
3256 : :
3257 : : Look for a sequence of component references whose access functions
3258 : : are comparable (see access_fn_components_comparable_p). If more
3259 : : than one such sequence exists, pick the one nearest the base
3260 : : (which is the leftmost sequence in C notation). Store this sequence
3261 : : in FULL_SEQ.
3262 : :
3263 : : For example, if we have:
3264 : :
3265 : : struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3266 : :
3267 : : A: a[0][i].s.c.d
3268 : : B: __real b[0][i].s.e[i].f
3269 : :
3270 : : (where d is the same type as the real component of f) then the access
3271 : : functions would be:
3272 : :
3273 : : 0 1 2 3
3274 : : A: .d .c .s [i]
3275 : :
3276 : : 0 1 2 3 4 5
3277 : : B: __real .f [i] .e .s [i]
3278 : :
3279 : : The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3280 : : and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3281 : : COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3282 : : the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3283 : : so is comparable. The A3/B5 column contains two ARRAY_REFs that
3284 : : index foo[10] arrays, so is again comparable. The sequence is
3285 : : therefore:
3286 : :
3287 : : A: [1, 3] (i.e. [i].s.c)
3288 : : B: [3, 5] (i.e. [i].s.e)
3289 : :
3290 : : Also look for sequences of component references whose access
3291 : : functions are comparable and whose enclosing objects have the same
3292 : : RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3293 : : example, STRUCT_SEQ would be:
3294 : :
3295 : : A: [1, 2] (i.e. s.c)
3296 : : B: [3, 4] (i.e. s.e) */
3297 : 7173935 : while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3298 : : {
3299 : : /* The alternate indices form always has a single dimension
3300 : : with unconstrained base. */
3301 : 2904138 : gcc_assert (!use_alt_indices);
3302 : :
3303 : : /* REF_A and REF_B must be one of the component access types
3304 : : allowed by dr_analyze_indices. */
3305 : 2904138 : gcc_checking_assert (access_fn_component_p (ref_a));
3306 : 2904138 : gcc_checking_assert (access_fn_component_p (ref_b));
3307 : :
3308 : : /* Get the immediately-enclosing objects for REF_A and REF_B,
3309 : : i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3310 : : and DR_ACCESS_FN (B, INDEX_B). */
3311 : 2904138 : tree object_a = TREE_OPERAND (ref_a, 0);
3312 : 2904138 : tree object_b = TREE_OPERAND (ref_b, 0);
3313 : :
3314 : 2904138 : tree type_a = TREE_TYPE (object_a);
3315 : 2904138 : tree type_b = TREE_TYPE (object_b);
3316 : 2904138 : if (access_fn_components_comparable_p (ref_a, ref_b))
3317 : : {
3318 : : /* This pair of component accesses is comparable for dependence
3319 : : analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3320 : : DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3321 : 2659960 : if (full_seq.start_a + full_seq.length != index_a
3322 : 2600939 : || full_seq.start_b + full_seq.length != index_b)
3323 : : {
3324 : : /* The accesses don't extend the current sequence,
3325 : : so start a new one here. */
3326 : 89905 : full_seq.start_a = index_a;
3327 : 89905 : full_seq.start_b = index_b;
3328 : 89905 : full_seq.length = 0;
3329 : : }
3330 : :
3331 : : /* Add this pair of references to the sequence. */
3332 : 2659960 : full_seq.length += 1;
3333 : 2659960 : full_seq.object_a = object_a;
3334 : 2659960 : full_seq.object_b = object_b;
3335 : :
3336 : : /* If the enclosing objects are structures (and thus have the
3337 : : same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3338 : 2659960 : if (TREE_CODE (type_a) == RECORD_TYPE)
3339 : 727476 : struct_seq = full_seq;
3340 : :
3341 : : /* Move to the next containing reference for both A and B. */
3342 : 2659960 : ref_a = object_a;
3343 : 2659960 : ref_b = object_b;
3344 : 2659960 : index_a += 1;
3345 : 2659960 : index_b += 1;
3346 : 2659960 : continue;
3347 : : }
3348 : :
3349 : : /* Try to approach equal type sizes. */
3350 : 244178 : if (!COMPLETE_TYPE_P (type_a)
3351 : 241449 : || !COMPLETE_TYPE_P (type_b)
3352 : 235832 : || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3353 : 478414 : || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3354 : : break;
3355 : :
3356 : 233957 : unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3357 : 233957 : unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3358 : 233957 : if (size_a <= size_b)
3359 : : {
3360 : 132420 : index_a += 1;
3361 : 132420 : ref_a = object_a;
3362 : : }
3363 : 233957 : if (size_b <= size_a)
3364 : : {
3365 : 114435 : index_b += 1;
3366 : 114435 : ref_b = object_b;
3367 : : }
3368 : : }
3369 : :
3370 : : /* See whether FULL_SEQ ends at the base and whether the two bases
3371 : : are equal. We do not care about TBAA or alignment info so we can
3372 : : use OEP_ADDRESS_OF to avoid false negatives. */
3373 : 4280018 : tree base_a = indices_a->base_object;
3374 : 4280018 : tree base_b = indices_b->base_object;
3375 : 4280018 : bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3376 : 4085403 : && full_seq.start_b + full_seq.length == num_dimensions_b
3377 : 3940107 : && (indices_a->unconstrained_base
3378 : 3940107 : == indices_b->unconstrained_base)
3379 : 3932826 : && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3380 : 3444476 : && (types_compatible_p (TREE_TYPE (base_a),
3381 : 3444476 : TREE_TYPE (base_b))
3382 : 843248 : || (!base_supports_access_fn_components_p (base_a)
3383 : 835770 : && !base_supports_access_fn_components_p (base_b)
3384 : 834038 : && operand_equal_p
3385 : 834038 : (TYPE_SIZE (TREE_TYPE (base_a)),
3386 : 834038 : TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3387 : 7288048 : && (!loop_nest.exists ()
3388 : 3008030 : || (object_address_invariant_in_loop_p
3389 : 3008030 : (loop_nest[0], base_a))));
3390 : :
3391 : : /* If the bases are the same, we can include the base variation too.
3392 : : E.g. the b accesses in:
3393 : :
3394 : : for (int i = 0; i < n; ++i)
3395 : : b[i + 4][0] = b[i][0];
3396 : :
3397 : : have a definite dependence distance of 4, while for:
3398 : :
3399 : : for (int i = 0; i < n; ++i)
3400 : : a[i + 4][0] = b[i][0];
3401 : :
3402 : : the dependence distance depends on the gap between a and b.
3403 : :
3404 : : If the bases are different then we can only rely on the sequence
3405 : : rooted at a structure access, since arrays are allowed to overlap
3406 : : arbitrarily and change shape arbitrarily. E.g. we treat this as
3407 : : valid code:
3408 : :
3409 : : int a[256];
3410 : : ...
3411 : : ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3412 : :
3413 : : where two lvalues with the same int[4][3] type overlap, and where
3414 : : both lvalues are distinct from the object's declared type. */
3415 : 2903367 : if (same_base_p)
3416 : : {
3417 : 2903367 : if (indices_a->unconstrained_base)
3418 : 1404163 : full_seq.length += 1;
3419 : : }
3420 : : else
3421 : : full_seq = struct_seq;
3422 : :
3423 : : /* Punt if we didn't find a suitable sequence. */
3424 : 4280018 : if (full_seq.length == 0)
3425 : : {
3426 : 1124849 : if (use_alt_indices
3427 : 984985 : || (TREE_CODE (DR_REF (a)) == MEM_REF
3428 : 734470 : && TREE_CODE (DR_REF (b)) == MEM_REF)
3429 : 384179 : || may_be_nonaddressable_p (DR_REF (a))
3430 : 1508779 : || may_be_nonaddressable_p (DR_REF (b)))
3431 : : {
3432 : : /* Fully exhausted possibilities. */
3433 : 742472 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3434 : 742472 : return res;
3435 : : }
3436 : :
3437 : : /* Try evaluating both DRs as dereferences of pointers. */
3438 : 382377 : if (!a->alt_indices.base_object
3439 : 172995 : && TREE_CODE (DR_REF (a)) != MEM_REF)
3440 : : {
3441 : 40711 : tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3442 : : build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3443 : : build_int_cst
3444 : : (reference_alias_ptr_type (DR_REF (a)), 0));
3445 : 122133 : dr_analyze_indices (&a->alt_indices, alt_ref,
3446 : 40711 : loop_preheader_edge (loop_nest[0]),
3447 : : loop_containing_stmt (DR_STMT (a)));
3448 : : }
3449 : 382377 : if (!b->alt_indices.base_object
3450 : 182798 : && TREE_CODE (DR_REF (b)) != MEM_REF)
3451 : : {
3452 : 74841 : tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3453 : : build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3454 : : build_int_cst
3455 : : (reference_alias_ptr_type (DR_REF (b)), 0));
3456 : 224523 : dr_analyze_indices (&b->alt_indices, alt_ref,
3457 : 74841 : loop_preheader_edge (loop_nest[0]),
3458 : : loop_containing_stmt (DR_STMT (b)));
3459 : : }
3460 : 382377 : return initialize_data_dependence_relation (res, loop_nest, true);
3461 : : }
3462 : :
3463 : 3155169 : if (!same_base_p)
3464 : : {
3465 : : /* Partial overlap is possible for different bases when strict aliasing
3466 : : is not in effect. It's also possible if either base involves a union
3467 : : access; e.g. for:
3468 : :
3469 : : struct s1 { int a[2]; };
3470 : : struct s2 { struct s1 b; int c; };
3471 : : struct s3 { int d; struct s1 e; };
3472 : : union u { struct s2 f; struct s3 g; } *p, *q;
3473 : :
3474 : : the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3475 : : "p->g.e" (base "p->g") and might partially overlap the s1 at
3476 : : "q->g.e" (base "q->g"). */
3477 : 251802 : if (!flag_strict_aliasing
3478 : 231163 : || ref_contains_union_access_p (full_seq.object_a)
3479 : 440575 : || ref_contains_union_access_p (full_seq.object_b))
3480 : : {
3481 : 63029 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3482 : 63029 : return res;
3483 : : }
3484 : :
3485 : 188773 : DDR_COULD_BE_INDEPENDENT_P (res) = true;
3486 : 188773 : if (!loop_nest.exists ()
3487 : 377546 : || (object_address_invariant_in_loop_p (loop_nest[0],
3488 : 188773 : full_seq.object_a)
3489 : 19720 : && object_address_invariant_in_loop_p (loop_nest[0],
3490 : 19720 : full_seq.object_b)))
3491 : : {
3492 : 9337 : DDR_OBJECT_A (res) = full_seq.object_a;
3493 : 9337 : DDR_OBJECT_B (res) = full_seq.object_b;
3494 : : }
3495 : : }
3496 : :
3497 : 3092140 : DDR_AFFINE_P (res) = true;
3498 : 3092140 : DDR_ARE_DEPENDENT (res) = NULL_TREE;
3499 : 3092140 : DDR_SUBSCRIPTS (res).create (full_seq.length);
3500 : 3092140 : DDR_LOOP_NEST (res) = loop_nest;
3501 : 3092140 : DDR_SELF_REFERENCE (res) = false;
3502 : :
3503 : 6986531 : for (i = 0; i < full_seq.length; ++i)
3504 : : {
3505 : 3894391 : struct subscript *subscript;
3506 : :
3507 : 3894391 : subscript = XNEW (struct subscript);
3508 : 3894391 : SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3509 : 3894391 : SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3510 : 3894391 : SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3511 : 3894391 : SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3512 : 3894391 : SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3513 : 3894391 : SUB_DISTANCE (subscript) = chrec_dont_know;
3514 : 3894391 : DDR_SUBSCRIPTS (res).safe_push (subscript);
3515 : : }
3516 : :
3517 : : return res;
3518 : : }
3519 : :
3520 : : /* Initialize a data dependence relation between data accesses A and
3521 : : B. NB_LOOPS is the number of loops surrounding the references: the
3522 : : size of the classic distance/direction vectors. */
3523 : :
3524 : : struct data_dependence_relation *
3525 : 13089312 : initialize_data_dependence_relation (struct data_reference *a,
3526 : : struct data_reference *b,
3527 : : vec<loop_p> loop_nest)
3528 : : {
3529 : 13089312 : data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3530 : 13089312 : DDR_A (res) = a;
3531 : 13089312 : DDR_B (res) = b;
3532 : 13089312 : DDR_LOOP_NEST (res).create (0);
3533 : 13089312 : DDR_SUBSCRIPTS (res).create (0);
3534 : 13089312 : DDR_DIR_VECTS (res).create (0);
3535 : 13089312 : DDR_DIST_VECTS (res).create (0);
3536 : :
3537 : 13089312 : if (a == NULL || b == NULL)
3538 : : {
3539 : 0 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3540 : 0 : return res;
3541 : : }
3542 : :
3543 : : /* If the data references do not alias, then they are independent. */
3544 : 19595810 : if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3545 : : {
3546 : 6935105 : DDR_ARE_DEPENDENT (res) = chrec_known;
3547 : 6935105 : return res;
3548 : : }
3549 : :
3550 : 6154207 : return initialize_data_dependence_relation (res, loop_nest, false);
3551 : : }
3552 : :
3553 : :
3554 : : /* Frees memory used by the conflict function F. */
3555 : :
3556 : : static void
3557 : 14537202 : free_conflict_function (conflict_function *f)
3558 : : {
3559 : 14537202 : unsigned i;
3560 : :
3561 : 14537202 : if (CF_NONTRIVIAL_P (f))
3562 : : {
3563 : 4915456 : for (i = 0; i < f->n; i++)
3564 : 2457728 : affine_fn_free (f->fns[i]);
3565 : : }
3566 : 14537202 : free (f);
3567 : 14537202 : }
3568 : :
3569 : : /* Frees memory used by SUBSCRIPTS. */
3570 : :
3571 : : static void
3572 : 3092140 : free_subscripts (vec<subscript_p> subscripts)
3573 : : {
3574 : 13170811 : for (subscript_p s : subscripts)
3575 : : {
3576 : 3894391 : free_conflict_function (s->conflicting_iterations_in_a);
3577 : 3894391 : free_conflict_function (s->conflicting_iterations_in_b);
3578 : 3894391 : free (s);
3579 : : }
3580 : 3092140 : subscripts.release ();
3581 : 3092140 : }
3582 : :
3583 : : /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3584 : : description. */
3585 : :
3586 : : static inline void
3587 : 2197739 : finalize_ddr_dependent (struct data_dependence_relation *ddr,
3588 : : tree chrec)
3589 : : {
3590 : 2197739 : DDR_ARE_DEPENDENT (ddr) = chrec;
3591 : 2197739 : free_subscripts (DDR_SUBSCRIPTS (ddr));
3592 : 2197739 : DDR_SUBSCRIPTS (ddr).create (0);
3593 : 53486 : }
3594 : :
3595 : : /* The dependence relation DDR cannot be represented by a distance
3596 : : vector. */
3597 : :
3598 : : static inline void
3599 : 1916 : non_affine_dependence_relation (struct data_dependence_relation *ddr)
3600 : : {
3601 : 1916 : if (dump_file && (dump_flags & TDF_DETAILS))
3602 : 71 : fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3603 : :
3604 : 1916 : DDR_AFFINE_P (ddr) = false;
3605 : 1916 : }
3606 : :
3607 : :
3608 : :
3609 : : /* This section contains the classic Banerjee tests. */
3610 : :
3611 : : /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3612 : : variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3613 : :
3614 : : static inline bool
3615 : 2201686 : ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3616 : : {
3617 : 2201686 : return (evolution_function_is_constant_p (chrec_a)
3618 : 2690370 : && evolution_function_is_constant_p (chrec_b));
3619 : : }
3620 : :
3621 : : /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3622 : : variable, i.e., if the SIV (Single Index Variable) test is true. */
3623 : :
3624 : : static bool
3625 : 1714480 : siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3626 : : {
3627 : 3427485 : if ((evolution_function_is_constant_p (chrec_a)
3628 : 1478 : && evolution_function_is_univariate_p (chrec_b))
3629 : 3427485 : || (evolution_function_is_constant_p (chrec_b)
3630 : 1214 : && evolution_function_is_univariate_p (chrec_a)))
3631 : 2686 : return true;
3632 : :
3633 : 1711794 : if (evolution_function_is_univariate_p (chrec_a)
3634 : 1711794 : && evolution_function_is_univariate_p (chrec_b))
3635 : : {
3636 : 1685452 : switch (TREE_CODE (chrec_a))
3637 : : {
3638 : 1685452 : case POLYNOMIAL_CHREC:
3639 : 1685452 : switch (TREE_CODE (chrec_b))
3640 : : {
3641 : 1685452 : case POLYNOMIAL_CHREC:
3642 : 1685452 : if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3643 : : return false;
3644 : : /* FALLTHRU */
3645 : :
3646 : : default:
3647 : : return true;
3648 : : }
3649 : :
3650 : : default:
3651 : : return true;
3652 : : }
3653 : : }
3654 : :
3655 : : return false;
3656 : : }
3657 : :
3658 : : /* Creates a conflict function with N dimensions. The affine functions
3659 : : in each dimension follow. */
3660 : :
3661 : : static conflict_function *
3662 : 2457728 : conflict_fn (unsigned n, ...)
3663 : : {
3664 : 2457728 : unsigned i;
3665 : 2457728 : conflict_function *ret = XCNEW (conflict_function);
3666 : 2457728 : va_list ap;
3667 : :
3668 : 2457728 : gcc_assert (n > 0 && n <= MAX_DIM);
3669 : 2457728 : va_start (ap, n);
3670 : :
3671 : 2457728 : ret->n = n;
3672 : 4915456 : for (i = 0; i < n; i++)
3673 : 2457728 : ret->fns[i] = va_arg (ap, affine_fn);
3674 : 2457728 : va_end (ap);
3675 : :
3676 : 2457728 : return ret;
3677 : : }
3678 : :
3679 : : /* Returns constant affine function with value CST. */
3680 : :
3681 : : static affine_fn
3682 : 2344334 : affine_fn_cst (tree cst)
3683 : : {
3684 : 2344334 : affine_fn fn;
3685 : 2344334 : fn.create (1);
3686 : 2344334 : fn.quick_push (cst);
3687 : 2344334 : return fn;
3688 : : }
3689 : :
3690 : : /* Returns affine function with single variable, CST + COEF * x_DIM. */
3691 : :
3692 : : static affine_fn
3693 : 113394 : affine_fn_univar (tree cst, unsigned dim, tree coef)
3694 : : {
3695 : 113394 : affine_fn fn;
3696 : 113394 : fn.create (dim + 1);
3697 : 113394 : unsigned i;
3698 : :
3699 : 113394 : gcc_assert (dim > 0);
3700 : 113394 : fn.quick_push (cst);
3701 : 226788 : for (i = 1; i < dim; i++)
3702 : 0 : fn.quick_push (integer_zero_node);
3703 : 113394 : fn.quick_push (coef);
3704 : 113394 : return fn;
3705 : : }
3706 : :
3707 : : /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3708 : : *OVERLAPS_B are initialized to the functions that describe the
3709 : : relation between the elements accessed twice by CHREC_A and
3710 : : CHREC_B. For k >= 0, the following property is verified:
3711 : :
3712 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3713 : :
3714 : : static void
3715 : 487206 : analyze_ziv_subscript (tree chrec_a,
3716 : : tree chrec_b,
3717 : : conflict_function **overlaps_a,
3718 : : conflict_function **overlaps_b,
3719 : : tree *last_conflicts)
3720 : : {
3721 : 487206 : tree type, difference;
3722 : 487206 : dependence_stats.num_ziv++;
3723 : :
3724 : 487206 : if (dump_file && (dump_flags & TDF_DETAILS))
3725 : 20091 : fprintf (dump_file, "(analyze_ziv_subscript \n");
3726 : :
3727 : 487206 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3728 : 487206 : chrec_a = chrec_convert (type, chrec_a, NULL);
3729 : 487206 : chrec_b = chrec_convert (type, chrec_b, NULL);
3730 : 487206 : difference = chrec_fold_minus (type, chrec_a, chrec_b);
3731 : :
3732 : 487206 : switch (TREE_CODE (difference))
3733 : : {
3734 : 487206 : case INTEGER_CST:
3735 : 487206 : if (integer_zerop (difference))
3736 : : {
3737 : : /* The difference is equal to zero: the accessed index
3738 : : overlaps for each iteration in the loop. */
3739 : 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3740 : 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3741 : 0 : *last_conflicts = chrec_dont_know;
3742 : 0 : dependence_stats.num_ziv_dependent++;
3743 : : }
3744 : : else
3745 : : {
3746 : : /* The accesses do not overlap. */
3747 : 487206 : *overlaps_a = conflict_fn_no_dependence ();
3748 : 487206 : *overlaps_b = conflict_fn_no_dependence ();
3749 : 487206 : *last_conflicts = integer_zero_node;
3750 : 487206 : dependence_stats.num_ziv_independent++;
3751 : : }
3752 : : break;
3753 : :
3754 : 0 : default:
3755 : : /* We're not sure whether the indexes overlap. For the moment,
3756 : : conservatively answer "don't know". */
3757 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3758 : 0 : fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3759 : :
3760 : 0 : *overlaps_a = conflict_fn_not_known ();
3761 : 0 : *overlaps_b = conflict_fn_not_known ();
3762 : 0 : *last_conflicts = chrec_dont_know;
3763 : 0 : dependence_stats.num_ziv_unimplemented++;
3764 : 0 : break;
3765 : : }
3766 : :
3767 : 487206 : if (dump_file && (dump_flags & TDF_DETAILS))
3768 : 20091 : fprintf (dump_file, ")\n");
3769 : 487206 : }
3770 : :
3771 : : /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3772 : : and only if it fits to the int type. If this is not the case, or the
3773 : : bound on the number of iterations of LOOP could not be derived, returns
3774 : : chrec_dont_know. */
3775 : :
3776 : : static tree
3777 : 0 : max_stmt_executions_tree (class loop *loop)
3778 : : {
3779 : 0 : widest_int nit;
3780 : :
3781 : 0 : if (!max_stmt_executions (loop, &nit))
3782 : 0 : return chrec_dont_know;
3783 : :
3784 : 0 : if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3785 : 0 : return chrec_dont_know;
3786 : :
3787 : 0 : return wide_int_to_tree (unsigned_type_node, nit);
3788 : 0 : }
3789 : :
3790 : : /* Determine whether the CHREC is always positive/negative. If the expression
3791 : : cannot be statically analyzed, return false, otherwise set the answer into
3792 : : VALUE. */
3793 : :
3794 : : static bool
3795 : 4170 : chrec_is_positive (tree chrec, bool *value)
3796 : : {
3797 : 4170 : bool value0, value1, value2;
3798 : 4170 : tree end_value, nb_iter;
3799 : :
3800 : 4170 : switch (TREE_CODE (chrec))
3801 : : {
3802 : 0 : case POLYNOMIAL_CHREC:
3803 : 0 : if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3804 : 0 : || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3805 : 0 : return false;
3806 : :
3807 : : /* FIXME -- overflows. */
3808 : 0 : if (value0 == value1)
3809 : : {
3810 : 0 : *value = value0;
3811 : 0 : return true;
3812 : : }
3813 : :
3814 : : /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3815 : : and the proof consists in showing that the sign never
3816 : : changes during the execution of the loop, from 0 to
3817 : : loop->nb_iterations. */
3818 : 0 : if (!evolution_function_is_affine_p (chrec))
3819 : : return false;
3820 : :
3821 : 0 : nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3822 : 0 : if (chrec_contains_undetermined (nb_iter))
3823 : : return false;
3824 : :
3825 : : #if 0
3826 : : /* TODO -- If the test is after the exit, we may decrease the number of
3827 : : iterations by one. */
3828 : : if (after_exit)
3829 : : nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3830 : : #endif
3831 : :
3832 : 0 : end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3833 : :
3834 : 0 : if (!chrec_is_positive (end_value, &value2))
3835 : : return false;
3836 : :
3837 : 0 : *value = value0;
3838 : 0 : return value0 == value1;
3839 : :
3840 : 4170 : case INTEGER_CST:
3841 : 4170 : switch (tree_int_cst_sgn (chrec))
3842 : : {
3843 : 1904 : case -1:
3844 : 1904 : *value = false;
3845 : 1904 : break;
3846 : 2266 : case 1:
3847 : 2266 : *value = true;
3848 : 2266 : break;
3849 : : default:
3850 : : return false;
3851 : : }
3852 : : return true;
3853 : :
3854 : : default:
3855 : : return false;
3856 : : }
3857 : : }
3858 : :
3859 : :
3860 : : /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3861 : : constant, and CHREC_B is an affine function. *OVERLAPS_A and
3862 : : *OVERLAPS_B are initialized to the functions that describe the
3863 : : relation between the elements accessed twice by CHREC_A and
3864 : : CHREC_B. For k >= 0, the following property is verified:
3865 : :
3866 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3867 : :
3868 : : static void
3869 : 2686 : analyze_siv_subscript_cst_affine (tree chrec_a,
3870 : : tree chrec_b,
3871 : : conflict_function **overlaps_a,
3872 : : conflict_function **overlaps_b,
3873 : : tree *last_conflicts)
3874 : : {
3875 : 2686 : bool value0, value1, value2;
3876 : 2686 : tree type, difference, tmp;
3877 : :
3878 : 2686 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3879 : 2686 : chrec_a = chrec_convert (type, chrec_a, NULL);
3880 : 2686 : chrec_b = chrec_convert (type, chrec_b, NULL);
3881 : 2686 : difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3882 : :
3883 : : /* Special case overlap in the first iteration. */
3884 : 2686 : if (integer_zerop (difference))
3885 : : {
3886 : 599 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3887 : 599 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3888 : 599 : *last_conflicts = integer_one_node;
3889 : 599 : return;
3890 : : }
3891 : :
3892 : 2087 : if (!chrec_is_positive (initial_condition (difference), &value0))
3893 : : {
3894 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3895 : 0 : fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3896 : :
3897 : 0 : dependence_stats.num_siv_unimplemented++;
3898 : 0 : *overlaps_a = conflict_fn_not_known ();
3899 : 0 : *overlaps_b = conflict_fn_not_known ();
3900 : 0 : *last_conflicts = chrec_dont_know;
3901 : 0 : return;
3902 : : }
3903 : : else
3904 : : {
3905 : 2087 : if (value0 == false)
3906 : : {
3907 : 1690 : if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3908 : 1690 : || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3909 : : {
3910 : 4 : if (dump_file && (dump_flags & TDF_DETAILS))
3911 : 0 : fprintf (dump_file, "siv test failed: chrec not positive.\n");
3912 : :
3913 : 4 : *overlaps_a = conflict_fn_not_known ();
3914 : 4 : *overlaps_b = conflict_fn_not_known ();
3915 : 4 : *last_conflicts = chrec_dont_know;
3916 : 4 : dependence_stats.num_siv_unimplemented++;
3917 : 4 : return;
3918 : : }
3919 : : else
3920 : : {
3921 : 1686 : if (value1 == true)
3922 : : {
3923 : : /* Example:
3924 : : chrec_a = 12
3925 : : chrec_b = {10, +, 1}
3926 : : */
3927 : :
3928 : 1686 : if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3929 : : {
3930 : 1405 : HOST_WIDE_INT numiter;
3931 : 1405 : class loop *loop = get_chrec_loop (chrec_b);
3932 : :
3933 : 1405 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3934 : 1405 : tmp = fold_build2 (EXACT_DIV_EXPR, type,
3935 : : fold_build1 (ABS_EXPR, type, difference),
3936 : : CHREC_RIGHT (chrec_b));
3937 : 1405 : *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3938 : 1405 : *last_conflicts = integer_one_node;
3939 : :
3940 : :
3941 : : /* Perform weak-zero siv test to see if overlap is
3942 : : outside the loop bounds. */
3943 : 1405 : numiter = max_stmt_executions_int (loop);
3944 : :
3945 : 1405 : if (numiter >= 0
3946 : 1405 : && compare_tree_int (tmp, numiter) > 0)
3947 : : {
3948 : 0 : free_conflict_function (*overlaps_a);
3949 : 0 : free_conflict_function (*overlaps_b);
3950 : 0 : *overlaps_a = conflict_fn_no_dependence ();
3951 : 0 : *overlaps_b = conflict_fn_no_dependence ();
3952 : 0 : *last_conflicts = integer_zero_node;
3953 : 0 : dependence_stats.num_siv_independent++;
3954 : 0 : return;
3955 : : }
3956 : 1405 : dependence_stats.num_siv_dependent++;
3957 : 1405 : return;
3958 : : }
3959 : :
3960 : : /* When the step does not divide the difference, there are
3961 : : no overlaps. */
3962 : : else
3963 : : {
3964 : 281 : *overlaps_a = conflict_fn_no_dependence ();
3965 : 281 : *overlaps_b = conflict_fn_no_dependence ();
3966 : 281 : *last_conflicts = integer_zero_node;
3967 : 281 : dependence_stats.num_siv_independent++;
3968 : 281 : return;
3969 : : }
3970 : : }
3971 : :
3972 : : else
3973 : : {
3974 : : /* Example:
3975 : : chrec_a = 12
3976 : : chrec_b = {10, +, -1}
3977 : :
3978 : : In this case, chrec_a will not overlap with chrec_b. */
3979 : 0 : *overlaps_a = conflict_fn_no_dependence ();
3980 : 0 : *overlaps_b = conflict_fn_no_dependence ();
3981 : 0 : *last_conflicts = integer_zero_node;
3982 : 0 : dependence_stats.num_siv_independent++;
3983 : 0 : return;
3984 : : }
3985 : : }
3986 : : }
3987 : : else
3988 : : {
3989 : 397 : if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3990 : 397 : || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3991 : : {
3992 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3993 : 0 : fprintf (dump_file, "siv test failed: chrec not positive.\n");
3994 : :
3995 : 0 : *overlaps_a = conflict_fn_not_known ();
3996 : 0 : *overlaps_b = conflict_fn_not_known ();
3997 : 0 : *last_conflicts = chrec_dont_know;
3998 : 0 : dependence_stats.num_siv_unimplemented++;
3999 : 0 : return;
4000 : : }
4001 : : else
4002 : : {
4003 : 397 : if (value2 == false)
4004 : : {
4005 : : /* Example:
4006 : : chrec_a = 3
4007 : : chrec_b = {10, +, -1}
4008 : : */
4009 : 214 : if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
4010 : : {
4011 : 109 : HOST_WIDE_INT numiter;
4012 : 109 : class loop *loop = get_chrec_loop (chrec_b);
4013 : :
4014 : 109 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4015 : 109 : tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
4016 : : CHREC_RIGHT (chrec_b));
4017 : 109 : *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
4018 : 109 : *last_conflicts = integer_one_node;
4019 : :
4020 : : /* Perform weak-zero siv test to see if overlap is
4021 : : outside the loop bounds. */
4022 : 109 : numiter = max_stmt_executions_int (loop);
4023 : :
4024 : 109 : if (numiter >= 0
4025 : 109 : && compare_tree_int (tmp, numiter) > 0)
4026 : : {
4027 : 0 : free_conflict_function (*overlaps_a);
4028 : 0 : free_conflict_function (*overlaps_b);
4029 : 0 : *overlaps_a = conflict_fn_no_dependence ();
4030 : 0 : *overlaps_b = conflict_fn_no_dependence ();
4031 : 0 : *last_conflicts = integer_zero_node;
4032 : 0 : dependence_stats.num_siv_independent++;
4033 : 0 : return;
4034 : : }
4035 : 109 : dependence_stats.num_siv_dependent++;
4036 : 109 : return;
4037 : : }
4038 : :
4039 : : /* When the step does not divide the difference, there
4040 : : are no overlaps. */
4041 : : else
4042 : : {
4043 : 105 : *overlaps_a = conflict_fn_no_dependence ();
4044 : 105 : *overlaps_b = conflict_fn_no_dependence ();
4045 : 105 : *last_conflicts = integer_zero_node;
4046 : 105 : dependence_stats.num_siv_independent++;
4047 : 105 : return;
4048 : : }
4049 : : }
4050 : : else
4051 : : {
4052 : : /* Example:
4053 : : chrec_a = 3
4054 : : chrec_b = {4, +, 1}
4055 : :
4056 : : In this case, chrec_a will not overlap with chrec_b. */
4057 : 183 : *overlaps_a = conflict_fn_no_dependence ();
4058 : 183 : *overlaps_b = conflict_fn_no_dependence ();
4059 : 183 : *last_conflicts = integer_zero_node;
4060 : 183 : dependence_stats.num_siv_independent++;
4061 : 183 : return;
4062 : : }
4063 : : }
4064 : : }
4065 : : }
4066 : : }
4067 : :
4068 : : /* Helper recursive function for initializing the matrix A. Returns
4069 : : the initial value of CHREC. */
4070 : :
4071 : : static tree
4072 : 3330718 : initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4073 : : {
4074 : 6661428 : gcc_assert (chrec);
4075 : :
4076 : 6661428 : switch (TREE_CODE (chrec))
4077 : : {
4078 : 3330718 : case POLYNOMIAL_CHREC:
4079 : 3330718 : HOST_WIDE_INT chrec_right;
4080 : 3330718 : if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4081 : 8 : return chrec_dont_know;
4082 : 3330710 : chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4083 : : /* We want to be able to negate without overflow. */
4084 : 3330710 : if (chrec_right == HOST_WIDE_INT_MIN)
4085 : 0 : return chrec_dont_know;
4086 : 3330710 : A[index][0] = mult * chrec_right;
4087 : 3330710 : return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4088 : :
4089 : 0 : case PLUS_EXPR:
4090 : 0 : case MULT_EXPR:
4091 : 0 : case MINUS_EXPR:
4092 : 0 : {
4093 : 0 : tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4094 : 0 : tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4095 : :
4096 : 0 : return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4097 : : }
4098 : :
4099 : 0 : CASE_CONVERT:
4100 : 0 : {
4101 : 0 : tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4102 : 0 : return chrec_convert (chrec_type (chrec), op, NULL);
4103 : : }
4104 : :
4105 : 0 : case BIT_NOT_EXPR:
4106 : 0 : {
4107 : : /* Handle ~X as -1 - X. */
4108 : 0 : tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4109 : 0 : return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4110 : 0 : build_int_cst (TREE_TYPE (chrec), -1), op);
4111 : : }
4112 : :
4113 : 3330710 : case INTEGER_CST:
4114 : 3330710 : return cst_and_fits_in_hwi (chrec) ? chrec : chrec_dont_know;
4115 : :
4116 : 0 : default:
4117 : 0 : gcc_unreachable ();
4118 : : return NULL_TREE;
4119 : : }
4120 : : }
4121 : :
4122 : : #define FLOOR_DIV(x,y) ((x) / (y))
4123 : :
4124 : : /* Solves the special case of the Diophantine equation:
4125 : : | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4126 : :
4127 : : Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4128 : : number of iterations that loops X and Y run. The overlaps will be
4129 : : constructed as evolutions in dimension DIM. */
4130 : :
4131 : : static void
4132 : 58 : compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4133 : : HOST_WIDE_INT step_a,
4134 : : HOST_WIDE_INT step_b,
4135 : : affine_fn *overlaps_a,
4136 : : affine_fn *overlaps_b,
4137 : : tree *last_conflicts, int dim)
4138 : : {
4139 : 58 : if (((step_a > 0 && step_b > 0)
4140 : 8 : || (step_a < 0 && step_b < 0)))
4141 : : {
4142 : 54 : HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4143 : 54 : HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4144 : :
4145 : 54 : gcd_steps_a_b = gcd (step_a, step_b);
4146 : 54 : step_overlaps_a = step_b / gcd_steps_a_b;
4147 : 54 : step_overlaps_b = step_a / gcd_steps_a_b;
4148 : :
4149 : 54 : if (niter > 0)
4150 : : {
4151 : 54 : tau2 = FLOOR_DIV (niter, step_overlaps_a);
4152 : 54 : tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4153 : 54 : last_conflict = tau2;
4154 : 54 : *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4155 : : }
4156 : : else
4157 : 0 : *last_conflicts = chrec_dont_know;
4158 : :
4159 : 54 : *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4160 : : build_int_cst (NULL_TREE,
4161 : 54 : step_overlaps_a));
4162 : 54 : *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4163 : : build_int_cst (NULL_TREE,
4164 : 54 : step_overlaps_b));
4165 : 54 : }
4166 : :
4167 : : else
4168 : : {
4169 : 4 : *overlaps_a = affine_fn_cst (integer_zero_node);
4170 : 4 : *overlaps_b = affine_fn_cst (integer_zero_node);
4171 : 4 : *last_conflicts = integer_zero_node;
4172 : : }
4173 : 58 : }
4174 : :
4175 : : /* Solves the special case of a Diophantine equation where CHREC_A is
4176 : : an affine bivariate function, and CHREC_B is an affine univariate
4177 : : function. For example,
4178 : :
4179 : : | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4180 : :
4181 : : has the following overlapping functions:
4182 : :
4183 : : | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4184 : : | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4185 : : | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4186 : :
4187 : : FORNOW: This is a specialized implementation for a case occurring in
4188 : : a common benchmark. Implement the general algorithm. */
4189 : :
4190 : : static void
4191 : 0 : compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4192 : : conflict_function **overlaps_a,
4193 : : conflict_function **overlaps_b,
4194 : : tree *last_conflicts)
4195 : : {
4196 : 0 : bool xz_p, yz_p, xyz_p;
4197 : 0 : HOST_WIDE_INT step_x, step_y, step_z;
4198 : 0 : HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4199 : 0 : affine_fn overlaps_a_xz, overlaps_b_xz;
4200 : 0 : affine_fn overlaps_a_yz, overlaps_b_yz;
4201 : 0 : affine_fn overlaps_a_xyz, overlaps_b_xyz;
4202 : 0 : affine_fn ova1, ova2, ovb;
4203 : 0 : tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4204 : :
4205 : 0 : step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4206 : 0 : step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4207 : 0 : step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4208 : :
4209 : 0 : niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4210 : 0 : niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4211 : 0 : niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4212 : :
4213 : 0 : if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4214 : : {
4215 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4216 : 0 : fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4217 : :
4218 : 0 : *overlaps_a = conflict_fn_not_known ();
4219 : 0 : *overlaps_b = conflict_fn_not_known ();
4220 : 0 : *last_conflicts = chrec_dont_know;
4221 : 0 : return;
4222 : : }
4223 : :
4224 : 0 : niter = MIN (niter_x, niter_z);
4225 : 0 : compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4226 : : &overlaps_a_xz,
4227 : : &overlaps_b_xz,
4228 : : &last_conflicts_xz, 1);
4229 : 0 : niter = MIN (niter_y, niter_z);
4230 : 0 : compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4231 : : &overlaps_a_yz,
4232 : : &overlaps_b_yz,
4233 : : &last_conflicts_yz, 2);
4234 : 0 : niter = MIN (niter_x, niter_z);
4235 : 0 : niter = MIN (niter_y, niter);
4236 : 0 : compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4237 : : &overlaps_a_xyz,
4238 : : &overlaps_b_xyz,
4239 : : &last_conflicts_xyz, 3);
4240 : :
4241 : 0 : xz_p = !integer_zerop (last_conflicts_xz);
4242 : 0 : yz_p = !integer_zerop (last_conflicts_yz);
4243 : 0 : xyz_p = !integer_zerop (last_conflicts_xyz);
4244 : :
4245 : 0 : if (xz_p || yz_p || xyz_p)
4246 : : {
4247 : 0 : ova1 = affine_fn_cst (integer_zero_node);
4248 : 0 : ova2 = affine_fn_cst (integer_zero_node);
4249 : 0 : ovb = affine_fn_cst (integer_zero_node);
4250 : 0 : if (xz_p)
4251 : : {
4252 : 0 : affine_fn t0 = ova1;
4253 : 0 : affine_fn t2 = ovb;
4254 : :
4255 : 0 : ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4256 : 0 : ovb = affine_fn_plus (ovb, overlaps_b_xz);
4257 : 0 : affine_fn_free (t0);
4258 : 0 : affine_fn_free (t2);
4259 : 0 : *last_conflicts = last_conflicts_xz;
4260 : : }
4261 : 0 : if (yz_p)
4262 : : {
4263 : 0 : affine_fn t0 = ova2;
4264 : 0 : affine_fn t2 = ovb;
4265 : :
4266 : 0 : ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4267 : 0 : ovb = affine_fn_plus (ovb, overlaps_b_yz);
4268 : 0 : affine_fn_free (t0);
4269 : 0 : affine_fn_free (t2);
4270 : 0 : *last_conflicts = last_conflicts_yz;
4271 : : }
4272 : 0 : if (xyz_p)
4273 : : {
4274 : 0 : affine_fn t0 = ova1;
4275 : 0 : affine_fn t2 = ova2;
4276 : 0 : affine_fn t4 = ovb;
4277 : :
4278 : 0 : ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4279 : 0 : ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4280 : 0 : ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4281 : 0 : affine_fn_free (t0);
4282 : 0 : affine_fn_free (t2);
4283 : 0 : affine_fn_free (t4);
4284 : 0 : *last_conflicts = last_conflicts_xyz;
4285 : : }
4286 : 0 : *overlaps_a = conflict_fn (2, ova1, ova2);
4287 : 0 : *overlaps_b = conflict_fn (1, ovb);
4288 : 0 : }
4289 : : else
4290 : : {
4291 : 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4292 : 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4293 : 0 : *last_conflicts = integer_zero_node;
4294 : : }
4295 : :
4296 : 0 : affine_fn_free (overlaps_a_xz);
4297 : 0 : affine_fn_free (overlaps_b_xz);
4298 : 0 : affine_fn_free (overlaps_a_yz);
4299 : 0 : affine_fn_free (overlaps_b_yz);
4300 : 0 : affine_fn_free (overlaps_a_xyz);
4301 : 0 : affine_fn_free (overlaps_b_xyz);
4302 : : }
4303 : :
4304 : : /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4305 : :
4306 : : static void
4307 : 3374632 : lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4308 : : int size)
4309 : : {
4310 : 3374632 : memcpy (vec2, vec1, size * sizeof (*vec1));
4311 : 0 : }
4312 : :
4313 : : /* Copy the elements of M x N matrix MAT1 to MAT2. */
4314 : :
4315 : : static void
4316 : 1665289 : lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4317 : : int m, int n)
4318 : : {
4319 : 1665289 : int i;
4320 : :
4321 : 4995867 : for (i = 0; i < m; i++)
4322 : 3330578 : lambda_vector_copy (mat1[i], mat2[i], n);
4323 : 1665289 : }
4324 : :
4325 : : /* Store the N x N identity matrix in MAT. */
4326 : :
4327 : : static void
4328 : 1665289 : lambda_matrix_id (lambda_matrix mat, int size)
4329 : : {
4330 : 1665289 : int i, j;
4331 : :
4332 : 4995867 : for (i = 0; i < size; i++)
4333 : 9991734 : for (j = 0; j < size; j++)
4334 : 9991734 : mat[i][j] = (i == j) ? 1 : 0;
4335 : 1665289 : }
4336 : :
4337 : : /* Return the index of the first nonzero element of vector VEC1 between
4338 : : START and N. We must have START <= N.
4339 : : Returns N if VEC1 is the zero vector. */
4340 : :
4341 : : static int
4342 : 1665289 : lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4343 : : {
4344 : 1665289 : int j = start;
4345 : 1665289 : while (j < n && vec1[j] == 0)
4346 : 0 : j++;
4347 : 1665289 : return j;
4348 : : }
4349 : :
4350 : : /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4351 : : R2 = R2 + CONST1 * R1. */
4352 : :
4353 : : static bool
4354 : 3330848 : lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4355 : : lambda_int const1)
4356 : : {
4357 : 3330848 : int i;
4358 : :
4359 : 3330848 : if (const1 == 0)
4360 : : return true;
4361 : :
4362 : 8326535 : for (i = 0; i < n; i++)
4363 : : {
4364 : 4995921 : bool ovf;
4365 : 4995921 : lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4366 : 4995921 : if (ovf)
4367 : 3330848 : return false;
4368 : 4995921 : lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4369 : 4995921 : if (ovf || tem2 == HOST_WIDE_INT_MIN)
4370 : : return false;
4371 : 4995921 : mat[r2][i] = tem2;
4372 : : }
4373 : :
4374 : : return true;
4375 : : }
4376 : :
4377 : : /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4378 : : and store the result in VEC2. */
4379 : :
4380 : : static void
4381 : 1654074 : lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4382 : : int size, lambda_int const1)
4383 : : {
4384 : 1654074 : int i;
4385 : :
4386 : 1654074 : if (const1 == 0)
4387 : 0 : lambda_vector_clear (vec2, size);
4388 : : else
4389 : 4962222 : for (i = 0; i < size; i++)
4390 : 3308148 : vec2[i] = const1 * vec1[i];
4391 : 1654074 : }
4392 : :
4393 : : /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4394 : :
4395 : : static void
4396 : 1654074 : lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4397 : : int size)
4398 : : {
4399 : 0 : lambda_vector_mult_const (vec1, vec2, size, -1);
4400 : 0 : }
4401 : :
4402 : : /* Negate row R1 of matrix MAT which has N columns. */
4403 : :
4404 : : static void
4405 : 1654074 : lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4406 : : {
4407 : 0 : lambda_vector_negate (mat[r1], mat[r1], n);
4408 : 1654074 : }
4409 : :
4410 : : /* Return true if two vectors are equal. */
4411 : :
4412 : : static bool
4413 : 353556 : lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4414 : : {
4415 : 353556 : int i;
4416 : 354669 : for (i = 0; i < size; i++)
4417 : 354404 : if (vec1[i] != vec2[i])
4418 : : return false;
4419 : : return true;
4420 : : }
4421 : :
4422 : : /* Given an M x N integer matrix A, this function determines an M x
4423 : : M unimodular matrix U, and an M x N echelon matrix S such that
4424 : : "U.A = S". This decomposition is also known as "right Hermite".
4425 : :
4426 : : Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4427 : : Restructuring Compilers" Utpal Banerjee. */
4428 : :
4429 : : static bool
4430 : 1665289 : lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4431 : : lambda_matrix S, lambda_matrix U)
4432 : : {
4433 : 1665289 : int i, j, i0 = 0;
4434 : :
4435 : 1665289 : lambda_matrix_copy (A, S, m, n);
4436 : 1665289 : lambda_matrix_id (U, m);
4437 : :
4438 : 3330578 : for (j = 0; j < n; j++)
4439 : : {
4440 : 3330578 : if (lambda_vector_first_nz (S[j], m, i0) < m)
4441 : : {
4442 : 1665289 : ++i0;
4443 : 3330578 : for (i = m - 1; i >= i0; i--)
4444 : : {
4445 : 3330713 : while (S[i][j] != 0)
4446 : : {
4447 : 1665424 : lambda_int factor, a, b;
4448 : :
4449 : 1665424 : a = S[i-1][j];
4450 : 1665424 : b = S[i][j];
4451 : 1665424 : gcc_assert (a != HOST_WIDE_INT_MIN);
4452 : 1665424 : factor = a / b;
4453 : :
4454 : 1665424 : if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4455 : : return false;
4456 : 1665424 : std::swap (S[i], S[i-1]);
4457 : :
4458 : 1665424 : if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4459 : : return false;
4460 : 1665424 : std::swap (U[i], U[i-1]);
4461 : : }
4462 : : }
4463 : : }
4464 : : }
4465 : :
4466 : : return true;
4467 : : }
4468 : :
4469 : : /* Determines the overlapping elements due to accesses CHREC_A and
4470 : : CHREC_B, that are affine functions. This function cannot handle
4471 : : symbolic evolution functions, ie. when initial conditions are
4472 : : parameters, because it uses lambda matrices of integers. */
4473 : :
4474 : : static void
4475 : 1665359 : analyze_subscript_affine_affine (tree chrec_a,
4476 : : tree chrec_b,
4477 : : conflict_function **overlaps_a,
4478 : : conflict_function **overlaps_b,
4479 : : tree *last_conflicts)
4480 : : {
4481 : 1665359 : unsigned nb_vars_a, nb_vars_b, dim;
4482 : 1665359 : lambda_int gamma, gcd_alpha_beta;
4483 : 1665359 : lambda_matrix A, U, S;
4484 : 1665359 : struct obstack scratch_obstack;
4485 : :
4486 : 1665359 : if (eq_evolutions_p (chrec_a, chrec_b))
4487 : : {
4488 : : /* The accessed index overlaps for each iteration in the
4489 : : loop. */
4490 : 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4491 : 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4492 : 0 : *last_conflicts = chrec_dont_know;
4493 : 0 : return;
4494 : : }
4495 : 1665359 : if (dump_file && (dump_flags & TDF_DETAILS))
4496 : 17960 : fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4497 : :
4498 : : /* For determining the initial intersection, we have to solve a
4499 : : Diophantine equation. This is the most time consuming part.
4500 : :
4501 : : For answering to the question: "Is there a dependence?" we have
4502 : : to prove that there exists a solution to the Diophantine
4503 : : equation, and that the solution is in the iteration domain,
4504 : : i.e. the solution is positive or zero, and that the solution
4505 : : happens before the upper bound loop.nb_iterations. Otherwise
4506 : : there is no dependence. This function outputs a description of
4507 : : the iterations that hold the intersections. */
4508 : :
4509 : 1665359 : nb_vars_a = nb_vars_in_chrec (chrec_a);
4510 : 1665359 : nb_vars_b = nb_vars_in_chrec (chrec_b);
4511 : :
4512 : 1665359 : gcc_obstack_init (&scratch_obstack);
4513 : :
4514 : 1665359 : dim = nb_vars_a + nb_vars_b;
4515 : 1665359 : U = lambda_matrix_new (dim, dim, &scratch_obstack);
4516 : 1665359 : A = lambda_matrix_new (dim, 1, &scratch_obstack);
4517 : 1665359 : S = lambda_matrix_new (dim, 1, &scratch_obstack);
4518 : :
4519 : 1665359 : tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4520 : 1665359 : tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4521 : 1665359 : if (init_a == chrec_dont_know
4522 : 1665347 : || init_b == chrec_dont_know)
4523 : : {
4524 : 12 : if (dump_file && (dump_flags & TDF_DETAILS))
4525 : 0 : fprintf (dump_file, "affine-affine test failed: "
4526 : : "representation issue.\n");
4527 : 12 : *overlaps_a = conflict_fn_not_known ();
4528 : 12 : *overlaps_b = conflict_fn_not_known ();
4529 : 12 : *last_conflicts = chrec_dont_know;
4530 : 12 : goto end_analyze_subs_aa;
4531 : : }
4532 : 1665347 : gamma = int_cst_value (init_b) - int_cst_value (init_a);
4533 : :
4534 : : /* Don't do all the hard work of solving the Diophantine equation
4535 : : when we already know the solution: for example,
4536 : : | {3, +, 1}_1
4537 : : | {3, +, 4}_2
4538 : : | gamma = 3 - 3 = 0.
4539 : : Then the first overlap occurs during the first iterations:
4540 : : | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4541 : : */
4542 : 1665347 : if (gamma == 0)
4543 : : {
4544 : 58 : if (nb_vars_a == 1 && nb_vars_b == 1)
4545 : : {
4546 : 58 : HOST_WIDE_INT step_a, step_b;
4547 : 58 : HOST_WIDE_INT niter, niter_a, niter_b;
4548 : 58 : affine_fn ova, ovb;
4549 : :
4550 : 58 : niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4551 : 58 : niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4552 : 58 : niter = MIN (niter_a, niter_b);
4553 : 58 : step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4554 : 58 : step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4555 : :
4556 : 58 : compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4557 : : &ova, &ovb,
4558 : : last_conflicts, 1);
4559 : 58 : *overlaps_a = conflict_fn (1, ova);
4560 : 58 : *overlaps_b = conflict_fn (1, ovb);
4561 : : }
4562 : :
4563 : 0 : else if (nb_vars_a == 2 && nb_vars_b == 1)
4564 : 0 : compute_overlap_steps_for_affine_1_2
4565 : 0 : (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4566 : :
4567 : 0 : else if (nb_vars_a == 1 && nb_vars_b == 2)
4568 : 0 : compute_overlap_steps_for_affine_1_2
4569 : 0 : (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4570 : :
4571 : : else
4572 : : {
4573 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4574 : 0 : fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4575 : 0 : *overlaps_a = conflict_fn_not_known ();
4576 : 0 : *overlaps_b = conflict_fn_not_known ();
4577 : 0 : *last_conflicts = chrec_dont_know;
4578 : : }
4579 : 58 : goto end_analyze_subs_aa;
4580 : : }
4581 : :
4582 : : /* U.A = S */
4583 : 1665289 : if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4584 : : {
4585 : 0 : *overlaps_a = conflict_fn_not_known ();
4586 : 0 : *overlaps_b = conflict_fn_not_known ();
4587 : 0 : *last_conflicts = chrec_dont_know;
4588 : 0 : goto end_analyze_subs_aa;
4589 : : }
4590 : :
4591 : 1665289 : if (S[0][0] < 0)
4592 : : {
4593 : 1654074 : S[0][0] *= -1;
4594 : 1654074 : lambda_matrix_row_negate (U, dim, 0);
4595 : : }
4596 : 1665289 : gcd_alpha_beta = S[0][0];
4597 : :
4598 : : /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4599 : : but that is a quite strange case. Instead of ICEing, answer
4600 : : don't know. */
4601 : 1665289 : if (gcd_alpha_beta == 0)
4602 : : {
4603 : 0 : *overlaps_a = conflict_fn_not_known ();
4604 : 0 : *overlaps_b = conflict_fn_not_known ();
4605 : 0 : *last_conflicts = chrec_dont_know;
4606 : 0 : goto end_analyze_subs_aa;
4607 : : }
4608 : :
4609 : : /* The classic "gcd-test". */
4610 : 1665289 : if (!int_divides_p (gcd_alpha_beta, gamma))
4611 : : {
4612 : : /* The "gcd-test" has determined that there is no integer
4613 : : solution, i.e. there is no dependence. */
4614 : 1553191 : *overlaps_a = conflict_fn_no_dependence ();
4615 : 1553191 : *overlaps_b = conflict_fn_no_dependence ();
4616 : 1553191 : *last_conflicts = integer_zero_node;
4617 : : }
4618 : :
4619 : : /* Both access functions are univariate. This includes SIV and MIV cases. */
4620 : 112098 : else if (nb_vars_a == 1 && nb_vars_b == 1)
4621 : : {
4622 : : /* Both functions should have the same evolution sign. */
4623 : 112098 : if (((A[0][0] > 0 && -A[1][0] > 0)
4624 : 7547 : || (A[0][0] < 0 && -A[1][0] < 0)))
4625 : : {
4626 : : /* The solutions are given by:
4627 : : |
4628 : : | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4629 : : | [u21 u22] [y0]
4630 : :
4631 : : For a given integer t. Using the following variables,
4632 : :
4633 : : | i0 = u11 * gamma / gcd_alpha_beta
4634 : : | j0 = u12 * gamma / gcd_alpha_beta
4635 : : | i1 = u21
4636 : : | j1 = u22
4637 : :
4638 : : the solutions are:
4639 : :
4640 : : | x0 = i0 + i1 * t,
4641 : : | y0 = j0 + j1 * t. */
4642 : 111719 : HOST_WIDE_INT i0, j0, i1, j1;
4643 : :
4644 : 111719 : i0 = U[0][0] * gamma / gcd_alpha_beta;
4645 : 111719 : j0 = U[0][1] * gamma / gcd_alpha_beta;
4646 : 111719 : i1 = U[1][0];
4647 : 111719 : j1 = U[1][1];
4648 : :
4649 : 111719 : if ((i1 == 0 && i0 < 0)
4650 : 111719 : || (j1 == 0 && j0 < 0))
4651 : : {
4652 : : /* There is no solution.
4653 : : FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4654 : : falls in here, but for the moment we don't look at the
4655 : : upper bound of the iteration domain. */
4656 : 0 : *overlaps_a = conflict_fn_no_dependence ();
4657 : 0 : *overlaps_b = conflict_fn_no_dependence ();
4658 : 0 : *last_conflicts = integer_zero_node;
4659 : 55076 : goto end_analyze_subs_aa;
4660 : : }
4661 : :
4662 : 111719 : if (i1 > 0 && j1 > 0)
4663 : : {
4664 : 111719 : HOST_WIDE_INT niter_a
4665 : 111719 : = max_stmt_executions_int (get_chrec_loop (chrec_a));
4666 : 111719 : HOST_WIDE_INT niter_b
4667 : 111719 : = max_stmt_executions_int (get_chrec_loop (chrec_b));
4668 : 111719 : HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4669 : :
4670 : : /* (X0, Y0) is a solution of the Diophantine equation:
4671 : : "chrec_a (X0) = chrec_b (Y0)". */
4672 : 111719 : HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4673 : : CEIL (-j0, j1));
4674 : 111719 : HOST_WIDE_INT x0 = i1 * tau1 + i0;
4675 : 111719 : HOST_WIDE_INT y0 = j1 * tau1 + j0;
4676 : :
4677 : : /* (X1, Y1) is the smallest positive solution of the eq
4678 : : "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4679 : : first conflict occurs. */
4680 : 111719 : HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4681 : 111719 : HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4682 : 111719 : HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4683 : :
4684 : 111719 : if (niter > 0)
4685 : : {
4686 : : /* If the overlap occurs outside of the bounds of the
4687 : : loop, there is no dependence. */
4688 : 103862 : if (x1 >= niter_a || y1 >= niter_b)
4689 : : {
4690 : 55076 : *overlaps_a = conflict_fn_no_dependence ();
4691 : 55076 : *overlaps_b = conflict_fn_no_dependence ();
4692 : 55076 : *last_conflicts = integer_zero_node;
4693 : 55076 : goto end_analyze_subs_aa;
4694 : : }
4695 : :
4696 : : /* max stmt executions can get quite large, avoid
4697 : : overflows by using wide ints here. */
4698 : 48786 : widest_int tau2
4699 : 97572 : = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4700 : 146358 : wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4701 : 48786 : widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4702 : 48786 : if (wi::min_precision (last_conflict, SIGNED)
4703 : 48786 : <= TYPE_PRECISION (integer_type_node))
4704 : 44107 : *last_conflicts
4705 : 44107 : = build_int_cst (integer_type_node,
4706 : 44107 : last_conflict.to_shwi ());
4707 : : else
4708 : 4679 : *last_conflicts = chrec_dont_know;
4709 : 48786 : }
4710 : : else
4711 : 7857 : *last_conflicts = chrec_dont_know;
4712 : :
4713 : 56643 : *overlaps_a
4714 : 56643 : = conflict_fn (1,
4715 : 56643 : affine_fn_univar (build_int_cst (NULL_TREE, x1),
4716 : : 1,
4717 : 56643 : build_int_cst (NULL_TREE, i1)));
4718 : 56643 : *overlaps_b
4719 : 56643 : = conflict_fn (1,
4720 : 56643 : affine_fn_univar (build_int_cst (NULL_TREE, y1),
4721 : : 1,
4722 : 56643 : build_int_cst (NULL_TREE, j1)));
4723 : 56643 : }
4724 : : else
4725 : : {
4726 : : /* FIXME: For the moment, the upper bound of the
4727 : : iteration domain for i and j is not checked. */
4728 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4729 : 0 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4730 : 0 : *overlaps_a = conflict_fn_not_known ();
4731 : 0 : *overlaps_b = conflict_fn_not_known ();
4732 : 0 : *last_conflicts = chrec_dont_know;
4733 : : }
4734 : 56643 : }
4735 : : else
4736 : : {
4737 : 379 : if (dump_file && (dump_flags & TDF_DETAILS))
4738 : 19 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4739 : 379 : *overlaps_a = conflict_fn_not_known ();
4740 : 379 : *overlaps_b = conflict_fn_not_known ();
4741 : 379 : *last_conflicts = chrec_dont_know;
4742 : : }
4743 : : }
4744 : : else
4745 : : {
4746 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4747 : 0 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4748 : 0 : *overlaps_a = conflict_fn_not_known ();
4749 : 0 : *overlaps_b = conflict_fn_not_known ();
4750 : 0 : *last_conflicts = chrec_dont_know;
4751 : : }
4752 : :
4753 : 1665359 : end_analyze_subs_aa:
4754 : 1665359 : obstack_free (&scratch_obstack, NULL);
4755 : 1665359 : if (dump_file && (dump_flags & TDF_DETAILS))
4756 : : {
4757 : 17960 : fprintf (dump_file, " (overlaps_a = ");
4758 : 17960 : dump_conflict_function (dump_file, *overlaps_a);
4759 : 17960 : fprintf (dump_file, ")\n (overlaps_b = ");
4760 : 17960 : dump_conflict_function (dump_file, *overlaps_b);
4761 : 17960 : fprintf (dump_file, "))\n");
4762 : : }
4763 : : }
4764 : :
4765 : : /* Returns true when analyze_subscript_affine_affine can be used for
4766 : : determining the dependence relation between chrec_a and chrec_b,
4767 : : that contain symbols. This function modifies chrec_a and chrec_b
4768 : : such that the analysis result is the same, and such that they don't
4769 : : contain symbols, and then can safely be passed to the analyzer.
4770 : :
4771 : : Example: The analysis of the following tuples of evolutions produce
4772 : : the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4773 : : vs. {0, +, 1}_1
4774 : :
4775 : : {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4776 : : {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4777 : : */
4778 : :
4779 : : static bool
4780 : 51805 : can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4781 : : {
4782 : 51805 : tree diff, type, left_a, left_b, right_b;
4783 : :
4784 : 51805 : if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4785 : 51805 : || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4786 : : /* FIXME: For the moment not handled. Might be refined later. */
4787 : 15167 : return false;
4788 : :
4789 : 36638 : type = chrec_type (*chrec_a);
4790 : 36638 : left_a = CHREC_LEFT (*chrec_a);
4791 : 36638 : left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4792 : 36638 : diff = chrec_fold_minus (type, left_a, left_b);
4793 : :
4794 : 73276 : if (!evolution_function_is_constant_p (diff))
4795 : 4924 : return false;
4796 : :
4797 : 31714 : if (dump_file && (dump_flags & TDF_DETAILS))
4798 : 105 : fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4799 : :
4800 : 31714 : *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4801 : 31714 : diff, CHREC_RIGHT (*chrec_a));
4802 : 31714 : right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4803 : 31714 : *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4804 : : build_int_cst (type, 0),
4805 : : right_b);
4806 : 31714 : return true;
4807 : : }
4808 : :
4809 : : /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4810 : : *OVERLAPS_B are initialized to the functions that describe the
4811 : : relation between the elements accessed twice by CHREC_A and
4812 : : CHREC_B. For k >= 0, the following property is verified:
4813 : :
4814 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4815 : :
4816 : : static void
4817 : 1688068 : analyze_siv_subscript (tree chrec_a,
4818 : : tree chrec_b,
4819 : : conflict_function **overlaps_a,
4820 : : conflict_function **overlaps_b,
4821 : : tree *last_conflicts,
4822 : : int loop_nest_num)
4823 : : {
4824 : 1688068 : dependence_stats.num_siv++;
4825 : :
4826 : 1688068 : if (dump_file && (dump_flags & TDF_DETAILS))
4827 : 21005 : fprintf (dump_file, "(analyze_siv_subscript \n");
4828 : :
4829 : 1688068 : if (evolution_function_is_constant_p (chrec_a)
4830 : 1688068 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4831 : 1475 : analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4832 : : overlaps_a, overlaps_b, last_conflicts);
4833 : :
4834 : 1686593 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4835 : 3373186 : && evolution_function_is_constant_p (chrec_b))
4836 : 1211 : analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4837 : : overlaps_b, overlaps_a, last_conflicts);
4838 : :
4839 : 1685382 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4840 : 1685382 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4841 : : {
4842 : 1685382 : if (!chrec_contains_symbols (chrec_a)
4843 : 1685382 : && !chrec_contains_symbols (chrec_b))
4844 : : {
4845 : 1633577 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4846 : : overlaps_a, overlaps_b,
4847 : : last_conflicts);
4848 : :
4849 : 1633577 : if (CF_NOT_KNOWN_P (*overlaps_a)
4850 : 1633206 : || CF_NOT_KNOWN_P (*overlaps_b))
4851 : 371 : dependence_stats.num_siv_unimplemented++;
4852 : 1633206 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4853 : 55695 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4854 : 1577511 : dependence_stats.num_siv_independent++;
4855 : : else
4856 : 55695 : dependence_stats.num_siv_dependent++;
4857 : : }
4858 : 51805 : else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4859 : : &chrec_b))
4860 : : {
4861 : 31714 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4862 : : overlaps_a, overlaps_b,
4863 : : last_conflicts);
4864 : :
4865 : 31714 : if (CF_NOT_KNOWN_P (*overlaps_a)
4866 : 31698 : || CF_NOT_KNOWN_P (*overlaps_b))
4867 : 16 : dependence_stats.num_siv_unimplemented++;
4868 : 31698 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4869 : 956 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4870 : 30742 : dependence_stats.num_siv_independent++;
4871 : : else
4872 : 956 : dependence_stats.num_siv_dependent++;
4873 : : }
4874 : : else
4875 : 20091 : goto siv_subscript_dontknow;
4876 : : }
4877 : :
4878 : : else
4879 : : {
4880 : 20091 : siv_subscript_dontknow:;
4881 : 20091 : if (dump_file && (dump_flags & TDF_DETAILS))
4882 : 2887 : fprintf (dump_file, " siv test failed: unimplemented");
4883 : 20091 : *overlaps_a = conflict_fn_not_known ();
4884 : 20091 : *overlaps_b = conflict_fn_not_known ();
4885 : 20091 : *last_conflicts = chrec_dont_know;
4886 : 20091 : dependence_stats.num_siv_unimplemented++;
4887 : : }
4888 : :
4889 : 1688068 : if (dump_file && (dump_flags & TDF_DETAILS))
4890 : 21005 : fprintf (dump_file, ")\n");
4891 : 1688068 : }
4892 : :
4893 : : /* Returns false if we can prove that the greatest common divisor of the steps
4894 : : of CHREC does not divide CST, false otherwise. */
4895 : :
4896 : : static bool
4897 : 20660 : gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4898 : : {
4899 : 20660 : HOST_WIDE_INT cd = 0, val;
4900 : 20660 : tree step;
4901 : :
4902 : 20660 : if (!tree_fits_shwi_p (cst))
4903 : : return true;
4904 : 20660 : val = tree_to_shwi (cst);
4905 : :
4906 : 61834 : while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4907 : : {
4908 : 41318 : step = CHREC_RIGHT (chrec);
4909 : 41318 : if (!tree_fits_shwi_p (step))
4910 : : return true;
4911 : 41174 : cd = gcd (cd, tree_to_shwi (step));
4912 : 41174 : chrec = CHREC_LEFT (chrec);
4913 : : }
4914 : :
4915 : 20516 : return val % cd == 0;
4916 : : }
4917 : :
4918 : : /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4919 : : LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4920 : : functions that describe the relation between the elements accessed
4921 : : twice by CHREC_A and CHREC_B. For k >= 0, the following property
4922 : : is verified:
4923 : :
4924 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4925 : :
4926 : : static void
4927 : 26412 : analyze_miv_subscript (tree chrec_a,
4928 : : tree chrec_b,
4929 : : conflict_function **overlaps_a,
4930 : : conflict_function **overlaps_b,
4931 : : tree *last_conflicts,
4932 : : class loop *loop_nest)
4933 : : {
4934 : 26412 : tree type, difference;
4935 : :
4936 : 26412 : dependence_stats.num_miv++;
4937 : 26412 : if (dump_file && (dump_flags & TDF_DETAILS))
4938 : 27 : fprintf (dump_file, "(analyze_miv_subscript \n");
4939 : :
4940 : 26412 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4941 : 26412 : chrec_a = chrec_convert (type, chrec_a, NULL);
4942 : 26412 : chrec_b = chrec_convert (type, chrec_b, NULL);
4943 : 26412 : difference = chrec_fold_minus (type, chrec_a, chrec_b);
4944 : :
4945 : 26412 : if (eq_evolutions_p (chrec_a, chrec_b))
4946 : : {
4947 : : /* Access functions are the same: all the elements are accessed
4948 : : in the same order. */
4949 : 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4950 : 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4951 : 0 : *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4952 : 0 : dependence_stats.num_miv_dependent++;
4953 : : }
4954 : :
4955 : 26412 : else if (evolution_function_is_constant_p (difference)
4956 : 20690 : && evolution_function_is_affine_multivariate_p (chrec_a,
4957 : : loop_nest->num)
4958 : 47072 : && !gcd_of_steps_may_divide_p (chrec_a, difference))
4959 : : {
4960 : : /* testsuite/.../ssa-chrec-33.c
4961 : : {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4962 : :
4963 : : The difference is 1, and all the evolution steps are multiples
4964 : : of 2, consequently there are no overlapping elements. */
4965 : 19670 : *overlaps_a = conflict_fn_no_dependence ();
4966 : 19670 : *overlaps_b = conflict_fn_no_dependence ();
4967 : 19670 : *last_conflicts = integer_zero_node;
4968 : 19670 : dependence_stats.num_miv_independent++;
4969 : : }
4970 : :
4971 : 6742 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4972 : 104 : && !chrec_contains_symbols (chrec_a, loop_nest)
4973 : 95 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4974 : 6810 : && !chrec_contains_symbols (chrec_b, loop_nest))
4975 : : {
4976 : : /* testsuite/.../ssa-chrec-35.c
4977 : : {0, +, 1}_2 vs. {0, +, 1}_3
4978 : : the overlapping elements are respectively located at iterations:
4979 : : {0, +, 1}_x and {0, +, 1}_x,
4980 : : in other words, we have the equality:
4981 : : {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4982 : :
4983 : : Other examples:
4984 : : {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4985 : : {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4986 : :
4987 : : {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4988 : : {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4989 : : */
4990 : 68 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4991 : : overlaps_a, overlaps_b, last_conflicts);
4992 : :
4993 : 68 : if (CF_NOT_KNOWN_P (*overlaps_a)
4994 : 64 : || CF_NOT_KNOWN_P (*overlaps_b))
4995 : 4 : dependence_stats.num_miv_unimplemented++;
4996 : 64 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4997 : 50 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4998 : 14 : dependence_stats.num_miv_independent++;
4999 : : else
5000 : 50 : dependence_stats.num_miv_dependent++;
5001 : : }
5002 : :
5003 : : else
5004 : : {
5005 : : /* When the analysis is too difficult, answer "don't know". */
5006 : 6674 : if (dump_file && (dump_flags & TDF_DETAILS))
5007 : 23 : fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
5008 : :
5009 : 6674 : *overlaps_a = conflict_fn_not_known ();
5010 : 6674 : *overlaps_b = conflict_fn_not_known ();
5011 : 6674 : *last_conflicts = chrec_dont_know;
5012 : 6674 : dependence_stats.num_miv_unimplemented++;
5013 : : }
5014 : :
5015 : 26412 : if (dump_file && (dump_flags & TDF_DETAILS))
5016 : 27 : fprintf (dump_file, ")\n");
5017 : 26412 : }
5018 : :
5019 : : /* Determines the iterations for which CHREC_A is equal to CHREC_B in
5020 : : with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
5021 : : OVERLAP_ITERATIONS_B are initialized with two functions that
5022 : : describe the iterations that contain conflicting elements.
5023 : :
5024 : : Remark: For an integer k >= 0, the following equality is true:
5025 : :
5026 : : CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
5027 : : */
5028 : :
5029 : : static void
5030 : 3374210 : analyze_overlapping_iterations (tree chrec_a,
5031 : : tree chrec_b,
5032 : : conflict_function **overlap_iterations_a,
5033 : : conflict_function **overlap_iterations_b,
5034 : : tree *last_conflicts, class loop *loop_nest)
5035 : : {
5036 : 3374210 : unsigned int lnn = loop_nest->num;
5037 : :
5038 : 3374210 : dependence_stats.num_subscript_tests++;
5039 : :
5040 : 3374210 : if (dump_file && (dump_flags & TDF_DETAILS))
5041 : : {
5042 : 54194 : fprintf (dump_file, "(analyze_overlapping_iterations \n");
5043 : 54194 : fprintf (dump_file, " (chrec_a = ");
5044 : 54194 : print_generic_expr (dump_file, chrec_a);
5045 : 54194 : fprintf (dump_file, ")\n (chrec_b = ");
5046 : 54194 : print_generic_expr (dump_file, chrec_b);
5047 : 54194 : fprintf (dump_file, ")\n");
5048 : : }
5049 : :
5050 : 3374210 : if (chrec_a == NULL_TREE
5051 : 3374210 : || chrec_b == NULL_TREE
5052 : 3374210 : || chrec_contains_undetermined (chrec_a)
5053 : 6748420 : || chrec_contains_undetermined (chrec_b))
5054 : : {
5055 : 0 : dependence_stats.num_subscript_undetermined++;
5056 : :
5057 : 0 : *overlap_iterations_a = conflict_fn_not_known ();
5058 : 0 : *overlap_iterations_b = conflict_fn_not_known ();
5059 : : }
5060 : :
5061 : : /* If they are the same chrec, and are affine, they overlap
5062 : : on every iteration. */
5063 : 3374210 : else if (eq_evolutions_p (chrec_a, chrec_b)
5064 : 3374210 : && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5065 : 478478 : || operand_equal_p (chrec_a, chrec_b, 0)))
5066 : : {
5067 : 1170050 : dependence_stats.num_same_subscript_function++;
5068 : 1170050 : *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5069 : 1170050 : *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5070 : 1170050 : *last_conflicts = chrec_dont_know;
5071 : : }
5072 : :
5073 : : /* If they aren't the same, and aren't affine, we can't do anything
5074 : : yet. */
5075 : 2204160 : else if ((chrec_contains_symbols (chrec_a)
5076 : 2144693 : || chrec_contains_symbols (chrec_b))
5077 : 2205021 : && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5078 : 58136 : || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5079 : : {
5080 : 2474 : dependence_stats.num_subscript_undetermined++;
5081 : 2474 : *overlap_iterations_a = conflict_fn_not_known ();
5082 : 2474 : *overlap_iterations_b = conflict_fn_not_known ();
5083 : : }
5084 : :
5085 : 2201686 : else if (ziv_subscript_p (chrec_a, chrec_b))
5086 : 487206 : analyze_ziv_subscript (chrec_a, chrec_b,
5087 : : overlap_iterations_a, overlap_iterations_b,
5088 : : last_conflicts);
5089 : :
5090 : 1714480 : else if (siv_subscript_p (chrec_a, chrec_b))
5091 : 1688068 : analyze_siv_subscript (chrec_a, chrec_b,
5092 : : overlap_iterations_a, overlap_iterations_b,
5093 : : last_conflicts, lnn);
5094 : :
5095 : : else
5096 : 26412 : analyze_miv_subscript (chrec_a, chrec_b,
5097 : : overlap_iterations_a, overlap_iterations_b,
5098 : : last_conflicts, loop_nest);
5099 : :
5100 : 3374210 : if (dump_file && (dump_flags & TDF_DETAILS))
5101 : : {
5102 : 54194 : fprintf (dump_file, " (overlap_iterations_a = ");
5103 : 54194 : dump_conflict_function (dump_file, *overlap_iterations_a);
5104 : 54194 : fprintf (dump_file, ")\n (overlap_iterations_b = ");
5105 : 54194 : dump_conflict_function (dump_file, *overlap_iterations_b);
5106 : 54194 : fprintf (dump_file, "))\n");
5107 : : }
5108 : 3374210 : }
5109 : :
5110 : : /* Helper function for uniquely inserting distance vectors. */
5111 : :
5112 : : static void
5113 : 1068973 : save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5114 : : {
5115 : 1598727 : for (lambda_vector v : DDR_DIST_VECTS (ddr))
5116 : 531129 : if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5117 : : return;
5118 : :
5119 : 1068708 : DDR_DIST_VECTS (ddr).safe_push (dist_v);
5120 : : }
5121 : :
5122 : : /* Helper function for uniquely inserting direction vectors. */
5123 : :
5124 : : static void
5125 : 1068708 : save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5126 : : {
5127 : 1597667 : for (lambda_vector v : DDR_DIR_VECTS (ddr))
5128 : 529539 : if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5129 : : return;
5130 : :
5131 : 1068708 : DDR_DIR_VECTS (ddr).safe_push (dir_v);
5132 : : }
5133 : :
5134 : : /* Add a distance of 1 on all the loops outer than INDEX. If we
5135 : : haven't yet determined a distance for this outer loop, push a new
5136 : : distance vector composed of the previous distance, and a distance
5137 : : of 1 for this outer loop. Example:
5138 : :
5139 : : | loop_1
5140 : : | loop_2
5141 : : | A[10]
5142 : : | endloop_2
5143 : : | endloop_1
5144 : :
5145 : : Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5146 : : save (0, 1), then we have to save (1, 0). */
5147 : :
5148 : : static void
5149 : 16625 : add_outer_distances (struct data_dependence_relation *ddr,
5150 : : lambda_vector dist_v, int index)
5151 : : {
5152 : : /* For each outer loop where init_v is not set, the accesses are
5153 : : in dependence of distance 1 in the loop. */
5154 : 19808 : while (--index >= 0)
5155 : : {
5156 : 6366 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5157 : 3183 : lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5158 : 3183 : save_v[index] = 1;
5159 : 3183 : save_dist_v (ddr, save_v);
5160 : : }
5161 : 16625 : }
5162 : :
5163 : : /* Return false when fail to represent the data dependence as a
5164 : : distance vector. A_INDEX is the index of the first reference
5165 : : (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5166 : : second reference. INIT_B is set to true when a component has been
5167 : : added to the distance vector DIST_V. INDEX_CARRY is then set to
5168 : : the index in DIST_V that carries the dependence. */
5169 : :
5170 : : static bool
5171 : 57988 : build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5172 : : unsigned int a_index, unsigned int b_index,
5173 : : lambda_vector dist_v, bool *init_b,
5174 : : int *index_carry)
5175 : : {
5176 : 57988 : unsigned i;
5177 : 115976 : lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5178 : 57988 : class loop *loop = DDR_LOOP_NEST (ddr)[0];
5179 : :
5180 : 131190 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5181 : : {
5182 : 75118 : tree access_fn_a, access_fn_b;
5183 : 75118 : struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5184 : :
5185 : 75118 : if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5186 : : {
5187 : 310 : non_affine_dependence_relation (ddr);
5188 : 310 : return false;
5189 : : }
5190 : :
5191 : 74808 : access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5192 : 74808 : access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5193 : :
5194 : 74808 : if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5195 : 56847 : && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5196 : : {
5197 : 56254 : HOST_WIDE_INT dist;
5198 : 56254 : int index;
5199 : 56254 : int var_a = CHREC_VARIABLE (access_fn_a);
5200 : 56254 : int var_b = CHREC_VARIABLE (access_fn_b);
5201 : :
5202 : 56254 : if (var_a != var_b
5203 : 56254 : || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5204 : : {
5205 : 34 : non_affine_dependence_relation (ddr);
5206 : 34 : return false;
5207 : : }
5208 : :
5209 : : /* When data references are collected in a loop while data
5210 : : dependences are analyzed in loop nest nested in the loop, we
5211 : : would have more number of access functions than number of
5212 : : loops. Skip access functions of loops not in the loop nest.
5213 : :
5214 : : See PR89725 for more information. */
5215 : 56220 : if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5216 : 2 : continue;
5217 : :
5218 : 56218 : dist = int_cst_value (SUB_DISTANCE (subscript));
5219 : 56218 : index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5220 : 56218 : *index_carry = MIN (index, *index_carry);
5221 : :
5222 : : /* This is the subscript coupling test. If we have already
5223 : : recorded a distance for this loop (a distance coming from
5224 : : another subscript), it should be the same. For example,
5225 : : in the following code, there is no dependence:
5226 : :
5227 : : | loop i = 0, N, 1
5228 : : | T[i+1][i] = ...
5229 : : | ... = T[i][i]
5230 : : | endloop
5231 : : */
5232 : 56218 : if (init_v[index] != 0 && dist_v[index] != dist)
5233 : : {
5234 : 0 : finalize_ddr_dependent (ddr, chrec_known);
5235 : 0 : return false;
5236 : : }
5237 : :
5238 : 56218 : dist_v[index] = dist;
5239 : 56218 : init_v[index] = 1;
5240 : 56218 : *init_b = true;
5241 : 56218 : }
5242 : 18554 : else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5243 : : {
5244 : : /* This can be for example an affine vs. constant dependence
5245 : : (T[i] vs. T[3]) that is not an affine dependence and is
5246 : : not representable as a distance vector. */
5247 : 1572 : non_affine_dependence_relation (ddr);
5248 : 1572 : return false;
5249 : : }
5250 : : }
5251 : :
5252 : : return true;
5253 : : }
5254 : :
5255 : : /* Return true when the DDR contains only invariant access functions wrto. loop
5256 : : number LNUM. */
5257 : :
5258 : : static bool
5259 : 844068 : invariant_access_functions (const struct data_dependence_relation *ddr,
5260 : : int lnum)
5261 : : {
5262 : 2847299 : for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5263 : 988226 : if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5264 : 988226 : || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5265 : 673131 : return false;
5266 : :
5267 : : return true;
5268 : : }
5269 : :
5270 : : /* Helper function for the case where DDR_A and DDR_B are the same
5271 : : multivariate access function with a constant step. For an example
5272 : : see pr34635-1.c. */
5273 : :
5274 : : static void
5275 : 4357 : add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5276 : : {
5277 : 4357 : int x_1, x_2;
5278 : 4357 : tree c_1 = CHREC_LEFT (c_2);
5279 : 4357 : tree c_0 = CHREC_LEFT (c_1);
5280 : 4357 : lambda_vector dist_v;
5281 : 4357 : HOST_WIDE_INT v1, v2, cd;
5282 : :
5283 : : /* Polynomials with more than 2 variables are not handled yet. When
5284 : : the evolution steps are parameters, it is not possible to
5285 : : represent the dependence using classical distance vectors. */
5286 : 4357 : if (TREE_CODE (c_0) != INTEGER_CST
5287 : 2989 : || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5288 : 6733 : || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5289 : : {
5290 : 1989 : DDR_AFFINE_P (ddr) = false;
5291 : 1989 : return;
5292 : : }
5293 : :
5294 : 2368 : x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5295 : 2368 : x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5296 : :
5297 : : /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5298 : 4736 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5299 : 2368 : v1 = int_cst_value (CHREC_RIGHT (c_1));
5300 : 2368 : v2 = int_cst_value (CHREC_RIGHT (c_2));
5301 : 2368 : cd = gcd (v1, v2);
5302 : 2368 : v1 /= cd;
5303 : 2368 : v2 /= cd;
5304 : :
5305 : 2368 : if (v2 < 0)
5306 : : {
5307 : 9 : v2 = -v2;
5308 : 9 : v1 = -v1;
5309 : : }
5310 : :
5311 : 2368 : dist_v[x_1] = v2;
5312 : 2368 : dist_v[x_2] = -v1;
5313 : 2368 : save_dist_v (ddr, dist_v);
5314 : :
5315 : 2368 : add_outer_distances (ddr, dist_v, x_1);
5316 : : }
5317 : :
5318 : : /* Helper function for the case where DDR_A and DDR_B are the same
5319 : : access functions. */
5320 : :
5321 : : static void
5322 : 18707 : add_other_self_distances (struct data_dependence_relation *ddr)
5323 : : {
5324 : 18707 : lambda_vector dist_v;
5325 : 18707 : unsigned i;
5326 : 18707 : int index_carry = DDR_NB_LOOPS (ddr);
5327 : 18707 : subscript *sub;
5328 : 18707 : class loop *loop = DDR_LOOP_NEST (ddr)[0];
5329 : :
5330 : 39972 : FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5331 : : {
5332 : 26069 : tree access_fun = SUB_ACCESS_FN (sub, 0);
5333 : :
5334 : 26069 : if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5335 : : {
5336 : 18832 : if (!evolution_function_is_univariate_p (access_fun, loop->num))
5337 : : {
5338 : 4804 : if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5339 : : {
5340 : 447 : DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5341 : 447 : return;
5342 : : }
5343 : :
5344 : 4357 : access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5345 : :
5346 : 4357 : if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5347 : 4357 : add_multivariate_self_dist (ddr, access_fun);
5348 : : else
5349 : : /* The evolution step is not constant: it varies in
5350 : : the outer loop, so this cannot be represented by a
5351 : : distance vector. For example in pr34635.c the
5352 : : evolution is {0, +, {0, +, 4}_1}_2. */
5353 : 0 : DDR_AFFINE_P (ddr) = false;
5354 : :
5355 : 4357 : return;
5356 : : }
5357 : :
5358 : : /* When data references are collected in a loop while data
5359 : : dependences are analyzed in loop nest nested in the loop, we
5360 : : would have more number of access functions than number of
5361 : : loops. Skip access functions of loops not in the loop nest.
5362 : :
5363 : : See PR89725 for more information. */
5364 : 14028 : if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5365 : : loop))
5366 : 0 : continue;
5367 : :
5368 : 21424 : index_carry = MIN (index_carry,
5369 : : index_in_loop_nest (CHREC_VARIABLE (access_fun),
5370 : : DDR_LOOP_NEST (ddr)));
5371 : : }
5372 : : }
5373 : :
5374 : 27806 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5375 : 13903 : add_outer_distances (ddr, dist_v, index_carry);
5376 : : }
5377 : :
5378 : : static void
5379 : 170937 : insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5380 : : {
5381 : 341874 : lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5382 : :
5383 : 170937 : dist_v[0] = 1;
5384 : 170937 : save_dist_v (ddr, dist_v);
5385 : 170937 : }
5386 : :
5387 : : /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5388 : : is the case for example when access functions are the same and
5389 : : equal to a constant, as in:
5390 : :
5391 : : | loop_1
5392 : : | A[3] = ...
5393 : : | ... = A[3]
5394 : : | endloop_1
5395 : :
5396 : : in which case the distance vectors are (0) and (1). */
5397 : :
5398 : : static void
5399 : 170937 : add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5400 : : {
5401 : 170937 : unsigned i, j;
5402 : :
5403 : 170937 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5404 : : {
5405 : 170937 : subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5406 : 170937 : conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5407 : 170937 : conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5408 : :
5409 : 170937 : for (j = 0; j < ca->n; j++)
5410 : 170937 : if (affine_function_zero_p (ca->fns[j]))
5411 : : {
5412 : 170937 : insert_innermost_unit_dist_vector (ddr);
5413 : 170937 : return;
5414 : : }
5415 : :
5416 : 0 : for (j = 0; j < cb->n; j++)
5417 : 0 : if (affine_function_zero_p (cb->fns[j]))
5418 : : {
5419 : 0 : insert_innermost_unit_dist_vector (ddr);
5420 : 0 : return;
5421 : : }
5422 : : }
5423 : : }
5424 : :
5425 : : /* Return true when the DDR contains two data references that have the
5426 : : same access functions. */
5427 : :
5428 : : static inline bool
5429 : 894405 : same_access_functions (const struct data_dependence_relation *ddr)
5430 : : {
5431 : 3745143 : for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5432 : 1112265 : if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5433 : 1112265 : SUB_ACCESS_FN (sub, 1)))
5434 : : return false;
5435 : :
5436 : : return true;
5437 : : }
5438 : :
5439 : : /* Compute the classic per loop distance vector. DDR is the data
5440 : : dependence relation to build a vector from. Return false when fail
5441 : : to represent the data dependence as a distance vector. */
5442 : :
5443 : : static bool
5444 : 3038654 : build_classic_dist_vector (struct data_dependence_relation *ddr,
5445 : : class loop *loop_nest)
5446 : : {
5447 : 3038654 : bool init_b = false;
5448 : 3038654 : int index_carry = DDR_NB_LOOPS (ddr);
5449 : 3038654 : lambda_vector dist_v;
5450 : :
5451 : 3038654 : if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5452 : : return false;
5453 : :
5454 : 894405 : if (same_access_functions (ddr))
5455 : : {
5456 : : /* Save the 0 vector. */
5457 : 1688136 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5458 : 844068 : save_dist_v (ddr, dist_v);
5459 : :
5460 : 844068 : if (invariant_access_functions (ddr, loop_nest->num))
5461 : 170937 : add_distance_for_zero_overlaps (ddr);
5462 : :
5463 : 844068 : if (DDR_NB_LOOPS (ddr) > 1)
5464 : 18707 : add_other_self_distances (ddr);
5465 : :
5466 : 844068 : return true;
5467 : : }
5468 : :
5469 : 100674 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5470 : 50337 : if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5471 : : return false;
5472 : :
5473 : : /* Save the distance vector if we initialized one. */
5474 : 48421 : if (init_b)
5475 : : {
5476 : : /* Verify a basic constraint: classic distance vectors should
5477 : : always be lexicographically positive.
5478 : :
5479 : : Data references are collected in the order of execution of
5480 : : the program, thus for the following loop
5481 : :
5482 : : | for (i = 1; i < 100; i++)
5483 : : | for (j = 1; j < 100; j++)
5484 : : | {
5485 : : | t = T[j+1][i-1]; // A
5486 : : | T[j][i] = t + 2; // B
5487 : : | }
5488 : :
5489 : : references are collected following the direction of the wind:
5490 : : A then B. The data dependence tests are performed also
5491 : : following this order, such that we're looking at the distance
5492 : : separating the elements accessed by A from the elements later
5493 : : accessed by B. But in this example, the distance returned by
5494 : : test_dep (A, B) is lexicographically negative (-1, 1), that
5495 : : means that the access A occurs later than B with respect to
5496 : : the outer loop, ie. we're actually looking upwind. In this
5497 : : case we solve test_dep (B, A) looking downwind to the
5498 : : lexicographically positive solution, that returns the
5499 : : distance vector (1, -1). */
5500 : 96842 : if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5501 : : {
5502 : 7550 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5503 : 7550 : if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5504 : : return false;
5505 : 7546 : compute_subscript_distance (ddr);
5506 : 7546 : if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5507 : : &index_carry))
5508 : : return false;
5509 : 7546 : save_dist_v (ddr, save_v);
5510 : 7546 : DDR_REVERSED_P (ddr) = true;
5511 : :
5512 : : /* In this case there is a dependence forward for all the
5513 : : outer loops:
5514 : :
5515 : : | for (k = 1; k < 100; k++)
5516 : : | for (i = 1; i < 100; i++)
5517 : : | for (j = 1; j < 100; j++)
5518 : : | {
5519 : : | t = T[j+1][i-1]; // A
5520 : : | T[j][i] = t + 2; // B
5521 : : | }
5522 : :
5523 : : the vectors are:
5524 : : (0, 1, -1)
5525 : : (1, 1, -1)
5526 : : (1, -1, 1)
5527 : : */
5528 : 7546 : if (DDR_NB_LOOPS (ddr) > 1)
5529 : : {
5530 : 72 : add_outer_distances (ddr, save_v, index_carry);
5531 : 72 : add_outer_distances (ddr, dist_v, index_carry);
5532 : : }
5533 : : }
5534 : : else
5535 : : {
5536 : 40871 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5537 : 40871 : lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5538 : :
5539 : 40871 : if (DDR_NB_LOOPS (ddr) > 1)
5540 : : {
5541 : 105 : lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5542 : :
5543 : 105 : if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5544 : : return false;
5545 : 105 : compute_subscript_distance (ddr);
5546 : 105 : if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5547 : : &index_carry))
5548 : : return false;
5549 : :
5550 : 105 : save_dist_v (ddr, save_v);
5551 : 105 : add_outer_distances (ddr, dist_v, index_carry);
5552 : 105 : add_outer_distances (ddr, opposite_v, index_carry);
5553 : : }
5554 : : else
5555 : 40766 : save_dist_v (ddr, save_v);
5556 : : }
5557 : : }
5558 : : else
5559 : : {
5560 : : /* There is a distance of 1 on all the outer loops: Example:
5561 : : there is a dependence of distance 1 on loop_1 for the array A.
5562 : :
5563 : : | loop_1
5564 : : | A[5] = ...
5565 : : | endloop
5566 : : */
5567 : 0 : add_outer_distances (ddr, dist_v,
5568 : : lambda_vector_first_nz (dist_v,
5569 : 0 : DDR_NB_LOOPS (ddr), 0));
5570 : : }
5571 : :
5572 : : return true;
5573 : : }
5574 : :
5575 : : /* Return the direction for a given distance.
5576 : : FIXME: Computing dir this way is suboptimal, since dir can catch
5577 : : cases that dist is unable to represent. */
5578 : :
5579 : : static inline enum data_dependence_direction
5580 : 1093238 : dir_from_dist (int dist)
5581 : : {
5582 : 1093238 : if (dist > 0)
5583 : : return dir_positive;
5584 : 868570 : else if (dist < 0)
5585 : : return dir_negative;
5586 : : else
5587 : 866165 : return dir_equal;
5588 : : }
5589 : :
5590 : : /* Compute the classic per loop direction vector. DDR is the data
5591 : : dependence relation to build a vector from. */
5592 : :
5593 : : static void
5594 : 892485 : build_classic_dir_vector (struct data_dependence_relation *ddr)
5595 : : {
5596 : 892485 : unsigned i, j;
5597 : 892485 : lambda_vector dist_v;
5598 : :
5599 : 1961193 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5600 : : {
5601 : 2137416 : lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5602 : :
5603 : 3230654 : for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5604 : 1961808 : dir_v[j] = dir_from_dist (dist_v[j]);
5605 : :
5606 : 1068708 : save_dir_v (ddr, dir_v);
5607 : : }
5608 : 892485 : }
5609 : :
5610 : : /* Helper function. Returns true when there is a dependence between the
5611 : : data references. A_INDEX is the index of the first reference (0 for
5612 : : DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5613 : :
5614 : : static bool
5615 : 3046309 : subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5616 : : unsigned int a_index, unsigned int b_index,
5617 : : class loop *loop_nest)
5618 : : {
5619 : 3046309 : unsigned int i;
5620 : 3046309 : tree last_conflicts;
5621 : 3046309 : struct subscript *subscript;
5622 : 3046309 : tree res = NULL_TREE;
5623 : :
5624 : 4304807 : for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5625 : : {
5626 : 3374210 : conflict_function *overlaps_a, *overlaps_b;
5627 : :
5628 : 3374210 : analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5629 : : SUB_ACCESS_FN (subscript, b_index),
5630 : : &overlaps_a, &overlaps_b,
5631 : : &last_conflicts, loop_nest);
5632 : :
5633 : 3374210 : if (SUB_CONFLICTS_IN_A (subscript))
5634 : 3374210 : free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5635 : 3374210 : if (SUB_CONFLICTS_IN_B (subscript))
5636 : 3374210 : free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5637 : :
5638 : 3374210 : SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5639 : 3374210 : SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5640 : 3374210 : SUB_LAST_CONFLICT (subscript) = last_conflicts;
5641 : :
5642 : : /* If there is any undetermined conflict function we have to
5643 : : give a conservative answer in case we cannot prove that
5644 : : no dependence exists when analyzing another subscript. */
5645 : 3374210 : if (CF_NOT_KNOWN_P (overlaps_a)
5646 : 3344576 : || CF_NOT_KNOWN_P (overlaps_b))
5647 : : {
5648 : 29634 : res = chrec_dont_know;
5649 : 29634 : continue;
5650 : : }
5651 : :
5652 : : /* When there is a subscript with no dependence we can stop. */
5653 : 3344576 : else if (CF_NO_DEPENDENCE_P (overlaps_a)
5654 : 1228864 : || CF_NO_DEPENDENCE_P (overlaps_b))
5655 : : {
5656 : 2115712 : res = chrec_known;
5657 : 2115712 : break;
5658 : : }
5659 : : }
5660 : :
5661 : 3046309 : if (res == NULL_TREE)
5662 : : return true;
5663 : :
5664 : 2144253 : if (res == chrec_known)
5665 : 2115712 : dependence_stats.num_dependence_independent++;
5666 : : else
5667 : 28541 : dependence_stats.num_dependence_undetermined++;
5668 : 2144253 : finalize_ddr_dependent (ddr, res);
5669 : 2144253 : return false;
5670 : : }
5671 : :
5672 : : /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5673 : :
5674 : : static void
5675 : 3038654 : subscript_dependence_tester (struct data_dependence_relation *ddr,
5676 : : class loop *loop_nest)
5677 : : {
5678 : 3038654 : if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5679 : 894405 : dependence_stats.num_dependence_dependent++;
5680 : :
5681 : 3038654 : compute_subscript_distance (ddr);
5682 : 3038654 : if (build_classic_dist_vector (ddr, loop_nest))
5683 : : {
5684 : 892485 : if (dump_file && (dump_flags & TDF_DETAILS))
5685 : : {
5686 : 3594 : unsigned i;
5687 : :
5688 : 3594 : fprintf (dump_file, "(build_classic_dist_vector\n");
5689 : 10851 : for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5690 : : {
5691 : 3663 : fprintf (dump_file, " dist_vector = (");
5692 : 3663 : print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5693 : 7326 : DDR_NB_LOOPS (ddr));
5694 : 3663 : fprintf (dump_file, " )\n");
5695 : : }
5696 : 3594 : fprintf (dump_file, ")\n");
5697 : : }
5698 : :
5699 : 892485 : build_classic_dir_vector (ddr);
5700 : : }
5701 : 3038654 : }
5702 : :
5703 : : /* Returns true when all the access functions of A are affine or
5704 : : constant with respect to LOOP_NEST. */
5705 : :
5706 : : static bool
5707 : 6133620 : access_functions_are_affine_or_constant_p (const struct data_reference *a,
5708 : : const class loop *loop_nest)
5709 : : {
5710 : 6133620 : vec<tree> fns = DR_ACCESS_FNS (a);
5711 : 26627398 : for (tree t : fns)
5712 : 8280024 : if (!evolution_function_is_invariant_p (t, loop_nest->num)
5713 : 8280024 : && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5714 : : return false;
5715 : :
5716 : : return true;
5717 : : }
5718 : :
5719 : : /* This computes the affine dependence relation between A and B with
5720 : : respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5721 : : independence between two accesses, while CHREC_DONT_KNOW is used
5722 : : for representing the unknown relation.
5723 : :
5724 : : Note that it is possible to stop the computation of the dependence
5725 : : relation the first time we detect a CHREC_KNOWN element for a given
5726 : : subscript. */
5727 : :
5728 : : void
5729 : 6506498 : compute_affine_dependence (struct data_dependence_relation *ddr,
5730 : : class loop *loop_nest)
5731 : : {
5732 : 6506498 : struct data_reference *dra = DDR_A (ddr);
5733 : 6506498 : struct data_reference *drb = DDR_B (ddr);
5734 : :
5735 : 6506498 : if (dump_file && (dump_flags & TDF_DETAILS))
5736 : : {
5737 : 126828 : fprintf (dump_file, "(compute_affine_dependence\n");
5738 : 126828 : fprintf (dump_file, " ref_a: ");
5739 : 126828 : print_generic_expr (dump_file, DR_REF (dra));
5740 : 126828 : fprintf (dump_file, ", stmt_a: ");
5741 : 126828 : print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5742 : 126828 : fprintf (dump_file, " ref_b: ");
5743 : 126828 : print_generic_expr (dump_file, DR_REF (drb));
5744 : 126828 : fprintf (dump_file, ", stmt_b: ");
5745 : 126828 : print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5746 : : }
5747 : :
5748 : : /* Analyze only when the dependence relation is not yet known. */
5749 : 6506498 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5750 : : {
5751 : 3092140 : dependence_stats.num_dependence_tests++;
5752 : :
5753 : 3092140 : if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5754 : 3092140 : && access_functions_are_affine_or_constant_p (drb, loop_nest))
5755 : 3038654 : subscript_dependence_tester (ddr, loop_nest);
5756 : :
5757 : : /* As a last case, if the dependence cannot be determined, or if
5758 : : the dependence is considered too difficult to determine, answer
5759 : : "don't know". */
5760 : : else
5761 : : {
5762 : 53486 : dependence_stats.num_dependence_undetermined++;
5763 : :
5764 : 53486 : if (dump_file && (dump_flags & TDF_DETAILS))
5765 : : {
5766 : 156 : fprintf (dump_file, "Data ref a:\n");
5767 : 156 : dump_data_reference (dump_file, dra);
5768 : 156 : fprintf (dump_file, "Data ref b:\n");
5769 : 156 : dump_data_reference (dump_file, drb);
5770 : 156 : fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5771 : : }
5772 : 53486 : finalize_ddr_dependent (ddr, chrec_dont_know);
5773 : : }
5774 : : }
5775 : :
5776 : 6506498 : if (dump_file && (dump_flags & TDF_DETAILS))
5777 : : {
5778 : 126828 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5779 : 112337 : fprintf (dump_file, ") -> no dependence\n");
5780 : 14491 : else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5781 : 10828 : fprintf (dump_file, ") -> dependence analysis failed\n");
5782 : : else
5783 : 3663 : fprintf (dump_file, ")\n");
5784 : : }
5785 : 6506498 : }
5786 : :
5787 : : /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5788 : : the data references in DATAREFS, in the LOOP_NEST. When
5789 : : COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5790 : : relations. Return true when successful, i.e. data references number
5791 : : is small enough to be handled. */
5792 : :
5793 : : bool
5794 : 412763 : compute_all_dependences (const vec<data_reference_p> &datarefs,
5795 : : vec<ddr_p> *dependence_relations,
5796 : : const vec<loop_p> &loop_nest,
5797 : : bool compute_self_and_rr)
5798 : : {
5799 : 412763 : struct data_dependence_relation *ddr;
5800 : 412763 : struct data_reference *a, *b;
5801 : 412763 : unsigned int i, j;
5802 : :
5803 : 412763 : if ((int) datarefs.length ()
5804 : 412763 : > param_loop_max_datarefs_for_datadeps)
5805 : : {
5806 : 0 : struct data_dependence_relation *ddr;
5807 : :
5808 : : /* Insert a single relation into dependence_relations:
5809 : : chrec_dont_know. */
5810 : 0 : ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5811 : 0 : dependence_relations->safe_push (ddr);
5812 : 0 : return false;
5813 : : }
5814 : :
5815 : 3093008 : FOR_EACH_VEC_ELT (datarefs, i, a)
5816 : 7664183 : for (j = i + 1; datarefs.iterate (j, &b); j++)
5817 : 4983938 : if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5818 : : {
5819 : 4636387 : ddr = initialize_data_dependence_relation (a, b, loop_nest);
5820 : 4636387 : dependence_relations->safe_push (ddr);
5821 : 4636387 : if (loop_nest.exists ())
5822 : 4596580 : compute_affine_dependence (ddr, loop_nest[0]);
5823 : : }
5824 : :
5825 : 412763 : if (compute_self_and_rr)
5826 : 999735 : FOR_EACH_VEC_ELT (datarefs, i, a)
5827 : : {
5828 : 746018 : ddr = initialize_data_dependence_relation (a, a, loop_nest);
5829 : 746018 : dependence_relations->safe_push (ddr);
5830 : 746018 : if (loop_nest.exists ())
5831 : 746018 : compute_affine_dependence (ddr, loop_nest[0]);
5832 : : }
5833 : :
5834 : : return true;
5835 : : }
5836 : :
5837 : : /* Describes a location of a memory reference. */
5838 : :
5839 : : struct data_ref_loc
5840 : : {
5841 : : /* The memory reference. */
5842 : : tree ref;
5843 : :
5844 : : /* True if the memory reference is read. */
5845 : : bool is_read;
5846 : :
5847 : : /* True if the data reference is conditional within the containing
5848 : : statement, i.e. if it might not occur even when the statement
5849 : : is executed and runs to completion. */
5850 : : bool is_conditional_in_stmt;
5851 : : };
5852 : :
5853 : :
5854 : : /* Stores the locations of memory references in STMT to REFERENCES. Returns
5855 : : true if STMT clobbers memory, false otherwise. */
5856 : :
5857 : : static bool
5858 : 47116860 : get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5859 : : {
5860 : 47116860 : bool clobbers_memory = false;
5861 : 47116860 : data_ref_loc ref;
5862 : 47116860 : tree op0, op1;
5863 : 47116860 : enum gimple_code stmt_code = gimple_code (stmt);
5864 : :
5865 : : /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5866 : : As we cannot model data-references to not spelled out
5867 : : accesses give up if they may occur. */
5868 : 47116860 : if (stmt_code == GIMPLE_CALL
5869 : 47116860 : && !(gimple_call_flags (stmt) & ECF_CONST))
5870 : : {
5871 : : /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5872 : 3835958 : if (gimple_call_internal_p (stmt))
5873 : 49536 : switch (gimple_call_internal_fn (stmt))
5874 : : {
5875 : 5611 : case IFN_GOMP_SIMD_LANE:
5876 : 5611 : {
5877 : 5611 : class loop *loop = gimple_bb (stmt)->loop_father;
5878 : 5611 : tree uid = gimple_call_arg (stmt, 0);
5879 : 5611 : gcc_assert (TREE_CODE (uid) == SSA_NAME);
5880 : 5611 : if (loop == NULL
5881 : 5611 : || loop->simduid != SSA_NAME_VAR (uid))
5882 : : clobbers_memory = true;
5883 : : break;
5884 : : }
5885 : : case IFN_MASK_LOAD:
5886 : : case IFN_MASK_STORE:
5887 : : break;
5888 : 1902 : case IFN_MASK_CALL:
5889 : 1902 : {
5890 : 1902 : tree orig_fndecl
5891 : 1902 : = gimple_call_addr_fndecl (gimple_call_arg (stmt, 0));
5892 : 1902 : if (!orig_fndecl
5893 : 1902 : || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5894 : : clobbers_memory = true;
5895 : : }
5896 : : break;
5897 : : default:
5898 : 3874374 : clobbers_memory = true;
5899 : : break;
5900 : : }
5901 : 3786422 : else if (gimple_call_builtin_p (stmt, BUILT_IN_PREFETCH))
5902 : : clobbers_memory = false;
5903 : : else
5904 : 3874374 : clobbers_memory = true;
5905 : : }
5906 : 43280902 : else if (stmt_code == GIMPLE_ASM
5907 : 43280902 : && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5908 : 8303 : || gimple_vuse (stmt)))
5909 : : clobbers_memory = true;
5910 : :
5911 : 98822011 : if (!gimple_vuse (stmt))
5912 : : return clobbers_memory;
5913 : :
5914 : 18501819 : if (stmt_code == GIMPLE_ASSIGN)
5915 : : {
5916 : 13783662 : tree base;
5917 : 13783662 : op0 = gimple_assign_lhs (stmt);
5918 : 13783662 : op1 = gimple_assign_rhs1 (stmt);
5919 : :
5920 : 13783662 : if (DECL_P (op1)
5921 : 13783662 : || (REFERENCE_CLASS_P (op1)
5922 : 6690636 : && (base = get_base_address (op1))
5923 : 6690636 : && TREE_CODE (base) != SSA_NAME
5924 : 6690615 : && !is_gimple_min_invariant (base)))
5925 : : {
5926 : 7488278 : ref.ref = op1;
5927 : 7488278 : ref.is_read = true;
5928 : 7488278 : ref.is_conditional_in_stmt = false;
5929 : 7488278 : references->safe_push (ref);
5930 : : }
5931 : : }
5932 : 4718157 : else if (stmt_code == GIMPLE_CALL)
5933 : : {
5934 : 3830595 : unsigned i = 0, n;
5935 : 3830595 : tree ptr, type;
5936 : 3830595 : unsigned int align;
5937 : :
5938 : 3830595 : ref.is_read = false;
5939 : 3830595 : if (gimple_call_internal_p (stmt))
5940 : 41997 : switch (gimple_call_internal_fn (stmt))
5941 : : {
5942 : 1792 : case IFN_MASK_LOAD:
5943 : 1792 : if (gimple_call_lhs (stmt) == NULL_TREE)
5944 : : break;
5945 : 1792 : ref.is_read = true;
5946 : : /* FALLTHRU */
5947 : 3171 : case IFN_MASK_STORE:
5948 : 3171 : ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5949 : 3171 : align = tree_to_shwi (gimple_call_arg (stmt, 1));
5950 : 3171 : if (ref.is_read)
5951 : 1792 : type = TREE_TYPE (gimple_call_lhs (stmt));
5952 : : else
5953 : 1379 : type = TREE_TYPE (gimple_call_arg (stmt, 3));
5954 : 3171 : if (TYPE_ALIGN (type) != align)
5955 : 1007 : type = build_aligned_type (type, align);
5956 : 3171 : ref.is_conditional_in_stmt = true;
5957 : 3171 : ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5958 : : ptr);
5959 : 3171 : references->safe_push (ref);
5960 : 3171 : return false;
5961 : : case IFN_MASK_CALL:
5962 : 3827424 : i = 1;
5963 : : gcc_fallthrough ();
5964 : : default:
5965 : : break;
5966 : : }
5967 : :
5968 : 3827424 : op0 = gimple_call_lhs (stmt);
5969 : 3827424 : n = gimple_call_num_args (stmt);
5970 : 15407090 : for (; i < n; i++)
5971 : : {
5972 : 7752242 : op1 = gimple_call_arg (stmt, i);
5973 : :
5974 : 7752242 : if (DECL_P (op1)
5975 : 7752242 : || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5976 : : {
5977 : 490758 : ref.ref = op1;
5978 : 490758 : ref.is_read = true;
5979 : 490758 : ref.is_conditional_in_stmt = false;
5980 : 490758 : references->safe_push (ref);
5981 : : }
5982 : : }
5983 : : }
5984 : : else
5985 : : return clobbers_memory;
5986 : :
5987 : 17611086 : if (op0
5988 : 17611086 : && (DECL_P (op0)
5989 : 14320258 : || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5990 : : {
5991 : 7153959 : ref.ref = op0;
5992 : 7153959 : ref.is_read = false;
5993 : 7153959 : ref.is_conditional_in_stmt = false;
5994 : 7153959 : references->safe_push (ref);
5995 : : }
5996 : : return clobbers_memory;
5997 : : }
5998 : :
5999 : :
6000 : : /* Returns true if the loop-nest has any data reference. */
6001 : :
6002 : : bool
6003 : 753 : loop_nest_has_data_refs (loop_p loop)
6004 : : {
6005 : 753 : basic_block *bbs = get_loop_body (loop);
6006 : 753 : auto_vec<data_ref_loc, 3> references;
6007 : :
6008 : 1004 : for (unsigned i = 0; i < loop->num_nodes; i++)
6009 : : {
6010 : 933 : basic_block bb = bbs[i];
6011 : 933 : gimple_stmt_iterator bsi;
6012 : :
6013 : 3227 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6014 : : {
6015 : 2043 : gimple *stmt = gsi_stmt (bsi);
6016 : 2043 : get_references_in_stmt (stmt, &references);
6017 : 2043 : if (references.length ())
6018 : : {
6019 : 682 : free (bbs);
6020 : 682 : return true;
6021 : : }
6022 : : }
6023 : : }
6024 : 71 : free (bbs);
6025 : 71 : return false;
6026 : 753 : }
6027 : :
6028 : : /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
6029 : : reference, returns false, otherwise returns true. NEST is the outermost
6030 : : loop of the loop nest in which the references should be analyzed. */
6031 : :
6032 : : opt_result
6033 : 47100414 : find_data_references_in_stmt (class loop *nest, gimple *stmt,
6034 : : vec<data_reference_p> *datarefs)
6035 : : {
6036 : 47100414 : auto_vec<data_ref_loc, 2> references;
6037 : 47100414 : data_reference_p dr;
6038 : :
6039 : 47100414 : if (get_references_in_stmt (stmt, &references))
6040 : 3874369 : return opt_result::failure_at (stmt, "statement clobbers memory: %G",
6041 : : stmt);
6042 : :
6043 : 144055723 : for (const data_ref_loc &ref : references)
6044 : : {
6045 : 14377588 : dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
6046 : 14377588 : loop_containing_stmt (stmt), ref.ref,
6047 : 14377588 : stmt, ref.is_read, ref.is_conditional_in_stmt);
6048 : 14377588 : gcc_assert (dr != NULL);
6049 : 14377588 : datarefs->safe_push (dr);
6050 : : }
6051 : :
6052 : 43226045 : return opt_result::success ();
6053 : 47100414 : }
6054 : :
6055 : : /* Stores the data references in STMT to DATAREFS. If there is an
6056 : : unanalyzable reference, returns false, otherwise returns true.
6057 : : NEST is the outermost loop of the loop nest in which the references
6058 : : should be instantiated, LOOP is the loop in which the references
6059 : : should be analyzed. */
6060 : :
6061 : : bool
6062 : 14403 : graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
6063 : : vec<data_reference_p> *datarefs)
6064 : : {
6065 : 14403 : auto_vec<data_ref_loc, 2> references;
6066 : 14403 : bool ret = true;
6067 : 14403 : data_reference_p dr;
6068 : :
6069 : 14403 : if (get_references_in_stmt (stmt, &references))
6070 : : return false;
6071 : :
6072 : 46097 : for (const data_ref_loc &ref : references)
6073 : : {
6074 : 5806 : dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
6075 : 2903 : ref.is_conditional_in_stmt);
6076 : 2903 : gcc_assert (dr != NULL);
6077 : 2903 : datarefs->safe_push (dr);
6078 : : }
6079 : :
6080 : : return ret;
6081 : 14403 : }
6082 : :
6083 : : /* Search the data references in LOOP, and record the information into
6084 : : DATAREFS. Returns chrec_dont_know when failing to analyze a
6085 : : difficult case, returns NULL_TREE otherwise. */
6086 : :
6087 : : tree
6088 : 2336910 : find_data_references_in_bb (class loop *loop, basic_block bb,
6089 : : vec<data_reference_p> *datarefs)
6090 : : {
6091 : 2336910 : gimple_stmt_iterator bsi;
6092 : :
6093 : 20297286 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6094 : : {
6095 : 15953324 : gimple *stmt = gsi_stmt (bsi);
6096 : :
6097 : 15953324 : if (!find_data_references_in_stmt (loop, stmt, datarefs))
6098 : : {
6099 : 329858 : struct data_reference *res;
6100 : 329858 : res = XCNEW (struct data_reference);
6101 : 329858 : datarefs->safe_push (res);
6102 : :
6103 : 329858 : return chrec_dont_know;
6104 : : }
6105 : : }
6106 : :
6107 : : return NULL_TREE;
6108 : : }
6109 : :
6110 : : /* Search the data references in LOOP, and record the information into
6111 : : DATAREFS. Returns chrec_dont_know when failing to analyze a
6112 : : difficult case, returns NULL_TREE otherwise.
6113 : :
6114 : : TODO: This function should be made smarter so that it can handle address
6115 : : arithmetic as if they were array accesses, etc. */
6116 : :
6117 : : tree
6118 : 796157 : find_data_references_in_loop (class loop *loop,
6119 : : vec<data_reference_p> *datarefs)
6120 : : {
6121 : 796157 : basic_block bb, *bbs;
6122 : 796157 : unsigned int i;
6123 : :
6124 : 796157 : bbs = get_loop_body_in_dom_order (loop);
6125 : :
6126 : 3551954 : for (i = 0; i < loop->num_nodes; i++)
6127 : : {
6128 : 2257264 : bb = bbs[i];
6129 : :
6130 : 2257264 : if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6131 : : {
6132 : 297624 : free (bbs);
6133 : 297624 : return chrec_dont_know;
6134 : : }
6135 : : }
6136 : 498533 : free (bbs);
6137 : :
6138 : 498533 : return NULL_TREE;
6139 : : }
6140 : :
6141 : : /* Return the alignment in bytes that DRB is guaranteed to have at all
6142 : : times. */
6143 : :
6144 : : unsigned int
6145 : 413799 : dr_alignment (innermost_loop_behavior *drb)
6146 : : {
6147 : : /* Get the alignment of BASE_ADDRESS + INIT. */
6148 : 413799 : unsigned int alignment = drb->base_alignment;
6149 : 413799 : unsigned int misalignment = (drb->base_misalignment
6150 : 413799 : + TREE_INT_CST_LOW (drb->init));
6151 : 413799 : if (misalignment != 0)
6152 : 174720 : alignment = MIN (alignment, misalignment & -misalignment);
6153 : :
6154 : : /* Cap it to the alignment of OFFSET. */
6155 : 413799 : if (!integer_zerop (drb->offset))
6156 : 33654 : alignment = MIN (alignment, drb->offset_alignment);
6157 : :
6158 : : /* Cap it to the alignment of STEP. */
6159 : 413799 : if (!integer_zerop (drb->step))
6160 : 246384 : alignment = MIN (alignment, drb->step_alignment);
6161 : :
6162 : 413799 : return alignment;
6163 : : }
6164 : :
6165 : : /* If BASE is a pointer-typed SSA name, try to find the object that it
6166 : : is based on. Return this object X on success and store the alignment
6167 : : in bytes of BASE - &X in *ALIGNMENT_OUT. */
6168 : :
6169 : : static tree
6170 : 649931 : get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6171 : : {
6172 : 649931 : if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6173 : : return NULL_TREE;
6174 : :
6175 : 303696 : gimple *def = SSA_NAME_DEF_STMT (base);
6176 : 303696 : base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6177 : :
6178 : : /* Peel chrecs and record the minimum alignment preserved by
6179 : : all steps. */
6180 : 303696 : unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6181 : 616742 : while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6182 : : {
6183 : 9350 : unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6184 : 9350 : alignment = MIN (alignment, step_alignment);
6185 : 9350 : base = CHREC_LEFT (base);
6186 : : }
6187 : :
6188 : : /* Punt if the expression is too complicated to handle. */
6189 : 303696 : if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6190 : : return NULL_TREE;
6191 : :
6192 : : /* The only useful cases are those for which a dereference folds to something
6193 : : other than an INDIRECT_REF. */
6194 : 303664 : tree ref_type = TREE_TYPE (TREE_TYPE (base));
6195 : 303664 : tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6196 : 303664 : if (!ref)
6197 : : return NULL_TREE;
6198 : :
6199 : : /* Analyze the base to which the steps we peeled were applied. */
6200 : 1966 : poly_int64 bitsize, bitpos, bytepos;
6201 : 1966 : machine_mode mode;
6202 : 1966 : int unsignedp, reversep, volatilep;
6203 : 1966 : tree offset;
6204 : 1966 : base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6205 : : &unsignedp, &reversep, &volatilep);
6206 : 649931 : if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6207 : : return NULL_TREE;
6208 : :
6209 : : /* Restrict the alignment to that guaranteed by the offsets. */
6210 : 1966 : unsigned int bytepos_alignment = known_alignment (bytepos);
6211 : 1966 : if (bytepos_alignment != 0)
6212 : 1822 : alignment = MIN (alignment, bytepos_alignment);
6213 : 1966 : if (offset)
6214 : : {
6215 : 0 : unsigned int offset_alignment = highest_pow2_factor (offset);
6216 : 0 : alignment = MIN (alignment, offset_alignment);
6217 : : }
6218 : :
6219 : 1966 : *alignment_out = alignment;
6220 : 1966 : return base;
6221 : : }
6222 : :
6223 : : /* Return the object whose alignment would need to be changed in order
6224 : : to increase the alignment of ADDR. Store the maximum achievable
6225 : : alignment in *MAX_ALIGNMENT. */
6226 : :
6227 : : tree
6228 : 649931 : get_base_for_alignment (tree addr, unsigned int *max_alignment)
6229 : : {
6230 : 649931 : tree base = get_base_for_alignment_1 (addr, max_alignment);
6231 : 649931 : if (base)
6232 : : return base;
6233 : :
6234 : 647965 : if (TREE_CODE (addr) == ADDR_EXPR)
6235 : 271829 : addr = TREE_OPERAND (addr, 0);
6236 : 647965 : *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6237 : 647965 : return addr;
6238 : : }
6239 : :
6240 : : /* Recursive helper function. */
6241 : :
6242 : : static bool
6243 : 143794 : find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6244 : : {
6245 : : /* Inner loops of the nest should not contain siblings. Example:
6246 : : when there are two consecutive loops,
6247 : :
6248 : : | loop_0
6249 : : | loop_1
6250 : : | A[{0, +, 1}_1]
6251 : : | endloop_1
6252 : : | loop_2
6253 : : | A[{0, +, 1}_2]
6254 : : | endloop_2
6255 : : | endloop_0
6256 : :
6257 : : the dependence relation cannot be captured by the distance
6258 : : abstraction. */
6259 : 143794 : if (loop->next)
6260 : : return false;
6261 : :
6262 : 121856 : loop_nest->safe_push (loop);
6263 : 121856 : if (loop->inner)
6264 : 42302 : return find_loop_nest_1 (loop->inner, loop_nest);
6265 : : return true;
6266 : : }
6267 : :
6268 : : /* Return false when the LOOP is not well nested. Otherwise return
6269 : : true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6270 : : contain the loops from the outermost to the innermost, as they will
6271 : : appear in the classic distance vector. */
6272 : :
6273 : : bool
6274 : 999918 : find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6275 : : {
6276 : 999918 : loop_nest->safe_push (loop);
6277 : 999918 : if (loop->inner)
6278 : 101492 : return find_loop_nest_1 (loop->inner, loop_nest);
6279 : : return true;
6280 : : }
6281 : :
6282 : : /* Returns true when the data dependences have been computed, false otherwise.
6283 : : Given a loop nest LOOP, the following vectors are returned:
6284 : : DATAREFS is initialized to all the array elements contained in this loop,
6285 : : DEPENDENCE_RELATIONS contains the relations between the data references.
6286 : : Compute read-read and self relations if
6287 : : COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6288 : :
6289 : : bool
6290 : 399251 : compute_data_dependences_for_loop (class loop *loop,
6291 : : bool compute_self_and_read_read_dependences,
6292 : : vec<loop_p> *loop_nest,
6293 : : vec<data_reference_p> *datarefs,
6294 : : vec<ddr_p> *dependence_relations)
6295 : : {
6296 : 399251 : bool res = true;
6297 : :
6298 : 399251 : memset (&dependence_stats, 0, sizeof (dependence_stats));
6299 : :
6300 : : /* If the loop nest is not well formed, or one of the data references
6301 : : is not computable, give up without spending time to compute other
6302 : : dependences. */
6303 : 399251 : if (!loop
6304 : 399251 : || !find_loop_nest (loop, loop_nest)
6305 : 399249 : || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6306 : 652898 : || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6307 : : compute_self_and_read_read_dependences))
6308 : : res = false;
6309 : :
6310 : 399251 : if (dump_file && (dump_flags & TDF_STATS))
6311 : : {
6312 : 157 : fprintf (dump_file, "Dependence tester statistics:\n");
6313 : :
6314 : 157 : fprintf (dump_file, "Number of dependence tests: %d\n",
6315 : : dependence_stats.num_dependence_tests);
6316 : 157 : fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6317 : : dependence_stats.num_dependence_dependent);
6318 : 157 : fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6319 : : dependence_stats.num_dependence_independent);
6320 : 157 : fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6321 : : dependence_stats.num_dependence_undetermined);
6322 : :
6323 : 157 : fprintf (dump_file, "Number of subscript tests: %d\n",
6324 : : dependence_stats.num_subscript_tests);
6325 : 157 : fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6326 : : dependence_stats.num_subscript_undetermined);
6327 : 157 : fprintf (dump_file, "Number of same subscript function: %d\n",
6328 : : dependence_stats.num_same_subscript_function);
6329 : :
6330 : 157 : fprintf (dump_file, "Number of ziv tests: %d\n",
6331 : : dependence_stats.num_ziv);
6332 : 157 : fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6333 : : dependence_stats.num_ziv_dependent);
6334 : 157 : fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6335 : : dependence_stats.num_ziv_independent);
6336 : 157 : fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6337 : : dependence_stats.num_ziv_unimplemented);
6338 : :
6339 : 157 : fprintf (dump_file, "Number of siv tests: %d\n",
6340 : : dependence_stats.num_siv);
6341 : 157 : fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6342 : : dependence_stats.num_siv_dependent);
6343 : 157 : fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6344 : : dependence_stats.num_siv_independent);
6345 : 157 : fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6346 : : dependence_stats.num_siv_unimplemented);
6347 : :
6348 : 157 : fprintf (dump_file, "Number of miv tests: %d\n",
6349 : : dependence_stats.num_miv);
6350 : 157 : fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6351 : : dependence_stats.num_miv_dependent);
6352 : 157 : fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6353 : : dependence_stats.num_miv_independent);
6354 : 157 : fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6355 : : dependence_stats.num_miv_unimplemented);
6356 : : }
6357 : :
6358 : 399251 : return res;
6359 : : }
6360 : :
6361 : : /* Free the memory used by a data dependence relation DDR. */
6362 : :
6363 : : void
6364 : 13089312 : free_dependence_relation (struct data_dependence_relation *ddr)
6365 : : {
6366 : 13089312 : if (ddr == NULL)
6367 : : return;
6368 : :
6369 : 13089312 : if (DDR_SUBSCRIPTS (ddr).exists ())
6370 : 894401 : free_subscripts (DDR_SUBSCRIPTS (ddr));
6371 : 13089312 : DDR_DIST_VECTS (ddr).release ();
6372 : 13089312 : DDR_DIR_VECTS (ddr).release ();
6373 : :
6374 : 13089312 : free (ddr);
6375 : : }
6376 : :
6377 : : /* Free the memory used by the data dependence relations from
6378 : : DEPENDENCE_RELATIONS. */
6379 : :
6380 : : void
6381 : 3020940 : free_dependence_relations (vec<ddr_p>& dependence_relations)
6382 : : {
6383 : 9538458 : for (data_dependence_relation *ddr : dependence_relations)
6384 : 5385654 : if (ddr)
6385 : 5385654 : free_dependence_relation (ddr);
6386 : :
6387 : 3020940 : dependence_relations.release ();
6388 : 3020940 : }
6389 : :
6390 : : /* Free the memory used by the data references from DATAREFS. */
6391 : :
6392 : : void
6393 : 3316255 : free_data_refs (vec<data_reference_p>& datarefs)
6394 : : {
6395 : 19428760 : for (data_reference *dr : datarefs)
6396 : 12473753 : free_data_ref (dr);
6397 : 3316255 : datarefs.release ();
6398 : 3316255 : }
6399 : :
6400 : : /* Common routine implementing both dr_direction_indicator and
6401 : : dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6402 : : to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6403 : : Return the step as the indicator otherwise. */
6404 : :
6405 : : static tree
6406 : 45866 : dr_step_indicator (struct data_reference *dr, int useful_min)
6407 : : {
6408 : 45866 : tree step = DR_STEP (dr);
6409 : 45866 : if (!step)
6410 : : return NULL_TREE;
6411 : 45866 : STRIP_NOPS (step);
6412 : : /* Look for cases where the step is scaled by a positive constant
6413 : : integer, which will often be the access size. If the multiplication
6414 : : doesn't change the sign (due to overflow effects) then we can
6415 : : test the unscaled value instead. */
6416 : 45866 : if (TREE_CODE (step) == MULT_EXPR
6417 : 4917 : && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6418 : 50752 : && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6419 : : {
6420 : 4886 : tree factor = TREE_OPERAND (step, 1);
6421 : 4886 : step = TREE_OPERAND (step, 0);
6422 : :
6423 : : /* Strip widening and truncating conversions as well as nops. */
6424 : 1165 : if (CONVERT_EXPR_P (step)
6425 : 4886 : && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6426 : 3721 : step = TREE_OPERAND (step, 0);
6427 : 4886 : tree type = TREE_TYPE (step);
6428 : :
6429 : : /* Get the range of step values that would not cause overflow. */
6430 : 9772 : widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6431 : 4886 : / wi::to_widest (factor));
6432 : 9772 : widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6433 : 4886 : / wi::to_widest (factor));
6434 : :
6435 : : /* Get the range of values that the unconverted step actually has. */
6436 : 4886 : wide_int step_min, step_max;
6437 : 4886 : int_range_max vr;
6438 : 4886 : if (TREE_CODE (step) != SSA_NAME
6439 : 9700 : || !get_range_query (cfun)->range_of_expr (vr, step)
6440 : 9736 : || vr.undefined_p ())
6441 : : {
6442 : 36 : step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6443 : 36 : step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6444 : : }
6445 : : else
6446 : : {
6447 : 4850 : step_min = vr.lower_bound ();
6448 : 4850 : step_max = vr.upper_bound ();
6449 : : }
6450 : :
6451 : : /* Check whether the unconverted step has an acceptable range. */
6452 : 4886 : signop sgn = TYPE_SIGN (type);
6453 : 9772 : if (wi::les_p (minv, widest_int::from (step_min, sgn))
6454 : 12622 : && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6455 : : {
6456 : 1416 : if (wi::ge_p (step_min, useful_min, sgn))
6457 : 432 : return ssize_int (useful_min);
6458 : 984 : else if (wi::lt_p (step_max, 0, sgn))
6459 : 0 : return ssize_int (-1);
6460 : : else
6461 : 984 : return fold_convert (ssizetype, step);
6462 : : }
6463 : 4886 : }
6464 : 44450 : return DR_STEP (dr);
6465 : : }
6466 : :
6467 : : /* Return a value that is negative iff DR has a negative step. */
6468 : :
6469 : : tree
6470 : 11134 : dr_direction_indicator (struct data_reference *dr)
6471 : : {
6472 : 11134 : return dr_step_indicator (dr, 0);
6473 : : }
6474 : :
6475 : : /* Return a value that is zero iff DR has a zero step. */
6476 : :
6477 : : tree
6478 : 34732 : dr_zero_step_indicator (struct data_reference *dr)
6479 : : {
6480 : 34732 : return dr_step_indicator (dr, 1);
6481 : : }
6482 : :
6483 : : /* Return true if DR is known to have a nonnegative (but possibly zero)
6484 : : step. */
6485 : :
6486 : : bool
6487 : 4526 : dr_known_forward_stride_p (struct data_reference *dr)
6488 : : {
6489 : 4526 : tree indicator = dr_direction_indicator (dr);
6490 : 4526 : tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6491 : : fold_convert (ssizetype, indicator),
6492 : : ssize_int (0));
6493 : 4526 : return neg_step_val && integer_zerop (neg_step_val);
6494 : : }
|