Branch data Line data Source code
1 : : /* Data references and dependences detectors.
2 : : Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 : : Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : /* This pass walks a given loop structure searching for array
22 : : references. The information about the array accesses is recorded
23 : : in DATA_REFERENCE structures.
24 : :
25 : : The basic test for determining the dependences is:
26 : : given two access functions chrec1 and chrec2 to a same array, and
27 : : x and y two vectors from the iteration domain, the same element of
28 : : the array is accessed twice at iterations x and y if and only if:
29 : : | chrec1 (x) == chrec2 (y).
30 : :
31 : : The goals of this analysis are:
32 : :
33 : : - to determine the independence: the relation between two
34 : : independent accesses is qualified with the chrec_known (this
35 : : information allows a loop parallelization),
36 : :
37 : : - when two data references access the same data, to qualify the
38 : : dependence relation with classic dependence representations:
39 : :
40 : : - distance vectors
41 : : - direction vectors
42 : : - loop carried level dependence
43 : : - polyhedron dependence
44 : : or with the chains of recurrences based representation,
45 : :
46 : : - to define a knowledge base for storing the data dependence
47 : : information,
48 : :
49 : : - to define an interface to access this data.
50 : :
51 : :
52 : : Definitions:
53 : :
54 : : - subscript: given two array accesses a subscript is the tuple
55 : : composed of the access functions for a given dimension. Example:
56 : : Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 : : (f1, g1), (f2, g2), (f3, g3).
58 : :
59 : : - Diophantine equation: an equation whose coefficients and
60 : : solutions are integer constants, for example the equation
61 : : | 3*x + 2*y = 1
62 : : has an integer solution x = 1 and y = -1.
63 : :
64 : : References:
65 : :
66 : : - "Advanced Compilation for High Performance Computing" by Randy
67 : : Allen and Ken Kennedy.
68 : : http://citeseer.ist.psu.edu/goff91practical.html
69 : :
70 : : - "Loop Transformations for Restructuring Compilers - The Foundations"
71 : : by Utpal Banerjee.
72 : :
73 : :
74 : : */
75 : :
76 : : #include "config.h"
77 : : #include "system.h"
78 : : #include "coretypes.h"
79 : : #include "backend.h"
80 : : #include "rtl.h"
81 : : #include "tree.h"
82 : : #include "gimple.h"
83 : : #include "gimple-pretty-print.h"
84 : : #include "alias.h"
85 : : #include "fold-const.h"
86 : : #include "expr.h"
87 : : #include "gimple-iterator.h"
88 : : #include "tree-ssa-loop-niter.h"
89 : : #include "tree-ssa-loop.h"
90 : : #include "tree-ssa.h"
91 : : #include "cfgloop.h"
92 : : #include "tree-data-ref.h"
93 : : #include "tree-scalar-evolution.h"
94 : : #include "dumpfile.h"
95 : : #include "tree-affine.h"
96 : : #include "builtins.h"
97 : : #include "tree-eh.h"
98 : : #include "ssa.h"
99 : : #include "internal-fn.h"
100 : : #include "vr-values.h"
101 : : #include "range-op.h"
102 : : #include "tree-ssa-loop-ivopts.h"
103 : : #include "calls.h"
104 : :
105 : : static struct datadep_stats
106 : : {
107 : : int num_dependence_tests;
108 : : int num_dependence_dependent;
109 : : int num_dependence_independent;
110 : : int num_dependence_undetermined;
111 : :
112 : : int num_subscript_tests;
113 : : int num_subscript_undetermined;
114 : : int num_same_subscript_function;
115 : :
116 : : int num_ziv;
117 : : int num_ziv_independent;
118 : : int num_ziv_dependent;
119 : : int num_ziv_unimplemented;
120 : :
121 : : int num_siv;
122 : : int num_siv_independent;
123 : : int num_siv_dependent;
124 : : int num_siv_unimplemented;
125 : :
126 : : int num_miv;
127 : : int num_miv_independent;
128 : : int num_miv_dependent;
129 : : int num_miv_unimplemented;
130 : : } dependence_stats;
131 : :
132 : : static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
133 : : unsigned int, unsigned int,
134 : : class loop *);
135 : : /* Returns true iff A divides B. */
136 : :
137 : : static inline bool
138 : 1514 : tree_fold_divides_p (const_tree a, const_tree b)
139 : : {
140 : 1514 : gcc_assert (TREE_CODE (a) == INTEGER_CST);
141 : 1514 : gcc_assert (TREE_CODE (b) == INTEGER_CST);
142 : 1514 : return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
143 : : }
144 : :
145 : : /* Returns true iff A divides B. */
146 : :
147 : : static inline bool
148 : 990720 : int_divides_p (lambda_int a, lambda_int b)
149 : : {
150 : 990720 : return ((b % a) == 0);
151 : : }
152 : :
153 : : /* Return true if reference REF contains a union access. */
154 : :
155 : : static bool
156 : 324525 : ref_contains_union_access_p (tree ref)
157 : : {
158 : 374827 : while (handled_component_p (ref))
159 : : {
160 : 51451 : ref = TREE_OPERAND (ref, 0);
161 : 102902 : if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
162 : 51451 : || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
163 : : return true;
164 : : }
165 : : return false;
166 : : }
167 : :
168 : :
169 : :
170 : : /* Dump into FILE all the data references from DATAREFS. */
171 : :
172 : : static void
173 : 0 : dump_data_references (FILE *file, vec<data_reference_p> datarefs)
174 : : {
175 : 0 : for (data_reference *dr : datarefs)
176 : 0 : dump_data_reference (file, dr);
177 : 0 : }
178 : :
179 : : /* Unified dump into FILE all the data references from DATAREFS. */
180 : :
181 : : DEBUG_FUNCTION void
182 : 0 : debug (vec<data_reference_p> &ref)
183 : : {
184 : 0 : dump_data_references (stderr, ref);
185 : 0 : }
186 : :
187 : : DEBUG_FUNCTION void
188 : 0 : debug (vec<data_reference_p> *ptr)
189 : : {
190 : 0 : if (ptr)
191 : 0 : debug (*ptr);
192 : : else
193 : 0 : fprintf (stderr, "<nil>\n");
194 : 0 : }
195 : :
196 : :
197 : : /* Dump into STDERR all the data references from DATAREFS. */
198 : :
199 : : DEBUG_FUNCTION void
200 : 0 : debug_data_references (vec<data_reference_p> datarefs)
201 : : {
202 : 0 : dump_data_references (stderr, datarefs);
203 : 0 : }
204 : :
205 : : /* Print to STDERR the data_reference DR. */
206 : :
207 : : DEBUG_FUNCTION void
208 : 0 : debug_data_reference (struct data_reference *dr)
209 : : {
210 : 0 : dump_data_reference (stderr, dr);
211 : 0 : }
212 : :
213 : : /* Dump function for a DATA_REFERENCE structure. */
214 : :
215 : : void
216 : 3732 : dump_data_reference (FILE *outf,
217 : : struct data_reference *dr)
218 : : {
219 : 3732 : unsigned int i;
220 : :
221 : 3732 : fprintf (outf, "#(Data Ref: \n");
222 : 3732 : fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
223 : 3732 : fprintf (outf, "# stmt: ");
224 : 3732 : print_gimple_stmt (outf, DR_STMT (dr), 0);
225 : 3732 : fprintf (outf, "# ref: ");
226 : 3732 : print_generic_stmt (outf, DR_REF (dr));
227 : 3732 : fprintf (outf, "# base_object: ");
228 : 3732 : print_generic_stmt (outf, DR_BASE_OBJECT (dr));
229 : :
230 : 19344 : for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
231 : : {
232 : 4084 : fprintf (outf, "# Access function %d: ", i);
233 : 4084 : print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
234 : : }
235 : 3732 : fprintf (outf, "#)\n");
236 : 3732 : }
237 : :
238 : : /* Unified dump function for a DATA_REFERENCE structure. */
239 : :
240 : : DEBUG_FUNCTION void
241 : 0 : debug (data_reference &ref)
242 : : {
243 : 0 : dump_data_reference (stderr, &ref);
244 : 0 : }
245 : :
246 : : DEBUG_FUNCTION void
247 : 0 : debug (data_reference *ptr)
248 : : {
249 : 0 : if (ptr)
250 : 0 : debug (*ptr);
251 : : else
252 : 0 : fprintf (stderr, "<nil>\n");
253 : 0 : }
254 : :
255 : :
256 : : /* Dumps the affine function described by FN to the file OUTF. */
257 : :
258 : : DEBUG_FUNCTION void
259 : 34484 : dump_affine_function (FILE *outf, affine_fn fn)
260 : : {
261 : 34484 : unsigned i;
262 : 34484 : tree coef;
263 : :
264 : 34484 : print_generic_expr (outf, fn[0], TDF_SLIM);
265 : 71742 : for (i = 1; fn.iterate (i, &coef); i++)
266 : : {
267 : 2774 : fprintf (outf, " + ");
268 : 2774 : print_generic_expr (outf, coef, TDF_SLIM);
269 : 2774 : fprintf (outf, " * x_%u", i);
270 : : }
271 : 34484 : }
272 : :
273 : : /* Dumps the conflict function CF to the file OUTF. */
274 : :
275 : : DEBUG_FUNCTION void
276 : 153590 : dump_conflict_function (FILE *outf, conflict_function *cf)
277 : : {
278 : 153590 : unsigned i;
279 : :
280 : 153590 : if (cf->n == NO_DEPENDENCE)
281 : 113274 : fprintf (outf, "no dependence");
282 : 40316 : else if (cf->n == NOT_KNOWN)
283 : 5832 : fprintf (outf, "not known");
284 : : else
285 : : {
286 : 68968 : for (i = 0; i < cf->n; i++)
287 : : {
288 : 34484 : if (i != 0)
289 : 0 : fprintf (outf, " ");
290 : 34484 : fprintf (outf, "[");
291 : 34484 : dump_affine_function (outf, cf->fns[i]);
292 : 34484 : fprintf (outf, "]");
293 : : }
294 : : }
295 : 153590 : }
296 : :
297 : : /* Dump function for a SUBSCRIPT structure. */
298 : :
299 : : DEBUG_FUNCTION void
300 : 900 : dump_subscript (FILE *outf, struct subscript *subscript)
301 : : {
302 : 900 : conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
303 : :
304 : 900 : fprintf (outf, "\n (subscript \n");
305 : 900 : fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
306 : 900 : dump_conflict_function (outf, cf);
307 : 900 : if (CF_NONTRIVIAL_P (cf))
308 : : {
309 : 900 : tree last_iteration = SUB_LAST_CONFLICT (subscript);
310 : 900 : fprintf (outf, "\n last_conflict: ");
311 : 900 : print_generic_expr (outf, last_iteration);
312 : : }
313 : :
314 : 900 : cf = SUB_CONFLICTS_IN_B (subscript);
315 : 900 : fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
316 : 900 : dump_conflict_function (outf, cf);
317 : 900 : if (CF_NONTRIVIAL_P (cf))
318 : : {
319 : 900 : tree last_iteration = SUB_LAST_CONFLICT (subscript);
320 : 900 : fprintf (outf, "\n last_conflict: ");
321 : 900 : print_generic_expr (outf, last_iteration);
322 : : }
323 : :
324 : 900 : fprintf (outf, "\n (Subscript distance: ");
325 : 900 : print_generic_expr (outf, SUB_DISTANCE (subscript));
326 : 900 : fprintf (outf, " ))\n");
327 : 900 : }
328 : :
329 : : /* Print the classic direction vector DIRV to OUTF. */
330 : :
331 : : DEBUG_FUNCTION void
332 : 836 : print_direction_vector (FILE *outf,
333 : : lambda_vector dirv,
334 : : int length)
335 : : {
336 : 836 : int eq;
337 : :
338 : 1804 : for (eq = 0; eq < length; eq++)
339 : : {
340 : 968 : enum data_dependence_direction dir = ((enum data_dependence_direction)
341 : 968 : dirv[eq]);
342 : :
343 : 968 : switch (dir)
344 : : {
345 : 139 : case dir_positive:
346 : 139 : fprintf (outf, " +");
347 : 139 : break;
348 : 6 : case dir_negative:
349 : 6 : fprintf (outf, " -");
350 : 6 : break;
351 : 823 : case dir_equal:
352 : 823 : fprintf (outf, " =");
353 : 823 : break;
354 : 0 : case dir_positive_or_equal:
355 : 0 : fprintf (outf, " +=");
356 : 0 : break;
357 : 0 : case dir_positive_or_negative:
358 : 0 : fprintf (outf, " +-");
359 : 0 : break;
360 : 0 : case dir_negative_or_equal:
361 : 0 : fprintf (outf, " -=");
362 : 0 : break;
363 : 0 : case dir_star:
364 : 0 : fprintf (outf, " *");
365 : 0 : break;
366 : 0 : default:
367 : 0 : fprintf (outf, "indep");
368 : 0 : break;
369 : : }
370 : : }
371 : 836 : fprintf (outf, "\n");
372 : 836 : }
373 : :
374 : : /* Print a vector of direction vectors. */
375 : :
376 : : DEBUG_FUNCTION void
377 : 0 : print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
378 : : int length)
379 : : {
380 : 0 : for (lambda_vector v : dir_vects)
381 : 0 : print_direction_vector (outf, v, length);
382 : 0 : }
383 : :
384 : : /* Print out a vector VEC of length N to OUTFILE. */
385 : :
386 : : DEBUG_FUNCTION void
387 : 1239 : print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
388 : : {
389 : 1239 : int i;
390 : :
391 : 2664 : for (i = 0; i < n; i++)
392 : 1425 : fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
393 : 1239 : fprintf (outfile, "\n");
394 : 1239 : }
395 : :
396 : : /* Print a vector of distance vectors. */
397 : :
398 : : DEBUG_FUNCTION void
399 : 0 : print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
400 : : int length)
401 : : {
402 : 0 : for (lambda_vector v : dist_vects)
403 : 0 : print_lambda_vector (outf, v, length);
404 : 0 : }
405 : :
406 : : /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
407 : :
408 : : DEBUG_FUNCTION void
409 : 1682 : dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
410 : : {
411 : 1682 : struct data_reference *dra, *drb;
412 : :
413 : 1682 : fprintf (outf, "(Data Dep: \n");
414 : :
415 : 1682 : if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
416 : : {
417 : 402 : if (ddr)
418 : : {
419 : 402 : dra = DDR_A (ddr);
420 : 402 : drb = DDR_B (ddr);
421 : 402 : if (dra)
422 : 402 : dump_data_reference (outf, dra);
423 : : else
424 : 0 : fprintf (outf, " (nil)\n");
425 : 402 : if (drb)
426 : 402 : dump_data_reference (outf, drb);
427 : : else
428 : 0 : fprintf (outf, " (nil)\n");
429 : : }
430 : 402 : fprintf (outf, " (don't know)\n)\n");
431 : 402 : return;
432 : : }
433 : :
434 : 1280 : dra = DDR_A (ddr);
435 : 1280 : drb = DDR_B (ddr);
436 : 1280 : dump_data_reference (outf, dra);
437 : 1280 : dump_data_reference (outf, drb);
438 : :
439 : 1280 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
440 : 464 : fprintf (outf, " (no dependence)\n");
441 : :
442 : 816 : else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
443 : : {
444 : : unsigned int i;
445 : : class loop *loopi;
446 : :
447 : : subscript *sub;
448 : 1716 : FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
449 : : {
450 : 900 : fprintf (outf, " access_fn_A: ");
451 : 900 : print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
452 : 900 : fprintf (outf, " access_fn_B: ");
453 : 900 : print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
454 : 900 : dump_subscript (outf, sub);
455 : : }
456 : :
457 : 816 : fprintf (outf, " loop nest: (");
458 : 2554 : FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
459 : 922 : fprintf (outf, "%d ", loopi->num);
460 : 816 : fprintf (outf, ")\n");
461 : :
462 : 4115 : for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
463 : : {
464 : 836 : fprintf (outf, " distance_vector: ");
465 : 836 : print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
466 : 1672 : DDR_NB_LOOPS (ddr));
467 : : }
468 : :
469 : 3299 : for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
470 : : {
471 : 836 : fprintf (outf, " direction_vector: ");
472 : 836 : print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
473 : 1672 : DDR_NB_LOOPS (ddr));
474 : : }
475 : : }
476 : :
477 : 1280 : fprintf (outf, ")\n");
478 : : }
479 : :
480 : : /* Debug version. */
481 : :
482 : : DEBUG_FUNCTION void
483 : 0 : debug_data_dependence_relation (const struct data_dependence_relation *ddr)
484 : : {
485 : 0 : dump_data_dependence_relation (stderr, ddr);
486 : 0 : }
487 : :
488 : : /* Dump into FILE all the dependence relations from DDRS. */
489 : :
490 : : DEBUG_FUNCTION void
491 : 337 : dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
492 : : {
493 : 2663 : for (auto ddr : ddrs)
494 : 1682 : dump_data_dependence_relation (file, ddr);
495 : 337 : }
496 : :
497 : : DEBUG_FUNCTION void
498 : 0 : debug (vec<ddr_p> &ref)
499 : : {
500 : 0 : dump_data_dependence_relations (stderr, ref);
501 : 0 : }
502 : :
503 : : DEBUG_FUNCTION void
504 : 0 : debug (vec<ddr_p> *ptr)
505 : : {
506 : 0 : if (ptr)
507 : 0 : debug (*ptr);
508 : : else
509 : 0 : fprintf (stderr, "<nil>\n");
510 : 0 : }
511 : :
512 : :
513 : : /* Dump to STDERR all the dependence relations from DDRS. */
514 : :
515 : : DEBUG_FUNCTION void
516 : 0 : debug_data_dependence_relations (vec<ddr_p> ddrs)
517 : : {
518 : 0 : dump_data_dependence_relations (stderr, ddrs);
519 : 0 : }
520 : :
521 : : /* Dumps the distance and direction vectors in FILE. DDRS contains
522 : : the dependence relations, and VECT_SIZE is the size of the
523 : : dependence vectors, or in other words the number of loops in the
524 : : considered nest. */
525 : :
526 : : DEBUG_FUNCTION void
527 : 0 : dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
528 : : {
529 : 0 : for (data_dependence_relation *ddr : ddrs)
530 : 0 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
531 : : {
532 : 0 : for (lambda_vector v : DDR_DIST_VECTS (ddr))
533 : : {
534 : 0 : fprintf (file, "DISTANCE_V (");
535 : 0 : print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
536 : 0 : fprintf (file, ")\n");
537 : : }
538 : :
539 : 0 : for (lambda_vector v : DDR_DIR_VECTS (ddr))
540 : : {
541 : 0 : fprintf (file, "DIRECTION_V (");
542 : 0 : print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
543 : 0 : fprintf (file, ")\n");
544 : : }
545 : : }
546 : :
547 : 0 : fprintf (file, "\n\n");
548 : 0 : }
549 : :
550 : : /* Dumps the data dependence relations DDRS in FILE. */
551 : :
552 : : DEBUG_FUNCTION void
553 : 0 : dump_ddrs (FILE *file, vec<ddr_p> ddrs)
554 : : {
555 : 0 : for (data_dependence_relation *ddr : ddrs)
556 : 0 : dump_data_dependence_relation (file, ddr);
557 : :
558 : 0 : fprintf (file, "\n\n");
559 : 0 : }
560 : :
561 : : DEBUG_FUNCTION void
562 : 0 : debug_ddrs (vec<ddr_p> ddrs)
563 : : {
564 : 0 : dump_ddrs (stderr, ddrs);
565 : 0 : }
566 : :
567 : : /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
568 : : OP0 CODE OP1, where:
569 : :
570 : : - OP0 CODE OP1 has integral type TYPE
571 : : - the range of OP0 is given by OP0_RANGE and
572 : : - the range of OP1 is given by OP1_RANGE.
573 : :
574 : : Independently of RESULT_RANGE, try to compute:
575 : :
576 : : DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
577 : : - (sizetype) (OP0 CODE OP1)
578 : :
579 : : as a constant and subtract DELTA from the ssizetype constant in *OFF.
580 : : Return true on success, or false if DELTA is not known at compile time.
581 : :
582 : : Truncation and sign changes are known to distribute over CODE, i.e.
583 : :
584 : : (itype) (A CODE B) == (itype) A CODE (itype) B
585 : :
586 : : for any integral type ITYPE whose precision is no greater than the
587 : : precision of A and B. */
588 : :
589 : : static bool
590 : 3687592 : compute_distributive_range (tree type, value_range &op0_range,
591 : : tree_code code, value_range &op1_range,
592 : : tree *off, value_range *result_range)
593 : : {
594 : 3687592 : gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
595 : 3687592 : if (result_range)
596 : : {
597 : 978411 : range_op_handler op (code);
598 : 978411 : if (!op.fold_range (*result_range, type, op0_range, op1_range))
599 : 0 : result_range->set_varying (type);
600 : : }
601 : :
602 : : /* The distributive property guarantees that if TYPE is no narrower
603 : : than SIZETYPE,
604 : :
605 : : (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
606 : :
607 : : and so we can treat DELTA as zero. */
608 : 3687592 : if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
609 : : return true;
610 : :
611 : : /* If overflow is undefined, we can assume that:
612 : :
613 : : X == (ssizetype) OP0 CODE (ssizetype) OP1
614 : :
615 : : is within the range of TYPE, i.e.:
616 : :
617 : : X == (ssizetype) (TYPE) X
618 : :
619 : : Distributing the (TYPE) truncation over X gives:
620 : :
621 : : X == (ssizetype) (OP0 CODE OP1)
622 : :
623 : : Casting both sides to sizetype and distributing the sizetype cast
624 : : over X gives:
625 : :
626 : : (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
627 : :
628 : : and so we can treat DELTA as zero. */
629 : 223901 : if (TYPE_OVERFLOW_UNDEFINED (type))
630 : : return true;
631 : :
632 : : /* Compute the range of:
633 : :
634 : : (ssizetype) OP0 CODE (ssizetype) OP1
635 : :
636 : : The distributive property guarantees that this has the same bitpattern as:
637 : :
638 : : (sizetype) OP0 CODE (sizetype) OP1
639 : :
640 : : but its range is more conducive to analysis. */
641 : 65365 : range_cast (op0_range, ssizetype);
642 : 65365 : range_cast (op1_range, ssizetype);
643 : 65365 : value_range wide_range;
644 : 65365 : range_op_handler op (code);
645 : 65365 : bool saved_flag_wrapv = flag_wrapv;
646 : 65365 : flag_wrapv = 1;
647 : 65365 : if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
648 : 0 : wide_range.set_varying (ssizetype);;
649 : 65365 : flag_wrapv = saved_flag_wrapv;
650 : 65365 : if (wide_range.num_pairs () != 1
651 : 65365 : || wide_range.varying_p () || wide_range.undefined_p ())
652 : : return false;
653 : :
654 : 65197 : wide_int lb = wide_range.lower_bound ();
655 : 65197 : wide_int ub = wide_range.upper_bound ();
656 : :
657 : : /* Calculate the number of times that each end of the range overflows or
658 : : underflows TYPE. We can only calculate DELTA if the numbers match. */
659 : 65197 : unsigned int precision = TYPE_PRECISION (type);
660 : 65197 : if (!TYPE_UNSIGNED (type))
661 : : {
662 : 191 : wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
663 : 191 : lb -= type_min;
664 : 191 : ub -= type_min;
665 : 191 : }
666 : 65197 : wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
667 : 65197 : lb &= upper_bits;
668 : 65197 : ub &= upper_bits;
669 : 65197 : if (lb != ub)
670 : : return false;
671 : :
672 : : /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
673 : : negative values indicating underflow. The low PRECISION bits of LB
674 : : are clear, so DELTA is therefore LB (== UB). */
675 : 17152 : *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
676 : 17152 : return true;
677 : 65365 : }
678 : :
679 : : /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
680 : : given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
681 : : FROM_TYPE are integral types. */
682 : :
683 : : static bool
684 : 2445027 : nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
685 : : {
686 : 2445027 : gcc_assert (INTEGRAL_TYPE_P (to_type)
687 : : && INTEGRAL_TYPE_P (from_type)
688 : : && !TYPE_OVERFLOW_TRAPS (to_type)
689 : : && !TYPE_OVERFLOW_TRAPS (from_type));
690 : :
691 : : /* Converting to something no narrower than sizetype and then to sizetype
692 : : is equivalent to converting directly to sizetype. */
693 : 2445027 : if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
694 : : return true;
695 : :
696 : : /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
697 : 75687 : if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
698 : 75687 : && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
699 : : return true;
700 : :
701 : : /* For narrowing conversions, we could in principle test whether
702 : : the bits in FROM_TYPE but not in TO_TYPE have a fixed value
703 : : and apply a constant adjustment.
704 : :
705 : : For other conversions (which involve a sign change) we could
706 : : check that the signs are always equal, and apply a constant
707 : : adjustment if the signs are negative.
708 : :
709 : : However, both cases should be rare. */
710 : 60588 : return range_fits_type_p (&range, TYPE_PRECISION (to_type),
711 : 121176 : TYPE_SIGN (to_type));
712 : : }
713 : :
714 : : static void
715 : : split_constant_offset (tree type, tree *var, tree *off,
716 : : value_range *result_range,
717 : : hash_map<tree, std::pair<tree, tree> > &cache,
718 : : unsigned *limit);
719 : :
720 : : /* Helper function for split_constant_offset. If TYPE is a pointer type,
721 : : try to express OP0 CODE OP1 as:
722 : :
723 : : POINTER_PLUS <*VAR, (sizetype) *OFF>
724 : :
725 : : where:
726 : :
727 : : - *VAR has type TYPE
728 : : - *OFF is a constant of type ssizetype.
729 : :
730 : : If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
731 : :
732 : : *VAR + (sizetype) *OFF
733 : :
734 : : where:
735 : :
736 : : - *VAR has type sizetype
737 : : - *OFF is a constant of type ssizetype.
738 : :
739 : : In both cases, OP0 CODE OP1 has type TYPE.
740 : :
741 : : Return true on success. A false return value indicates that we can't
742 : : do better than set *OFF to zero.
743 : :
744 : : When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
745 : : if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
746 : :
747 : : CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
748 : : visited. LIMIT counts down the number of SSA names that we are
749 : : allowed to process before giving up. */
750 : :
751 : : static bool
752 : 51612755 : split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
753 : : tree *var, tree *off, value_range *result_range,
754 : : hash_map<tree, std::pair<tree, tree> > &cache,
755 : : unsigned *limit)
756 : : {
757 : 51612755 : tree var0, var1;
758 : 51612755 : tree off0, off1;
759 : 51612755 : value_range op0_range, op1_range;
760 : :
761 : 51612755 : *var = NULL_TREE;
762 : 51612755 : *off = NULL_TREE;
763 : :
764 : 51612755 : if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
765 : : return false;
766 : :
767 : 51612239 : switch (code)
768 : : {
769 : 15779042 : case INTEGER_CST:
770 : 15779042 : *var = size_int (0);
771 : 15779042 : *off = fold_convert (ssizetype, op0);
772 : 15779042 : if (result_range)
773 : : {
774 : 998473 : wide_int w = wi::to_wide (op0);
775 : 998473 : result_range->set (TREE_TYPE (op0), w, w);
776 : 998473 : }
777 : : return true;
778 : :
779 : 2172446 : case POINTER_PLUS_EXPR:
780 : 2172446 : split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
781 : 2172446 : split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
782 : 2172446 : *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
783 : 2172446 : *off = size_binop (PLUS_EXPR, off0, off1);
784 : 2172446 : return true;
785 : :
786 : 1821404 : case PLUS_EXPR:
787 : 1821404 : case MINUS_EXPR:
788 : 1821404 : split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
789 : 1821404 : split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
790 : 1821404 : *off = size_binop (code, off0, off1);
791 : 1821404 : if (!compute_distributive_range (type, op0_range, code, op1_range,
792 : : off, result_range))
793 : : return false;
794 : 1774240 : *var = fold_build2 (code, sizetype, var0, var1);
795 : 1774240 : return true;
796 : :
797 : 2309470 : case MULT_EXPR:
798 : 2309470 : if (TREE_CODE (op1) != INTEGER_CST)
799 : : return false;
800 : :
801 : 1866188 : split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
802 : 1866188 : op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
803 : 1866188 : *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
804 : 1866188 : if (!compute_distributive_range (type, op0_range, code, op1_range,
805 : : off, result_range))
806 : : return false;
807 : 1865139 : *var = fold_build2 (MULT_EXPR, sizetype, var0,
808 : : fold_convert (sizetype, op1));
809 : 1865139 : return true;
810 : :
811 : 9476626 : case ADDR_EXPR:
812 : 9476626 : {
813 : 9476626 : tree base, poffset;
814 : 9476626 : poly_int64 pbitsize, pbitpos, pbytepos;
815 : 9476626 : machine_mode pmode;
816 : 9476626 : int punsignedp, preversep, pvolatilep;
817 : :
818 : 9476626 : op0 = TREE_OPERAND (op0, 0);
819 : 9476626 : base
820 : 9476626 : = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
821 : : &punsignedp, &preversep, &pvolatilep);
822 : :
823 : 9499649 : if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
824 : : return false;
825 : 9476626 : base = build_fold_addr_expr (base);
826 : 9476626 : off0 = ssize_int (pbytepos);
827 : :
828 : 9476626 : if (poffset)
829 : : {
830 : 70 : split_constant_offset (poffset, &poffset, &off1, nullptr,
831 : : cache, limit);
832 : 70 : off0 = size_binop (PLUS_EXPR, off0, off1);
833 : 70 : base = fold_build_pointer_plus (base, poffset);
834 : : }
835 : :
836 : 9476626 : var0 = fold_convert (type, base);
837 : :
838 : : /* If variable length types are involved, punt, otherwise casts
839 : : might be converted into ARRAY_REFs in gimplify_conversion.
840 : : To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
841 : : possibly no longer appears in current GIMPLE, might resurface.
842 : : This perhaps could run
843 : : if (CONVERT_EXPR_P (var0))
844 : : {
845 : : gimplify_conversion (&var0);
846 : : // Attempt to fill in any within var0 found ARRAY_REF's
847 : : // element size from corresponding op embedded ARRAY_REF,
848 : : // if unsuccessful, just punt.
849 : : } */
850 : 19298431 : while (POINTER_TYPE_P (type))
851 : 9821805 : type = TREE_TYPE (type);
852 : 9476626 : if (int_size_in_bytes (type) < 0)
853 : : return false;
854 : :
855 : 9453603 : *var = var0;
856 : 9453603 : *off = off0;
857 : 9453603 : return true;
858 : : }
859 : :
860 : 13496487 : case SSA_NAME:
861 : 13496487 : {
862 : 13496487 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
863 : : return false;
864 : :
865 : 13496234 : gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
866 : 13496234 : enum tree_code subcode;
867 : :
868 : 13496234 : if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
869 : : return false;
870 : :
871 : 7479536 : subcode = gimple_assign_rhs_code (def_stmt);
872 : :
873 : : /* We are using a cache to avoid un-CSEing large amounts of code. */
874 : 7479536 : bool use_cache = false;
875 : 7479536 : if (!has_single_use (op0)
876 : 7479536 : && (subcode == POINTER_PLUS_EXPR
877 : 3860616 : || subcode == PLUS_EXPR
878 : : || subcode == MINUS_EXPR
879 : : || subcode == MULT_EXPR
880 : : || subcode == ADDR_EXPR
881 : : || CONVERT_EXPR_CODE_P (subcode)))
882 : : {
883 : 1856640 : use_cache = true;
884 : 1856640 : bool existed;
885 : 1856640 : std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
886 : 1856640 : if (existed)
887 : : {
888 : 33700 : if (integer_zerop (e.second))
889 : 33700 : return false;
890 : 1538 : *var = e.first;
891 : 1538 : *off = e.second;
892 : : /* The caller sets the range in this case. */
893 : 1538 : return true;
894 : : }
895 : 1822940 : e = std::make_pair (op0, ssize_int (0));
896 : : }
897 : :
898 : 7445836 : if (*limit == 0)
899 : : return false;
900 : 7444802 : --*limit;
901 : :
902 : 7444802 : var0 = gimple_assign_rhs1 (def_stmt);
903 : 7444802 : var1 = gimple_assign_rhs2 (def_stmt);
904 : :
905 : 7444802 : bool res = split_constant_offset_1 (type, var0, subcode, var1,
906 : : var, off, nullptr, cache, limit);
907 : 7444802 : if (res && use_cache)
908 : 1625391 : *cache.get (op0) = std::make_pair (*var, *off);
909 : : /* The caller sets the range in this case. */
910 : : return res;
911 : : }
912 : 3458944 : CASE_CONVERT:
913 : 3458944 : {
914 : : /* We can only handle the following conversions:
915 : :
916 : : - Conversions from one pointer type to another pointer type.
917 : :
918 : : - Conversions from one non-trapping integral type to another
919 : : non-trapping integral type. In this case, the recursive
920 : : call makes sure that:
921 : :
922 : : (sizetype) OP0
923 : :
924 : : can be expressed as a sizetype operation involving VAR and OFF,
925 : : and all we need to do is check whether:
926 : :
927 : : (sizetype) OP0 == (sizetype) (TYPE) OP0
928 : :
929 : : - Conversions from a non-trapping sizetype-size integral type to
930 : : a like-sized pointer type. In this case, the recursive call
931 : : makes sure that:
932 : :
933 : : (sizetype) OP0 == *VAR + (sizetype) *OFF
934 : :
935 : : and we can convert that to:
936 : :
937 : : POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
938 : :
939 : : - Conversions from a sizetype-sized pointer type to a like-sized
940 : : non-trapping integral type. In this case, the recursive call
941 : : makes sure that:
942 : :
943 : : OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
944 : :
945 : : where the POINTER_PLUS and *VAR have the same precision as
946 : : TYPE (and the same precision as sizetype). Then:
947 : :
948 : : (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
949 : 3458944 : tree itype = TREE_TYPE (op0);
950 : 3458944 : if ((POINTER_TYPE_P (itype)
951 : 2534534 : || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
952 : 3458632 : && (POINTER_TYPE_P (type)
953 : 2481598 : || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
954 : 6917576 : && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
955 : 125766 : || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
956 : 125766 : && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
957 : : {
958 : 3458621 : if (POINTER_TYPE_P (type))
959 : : {
960 : 977023 : split_constant_offset (op0, var, off, nullptr, cache, limit);
961 : 977023 : *var = fold_convert (type, *var);
962 : : }
963 : 2481598 : else if (POINTER_TYPE_P (itype))
964 : : {
965 : 36571 : split_constant_offset (op0, var, off, nullptr, cache, limit);
966 : 36571 : *var = fold_convert (sizetype, *var);
967 : : }
968 : : else
969 : : {
970 : 2445027 : split_constant_offset (op0, var, off, &op0_range,
971 : : cache, limit);
972 : 2445027 : if (!nop_conversion_for_offset_p (type, itype, op0_range))
973 : : return false;
974 : 2399345 : if (result_range)
975 : : {
976 : 1324039 : *result_range = op0_range;
977 : 1324039 : range_cast (*result_range, type);
978 : : }
979 : : }
980 : 3412939 : return true;
981 : : }
982 : : return false;
983 : : }
984 : :
985 : : default:
986 : : return false;
987 : : }
988 : 51612755 : }
989 : :
990 : : /* If EXP has pointer type, try to express it as:
991 : :
992 : : POINTER_PLUS <*VAR, (sizetype) *OFF>
993 : :
994 : : where:
995 : :
996 : : - *VAR has the same type as EXP
997 : : - *OFF is a constant of type ssizetype.
998 : :
999 : : If EXP has an integral type, try to express (sizetype) EXP as:
1000 : :
1001 : : *VAR + (sizetype) *OFF
1002 : :
1003 : : where:
1004 : :
1005 : : - *VAR has type sizetype
1006 : : - *OFF is a constant of type ssizetype.
1007 : :
1008 : : If EXP_RANGE is nonnull, set it to the range of EXP.
1009 : :
1010 : : CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1011 : : visited. LIMIT counts down the number of SSA names that we are
1012 : : allowed to process before giving up. */
1013 : :
1014 : : static void
1015 : 44168023 : split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1016 : : hash_map<tree, std::pair<tree, tree> > &cache,
1017 : : unsigned *limit)
1018 : : {
1019 : 44168023 : tree type = TREE_TYPE (exp), op0, op1;
1020 : 44168023 : enum tree_code code;
1021 : :
1022 : 44168023 : code = TREE_CODE (exp);
1023 : 44168023 : if (exp_range)
1024 : : {
1025 : 7954023 : *exp_range = type;
1026 : 7954023 : if (code == SSA_NAME)
1027 : : {
1028 : 4596970 : value_range vr;
1029 : 9193940 : get_range_query (cfun)->range_of_expr (vr, exp);
1030 : 4596970 : if (vr.undefined_p ())
1031 : 4569 : vr.set_varying (TREE_TYPE (exp));
1032 : 4596970 : tree vr_min, vr_max;
1033 : 4596970 : value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1034 : 4596970 : wide_int var_min = wi::to_wide (vr_min);
1035 : 4596970 : wide_int var_max = wi::to_wide (vr_max);
1036 : 4596970 : wide_int var_nonzero = get_nonzero_bits (exp);
1037 : 13790910 : vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1038 : : &var_min, &var_max,
1039 : : var_nonzero,
1040 : 4596970 : TYPE_SIGN (type));
1041 : : /* This check for VR_VARYING is here because the old code
1042 : : using get_range_info would return VR_RANGE for the entire
1043 : : domain, instead of VR_VARYING. The new code normalizes
1044 : : full-domain ranges to VR_VARYING. */
1045 : 4596970 : if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1046 : 4473436 : *exp_range = value_range (type, var_min, var_max);
1047 : 4596970 : }
1048 : : }
1049 : :
1050 : 44168023 : if (!tree_is_chrec (exp)
1051 : 44168018 : && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1052 : : {
1053 : 44167953 : extract_ops_from_tree (exp, &code, &op0, &op1);
1054 : 44167953 : if (split_constant_offset_1 (type, op0, code, op1, var, off,
1055 : : exp_range, cache, limit))
1056 : 34458947 : return;
1057 : : }
1058 : :
1059 : 9709076 : *var = exp;
1060 : 9709076 : if (INTEGRAL_TYPE_P (type))
1061 : 3102832 : *var = fold_convert (sizetype, *var);
1062 : 9709076 : *off = ssize_int (0);
1063 : :
1064 : 9709076 : value_range r;
1065 : 2806695 : if (exp_range && code != SSA_NAME
1066 : 118902 : && get_range_query (cfun)->range_of_expr (r, exp)
1067 : 9768527 : && !r.undefined_p ())
1068 : 59451 : *exp_range = r;
1069 : 9709076 : }
1070 : :
1071 : : /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1072 : : type as EXP while OFF has type ssizetype. */
1073 : :
1074 : : void
1075 : 30855444 : split_constant_offset (tree exp, tree *var, tree *off)
1076 : : {
1077 : 30855444 : unsigned limit = param_ssa_name_def_chain_limit;
1078 : 30855444 : static hash_map<tree, std::pair<tree, tree> > *cache;
1079 : 30855444 : if (!cache)
1080 : 74897 : cache = new hash_map<tree, std::pair<tree, tree> > (37);
1081 : 30855444 : split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1082 : 30855444 : *var = fold_convert (TREE_TYPE (exp), *var);
1083 : 30855444 : cache->empty ();
1084 : 30855444 : }
1085 : :
1086 : : /* Returns the address ADDR of an object in a canonical shape (without nop
1087 : : casts, and with type of pointer to the object). */
1088 : :
1089 : : static tree
1090 : 14455709 : canonicalize_base_object_address (tree addr)
1091 : : {
1092 : 14455709 : tree orig = addr;
1093 : :
1094 : 14455709 : STRIP_NOPS (addr);
1095 : :
1096 : : /* The base address may be obtained by casting from integer, in that case
1097 : : keep the cast. */
1098 : 14455709 : if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1099 : : return orig;
1100 : :
1101 : 14393841 : if (TREE_CODE (addr) != ADDR_EXPR)
1102 : : return addr;
1103 : :
1104 : 8924781 : return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1105 : : }
1106 : :
1107 : : /* Analyze the behavior of memory reference REF within STMT.
1108 : : There are two modes:
1109 : :
1110 : : - BB analysis. In this case we simply split the address into base,
1111 : : init and offset components, without reference to any containing loop.
1112 : : The resulting base and offset are general expressions and they can
1113 : : vary arbitrarily from one iteration of the containing loop to the next.
1114 : : The step is always zero.
1115 : :
1116 : : - loop analysis. In this case we analyze the reference both wrt LOOP
1117 : : and on the basis that the reference occurs (is "used") in LOOP;
1118 : : see the comment above analyze_scalar_evolution_in_loop for more
1119 : : information about this distinction. The base, init, offset and
1120 : : step fields are all invariant in LOOP.
1121 : :
1122 : : Perform BB analysis if LOOP is null, or if LOOP is the function's
1123 : : dummy outermost loop. In other cases perform loop analysis.
1124 : :
1125 : : Return true if the analysis succeeded and store the results in DRB if so.
1126 : : BB analysis can only fail for bitfield or reversed-storage accesses. */
1127 : :
1128 : : opt_result
1129 : 14846002 : dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1130 : : class loop *loop, const gimple *stmt)
1131 : : {
1132 : 14846002 : poly_int64 pbitsize, pbitpos;
1133 : 14846002 : tree base, poffset;
1134 : 14846002 : machine_mode pmode;
1135 : 14846002 : int punsignedp, preversep, pvolatilep;
1136 : 14846002 : affine_iv base_iv, offset_iv;
1137 : 14846002 : tree init, dinit, step;
1138 : 14846002 : bool in_loop = (loop && loop->num);
1139 : :
1140 : 14846002 : if (dump_file && (dump_flags & TDF_DETAILS))
1141 : 60960 : fprintf (dump_file, "analyze_innermost: ");
1142 : :
1143 : 14846002 : base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1144 : : &punsignedp, &preversep, &pvolatilep);
1145 : 14846002 : gcc_assert (base != NULL_TREE);
1146 : :
1147 : 14846002 : poly_int64 pbytepos;
1148 : 14846002 : if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1149 : 29438 : return opt_result::failure_at (stmt,
1150 : : "failed: bit offset alignment.\n");
1151 : :
1152 : 14816564 : if (preversep)
1153 : 417 : return opt_result::failure_at (stmt,
1154 : : "failed: reverse storage order.\n");
1155 : :
1156 : : /* Calculate the alignment and misalignment for the inner reference. */
1157 : 14816147 : unsigned int HOST_WIDE_INT bit_base_misalignment;
1158 : 14816147 : unsigned int bit_base_alignment;
1159 : 14816147 : get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1160 : :
1161 : : /* There are no bitfield references remaining in BASE, so the values
1162 : : we got back must be whole bytes. */
1163 : 14816147 : gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1164 : : && bit_base_misalignment % BITS_PER_UNIT == 0);
1165 : 14816147 : unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1166 : 14816147 : poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1167 : :
1168 : 14816147 : if (TREE_CODE (base) == MEM_REF)
1169 : : {
1170 : 5947670 : if (!integer_zerop (TREE_OPERAND (base, 1)))
1171 : : {
1172 : : /* Subtract MOFF from the base and add it to POFFSET instead.
1173 : : Adjust the misalignment to reflect the amount we subtracted. */
1174 : 999843 : poly_offset_int moff = mem_ref_offset (base);
1175 : 999843 : base_misalignment -= moff.force_shwi ();
1176 : 999843 : tree mofft = wide_int_to_tree (sizetype, moff);
1177 : 999843 : if (!poffset)
1178 : 991330 : poffset = mofft;
1179 : : else
1180 : 8513 : poffset = size_binop (PLUS_EXPR, poffset, mofft);
1181 : : }
1182 : 5947670 : base = TREE_OPERAND (base, 0);
1183 : : }
1184 : : else
1185 : : {
1186 : 8868477 : if (may_be_nonaddressable_p (base))
1187 : 2044 : return opt_result::failure_at (stmt,
1188 : : "failed: base not addressable.\n");
1189 : 8866433 : base = build_fold_addr_expr (base);
1190 : : }
1191 : :
1192 : 14814103 : if (in_loop)
1193 : : {
1194 : 2621030 : if (!simple_iv (loop, loop, base, &base_iv, true))
1195 : 279687 : return opt_result::failure_at
1196 : 279687 : (stmt, "failed: evolution of base is not affine.\n");
1197 : : }
1198 : : else
1199 : : {
1200 : 12193073 : base_iv.base = base;
1201 : 12193073 : base_iv.step = ssize_int (0);
1202 : 12193073 : base_iv.no_overflow = true;
1203 : : }
1204 : :
1205 : 14534416 : if (!poffset)
1206 : : {
1207 : 12179752 : offset_iv.base = ssize_int (0);
1208 : 12179752 : offset_iv.step = ssize_int (0);
1209 : : }
1210 : : else
1211 : : {
1212 : 2354664 : if (!in_loop)
1213 : : {
1214 : 1263062 : offset_iv.base = poffset;
1215 : 1263062 : offset_iv.step = ssize_int (0);
1216 : : }
1217 : 1091602 : else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1218 : 78707 : return opt_result::failure_at
1219 : 78707 : (stmt, "failed: evolution of offset is not affine.\n");
1220 : : }
1221 : :
1222 : 14455709 : init = ssize_int (pbytepos);
1223 : :
1224 : : /* Subtract any constant component from the base and add it to INIT instead.
1225 : : Adjust the misalignment to reflect the amount we subtracted. */
1226 : 14455709 : split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1227 : 14455709 : init = size_binop (PLUS_EXPR, init, dinit);
1228 : 14455709 : base_misalignment -= TREE_INT_CST_LOW (dinit);
1229 : :
1230 : 14455709 : split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1231 : 14455709 : init = size_binop (PLUS_EXPR, init, dinit);
1232 : :
1233 : 14455709 : step = size_binop (PLUS_EXPR,
1234 : : fold_convert (ssizetype, base_iv.step),
1235 : : fold_convert (ssizetype, offset_iv.step));
1236 : :
1237 : 14455709 : base = canonicalize_base_object_address (base_iv.base);
1238 : :
1239 : : /* See if get_pointer_alignment can guarantee a higher alignment than
1240 : : the one we calculated above. */
1241 : 14455709 : unsigned int HOST_WIDE_INT alt_misalignment;
1242 : 14455709 : unsigned int alt_alignment;
1243 : 14455709 : get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1244 : :
1245 : : /* As above, these values must be whole bytes. */
1246 : 14455709 : gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1247 : : && alt_misalignment % BITS_PER_UNIT == 0);
1248 : 14455709 : alt_alignment /= BITS_PER_UNIT;
1249 : 14455709 : alt_misalignment /= BITS_PER_UNIT;
1250 : :
1251 : 14455709 : if (base_alignment < alt_alignment)
1252 : : {
1253 : 109542 : base_alignment = alt_alignment;
1254 : 109542 : base_misalignment = alt_misalignment;
1255 : : }
1256 : :
1257 : 14455709 : drb->base_address = base;
1258 : 14455709 : drb->offset = fold_convert (ssizetype, offset_iv.base);
1259 : 14455709 : drb->init = init;
1260 : 14455709 : drb->step = step;
1261 : 14455709 : if (known_misalignment (base_misalignment, base_alignment,
1262 : : &drb->base_misalignment))
1263 : 14455709 : drb->base_alignment = base_alignment;
1264 : : else
1265 : : {
1266 : 0 : drb->base_alignment = known_alignment (base_misalignment);
1267 : 0 : drb->base_misalignment = 0;
1268 : : }
1269 : 14455709 : drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1270 : 14455709 : drb->step_alignment = highest_pow2_factor (step);
1271 : :
1272 : 14455709 : if (dump_file && (dump_flags & TDF_DETAILS))
1273 : 57532 : fprintf (dump_file, "success.\n");
1274 : :
1275 : 14455709 : return opt_result::success ();
1276 : : }
1277 : :
1278 : : /* Return true if OP is a valid component reference for a DR access
1279 : : function. This accepts a subset of what handled_component_p accepts. */
1280 : :
1281 : : static bool
1282 : 22159626 : access_fn_component_p (tree op)
1283 : : {
1284 : 22159626 : switch (TREE_CODE (op))
1285 : : {
1286 : : case REALPART_EXPR:
1287 : : case IMAGPART_EXPR:
1288 : : case ARRAY_REF:
1289 : : return true;
1290 : :
1291 : 14721368 : case COMPONENT_REF:
1292 : 14721368 : return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1293 : :
1294 : 0 : default:
1295 : 0 : return false;
1296 : : }
1297 : : }
1298 : :
1299 : : /* Returns whether BASE can have a access_fn_component_p with BASE
1300 : : as base. */
1301 : :
1302 : : static bool
1303 : 314266 : base_supports_access_fn_components_p (tree base)
1304 : : {
1305 : 314266 : switch (TREE_CODE (TREE_TYPE (base)))
1306 : : {
1307 : : case COMPLEX_TYPE:
1308 : : case ARRAY_TYPE:
1309 : : case RECORD_TYPE:
1310 : : return true;
1311 : 303419 : default:
1312 : 303419 : return false;
1313 : : }
1314 : : }
1315 : :
1316 : : /* Determines the base object and the list of indices of memory reference
1317 : : DR, analyzed in LOOP and instantiated before NEST. */
1318 : :
1319 : : static void
1320 : 14917814 : dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1321 : : {
1322 : : /* If analyzing a basic-block there are no indices to analyze
1323 : : and thus no access functions. */
1324 : 14917814 : if (!nest)
1325 : : {
1326 : 12224259 : dri->base_object = ref;
1327 : 12224259 : dri->access_fns.create (0);
1328 : 12224259 : return;
1329 : : }
1330 : :
1331 : 2693555 : vec<tree> access_fns = vNULL;
1332 : :
1333 : : /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1334 : : into a two element array with a constant index. The base is
1335 : : then just the immediate underlying object. */
1336 : 2693555 : if (TREE_CODE (ref) == REALPART_EXPR)
1337 : : {
1338 : 39551 : ref = TREE_OPERAND (ref, 0);
1339 : 39551 : access_fns.safe_push (integer_zero_node);
1340 : : }
1341 : 2654004 : else if (TREE_CODE (ref) == IMAGPART_EXPR)
1342 : : {
1343 : 39189 : ref = TREE_OPERAND (ref, 0);
1344 : 39189 : access_fns.safe_push (integer_one_node);
1345 : : }
1346 : :
1347 : : /* Analyze access functions of dimensions we know to be independent.
1348 : : The list of component references handled here should be kept in
1349 : : sync with access_fn_component_p. */
1350 : 4928195 : while (handled_component_p (ref))
1351 : : {
1352 : 2345053 : if (TREE_CODE (ref) == ARRAY_REF)
1353 : : {
1354 : 1153753 : tree op = TREE_OPERAND (ref, 1);
1355 : 1153753 : tree access_fn = analyze_scalar_evolution (loop, op);
1356 : 1153753 : access_fn = instantiate_scev (nest, loop, access_fn);
1357 : 1153753 : access_fns.safe_push (access_fn);
1358 : : }
1359 : 1191300 : else if (TREE_CODE (ref) == COMPONENT_REF
1360 : 1191300 : && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1361 : : {
1362 : : /* For COMPONENT_REFs of records (but not unions!) use the
1363 : : FIELD_DECL offset as constant access function so we can
1364 : : disambiguate a[i].f1 and a[i].f2. */
1365 : 1080887 : tree off = component_ref_field_offset (ref);
1366 : 1080887 : off = size_binop (PLUS_EXPR,
1367 : : size_binop (MULT_EXPR,
1368 : : fold_convert (bitsizetype, off),
1369 : : bitsize_int (BITS_PER_UNIT)),
1370 : : DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1371 : 1080887 : access_fns.safe_push (off);
1372 : : }
1373 : : else
1374 : : /* If we have an unhandled component we could not translate
1375 : : to an access function stop analyzing. We have determined
1376 : : our base object in this case. */
1377 : : break;
1378 : :
1379 : 2234640 : ref = TREE_OPERAND (ref, 0);
1380 : : }
1381 : :
1382 : : /* If the address operand of a MEM_REF base has an evolution in the
1383 : : analyzed nest, add it as an additional independent access-function. */
1384 : 2693555 : if (TREE_CODE (ref) == MEM_REF)
1385 : : {
1386 : 1804256 : tree op = TREE_OPERAND (ref, 0);
1387 : 1804256 : tree access_fn = analyze_scalar_evolution (loop, op);
1388 : 1804256 : access_fn = instantiate_scev (nest, loop, access_fn);
1389 : 1804256 : STRIP_NOPS (access_fn);
1390 : 1804256 : if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1391 : : {
1392 : 901608 : tree memoff = TREE_OPERAND (ref, 1);
1393 : 901608 : tree base = initial_condition (access_fn);
1394 : 901608 : tree orig_type = TREE_TYPE (base);
1395 : 901608 : STRIP_USELESS_TYPE_CONVERSION (base);
1396 : 901608 : tree off;
1397 : 901608 : split_constant_offset (base, &base, &off);
1398 : 901608 : STRIP_USELESS_TYPE_CONVERSION (base);
1399 : : /* Fold the MEM_REF offset into the evolutions initial
1400 : : value to make more bases comparable. */
1401 : 901608 : if (!integer_zerop (memoff))
1402 : : {
1403 : 70367 : off = size_binop (PLUS_EXPR, off,
1404 : : fold_convert (ssizetype, memoff));
1405 : 70367 : memoff = build_int_cst (TREE_TYPE (memoff), 0);
1406 : : }
1407 : : /* Adjust the offset so it is a multiple of the access type
1408 : : size and thus we separate bases that can possibly be used
1409 : : to produce partial overlaps (which the access_fn machinery
1410 : : cannot handle). */
1411 : 901608 : wide_int rem;
1412 : 901608 : if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1413 : 901472 : && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1414 : 1802897 : && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1415 : 901289 : rem = wi::mod_trunc
1416 : 901289 : (wi::to_wide (off),
1417 : 1802578 : wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1418 : 901289 : SIGNED);
1419 : : else
1420 : : /* If we can't compute the remainder simply force the initial
1421 : : condition to zero. */
1422 : 319 : rem = wi::to_wide (off);
1423 : 901608 : off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1424 : 901608 : memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1425 : : /* And finally replace the initial condition. */
1426 : 1803216 : access_fn = chrec_replace_initial_condition
1427 : 901608 : (access_fn, fold_convert (orig_type, off));
1428 : : /* ??? This is still not a suitable base object for
1429 : : dr_may_alias_p - the base object needs to be an
1430 : : access that covers the object as whole. With
1431 : : an evolution in the pointer this cannot be
1432 : : guaranteed.
1433 : : As a band-aid, mark the access so we can special-case
1434 : : it in dr_may_alias_p. */
1435 : 901608 : tree old = ref;
1436 : 901608 : ref = fold_build2_loc (EXPR_LOCATION (ref),
1437 : 901608 : MEM_REF, TREE_TYPE (ref),
1438 : : base, memoff);
1439 : 901608 : MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1440 : 901608 : MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1441 : 901608 : dri->unconstrained_base = true;
1442 : 901608 : access_fns.safe_push (access_fn);
1443 : 901608 : }
1444 : : }
1445 : 889299 : else if (DECL_P (ref))
1446 : : {
1447 : : /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1448 : 778886 : ref = build2 (MEM_REF, TREE_TYPE (ref),
1449 : : build_fold_addr_expr (ref),
1450 : 778886 : build_int_cst (reference_alias_ptr_type (ref), 0));
1451 : : }
1452 : :
1453 : 2693555 : dri->base_object = ref;
1454 : 2693555 : dri->access_fns = access_fns;
1455 : : }
1456 : :
1457 : : /* Extracts the alias analysis information from the memory reference DR. */
1458 : :
1459 : : static void
1460 : 14835820 : dr_analyze_alias (struct data_reference *dr)
1461 : : {
1462 : 14835820 : tree ref = DR_REF (dr);
1463 : 14835820 : tree base = get_base_address (ref), addr;
1464 : :
1465 : 14835820 : if (INDIRECT_REF_P (base)
1466 : 14835820 : || TREE_CODE (base) == MEM_REF)
1467 : : {
1468 : 5955612 : addr = TREE_OPERAND (base, 0);
1469 : 5955612 : if (TREE_CODE (addr) == SSA_NAME)
1470 : 5954506 : DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1471 : : }
1472 : 14835820 : }
1473 : :
1474 : : /* Frees data reference DR. */
1475 : :
1476 : : void
1477 : 15125860 : free_data_ref (data_reference_p dr)
1478 : : {
1479 : 15125860 : DR_ACCESS_FNS (dr).release ();
1480 : 15125860 : if (dr->alt_indices.base_object)
1481 : 81994 : dr->alt_indices.access_fns.release ();
1482 : 15125860 : free (dr);
1483 : 15125860 : }
1484 : :
1485 : : /* Analyze memory reference MEMREF, which is accessed in STMT.
1486 : : The reference is a read if IS_READ is true, otherwise it is a write.
1487 : : IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1488 : : within STMT, i.e. that it might not occur even if STMT is executed
1489 : : and runs to completion.
1490 : :
1491 : : Return the data_reference description of MEMREF. NEST is the outermost
1492 : : loop in which the reference should be instantiated, LOOP is the loop
1493 : : in which the data reference should be analyzed. */
1494 : :
1495 : : struct data_reference *
1496 : 14835820 : create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1497 : : bool is_read, bool is_conditional_in_stmt)
1498 : : {
1499 : 14835820 : struct data_reference *dr;
1500 : :
1501 : 14835820 : if (dump_file && (dump_flags & TDF_DETAILS))
1502 : : {
1503 : 59745 : fprintf (dump_file, "Creating dr for ");
1504 : 59745 : print_generic_expr (dump_file, memref, TDF_SLIM);
1505 : 59745 : fprintf (dump_file, "\n");
1506 : : }
1507 : :
1508 : 14835820 : dr = XCNEW (struct data_reference);
1509 : 14835820 : DR_STMT (dr) = stmt;
1510 : 14835820 : DR_REF (dr) = memref;
1511 : 14835820 : DR_IS_READ (dr) = is_read;
1512 : 14835820 : DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1513 : :
1514 : 27060079 : dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1515 : : nest != NULL ? loop : NULL, stmt);
1516 : 14835820 : dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1517 : 14835820 : dr_analyze_alias (dr);
1518 : :
1519 : 14835820 : if (dump_file && (dump_flags & TDF_DETAILS))
1520 : : {
1521 : 59745 : unsigned i;
1522 : 59745 : fprintf (dump_file, "\tbase_address: ");
1523 : 59745 : print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1524 : 59745 : fprintf (dump_file, "\n\toffset from base address: ");
1525 : 59745 : print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1526 : 59745 : fprintf (dump_file, "\n\tconstant offset from base address: ");
1527 : 59745 : print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1528 : 59745 : fprintf (dump_file, "\n\tstep: ");
1529 : 59745 : print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1530 : 59745 : fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1531 : 59745 : fprintf (dump_file, "\n\tbase misalignment: %d",
1532 : : DR_BASE_MISALIGNMENT (dr));
1533 : 59745 : fprintf (dump_file, "\n\toffset alignment: %d",
1534 : : DR_OFFSET_ALIGNMENT (dr));
1535 : 59745 : fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1536 : 59745 : fprintf (dump_file, "\n\tbase_object: ");
1537 : 59745 : print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1538 : 59745 : fprintf (dump_file, "\n");
1539 : 270322 : for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1540 : : {
1541 : 53804 : fprintf (dump_file, "\tAccess function %d: ", i);
1542 : 53804 : print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1543 : : }
1544 : : }
1545 : :
1546 : 14835820 : return dr;
1547 : : }
1548 : :
1549 : : /* A helper function computes order between two tree expressions T1 and T2.
1550 : : This is used in comparator functions sorting objects based on the order
1551 : : of tree expressions. The function returns -1, 0, or 1. */
1552 : :
1553 : : int
1554 : 364362999 : data_ref_compare_tree (tree t1, tree t2)
1555 : : {
1556 : 364362999 : int i, cmp;
1557 : 364362999 : enum tree_code code;
1558 : 364362999 : char tclass;
1559 : :
1560 : 364362999 : if (t1 == t2)
1561 : : return 0;
1562 : 166601249 : if (t1 == NULL)
1563 : : return -1;
1564 : 166531414 : if (t2 == NULL)
1565 : : return 1;
1566 : :
1567 : 166489318 : STRIP_USELESS_TYPE_CONVERSION (t1);
1568 : 166489318 : STRIP_USELESS_TYPE_CONVERSION (t2);
1569 : 166489318 : if (t1 == t2)
1570 : : return 0;
1571 : :
1572 : 165938988 : if (TREE_CODE (t1) != TREE_CODE (t2)
1573 : 11890492 : && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1574 : 16897718 : return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1575 : :
1576 : 154048496 : code = TREE_CODE (t1);
1577 : 154048496 : switch (code)
1578 : : {
1579 : 44837068 : case INTEGER_CST:
1580 : 44837068 : return tree_int_cst_compare (t1, t2);
1581 : :
1582 : 0 : case STRING_CST:
1583 : 0 : if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1584 : 0 : return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1585 : 0 : return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1586 : 0 : TREE_STRING_LENGTH (t1));
1587 : :
1588 : 12451229 : case SSA_NAME:
1589 : 12451229 : if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1590 : 12451229 : return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1591 : : break;
1592 : :
1593 : 96760199 : default:
1594 : 96760199 : if (POLY_INT_CST_P (t1))
1595 : : return compare_sizes_for_sort (wi::to_poly_widest (t1),
1596 : : wi::to_poly_widest (t2));
1597 : :
1598 : 96760199 : tclass = TREE_CODE_CLASS (code);
1599 : :
1600 : : /* For decls, compare their UIDs. */
1601 : 96760199 : if (tclass == tcc_declaration)
1602 : : {
1603 : 18798997 : if (DECL_UID (t1) != DECL_UID (t2))
1604 : 18798358 : return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1605 : : break;
1606 : : }
1607 : : /* For expressions, compare their operands recursively. */
1608 : 77961202 : else if (IS_EXPR_CODE_CLASS (tclass))
1609 : : {
1610 : 137760329 : for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1611 : : {
1612 : 89567394 : cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1613 : 89567394 : TREE_OPERAND (t2, i));
1614 : 89567394 : if (cmp != 0)
1615 : 29768267 : return cmp;
1616 : : }
1617 : : }
1618 : : else
1619 : 0 : gcc_unreachable ();
1620 : : }
1621 : :
1622 : : return 0;
1623 : : }
1624 : :
1625 : : /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1626 : : check. */
1627 : :
1628 : : opt_result
1629 : 111178 : runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1630 : : {
1631 : 111178 : if (dump_enabled_p ())
1632 : 6252 : dump_printf (MSG_NOTE,
1633 : : "consider run-time aliasing test between %T and %T\n",
1634 : 6252 : DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1635 : :
1636 : 111178 : if (!speed_p)
1637 : 0 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1638 : : "runtime alias check not supported when"
1639 : : " optimizing for size.\n");
1640 : :
1641 : : /* FORNOW: We don't support versioning with outer-loop in either
1642 : : vectorization or loop distribution. */
1643 : 111178 : if (loop != NULL && loop->inner != NULL)
1644 : 143 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1645 : : "runtime alias check not supported for"
1646 : : " outer loop.\n");
1647 : :
1648 : : /* FORNOW: We don't support handling different address spaces. */
1649 : 111035 : if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1650 : 111035 : != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1651 : 4 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1652 : : "runtime alias check between different "
1653 : : "address spaces not supported.\n");
1654 : :
1655 : 111031 : return opt_result::success ();
1656 : : }
1657 : :
1658 : : /* Operator == between two dr_with_seg_len objects.
1659 : :
1660 : : This equality operator is used to make sure two data refs
1661 : : are the same one so that we will consider to combine the
1662 : : aliasing checks of those two pairs of data dependent data
1663 : : refs. */
1664 : :
1665 : : static bool
1666 : 103622 : operator == (const dr_with_seg_len& d1,
1667 : : const dr_with_seg_len& d2)
1668 : : {
1669 : 103622 : return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1670 : 103622 : DR_BASE_ADDRESS (d2.dr), 0)
1671 : 79429 : && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1672 : 78538 : && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1673 : 73997 : && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1674 : 73870 : && known_eq (d1.access_size, d2.access_size)
1675 : 175035 : && d1.align == d2.align);
1676 : : }
1677 : :
1678 : : /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1679 : : so that we can combine aliasing checks in one scan. */
1680 : :
1681 : : static int
1682 : 960249 : comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1683 : : {
1684 : 960249 : const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1685 : 960249 : const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1686 : 960249 : const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1687 : 960249 : const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1688 : :
1689 : : /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1690 : : if a and c have the same basic address snd step, and b and d have the same
1691 : : address and step. Therefore, if any a&c or b&d don't have the same address
1692 : : and step, we don't care the order of those two pairs after sorting. */
1693 : 960249 : int comp_res;
1694 : :
1695 : 960249 : if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1696 : 960249 : DR_BASE_ADDRESS (b1.dr))) != 0)
1697 : : return comp_res;
1698 : 468743 : if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1699 : 468743 : DR_BASE_ADDRESS (b2.dr))) != 0)
1700 : : return comp_res;
1701 : 303539 : if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1702 : 303539 : DR_STEP (b1.dr))) != 0)
1703 : : return comp_res;
1704 : 303433 : if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1705 : 303433 : DR_STEP (b2.dr))) != 0)
1706 : : return comp_res;
1707 : 295877 : if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1708 : 295877 : DR_OFFSET (b1.dr))) != 0)
1709 : : return comp_res;
1710 : 282298 : if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1711 : 282298 : DR_INIT (b1.dr))) != 0)
1712 : : return comp_res;
1713 : 199367 : if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1714 : 199367 : DR_OFFSET (b2.dr))) != 0)
1715 : : return comp_res;
1716 : 194776 : if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1717 : 194776 : DR_INIT (b2.dr))) != 0)
1718 : : return comp_res;
1719 : :
1720 : : return 0;
1721 : : }
1722 : :
1723 : : /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1724 : :
1725 : : static void
1726 : 773 : dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1727 : : {
1728 : 1546 : dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1729 : 773 : DR_REF (alias_pair->first.dr),
1730 : 773 : DR_REF (alias_pair->second.dr));
1731 : :
1732 : 773 : dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1733 : : alias_pair->first.seg_len);
1734 : 773 : if (!operand_equal_p (alias_pair->first.seg_len,
1735 : 773 : alias_pair->second.seg_len, 0))
1736 : 147 : dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1737 : :
1738 : 773 : dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1739 : 773 : dump_dec (MSG_NOTE, alias_pair->first.access_size);
1740 : 773 : if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1741 : : {
1742 : 151 : dump_printf (MSG_NOTE, " vs. ");
1743 : 151 : dump_dec (MSG_NOTE, alias_pair->second.access_size);
1744 : : }
1745 : :
1746 : 773 : dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1747 : : alias_pair->first.align);
1748 : 773 : if (alias_pair->first.align != alias_pair->second.align)
1749 : 72 : dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1750 : :
1751 : 773 : dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1752 : 773 : if (alias_pair->flags & DR_ALIAS_RAW)
1753 : 87 : dump_printf (MSG_NOTE, " RAW");
1754 : 773 : if (alias_pair->flags & DR_ALIAS_WAR)
1755 : 607 : dump_printf (MSG_NOTE, " WAR");
1756 : 773 : if (alias_pair->flags & DR_ALIAS_WAW)
1757 : 134 : dump_printf (MSG_NOTE, " WAW");
1758 : 773 : if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1759 : 154 : dump_printf (MSG_NOTE, " ARBITRARY");
1760 : 773 : if (alias_pair->flags & DR_ALIAS_SWAPPED)
1761 : 0 : dump_printf (MSG_NOTE, " SWAPPED");
1762 : 773 : if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1763 : 0 : dump_printf (MSG_NOTE, " UNSWAPPED");
1764 : 773 : if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1765 : 0 : dump_printf (MSG_NOTE, " MIXED_STEPS");
1766 : 773 : if (alias_pair->flags == 0)
1767 : 0 : dump_printf (MSG_NOTE, " <none>");
1768 : 773 : dump_printf (MSG_NOTE, "\n");
1769 : 773 : }
1770 : :
1771 : : /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1772 : : FACTOR is number of iterations that each data reference is accessed.
1773 : :
1774 : : Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1775 : : we create an expression:
1776 : :
1777 : : ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1778 : : || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1779 : :
1780 : : for aliasing checks. However, in some cases we can decrease the number
1781 : : of checks by combining two checks into one. For example, suppose we have
1782 : : another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1783 : : condition is satisfied:
1784 : :
1785 : : load_ptr_0 < load_ptr_1 &&
1786 : : load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1787 : :
1788 : : (this condition means, in each iteration of vectorized loop, the accessed
1789 : : memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1790 : : load_ptr_1.)
1791 : :
1792 : : we then can use only the following expression to finish the alising checks
1793 : : between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1794 : :
1795 : : ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1796 : : || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1797 : :
1798 : : Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1799 : : basic address. */
1800 : :
1801 : : void
1802 : 13097 : prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1803 : : poly_uint64)
1804 : : {
1805 : 13097 : if (alias_pairs->is_empty ())
1806 : 13097 : return;
1807 : :
1808 : : /* Canonicalize each pair so that the base components are ordered wrt
1809 : : data_ref_compare_tree. This allows the loop below to merge more
1810 : : cases. */
1811 : : unsigned int i;
1812 : : dr_with_seg_len_pair_t *alias_pair;
1813 : 58980 : FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1814 : : {
1815 : 46372 : data_reference_p dr_a = alias_pair->first.dr;
1816 : 46372 : data_reference_p dr_b = alias_pair->second.dr;
1817 : 46372 : int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1818 : : DR_BASE_ADDRESS (dr_b));
1819 : 46372 : if (comp_res == 0)
1820 : 1341 : comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1821 : 1341 : if (comp_res == 0)
1822 : 36 : comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1823 : 46372 : if (comp_res > 0)
1824 : : {
1825 : 14973 : std::swap (alias_pair->first, alias_pair->second);
1826 : 14973 : alias_pair->flags |= DR_ALIAS_SWAPPED;
1827 : : }
1828 : : else
1829 : 31399 : alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1830 : : }
1831 : :
1832 : : /* Sort the collected data ref pairs so that we can scan them once to
1833 : : combine all possible aliasing checks. */
1834 : 12608 : alias_pairs->qsort (comp_dr_with_seg_len_pair);
1835 : :
1836 : : /* Scan the sorted dr pairs and check if we can combine alias checks
1837 : : of two neighboring dr pairs. */
1838 : : unsigned int last = 0;
1839 : 92744 : for (i = 1; i < alias_pairs->length (); ++i)
1840 : : {
1841 : : /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1842 : 33764 : dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1843 : 33764 : dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1844 : :
1845 : 33764 : dr_with_seg_len *dr_a1 = &alias_pair1->first;
1846 : 33764 : dr_with_seg_len *dr_b1 = &alias_pair1->second;
1847 : 33764 : dr_with_seg_len *dr_a2 = &alias_pair2->first;
1848 : 33764 : dr_with_seg_len *dr_b2 = &alias_pair2->second;
1849 : :
1850 : : /* Remove duplicate data ref pairs. */
1851 : 33764 : if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1852 : : {
1853 : 14505 : if (dump_enabled_p ())
1854 : 1301 : dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1855 : 1301 : DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1856 : 1301 : DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1857 : 14505 : alias_pair1->flags |= alias_pair2->flags;
1858 : 48269 : continue;
1859 : : }
1860 : :
1861 : : /* Assume that we won't be able to merge the pairs, then correct
1862 : : if we do. */
1863 : 19259 : last += 1;
1864 : 19259 : if (last != i)
1865 : 4532 : (*alias_pairs)[last] = (*alias_pairs)[i];
1866 : :
1867 : 19259 : if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1868 : : {
1869 : : /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1870 : : and DR_A1 and DR_A2 are two consecutive memrefs. */
1871 : 16835 : if (*dr_a1 == *dr_a2)
1872 : : {
1873 : 12784 : std::swap (dr_a1, dr_b1);
1874 : 12784 : std::swap (dr_a2, dr_b2);
1875 : : }
1876 : :
1877 : 16835 : poly_int64 init_a1, init_a2;
1878 : : /* Only consider cases in which the distance between the initial
1879 : : DR_A1 and the initial DR_A2 is known at compile time. */
1880 : 30924 : if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1881 : 16835 : DR_BASE_ADDRESS (dr_a2->dr), 0)
1882 : 3260 : || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1883 : 3260 : DR_OFFSET (dr_a2->dr), 0)
1884 : 2746 : || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1885 : 19581 : || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1886 : 14095 : continue;
1887 : :
1888 : : /* Don't combine if we can't tell which one comes first. */
1889 : 2746 : if (!ordered_p (init_a1, init_a2))
1890 : : continue;
1891 : :
1892 : : /* Work out what the segment length would be if we did combine
1893 : : DR_A1 and DR_A2:
1894 : :
1895 : : - If DR_A1 and DR_A2 have equal lengths, that length is
1896 : : also the combined length.
1897 : :
1898 : : - If DR_A1 and DR_A2 both have negative "lengths", the combined
1899 : : length is the lower bound on those lengths.
1900 : :
1901 : : - If DR_A1 and DR_A2 both have positive lengths, the combined
1902 : : length is the upper bound on those lengths.
1903 : :
1904 : : Other cases are unlikely to give a useful combination.
1905 : :
1906 : : The lengths both have sizetype, so the sign is taken from
1907 : : the step instead. */
1908 : 2746 : poly_uint64 new_seg_len = 0;
1909 : 2746 : bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1910 : 2746 : dr_a2->seg_len, 0);
1911 : 2746 : if (new_seg_len_p)
1912 : : {
1913 : 6 : poly_uint64 seg_len_a1, seg_len_a2;
1914 : 6 : if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1915 : 6 : || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1916 : 6 : continue;
1917 : :
1918 : 0 : tree indicator_a = dr_direction_indicator (dr_a1->dr);
1919 : 0 : if (TREE_CODE (indicator_a) != INTEGER_CST)
1920 : 0 : continue;
1921 : :
1922 : 0 : tree indicator_b = dr_direction_indicator (dr_a2->dr);
1923 : 0 : if (TREE_CODE (indicator_b) != INTEGER_CST)
1924 : 0 : continue;
1925 : :
1926 : 0 : int sign_a = tree_int_cst_sgn (indicator_a);
1927 : 0 : int sign_b = tree_int_cst_sgn (indicator_b);
1928 : :
1929 : 0 : if (sign_a <= 0 && sign_b <= 0)
1930 : 0 : new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1931 : 0 : else if (sign_a >= 0 && sign_b >= 0)
1932 : 0 : new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1933 : : else
1934 : 0 : continue;
1935 : : }
1936 : : /* At this point we're committed to merging the refs. */
1937 : :
1938 : : /* Make sure dr_a1 starts left of dr_a2. */
1939 : 2740 : if (maybe_gt (init_a1, init_a2))
1940 : : {
1941 : 0 : std::swap (*dr_a1, *dr_a2);
1942 : 0 : std::swap (init_a1, init_a2);
1943 : : }
1944 : :
1945 : : /* The DR_Bs are equal, so only the DR_As can introduce
1946 : : mixed steps. */
1947 : 2740 : if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1948 : 0 : alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1949 : :
1950 : 2740 : if (new_seg_len_p)
1951 : : {
1952 : 0 : dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1953 : : new_seg_len);
1954 : 0 : dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1955 : : }
1956 : :
1957 : : /* This is always positive due to the swap above. */
1958 : 2740 : poly_uint64 diff = init_a2 - init_a1;
1959 : :
1960 : : /* The new check will start at DR_A1. Make sure that its access
1961 : : size encompasses the initial DR_A2. */
1962 : 2740 : if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1963 : : {
1964 : 1033 : dr_a1->access_size = upper_bound (dr_a1->access_size,
1965 : : diff + dr_a2->access_size);
1966 : 1033 : unsigned int new_align = known_alignment (dr_a1->access_size);
1967 : 1033 : dr_a1->align = MIN (dr_a1->align, new_align);
1968 : : }
1969 : 2740 : if (dump_enabled_p ())
1970 : 225 : dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1971 : 225 : DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1972 : 225 : DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1973 : 2740 : alias_pair1->flags |= alias_pair2->flags;
1974 : 2740 : last -= 1;
1975 : : }
1976 : : }
1977 : 12608 : alias_pairs->truncate (last + 1);
1978 : :
1979 : : /* Try to restore the original dr_with_seg_len order within each
1980 : : dr_with_seg_len_pair_t. If we ended up combining swapped and
1981 : : unswapped pairs into the same check, we have to invalidate any
1982 : : RAW, WAR and WAW information for it. */
1983 : 12608 : if (dump_enabled_p ())
1984 : 665 : dump_printf (MSG_NOTE, "merged alias checks:\n");
1985 : 41735 : FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1986 : : {
1987 : 29127 : unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1988 : 29127 : unsigned int swapped = (alias_pair->flags & swap_mask);
1989 : 29127 : if (swapped == DR_ALIAS_SWAPPED)
1990 : 8898 : std::swap (alias_pair->first, alias_pair->second);
1991 : 20229 : else if (swapped != DR_ALIAS_UNSWAPPED)
1992 : 1375 : alias_pair->flags |= DR_ALIAS_ARBITRARY;
1993 : 29127 : alias_pair->flags &= ~swap_mask;
1994 : 29127 : if (dump_enabled_p ())
1995 : 773 : dump_alias_pair (alias_pair, " ");
1996 : : }
1997 : : }
1998 : :
1999 : : /* A subroutine of create_intersect_range_checks, with a subset of the
2000 : : same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
2001 : : to optimize cases in which the references form a simple RAW, WAR or
2002 : : WAR dependence. */
2003 : :
2004 : : static bool
2005 : 3975 : create_ifn_alias_checks (tree *cond_expr,
2006 : : const dr_with_seg_len_pair_t &alias_pair)
2007 : : {
2008 : 3975 : const dr_with_seg_len& dr_a = alias_pair.first;
2009 : 3975 : const dr_with_seg_len& dr_b = alias_pair.second;
2010 : :
2011 : : /* Check for cases in which:
2012 : :
2013 : : (a) we have a known RAW, WAR or WAR dependence
2014 : : (b) the accesses are well-ordered in both the original and new code
2015 : : (see the comment above the DR_ALIAS_* flags for details); and
2016 : : (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2017 : 3975 : if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2018 : : return false;
2019 : :
2020 : : /* Make sure that both DRs access the same pattern of bytes,
2021 : : with a constant length and step. */
2022 : 2754 : poly_uint64 seg_len;
2023 : 2754 : if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2024 : 2398 : || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2025 : 2394 : || maybe_ne (dr_a.access_size, dr_b.access_size)
2026 : 2354 : || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2027 : 5108 : || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2028 : 409 : return false;
2029 : :
2030 : 2345 : unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2031 : 2345 : tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2032 : 2345 : tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2033 : :
2034 : : /* See whether the target suports what we want to do. WAW checks are
2035 : : equivalent to WAR checks here. */
2036 : 4690 : internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2037 : 2345 : ? IFN_CHECK_RAW_PTRS
2038 : : : IFN_CHECK_WAR_PTRS);
2039 : 2345 : unsigned int align = MIN (dr_a.align, dr_b.align);
2040 : 2345 : poly_uint64 full_length = seg_len + bytes;
2041 : 2345 : if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2042 : : full_length, align))
2043 : : {
2044 : 2345 : full_length = seg_len + dr_a.access_size;
2045 : 2345 : if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2046 : : full_length, align))
2047 : : return false;
2048 : : }
2049 : :
2050 : : /* Commit to using this form of test. */
2051 : 0 : addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2052 : 0 : addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2053 : :
2054 : 0 : addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2055 : 0 : addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2056 : :
2057 : 0 : *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2058 : : ifn, boolean_type_node,
2059 : : 4, addr_a, addr_b,
2060 : : size_int (full_length),
2061 : : size_int (align));
2062 : :
2063 : 0 : if (dump_enabled_p ())
2064 : : {
2065 : 0 : if (ifn == IFN_CHECK_RAW_PTRS)
2066 : 0 : dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2067 : : else
2068 : 0 : dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2069 : : }
2070 : : return true;
2071 : : }
2072 : :
2073 : : /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2074 : : free of aliases, using a condition based on index values instead
2075 : : of a condition based on addresses. Return true on success,
2076 : : storing the condition in *COND_EXPR.
2077 : :
2078 : : This can only be done if the two data references in ALIAS_PAIR access
2079 : : the same array object and the index is the only difference. For example,
2080 : : if the two data references are DR_A and DR_B:
2081 : :
2082 : : DR_A DR_B
2083 : : data-ref arr[i] arr[j]
2084 : : base_object arr arr
2085 : : index {i_0, +, 1}_loop {j_0, +, 1}_loop
2086 : :
2087 : : The addresses and their index are like:
2088 : :
2089 : : |<- ADDR_A ->| |<- ADDR_B ->|
2090 : : ------------------------------------------------------->
2091 : : | | | | | | | | | |
2092 : : ------------------------------------------------------->
2093 : : i_0 ... i_0+4 j_0 ... j_0+4
2094 : :
2095 : : We can create expression based on index rather than address:
2096 : :
2097 : : (unsigned) (i_0 - j_0 + 3) <= 6
2098 : :
2099 : : i.e. the indices are less than 4 apart.
2100 : :
2101 : : Note evolution step of index needs to be considered in comparison. */
2102 : :
2103 : : static bool
2104 : 4123 : create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2105 : : const dr_with_seg_len_pair_t &alias_pair)
2106 : : {
2107 : 4123 : const dr_with_seg_len &dr_a = alias_pair.first;
2108 : 4123 : const dr_with_seg_len &dr_b = alias_pair.second;
2109 : 4123 : if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2110 : 4123 : || integer_zerop (DR_STEP (dr_a.dr))
2111 : 3870 : || integer_zerop (DR_STEP (dr_b.dr))
2112 : 15664 : || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2113 : 320 : return false;
2114 : :
2115 : 3803 : poly_uint64 seg_len1, seg_len2;
2116 : 3803 : if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2117 : 3803 : || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2118 : 201 : return false;
2119 : :
2120 : 3602 : if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2121 : : return false;
2122 : :
2123 : 3602 : if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2124 : : return false;
2125 : :
2126 : 149 : if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2127 : : return false;
2128 : :
2129 : 149 : gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2130 : :
2131 : 149 : bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2132 : 149 : unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2133 : 149 : if (neg_step)
2134 : : {
2135 : 30 : abs_step = -abs_step;
2136 : 30 : seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2137 : 30 : seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2138 : : }
2139 : :
2140 : : /* Infer the number of iterations with which the memory segment is accessed
2141 : : by DR. In other words, alias is checked if memory segment accessed by
2142 : : DR_A in some iterations intersect with memory segment accessed by DR_B
2143 : : in the same amount iterations.
2144 : : Note segnment length is a linear function of number of iterations with
2145 : : DR_STEP as the coefficient. */
2146 : 149 : poly_uint64 niter_len1, niter_len2;
2147 : 149 : if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2148 : 149 : || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2149 : : return false;
2150 : :
2151 : : /* Divide each access size by the byte step, rounding up. */
2152 : 149 : poly_uint64 niter_access1, niter_access2;
2153 : 149 : if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2154 : : abs_step, &niter_access1)
2155 : 149 : || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2156 : : abs_step, &niter_access2))
2157 : : return false;
2158 : :
2159 : 149 : bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2160 : :
2161 : 149 : int found = -1;
2162 : 610 : for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2163 : : {
2164 : 157 : tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2165 : 157 : tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2166 : : /* Two indices must be the same if they are not scev, or not scev wrto
2167 : : current loop being vecorized. */
2168 : 157 : if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2169 : 149 : || TREE_CODE (access2) != POLYNOMIAL_CHREC
2170 : 149 : || CHREC_VARIABLE (access1) != (unsigned)loop->num
2171 : 306 : || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2172 : : {
2173 : 8 : if (operand_equal_p (access1, access2, 0))
2174 : 7 : continue;
2175 : :
2176 : : return false;
2177 : : }
2178 : 149 : if (found >= 0)
2179 : : return false;
2180 : 149 : found = i;
2181 : : }
2182 : :
2183 : : /* Ought not to happen in practice, since if all accesses are equal then the
2184 : : alias should be decidable at compile time. */
2185 : 148 : if (found < 0)
2186 : : return false;
2187 : :
2188 : : /* The two indices must have the same step. */
2189 : 148 : tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2190 : 148 : tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2191 : 148 : if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2192 : : return false;
2193 : :
2194 : 148 : tree idx_step = CHREC_RIGHT (access1);
2195 : : /* Index must have const step, otherwise DR_STEP won't be constant. */
2196 : 148 : gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2197 : : /* Index must evaluate in the same direction as DR. */
2198 : 148 : gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2199 : :
2200 : 148 : tree min1 = CHREC_LEFT (access1);
2201 : 148 : tree min2 = CHREC_LEFT (access2);
2202 : 148 : if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2203 : : return false;
2204 : :
2205 : : /* Ideally, alias can be checked against loop's control IV, but we
2206 : : need to prove linear mapping between control IV and reference
2207 : : index. Although that should be true, we check against (array)
2208 : : index of data reference. Like segment length, index length is
2209 : : linear function of the number of iterations with index_step as
2210 : : the coefficient, i.e, niter_len * idx_step. */
2211 : 148 : offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2212 : : SIGNED);
2213 : 148 : if (neg_step)
2214 : 30 : abs_idx_step = -abs_idx_step;
2215 : 296 : poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2216 : 296 : poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2217 : 148 : poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2218 : 148 : poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2219 : :
2220 : 148 : gcc_assert (known_ge (idx_len1, 0)
2221 : : && known_ge (idx_len2, 0)
2222 : : && known_ge (idx_access1, 0)
2223 : : && known_ge (idx_access2, 0));
2224 : :
2225 : : /* Each access has the following pattern, with lengths measured
2226 : : in units of INDEX:
2227 : :
2228 : : <-- idx_len -->
2229 : : <--- A: -ve step --->
2230 : : +-----+-------+-----+-------+-----+
2231 : : | n-1 | ..... | 0 | ..... | n-1 |
2232 : : +-----+-------+-----+-------+-----+
2233 : : <--- B: +ve step --->
2234 : : <-- idx_len -->
2235 : : |
2236 : : min
2237 : :
2238 : : where "n" is the number of scalar iterations covered by the segment
2239 : : and where each access spans idx_access units.
2240 : :
2241 : : A is the range of bytes accessed when the step is negative,
2242 : : B is the range when the step is positive.
2243 : :
2244 : : When checking for general overlap, we need to test whether
2245 : : the range:
2246 : :
2247 : : [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2248 : :
2249 : : overlaps:
2250 : :
2251 : : [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2252 : :
2253 : : where:
2254 : :
2255 : : low_offsetN = +ve step ? 0 : -idx_lenN;
2256 : : high_offsetN = +ve step ? idx_lenN : 0;
2257 : :
2258 : : This is equivalent to testing whether:
2259 : :
2260 : : min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2261 : : && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2262 : :
2263 : : Converting this into a single test, there is an overlap if:
2264 : :
2265 : : 0 <= min2 - min1 + bias <= limit
2266 : :
2267 : : where bias = high_offset2 + idx_access2 - 1 - low_offset1
2268 : : limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2269 : : + (high_offset2 - low_offset2 + idx_access2 - 1)
2270 : : i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2271 : :
2272 : : Combining the tests requires limit to be computable in an unsigned
2273 : : form of the index type; if it isn't, we fall back to the usual
2274 : : pointer-based checks.
2275 : :
2276 : : We can do better if DR_B is a write and if DR_A and DR_B are
2277 : : well-ordered in both the original and the new code (see the
2278 : : comment above the DR_ALIAS_* flags for details). In this case
2279 : : we know that for each i in [0, n-1], the write performed by
2280 : : access i of DR_B occurs after access numbers j<=i of DR_A in
2281 : : both the original and the new code. Any write or anti
2282 : : dependencies wrt those DR_A accesses are therefore maintained.
2283 : :
2284 : : We just need to make sure that each individual write in DR_B does not
2285 : : overlap any higher-indexed access in DR_A; such DR_A accesses happen
2286 : : after the DR_B access in the original code but happen before it in
2287 : : the new code.
2288 : :
2289 : : We know the steps for both accesses are equal, so by induction, we
2290 : : just need to test whether the first write of DR_B overlaps a later
2291 : : access of DR_A. In other words, we need to move min1 along by
2292 : : one iteration:
2293 : :
2294 : : min1' = min1 + idx_step
2295 : :
2296 : : and use the ranges:
2297 : :
2298 : : [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2299 : :
2300 : : and:
2301 : :
2302 : : [min2, min2 + idx_access2 - 1]
2303 : :
2304 : : where:
2305 : :
2306 : : low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2307 : : high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2308 : 148 : if (waw_or_war_p)
2309 : 120 : idx_len1 -= abs_idx_step;
2310 : :
2311 : 148 : poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2312 : 148 : if (!waw_or_war_p)
2313 : 148 : limit += idx_len2;
2314 : :
2315 : 148 : tree utype = unsigned_type_for (TREE_TYPE (min1));
2316 : 148 : if (!wi::fits_to_tree_p (limit, utype))
2317 : : return false;
2318 : :
2319 : 148 : poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2320 : 148 : poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2321 : 148 : poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2322 : : /* Equivalent to adding IDX_STEP to MIN1. */
2323 : 148 : if (waw_or_war_p)
2324 : 120 : bias -= wi::to_offset (idx_step);
2325 : :
2326 : 148 : tree subject = fold_build2 (MINUS_EXPR, utype,
2327 : : fold_convert (utype, min2),
2328 : : fold_convert (utype, min1));
2329 : 148 : subject = fold_build2 (PLUS_EXPR, utype, subject,
2330 : : wide_int_to_tree (utype, bias));
2331 : 148 : tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2332 : : wide_int_to_tree (utype, limit));
2333 : 148 : if (*cond_expr)
2334 : 0 : *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2335 : : *cond_expr, part_cond_expr);
2336 : : else
2337 : 148 : *cond_expr = part_cond_expr;
2338 : 148 : if (dump_enabled_p ())
2339 : : {
2340 : 130 : if (waw_or_war_p)
2341 : 103 : dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2342 : : else
2343 : 27 : dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2344 : : }
2345 : : return true;
2346 : : }
2347 : :
2348 : : /* A subroutine of create_intersect_range_checks, with a subset of the
2349 : : same arguments. Try to optimize cases in which the second access
2350 : : is a write and in which some overlap is valid. */
2351 : :
2352 : : static bool
2353 : 3975 : create_waw_or_war_checks (tree *cond_expr,
2354 : : const dr_with_seg_len_pair_t &alias_pair)
2355 : : {
2356 : 3975 : const dr_with_seg_len& dr_a = alias_pair.first;
2357 : 3975 : const dr_with_seg_len& dr_b = alias_pair.second;
2358 : :
2359 : : /* Check for cases in which:
2360 : :
2361 : : (a) DR_B is always a write;
2362 : : (b) the accesses are well-ordered in both the original and new code
2363 : : (see the comment above the DR_ALIAS_* flags for details); and
2364 : : (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2365 : 3975 : if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2366 : : return false;
2367 : :
2368 : : /* Check for equal (but possibly variable) steps. */
2369 : 2714 : tree step = DR_STEP (dr_a.dr);
2370 : 2714 : if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2371 : : return false;
2372 : :
2373 : : /* Make sure that we can operate on sizetype without loss of precision. */
2374 : 2365 : tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2375 : 2365 : if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2376 : : return false;
2377 : :
2378 : : /* All addresses involved are known to have a common alignment ALIGN.
2379 : : We can therefore subtract ALIGN from an exclusive endpoint to get
2380 : : an inclusive endpoint. In the best (and common) case, ALIGN is the
2381 : : same as the access sizes of both DRs, and so subtracting ALIGN
2382 : : cancels out the addition of an access size. */
2383 : 2365 : unsigned int align = MIN (dr_a.align, dr_b.align);
2384 : 2365 : poly_uint64 last_chunk_a = dr_a.access_size - align;
2385 : 2365 : poly_uint64 last_chunk_b = dr_b.access_size - align;
2386 : :
2387 : : /* Get a boolean expression that is true when the step is negative. */
2388 : 2365 : tree indicator = dr_direction_indicator (dr_a.dr);
2389 : 2365 : tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2390 : : fold_convert (ssizetype, indicator),
2391 : : ssize_int (0));
2392 : :
2393 : : /* Get lengths in sizetype. */
2394 : 2365 : tree seg_len_a
2395 : 2365 : = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2396 : 2365 : step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2397 : :
2398 : : /* Each access has the following pattern:
2399 : :
2400 : : <- |seg_len| ->
2401 : : <--- A: -ve step --->
2402 : : +-----+-------+-----+-------+-----+
2403 : : | n-1 | ..... | 0 | ..... | n-1 |
2404 : : +-----+-------+-----+-------+-----+
2405 : : <--- B: +ve step --->
2406 : : <- |seg_len| ->
2407 : : |
2408 : : base address
2409 : :
2410 : : where "n" is the number of scalar iterations covered by the segment.
2411 : :
2412 : : A is the range of bytes accessed when the step is negative,
2413 : : B is the range when the step is positive.
2414 : :
2415 : : We know that DR_B is a write. We also know (from checking that
2416 : : DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2417 : : the write performed by access i of DR_B occurs after access numbers
2418 : : j<=i of DR_A in both the original and the new code. Any write or
2419 : : anti dependencies wrt those DR_A accesses are therefore maintained.
2420 : :
2421 : : We just need to make sure that each individual write in DR_B does not
2422 : : overlap any higher-indexed access in DR_A; such DR_A accesses happen
2423 : : after the DR_B access in the original code but happen before it in
2424 : : the new code.
2425 : :
2426 : : We know the steps for both accesses are equal, so by induction, we
2427 : : just need to test whether the first write of DR_B overlaps a later
2428 : : access of DR_A. In other words, we need to move addr_a along by
2429 : : one iteration:
2430 : :
2431 : : addr_a' = addr_a + step
2432 : :
2433 : : and check whether:
2434 : :
2435 : : [addr_b, addr_b + last_chunk_b]
2436 : :
2437 : : overlaps:
2438 : :
2439 : : [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2440 : :
2441 : : where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2442 : :
2443 : : low_offset_a = +ve step ? 0 : seg_len_a - step
2444 : : high_offset_a = +ve step ? seg_len_a - step : 0
2445 : :
2446 : : This is equivalent to testing whether:
2447 : :
2448 : : addr_a' + low_offset_a <= addr_b + last_chunk_b
2449 : : && addr_b <= addr_a' + high_offset_a + last_chunk_a
2450 : :
2451 : : Converting this into a single test, there is an overlap if:
2452 : :
2453 : : 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2454 : :
2455 : : where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2456 : :
2457 : : If DR_A is performed, limit + |step| - last_chunk_b is known to be
2458 : : less than the size of the object underlying DR_A. We also know
2459 : : that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2460 : : guaranteed at compile time. There can therefore be no overflow if
2461 : : "limit" is calculated in an unsigned type with pointer precision. */
2462 : 2365 : tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2463 : : DR_OFFSET (dr_a.dr));
2464 : 2365 : addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2465 : :
2466 : 2365 : tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2467 : : DR_OFFSET (dr_b.dr));
2468 : 2365 : addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2469 : :
2470 : : /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2471 : 2365 : addr_a = fold_build_pointer_plus (addr_a, step);
2472 : 2365 : tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2473 : : seg_len_a, step);
2474 : 2365 : if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2475 : 0 : seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2476 : :
2477 : 2365 : tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2478 : : seg_len_a_minus_step, size_zero_node);
2479 : 2365 : if (!CONSTANT_CLASS_P (low_offset_a))
2480 : 0 : low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2481 : :
2482 : : /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2483 : : but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2484 : 2365 : tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2485 : : low_offset_a);
2486 : :
2487 : : /* The amount added to addr_b - addr_a'. */
2488 : 2365 : tree bias = fold_build2 (MINUS_EXPR, sizetype,
2489 : : size_int (last_chunk_b), low_offset_a);
2490 : :
2491 : 2365 : tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2492 : 2365 : limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2493 : : size_int (last_chunk_a + last_chunk_b));
2494 : :
2495 : 2365 : tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2496 : 2365 : subject = fold_build2 (PLUS_EXPR, sizetype,
2497 : : fold_convert (sizetype, subject), bias);
2498 : :
2499 : 2365 : *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2500 : 2365 : if (dump_enabled_p ())
2501 : 291 : dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2502 : : return true;
2503 : : }
2504 : :
2505 : : /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2506 : : every address ADDR accessed by D:
2507 : :
2508 : : *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2509 : :
2510 : : In this case, every element accessed by D is aligned to at least
2511 : : ALIGN bytes.
2512 : :
2513 : : If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2514 : :
2515 : : *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2516 : :
2517 : : static void
2518 : 3220 : get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2519 : : tree *seg_max_out, HOST_WIDE_INT align)
2520 : : {
2521 : : /* Each access has the following pattern:
2522 : :
2523 : : <- |seg_len| ->
2524 : : <--- A: -ve step --->
2525 : : +-----+-------+-----+-------+-----+
2526 : : | n-1 | ,.... | 0 | ..... | n-1 |
2527 : : +-----+-------+-----+-------+-----+
2528 : : <--- B: +ve step --->
2529 : : <- |seg_len| ->
2530 : : |
2531 : : base address
2532 : :
2533 : : where "n" is the number of scalar iterations covered by the segment.
2534 : : (This should be VF for a particular pair if we know that both steps
2535 : : are the same, otherwise it will be the full number of scalar loop
2536 : : iterations.)
2537 : :
2538 : : A is the range of bytes accessed when the step is negative,
2539 : : B is the range when the step is positive.
2540 : :
2541 : : If the access size is "access_size" bytes, the lowest addressed byte is:
2542 : :
2543 : : base + (step < 0 ? seg_len : 0) [LB]
2544 : :
2545 : : and the highest addressed byte is always below:
2546 : :
2547 : : base + (step < 0 ? 0 : seg_len) + access_size [UB]
2548 : :
2549 : : Thus:
2550 : :
2551 : : LB <= ADDR < UB
2552 : :
2553 : : If ALIGN is nonzero, all three values are aligned to at least ALIGN
2554 : : bytes, so:
2555 : :
2556 : : LB <= ADDR <= UB - ALIGN
2557 : :
2558 : : where "- ALIGN" folds naturally with the "+ access_size" and often
2559 : : cancels it out.
2560 : :
2561 : : We don't try to simplify LB and UB beyond this (e.g. by using
2562 : : MIN and MAX based on whether seg_len rather than the stride is
2563 : : negative) because it is possible for the absolute size of the
2564 : : segment to overflow the range of a ssize_t.
2565 : :
2566 : : Keeping the pointer_plus outside of the cond_expr should allow
2567 : : the cond_exprs to be shared with other alias checks. */
2568 : 3220 : tree indicator = dr_direction_indicator (d.dr);
2569 : 3220 : tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2570 : : fold_convert (ssizetype, indicator),
2571 : : ssize_int (0));
2572 : 3220 : tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2573 : : DR_OFFSET (d.dr));
2574 : 3220 : addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2575 : 3220 : tree seg_len
2576 : 3220 : = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2577 : :
2578 : 3220 : tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2579 : : seg_len, size_zero_node);
2580 : 3220 : tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2581 : : size_zero_node, seg_len);
2582 : 3220 : max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2583 : : size_int (d.access_size - align));
2584 : :
2585 : 3220 : *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2586 : 3220 : *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2587 : 3220 : }
2588 : :
2589 : : /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2590 : : storing the condition in *COND_EXPR. The fallback is to generate a
2591 : : a test that the two accesses do not overlap:
2592 : :
2593 : : end_a <= start_b || end_b <= start_a. */
2594 : :
2595 : : static void
2596 : 4123 : create_intersect_range_checks (class loop *loop, tree *cond_expr,
2597 : : const dr_with_seg_len_pair_t &alias_pair)
2598 : : {
2599 : 4123 : const dr_with_seg_len& dr_a = alias_pair.first;
2600 : 4123 : const dr_with_seg_len& dr_b = alias_pair.second;
2601 : 4123 : *cond_expr = NULL_TREE;
2602 : 4123 : if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2603 : 2513 : return;
2604 : :
2605 : 3975 : if (create_ifn_alias_checks (cond_expr, alias_pair))
2606 : : return;
2607 : :
2608 : 3975 : if (create_waw_or_war_checks (cond_expr, alias_pair))
2609 : : return;
2610 : :
2611 : 1610 : unsigned HOST_WIDE_INT min_align;
2612 : 1610 : tree_code cmp_code;
2613 : : /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2614 : : are equivalent. This is just an optimization heuristic. */
2615 : 1610 : if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2616 : 1516 : && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2617 : : {
2618 : : /* In this case adding access_size to seg_len is likely to give
2619 : : a simple X * step, where X is either the number of scalar
2620 : : iterations or the vectorization factor. We're better off
2621 : : keeping that, rather than subtracting an alignment from it.
2622 : :
2623 : : In this case the maximum values are exclusive and so there is
2624 : : no alias if the maximum of one segment equals the minimum
2625 : : of another. */
2626 : : min_align = 0;
2627 : : cmp_code = LE_EXPR;
2628 : : }
2629 : : else
2630 : : {
2631 : : /* Calculate the minimum alignment shared by all four pointers,
2632 : : then arrange for this alignment to be subtracted from the
2633 : : exclusive maximum values to get inclusive maximum values.
2634 : : This "- min_align" is cumulative with a "+ access_size"
2635 : : in the calculation of the maximum values. In the best
2636 : : (and common) case, the two cancel each other out, leaving
2637 : : us with an inclusive bound based only on seg_len. In the
2638 : : worst case we're simply adding a smaller number than before.
2639 : :
2640 : : Because the maximum values are inclusive, there is an alias
2641 : : if the maximum value of one segment is equal to the minimum
2642 : : value of the other. */
2643 : 183 : min_align = MIN (dr_a.align, dr_b.align);
2644 : 183 : cmp_code = LT_EXPR;
2645 : : }
2646 : :
2647 : 1610 : tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2648 : 1610 : get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2649 : 1610 : get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2650 : :
2651 : 1610 : *cond_expr
2652 : 1610 : = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2653 : : fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2654 : : fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2655 : 1610 : if (dump_enabled_p ())
2656 : 246 : dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2657 : : }
2658 : :
2659 : : /* Create a conditional expression that represents the run-time checks for
2660 : : overlapping of address ranges represented by a list of data references
2661 : : pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2662 : : COND_EXPR is the conditional expression to be used in the if statement
2663 : : that controls which version of the loop gets executed at runtime. */
2664 : :
2665 : : void
2666 : 2744 : create_runtime_alias_checks (class loop *loop,
2667 : : const vec<dr_with_seg_len_pair_t> *alias_pairs,
2668 : : tree * cond_expr)
2669 : : {
2670 : 2744 : tree part_cond_expr;
2671 : :
2672 : 2744 : fold_defer_overflow_warnings ();
2673 : 12355 : for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2674 : : {
2675 : 4123 : gcc_assert (alias_pair.flags);
2676 : 4123 : if (dump_enabled_p ())
2677 : 667 : dump_printf (MSG_NOTE,
2678 : : "create runtime check for data references %T and %T\n",
2679 : 667 : DR_REF (alias_pair.first.dr),
2680 : 667 : DR_REF (alias_pair.second.dr));
2681 : :
2682 : : /* Create condition expression for each pair data references. */
2683 : 4123 : create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2684 : 4123 : if (*cond_expr)
2685 : 4076 : *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2686 : : *cond_expr, part_cond_expr);
2687 : : else
2688 : 47 : *cond_expr = part_cond_expr;
2689 : : }
2690 : 2744 : fold_undefer_and_ignore_overflow_warnings ();
2691 : 2744 : }
2692 : :
2693 : : /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2694 : : expressions. */
2695 : : static bool
2696 : 0 : dr_equal_offsets_p1 (tree offset1, tree offset2)
2697 : : {
2698 : 0 : bool res;
2699 : :
2700 : 0 : STRIP_NOPS (offset1);
2701 : 0 : STRIP_NOPS (offset2);
2702 : :
2703 : 0 : if (offset1 == offset2)
2704 : : return true;
2705 : :
2706 : 0 : if (TREE_CODE (offset1) != TREE_CODE (offset2)
2707 : 0 : || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2708 : : return false;
2709 : :
2710 : 0 : res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2711 : 0 : TREE_OPERAND (offset2, 0));
2712 : :
2713 : 0 : if (!res || !BINARY_CLASS_P (offset1))
2714 : : return res;
2715 : :
2716 : 0 : res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2717 : 0 : TREE_OPERAND (offset2, 1));
2718 : :
2719 : 0 : return res;
2720 : : }
2721 : :
2722 : : /* Check if DRA and DRB have equal offsets. */
2723 : : bool
2724 : 0 : dr_equal_offsets_p (struct data_reference *dra,
2725 : : struct data_reference *drb)
2726 : : {
2727 : 0 : tree offset1, offset2;
2728 : :
2729 : 0 : offset1 = DR_OFFSET (dra);
2730 : 0 : offset2 = DR_OFFSET (drb);
2731 : :
2732 : 0 : return dr_equal_offsets_p1 (offset1, offset2);
2733 : : }
2734 : :
2735 : : /* Returns true if FNA == FNB. */
2736 : :
2737 : : static bool
2738 : 0 : affine_function_equal_p (affine_fn fna, affine_fn fnb)
2739 : : {
2740 : 0 : unsigned i, n = fna.length ();
2741 : :
2742 : 0 : if (n != fnb.length ())
2743 : : return false;
2744 : :
2745 : 0 : for (i = 0; i < n; i++)
2746 : 0 : if (!operand_equal_p (fna[i], fnb[i], 0))
2747 : : return false;
2748 : :
2749 : : return true;
2750 : : }
2751 : :
2752 : : /* If all the functions in CF are the same, returns one of them,
2753 : : otherwise returns NULL. */
2754 : :
2755 : : static affine_fn
2756 : 1902754 : common_affine_function (conflict_function *cf)
2757 : : {
2758 : 1902754 : unsigned i;
2759 : 1902754 : affine_fn comm;
2760 : :
2761 : 1902754 : if (!CF_NONTRIVIAL_P (cf))
2762 : 0 : return affine_fn ();
2763 : :
2764 : 1902754 : comm = cf->fns[0];
2765 : :
2766 : 1902754 : for (i = 1; i < cf->n; i++)
2767 : 0 : if (!affine_function_equal_p (comm, cf->fns[i]))
2768 : 0 : return affine_fn ();
2769 : :
2770 : 1902754 : return comm;
2771 : : }
2772 : :
2773 : : /* Returns the base of the affine function FN. */
2774 : :
2775 : : static tree
2776 : 1086969 : affine_function_base (affine_fn fn)
2777 : : {
2778 : 0 : return fn[0];
2779 : : }
2780 : :
2781 : : /* Returns true if FN is a constant. */
2782 : :
2783 : : static bool
2784 : 1087173 : affine_function_constant_p (affine_fn fn)
2785 : : {
2786 : 1087173 : unsigned i;
2787 : 1087173 : tree coef;
2788 : :
2789 : 1133721 : for (i = 1; fn.iterate (i, &coef); i++)
2790 : 46752 : if (!integer_zerop (coef))
2791 : : return false;
2792 : :
2793 : : return true;
2794 : : }
2795 : :
2796 : : /* Returns true if FN is the zero constant function. */
2797 : :
2798 : : static bool
2799 : 135796 : affine_function_zero_p (affine_fn fn)
2800 : : {
2801 : 135796 : return (integer_zerop (affine_function_base (fn))
2802 : 135796 : && affine_function_constant_p (fn));
2803 : : }
2804 : :
2805 : : /* Returns a signed integer type with the largest precision from TA
2806 : : and TB. */
2807 : :
2808 : : static tree
2809 : 2788609 : signed_type_for_types (tree ta, tree tb)
2810 : : {
2811 : 2788609 : if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2812 : 182 : return signed_type_for (ta);
2813 : : else
2814 : 2788427 : return signed_type_for (tb);
2815 : : }
2816 : :
2817 : : /* Applies operation OP on affine functions FNA and FNB, and returns the
2818 : : result. */
2819 : :
2820 : : static affine_fn
2821 : 951377 : affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2822 : : {
2823 : 951377 : unsigned i, n, m;
2824 : 951377 : affine_fn ret;
2825 : 951377 : tree coef;
2826 : :
2827 : 2854131 : if (fnb.length () > fna.length ())
2828 : : {
2829 : 0 : n = fna.length ();
2830 : 0 : m = fnb.length ();
2831 : : }
2832 : : else
2833 : : {
2834 : 951377 : n = fnb.length ();
2835 : : m = fna.length ();
2836 : : }
2837 : :
2838 : 951377 : ret.create (m);
2839 : 1949506 : for (i = 0; i < n; i++)
2840 : : {
2841 : 1996258 : tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2842 : 998129 : TREE_TYPE (fnb[i]));
2843 : 998129 : ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2844 : : }
2845 : :
2846 : 951377 : for (; fna.iterate (i, &coef); i++)
2847 : 0 : ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2848 : : coef, integer_zero_node));
2849 : 951377 : for (; fnb.iterate (i, &coef); i++)
2850 : 0 : ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2851 : : integer_zero_node, coef));
2852 : :
2853 : 951377 : return ret;
2854 : : }
2855 : :
2856 : : /* Returns the sum of affine functions FNA and FNB. */
2857 : :
2858 : : static affine_fn
2859 : 0 : affine_fn_plus (affine_fn fna, affine_fn fnb)
2860 : : {
2861 : 0 : return affine_fn_op (PLUS_EXPR, fna, fnb);
2862 : : }
2863 : :
2864 : : /* Returns the difference of affine functions FNA and FNB. */
2865 : :
2866 : : static affine_fn
2867 : 951377 : affine_fn_minus (affine_fn fna, affine_fn fnb)
2868 : : {
2869 : 0 : return affine_fn_op (MINUS_EXPR, fna, fnb);
2870 : : }
2871 : :
2872 : : /* Frees affine function FN. */
2873 : :
2874 : : static void
2875 : 3675133 : affine_fn_free (affine_fn fn)
2876 : : {
2877 : 0 : fn.release ();
2878 : 0 : }
2879 : :
2880 : : /* Determine for each subscript in the data dependence relation DDR
2881 : : the distance. */
2882 : :
2883 : : static void
2884 : 3496612 : compute_subscript_distance (struct data_dependence_relation *ddr)
2885 : : {
2886 : 3496612 : conflict_function *cf_a, *cf_b;
2887 : 3496612 : affine_fn fn_a, fn_b, diff;
2888 : :
2889 : 3496612 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2890 : : {
2891 : : unsigned int i;
2892 : :
2893 : 3387870 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2894 : : {
2895 : 951377 : struct subscript *subscript;
2896 : :
2897 : 951377 : subscript = DDR_SUBSCRIPT (ddr, i);
2898 : 951377 : cf_a = SUB_CONFLICTS_IN_A (subscript);
2899 : 951377 : cf_b = SUB_CONFLICTS_IN_B (subscript);
2900 : :
2901 : 951377 : fn_a = common_affine_function (cf_a);
2902 : 951377 : fn_b = common_affine_function (cf_b);
2903 : 951377 : if (!fn_a.exists () || !fn_b.exists ())
2904 : : {
2905 : 0 : SUB_DISTANCE (subscript) = chrec_dont_know;
2906 : 0 : return;
2907 : : }
2908 : 951377 : diff = affine_fn_minus (fn_a, fn_b);
2909 : :
2910 : 951377 : if (affine_function_constant_p (diff))
2911 : 951173 : SUB_DISTANCE (subscript) = affine_function_base (diff);
2912 : : else
2913 : 204 : SUB_DISTANCE (subscript) = chrec_dont_know;
2914 : :
2915 : 951377 : affine_fn_free (diff);
2916 : : }
2917 : : }
2918 : : }
2919 : :
2920 : : /* Returns the conflict function for "unknown". */
2921 : :
2922 : : static conflict_function *
2923 : 17476838 : conflict_fn_not_known (void)
2924 : : {
2925 : 0 : conflict_function *fn = XCNEW (conflict_function);
2926 : 17476838 : fn->n = NOT_KNOWN;
2927 : :
2928 : 17476838 : return fn;
2929 : : }
2930 : :
2931 : : /* Returns the conflict function for "independent". */
2932 : :
2933 : : static conflict_function *
2934 : 5448120 : conflict_fn_no_dependence (void)
2935 : : {
2936 : 0 : conflict_function *fn = XCNEW (conflict_function);
2937 : 5448120 : fn->n = NO_DEPENDENCE;
2938 : :
2939 : 5448120 : return fn;
2940 : : }
2941 : :
2942 : : /* Returns true if the address of OBJ is invariant in LOOP. */
2943 : :
2944 : : static bool
2945 : 3725240 : object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2946 : : {
2947 : 3773497 : while (handled_component_p (obj))
2948 : : {
2949 : 55330 : if (TREE_CODE (obj) == ARRAY_REF)
2950 : : {
2951 : 17589 : for (int i = 1; i < 4; ++i)
2952 : 14960 : if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2953 : 14960 : loop->num))
2954 : : return false;
2955 : : }
2956 : 45628 : else if (TREE_CODE (obj) == COMPONENT_REF)
2957 : : {
2958 : 26909 : if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2959 : 26909 : loop->num))
2960 : : return false;
2961 : : }
2962 : 48257 : obj = TREE_OPERAND (obj, 0);
2963 : : }
2964 : :
2965 : 3718167 : if (!INDIRECT_REF_P (obj)
2966 : 3718167 : && TREE_CODE (obj) != MEM_REF)
2967 : : return true;
2968 : :
2969 : 3697021 : return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2970 : 7394042 : loop->num);
2971 : : }
2972 : :
2973 : : /* Returns false if we can prove that data references A and B do not alias,
2974 : : true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2975 : : considered. */
2976 : :
2977 : : bool
2978 : 13161477 : dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2979 : : class loop *loop_nest)
2980 : : {
2981 : 13161477 : tree addr_a = DR_BASE_OBJECT (a);
2982 : 13161477 : tree addr_b = DR_BASE_OBJECT (b);
2983 : :
2984 : : /* If we are not processing a loop nest but scalar code we
2985 : : do not need to care about possible cross-iteration dependences
2986 : : and thus can process the full original reference. Do so,
2987 : : similar to how loop invariant motion applies extra offset-based
2988 : : disambiguation. */
2989 : 13161477 : if (!loop_nest)
2990 : : {
2991 : 7098054 : tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2992 : 7098054 : tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2993 : :
2994 : 7098054 : if (DR_BASE_ADDRESS (a)
2995 : 7093426 : && DR_BASE_ADDRESS (b)
2996 : 7093100 : && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2997 : 6255124 : && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2998 : 6189084 : && poly_int_tree_p (tree_size_a)
2999 : 6189064 : && poly_int_tree_p (tree_size_b)
3000 : 13287118 : && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
3001 : 6189064 : wi::to_poly_widest (tree_size_a),
3002 : 6189064 : wi::to_poly_widest (DR_INIT (b)),
3003 : 6189064 : wi::to_poly_widest (tree_size_b)))
3004 : : {
3005 : 4482492 : gcc_assert (integer_zerop (DR_STEP (a))
3006 : : && integer_zerop (DR_STEP (b)));
3007 : 4482512 : return false;
3008 : : }
3009 : :
3010 : 10462248 : aff_tree off1, off2;
3011 : : poly_widest_int size1, size2;
3012 : 2615562 : get_inner_reference_aff (DR_REF (a), &off1, &size1);
3013 : 2615562 : get_inner_reference_aff (DR_REF (b), &off2, &size2);
3014 : 2615562 : aff_combination_scale (&off1, -1);
3015 : 2615562 : aff_combination_add (&off2, &off1);
3016 : 2615562 : if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3017 : 20 : return false;
3018 : 2615562 : }
3019 : :
3020 : 8678965 : if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3021 : 6345669 : && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3022 : : /* For cross-iteration dependences the cliques must be valid for the
3023 : : whole loop, not just individual iterations. */
3024 : 6134627 : && (!loop_nest
3025 : 5921692 : || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3026 : 5032653 : || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3027 : 6013880 : && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3028 : 14528202 : && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3029 : : return false;
3030 : :
3031 : : /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3032 : : do not know the size of the base-object. So we cannot do any
3033 : : offset/overlap based analysis but have to rely on points-to
3034 : : information only. */
3035 : 8533516 : if (TREE_CODE (addr_a) == MEM_REF
3036 : 8533516 : && (DR_UNCONSTRAINED_BASE (a)
3037 : 4717926 : || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3038 : : {
3039 : : /* For true dependences we can apply TBAA. */
3040 : 4097160 : if (flag_strict_aliasing
3041 : 3890595 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3042 : 4226187 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3043 : 129027 : get_alias_set (DR_REF (b))))
3044 : : return false;
3045 : 4081995 : if (TREE_CODE (addr_b) == MEM_REF)
3046 : 4009315 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3047 : 8018630 : TREE_OPERAND (addr_b, 0));
3048 : : else
3049 : 72680 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3050 : 72680 : build_fold_addr_expr (addr_b));
3051 : : }
3052 : 4436356 : else if (TREE_CODE (addr_b) == MEM_REF
3053 : 4436356 : && (DR_UNCONSTRAINED_BASE (b)
3054 : 2125702 : || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3055 : : {
3056 : : /* For true dependences we can apply TBAA. */
3057 : 274012 : if (flag_strict_aliasing
3058 : 206001 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3059 : 336280 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3060 : 62268 : get_alias_set (DR_REF (b))))
3061 : : return false;
3062 : 258771 : if (TREE_CODE (addr_a) == MEM_REF)
3063 : 159617 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3064 : 319234 : TREE_OPERAND (addr_b, 0));
3065 : : else
3066 : 99154 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3067 : 198308 : TREE_OPERAND (addr_b, 0));
3068 : : }
3069 : :
3070 : : /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3071 : : that is being subsetted in the loop nest. */
3072 : 4162344 : if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3073 : 2779966 : return refs_output_dependent_p (addr_a, addr_b);
3074 : 1382378 : else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3075 : 389249 : return refs_anti_dependent_p (addr_a, addr_b);
3076 : 993129 : return refs_may_alias_p (addr_a, addr_b);
3077 : : }
3078 : :
3079 : : /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3080 : : if it is meaningful to compare their associated access functions
3081 : : when checking for dependencies. */
3082 : :
3083 : : static bool
3084 : 11079813 : access_fn_components_comparable_p (tree ref_a, tree ref_b)
3085 : : {
3086 : : /* Allow pairs of component refs from the following sets:
3087 : :
3088 : : { REALPART_EXPR, IMAGPART_EXPR }
3089 : : { COMPONENT_REF }
3090 : : { ARRAY_REF }. */
3091 : 11079813 : tree_code code_a = TREE_CODE (ref_a);
3092 : 11079813 : tree_code code_b = TREE_CODE (ref_b);
3093 : 11079813 : if (code_a == IMAGPART_EXPR)
3094 : 35472 : code_a = REALPART_EXPR;
3095 : 11079813 : if (code_b == IMAGPART_EXPR)
3096 : 41291 : code_b = REALPART_EXPR;
3097 : 11079813 : if (code_a != code_b)
3098 : : return false;
3099 : :
3100 : 10354045 : if (TREE_CODE (ref_a) == COMPONENT_REF)
3101 : : /* ??? We cannot simply use the type of operand #0 of the refs here as
3102 : : the Fortran compiler smuggles type punning into COMPONENT_REFs.
3103 : : Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3104 : 6997850 : return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3105 : 6997850 : == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3106 : :
3107 : 3356195 : return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3108 : 6712390 : TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3109 : : }
3110 : :
3111 : : /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3112 : : is true when the main indices of A and B were not comparable so we try again
3113 : : with alternate indices computed on an indirect reference. */
3114 : :
3115 : : struct data_dependence_relation *
3116 : 6081919 : initialize_data_dependence_relation (struct data_dependence_relation *res,
3117 : : vec<loop_p> loop_nest,
3118 : : bool use_alt_indices)
3119 : : {
3120 : 6081919 : struct data_reference *a = DDR_A (res);
3121 : 6081919 : struct data_reference *b = DDR_B (res);
3122 : 6081919 : unsigned int i;
3123 : :
3124 : 6081919 : struct indices *indices_a = &a->indices;
3125 : 6081919 : struct indices *indices_b = &b->indices;
3126 : 6081919 : if (use_alt_indices)
3127 : : {
3128 : 154937 : if (TREE_CODE (DR_REF (a)) != MEM_REF)
3129 : 124075 : indices_a = &a->alt_indices;
3130 : 154937 : if (TREE_CODE (DR_REF (b)) != MEM_REF)
3131 : 133662 : indices_b = &b->alt_indices;
3132 : : }
3133 : 6081919 : unsigned int num_dimensions_a = indices_a->access_fns.length ();
3134 : 6081919 : unsigned int num_dimensions_b = indices_b->access_fns.length ();
3135 : 6081919 : if (num_dimensions_a == 0 || num_dimensions_b == 0)
3136 : : {
3137 : 1835243 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3138 : 1835243 : return res;
3139 : : }
3140 : :
3141 : : /* For unconstrained bases, the root (highest-indexed) subscript
3142 : : describes a variation in the base of the original DR_REF rather
3143 : : than a component access. We have no type that accurately describes
3144 : : the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3145 : : applying this subscript) so limit the search to the last real
3146 : : component access.
3147 : :
3148 : : E.g. for:
3149 : :
3150 : : void
3151 : : f (int a[][8], int b[][8])
3152 : : {
3153 : : for (int i = 0; i < 8; ++i)
3154 : : a[i * 2][0] = b[i][0];
3155 : : }
3156 : :
3157 : : the a and b accesses have a single ARRAY_REF component reference [0]
3158 : : but have two subscripts. */
3159 : 4246676 : if (indices_a->unconstrained_base)
3160 : 1325641 : num_dimensions_a -= 1;
3161 : 4246676 : if (indices_b->unconstrained_base)
3162 : 1321762 : num_dimensions_b -= 1;
3163 : :
3164 : : /* These structures describe sequences of component references in
3165 : : DR_REF (A) and DR_REF (B). Each component reference is tied to a
3166 : : specific access function. */
3167 : 4246676 : struct {
3168 : : /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3169 : : DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3170 : : indices. In C notation, these are the indices of the rightmost
3171 : : component references; e.g. for a sequence .b.c.d, the start
3172 : : index is for .d. */
3173 : : unsigned int start_a;
3174 : : unsigned int start_b;
3175 : :
3176 : : /* The sequence contains LENGTH consecutive access functions from
3177 : : each DR. */
3178 : : unsigned int length;
3179 : :
3180 : : /* The enclosing objects for the A and B sequences respectively,
3181 : : i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3182 : : and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3183 : : tree object_a;
3184 : : tree object_b;
3185 : 4246676 : } full_seq = {}, struct_seq = {};
3186 : :
3187 : : /* Before each iteration of the loop:
3188 : :
3189 : : - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3190 : : - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3191 : 4246676 : unsigned int index_a = 0;
3192 : 4246676 : unsigned int index_b = 0;
3193 : 4246676 : tree ref_a = DR_REF (a);
3194 : 4246676 : tree ref_b = DR_REF (b);
3195 : :
3196 : : /* Now walk the component references from the final DR_REFs back up to
3197 : : the enclosing base objects. Each component reference corresponds
3198 : : to one access function in the DR, with access function 0 being for
3199 : : the final DR_REF and the highest-indexed access function being the
3200 : : one that is applied to the base of the DR.
3201 : :
3202 : : Look for a sequence of component references whose access functions
3203 : : are comparable (see access_fn_components_comparable_p). If more
3204 : : than one such sequence exists, pick the one nearest the base
3205 : : (which is the leftmost sequence in C notation). Store this sequence
3206 : : in FULL_SEQ.
3207 : :
3208 : : For example, if we have:
3209 : :
3210 : : struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3211 : :
3212 : : A: a[0][i].s.c.d
3213 : : B: __real b[0][i].s.e[i].f
3214 : :
3215 : : (where d is the same type as the real component of f) then the access
3216 : : functions would be:
3217 : :
3218 : : 0 1 2 3
3219 : : A: .d .c .s [i]
3220 : :
3221 : : 0 1 2 3 4 5
3222 : : B: __real .f [i] .e .s [i]
3223 : :
3224 : : The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3225 : : and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3226 : : COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3227 : : the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3228 : : so is comparable. The A3/B5 column contains two ARRAY_REFs that
3229 : : index foo[10] arrays, so is again comparable. The sequence is
3230 : : therefore:
3231 : :
3232 : : A: [1, 3] (i.e. [i].s.c)
3233 : : B: [3, 5] (i.e. [i].s.e)
3234 : :
3235 : : Also look for sequences of component references whose access
3236 : : functions are comparable and whose enclosing objects have the same
3237 : : RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3238 : : example, STRUCT_SEQ would be:
3239 : :
3240 : : A: [1, 2] (i.e. s.c)
3241 : : B: [3, 4] (i.e. s.e) */
3242 : 15314635 : while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3243 : : {
3244 : : /* The alternate indices form always has a single dimension
3245 : : with unconstrained base. */
3246 : 11079813 : gcc_assert (!use_alt_indices);
3247 : :
3248 : : /* REF_A and REF_B must be one of the component access types
3249 : : allowed by dr_analyze_indices. */
3250 : 11079813 : gcc_checking_assert (access_fn_component_p (ref_a));
3251 : 11079813 : gcc_checking_assert (access_fn_component_p (ref_b));
3252 : :
3253 : : /* Get the immediately-enclosing objects for REF_A and REF_B,
3254 : : i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3255 : : and DR_ACCESS_FN (B, INDEX_B). */
3256 : 11079813 : tree object_a = TREE_OPERAND (ref_a, 0);
3257 : 11079813 : tree object_b = TREE_OPERAND (ref_b, 0);
3258 : :
3259 : 11079813 : tree type_a = TREE_TYPE (object_a);
3260 : 11079813 : tree type_b = TREE_TYPE (object_b);
3261 : 11079813 : if (access_fn_components_comparable_p (ref_a, ref_b))
3262 : : {
3263 : : /* This pair of component accesses is comparable for dependence
3264 : : analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3265 : : DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3266 : 7979124 : if (full_seq.start_a + full_seq.length != index_a
3267 : 7095971 : || full_seq.start_b + full_seq.length != index_b)
3268 : : {
3269 : : /* The accesses don't extend the current sequence,
3270 : : so start a new one here. */
3271 : 1152202 : full_seq.start_a = index_a;
3272 : 1152202 : full_seq.start_b = index_b;
3273 : 1152202 : full_seq.length = 0;
3274 : : }
3275 : :
3276 : : /* Add this pair of references to the sequence. */
3277 : 7979124 : full_seq.length += 1;
3278 : 7979124 : full_seq.object_a = object_a;
3279 : 7979124 : full_seq.object_b = object_b;
3280 : :
3281 : : /* If the enclosing objects are structures (and thus have the
3282 : : same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3283 : 7979124 : if (TREE_CODE (type_a) == RECORD_TYPE)
3284 : 4638916 : struct_seq = full_seq;
3285 : :
3286 : : /* Move to the next containing reference for both A and B. */
3287 : 7979124 : ref_a = object_a;
3288 : 7979124 : ref_b = object_b;
3289 : 7979124 : index_a += 1;
3290 : 7979124 : index_b += 1;
3291 : 7979124 : continue;
3292 : : }
3293 : :
3294 : : /* Try to approach equal type sizes. */
3295 : 3100689 : if (!COMPLETE_TYPE_P (type_a)
3296 : 3097217 : || !COMPLETE_TYPE_P (type_b)
3297 : 3091088 : || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3298 : 6189798 : || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3299 : : break;
3300 : :
3301 : 3088835 : unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3302 : 3088835 : unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3303 : 3088835 : if (size_a <= size_b)
3304 : : {
3305 : 1602023 : index_a += 1;
3306 : 1602023 : ref_a = object_a;
3307 : : }
3308 : 3088835 : if (size_b <= size_a)
3309 : : {
3310 : 1606294 : index_b += 1;
3311 : 1606294 : ref_b = object_b;
3312 : : }
3313 : : }
3314 : :
3315 : : /* See whether FULL_SEQ ends at the base and whether the two bases
3316 : : are equal. We do not care about TBAA or alignment info so we can
3317 : : use OEP_ADDRESS_OF to avoid false negatives. */
3318 : 4246676 : tree base_a = indices_a->base_object;
3319 : 4246676 : tree base_b = indices_b->base_object;
3320 : 4246676 : bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3321 : 4157090 : && full_seq.start_b + full_seq.length == num_dimensions_b
3322 : 4114741 : && (indices_a->unconstrained_base
3323 : 4114741 : == indices_b->unconstrained_base)
3324 : 4107844 : && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3325 : 3685561 : && (types_compatible_p (TREE_TYPE (base_a),
3326 : 3685561 : TREE_TYPE (base_b))
3327 : 161814 : || (!base_supports_access_fn_components_p (base_a)
3328 : 152452 : && !base_supports_access_fn_components_p (base_b)
3329 : 150967 : && operand_equal_p
3330 : 150967 : (TYPE_SIZE (TREE_TYPE (base_a)),
3331 : 150967 : TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3332 : 7788282 : && (!loop_nest.exists ()
3333 : 3541606 : || (object_address_invariant_in_loop_p
3334 : 3541606 : (loop_nest[0], base_a))));
3335 : :
3336 : : /* If the bases are the same, we can include the base variation too.
3337 : : E.g. the b accesses in:
3338 : :
3339 : : for (int i = 0; i < n; ++i)
3340 : : b[i + 4][0] = b[i][0];
3341 : :
3342 : : have a definite dependence distance of 4, while for:
3343 : :
3344 : : for (int i = 0; i < n; ++i)
3345 : : a[i + 4][0] = b[i][0];
3346 : :
3347 : : the dependence distance depends on the gap between a and b.
3348 : :
3349 : : If the bases are different then we can only rely on the sequence
3350 : : rooted at a structure access, since arrays are allowed to overlap
3351 : : arbitrarily and change shape arbitrarily. E.g. we treat this as
3352 : : valid code:
3353 : :
3354 : : int a[256];
3355 : : ...
3356 : : ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3357 : :
3358 : : where two lvalues with the same int[4][3] type overlap, and where
3359 : : both lvalues are distinct from the object's declared type. */
3360 : 3491377 : if (same_base_p)
3361 : : {
3362 : 3491377 : if (indices_a->unconstrained_base)
3363 : 805131 : full_seq.length += 1;
3364 : : }
3365 : : else
3366 : : full_seq = struct_seq;
3367 : :
3368 : : /* Punt if we didn't find a suitable sequence. */
3369 : 4246676 : if (full_seq.length == 0)
3370 : : {
3371 : 580772 : if (use_alt_indices
3372 : 483049 : || (TREE_CODE (DR_REF (a)) == MEM_REF
3373 : 358521 : && TREE_CODE (DR_REF (b)) == MEM_REF)
3374 : 156770 : || may_be_nonaddressable_p (DR_REF (a))
3375 : 737304 : || may_be_nonaddressable_p (DR_REF (b)))
3376 : : {
3377 : : /* Fully exhausted possibilities. */
3378 : 425835 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3379 : 425835 : return res;
3380 : : }
3381 : :
3382 : : /* Try evaluating both DRs as dereferences of pointers. */
3383 : 154937 : if (!a->alt_indices.base_object
3384 : 57298 : && TREE_CODE (DR_REF (a)) != MEM_REF)
3385 : : {
3386 : 26436 : tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3387 : : build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3388 : : build_int_cst
3389 : 26436 : (reference_alias_ptr_type (DR_REF (a)), 0));
3390 : 79308 : dr_analyze_indices (&a->alt_indices, alt_ref,
3391 : 26436 : loop_preheader_edge (loop_nest[0]),
3392 : : loop_containing_stmt (DR_STMT (a)));
3393 : : }
3394 : 154937 : if (!b->alt_indices.base_object
3395 : 76833 : && TREE_CODE (DR_REF (b)) != MEM_REF)
3396 : : {
3397 : 55558 : tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3398 : : build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3399 : : build_int_cst
3400 : 55558 : (reference_alias_ptr_type (DR_REF (b)), 0));
3401 : 166674 : dr_analyze_indices (&b->alt_indices, alt_ref,
3402 : 55558 : loop_preheader_edge (loop_nest[0]),
3403 : : loop_containing_stmt (DR_STMT (b)));
3404 : : }
3405 : 154937 : return initialize_data_dependence_relation (res, loop_nest, true);
3406 : : }
3407 : :
3408 : 3665904 : if (!same_base_p)
3409 : : {
3410 : : /* Partial overlap is possible for different bases when strict aliasing
3411 : : is not in effect. It's also possible if either base involves a union
3412 : : access; e.g. for:
3413 : :
3414 : : struct s1 { int a[2]; };
3415 : : struct s2 { struct s1 b; int c; };
3416 : : struct s3 { int d; struct s1 e; };
3417 : : union u { struct s2 f; struct s3 g; } *p, *q;
3418 : :
3419 : : the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3420 : : "p->g.e" (base "p->g") and might partially overlap the s1 at
3421 : : "q->g.e" (base "q->g"). */
3422 : 174527 : if (!flag_strict_aliasing
3423 : 162837 : || ref_contains_union_access_p (full_seq.object_a)
3424 : 336215 : || ref_contains_union_access_p (full_seq.object_b))
3425 : : {
3426 : 12839 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3427 : 12839 : return res;
3428 : : }
3429 : :
3430 : 161688 : DDR_COULD_BE_INDEPENDENT_P (res) = true;
3431 : 161688 : if (!loop_nest.exists ()
3432 : 323376 : || (object_address_invariant_in_loop_p (loop_nest[0],
3433 : 161688 : full_seq.object_a)
3434 : 21946 : && object_address_invariant_in_loop_p (loop_nest[0],
3435 : 21946 : full_seq.object_b)))
3436 : : {
3437 : 7101 : DDR_OBJECT_A (res) = full_seq.object_a;
3438 : 7101 : DDR_OBJECT_B (res) = full_seq.object_b;
3439 : : }
3440 : : }
3441 : :
3442 : 3653065 : DDR_AFFINE_P (res) = true;
3443 : 3653065 : DDR_ARE_DEPENDENT (res) = NULL_TREE;
3444 : 3653065 : DDR_SUBSCRIPTS (res).create (full_seq.length);
3445 : 3653065 : DDR_LOOP_NEST (res) = loop_nest;
3446 : 3653065 : DDR_SELF_REFERENCE (res) = false;
3447 : :
3448 : 12360243 : for (i = 0; i < full_seq.length; ++i)
3449 : : {
3450 : 8707178 : struct subscript *subscript;
3451 : :
3452 : 8707178 : subscript = XNEW (struct subscript);
3453 : 8707178 : SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3454 : 8707178 : SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3455 : 8707178 : SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3456 : 8707178 : SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3457 : 8707178 : SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3458 : 8707178 : SUB_DISTANCE (subscript) = chrec_dont_know;
3459 : 8707178 : DDR_SUBSCRIPTS (res).safe_push (subscript);
3460 : : }
3461 : :
3462 : : return res;
3463 : : }
3464 : :
3465 : : /* Initialize a data dependence relation between data accesses A and
3466 : : B. NB_LOOPS is the number of loops surrounding the references: the
3467 : : size of the classic distance/direction vectors. */
3468 : :
3469 : : struct data_dependence_relation *
3470 : 12087899 : initialize_data_dependence_relation (struct data_reference *a,
3471 : : struct data_reference *b,
3472 : : vec<loop_p> loop_nest)
3473 : : {
3474 : 12087899 : data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3475 : 12087899 : DDR_A (res) = a;
3476 : 12087899 : DDR_B (res) = b;
3477 : 12087899 : DDR_LOOP_NEST (res).create (0);
3478 : 12087899 : DDR_SUBSCRIPTS (res).create (0);
3479 : 12087899 : DDR_DIR_VECTS (res).create (0);
3480 : 12087899 : DDR_DIST_VECTS (res).create (0);
3481 : :
3482 : 12087899 : if (a == NULL || b == NULL)
3483 : : {
3484 : 0 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3485 : 0 : return res;
3486 : : }
3487 : :
3488 : : /* If the data references do not alias, then they are independent. */
3489 : 18146372 : if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3490 : : {
3491 : 6160917 : DDR_ARE_DEPENDENT (res) = chrec_known;
3492 : 6160917 : return res;
3493 : : }
3494 : :
3495 : 5926982 : return initialize_data_dependence_relation (res, loop_nest, false);
3496 : : }
3497 : :
3498 : :
3499 : : /* Frees memory used by the conflict function F. */
3500 : :
3501 : : static void
3502 : 25648714 : free_conflict_function (conflict_function *f)
3503 : : {
3504 : 25648714 : unsigned i;
3505 : :
3506 : 25648714 : if (CF_NONTRIVIAL_P (f))
3507 : : {
3508 : 5447512 : for (i = 0; i < f->n; i++)
3509 : 2723756 : affine_fn_free (f->fns[i]);
3510 : : }
3511 : 25648714 : free (f);
3512 : 25648714 : }
3513 : :
3514 : : /* Frees memory used by SUBSCRIPTS. */
3515 : :
3516 : : static void
3517 : 3653065 : free_subscripts (vec<subscript_p> subscripts)
3518 : : {
3519 : 19666373 : for (subscript_p s : subscripts)
3520 : : {
3521 : 8707178 : free_conflict_function (s->conflicting_iterations_in_a);
3522 : 8707178 : free_conflict_function (s->conflicting_iterations_in_b);
3523 : 8707178 : free (s);
3524 : : }
3525 : 3653065 : subscripts.release ();
3526 : 3653065 : }
3527 : :
3528 : : /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3529 : : description. */
3530 : :
3531 : : static inline void
3532 : 2916033 : finalize_ddr_dependent (struct data_dependence_relation *ddr,
3533 : : tree chrec)
3534 : : {
3535 : 2916033 : DDR_ARE_DEPENDENT (ddr) = chrec;
3536 : 2916033 : free_subscripts (DDR_SUBSCRIPTS (ddr));
3537 : 2916033 : DDR_SUBSCRIPTS (ddr).create (0);
3538 : 161979 : }
3539 : :
3540 : : /* The dependence relation DDR cannot be represented by a distance
3541 : : vector. */
3542 : :
3543 : : static inline void
3544 : 1518 : non_affine_dependence_relation (struct data_dependence_relation *ddr)
3545 : : {
3546 : 1518 : if (dump_file && (dump_flags & TDF_DETAILS))
3547 : 35 : fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3548 : :
3549 : 1518 : DDR_AFFINE_P (ddr) = false;
3550 : 1518 : }
3551 : :
3552 : :
3553 : :
3554 : : /* This section contains the classic Banerjee tests. */
3555 : :
3556 : : /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3557 : : variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3558 : :
3559 : : static inline bool
3560 : 2801126 : ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3561 : : {
3562 : 2801126 : return (evolution_function_is_constant_p (chrec_a)
3563 : 4562642 : && evolution_function_is_constant_p (chrec_b));
3564 : : }
3565 : :
3566 : : /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3567 : : variable, i.e., if the SIV (Single Index Variable) test is true. */
3568 : :
3569 : : static bool
3570 : 1040780 : siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3571 : : {
3572 : 2080393 : if ((evolution_function_is_constant_p (chrec_a)
3573 : 1170 : && evolution_function_is_univariate_p (chrec_b))
3574 : 2080393 : || (evolution_function_is_constant_p (chrec_b)
3575 : 998 : && evolution_function_is_univariate_p (chrec_a)))
3576 : 2162 : return true;
3577 : :
3578 : 1038618 : if (evolution_function_is_univariate_p (chrec_a)
3579 : 1038618 : && evolution_function_is_univariate_p (chrec_b))
3580 : : {
3581 : 1010712 : switch (TREE_CODE (chrec_a))
3582 : : {
3583 : 1010712 : case POLYNOMIAL_CHREC:
3584 : 1010712 : switch (TREE_CODE (chrec_b))
3585 : : {
3586 : 1010712 : case POLYNOMIAL_CHREC:
3587 : 1010712 : if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3588 : : return false;
3589 : : /* FALLTHRU */
3590 : :
3591 : : default:
3592 : : return true;
3593 : : }
3594 : :
3595 : : default:
3596 : : return true;
3597 : : }
3598 : : }
3599 : :
3600 : : return false;
3601 : : }
3602 : :
3603 : : /* Creates a conflict function with N dimensions. The affine functions
3604 : : in each dimension follow. */
3605 : :
3606 : : static conflict_function *
3607 : 2723756 : conflict_fn (unsigned n, ...)
3608 : : {
3609 : 2723756 : unsigned i;
3610 : 2723756 : conflict_function *ret = XCNEW (conflict_function);
3611 : 2723756 : va_list ap;
3612 : :
3613 : 2723756 : gcc_assert (n > 0 && n <= MAX_DIM);
3614 : 2723756 : va_start (ap, n);
3615 : :
3616 : 2723756 : ret->n = n;
3617 : 5447512 : for (i = 0; i < n; i++)
3618 : 2723756 : ret->fns[i] = va_arg (ap, affine_fn);
3619 : 2723756 : va_end (ap);
3620 : :
3621 : 2723756 : return ret;
3622 : : }
3623 : :
3624 : : /* Returns constant affine function with value CST. */
3625 : :
3626 : : static affine_fn
3627 : 2629810 : affine_fn_cst (tree cst)
3628 : : {
3629 : 2629810 : affine_fn fn;
3630 : 2629810 : fn.create (1);
3631 : 2629810 : fn.quick_push (cst);
3632 : 2629810 : return fn;
3633 : : }
3634 : :
3635 : : /* Returns affine function with single variable, CST + COEF * x_DIM. */
3636 : :
3637 : : static affine_fn
3638 : 93946 : affine_fn_univar (tree cst, unsigned dim, tree coef)
3639 : : {
3640 : 93946 : affine_fn fn;
3641 : 93946 : fn.create (dim + 1);
3642 : 93946 : unsigned i;
3643 : :
3644 : 93946 : gcc_assert (dim > 0);
3645 : 93946 : fn.quick_push (cst);
3646 : 187892 : for (i = 1; i < dim; i++)
3647 : 0 : fn.quick_push (integer_zero_node);
3648 : 93946 : fn.quick_push (coef);
3649 : 93946 : return fn;
3650 : : }
3651 : :
3652 : : /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3653 : : *OVERLAPS_B are initialized to the functions that describe the
3654 : : relation between the elements accessed twice by CHREC_A and
3655 : : CHREC_B. For k >= 0, the following property is verified:
3656 : :
3657 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3658 : :
3659 : : static void
3660 : 1760346 : analyze_ziv_subscript (tree chrec_a,
3661 : : tree chrec_b,
3662 : : conflict_function **overlaps_a,
3663 : : conflict_function **overlaps_b,
3664 : : tree *last_conflicts)
3665 : : {
3666 : 1760346 : tree type, difference;
3667 : 1760346 : dependence_stats.num_ziv++;
3668 : :
3669 : 1760346 : if (dump_file && (dump_flags & TDF_DETAILS))
3670 : 24444 : fprintf (dump_file, "(analyze_ziv_subscript \n");
3671 : :
3672 : 1760346 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3673 : 1760346 : chrec_a = chrec_convert (type, chrec_a, NULL);
3674 : 1760346 : chrec_b = chrec_convert (type, chrec_b, NULL);
3675 : 1760346 : difference = chrec_fold_minus (type, chrec_a, chrec_b);
3676 : :
3677 : 1760346 : switch (TREE_CODE (difference))
3678 : : {
3679 : 1760346 : case INTEGER_CST:
3680 : 1760346 : if (integer_zerop (difference))
3681 : : {
3682 : : /* The difference is equal to zero: the accessed index
3683 : : overlaps for each iteration in the loop. */
3684 : 40 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3685 : 40 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3686 : 40 : *last_conflicts = chrec_dont_know;
3687 : 40 : dependence_stats.num_ziv_dependent++;
3688 : : }
3689 : : else
3690 : : {
3691 : : /* The accesses do not overlap. */
3692 : 1760306 : *overlaps_a = conflict_fn_no_dependence ();
3693 : 1760306 : *overlaps_b = conflict_fn_no_dependence ();
3694 : 1760306 : *last_conflicts = integer_zero_node;
3695 : 1760306 : dependence_stats.num_ziv_independent++;
3696 : : }
3697 : : break;
3698 : :
3699 : 0 : default:
3700 : : /* We're not sure whether the indexes overlap. For the moment,
3701 : : conservatively answer "don't know". */
3702 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3703 : 0 : fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3704 : :
3705 : 0 : *overlaps_a = conflict_fn_not_known ();
3706 : 0 : *overlaps_b = conflict_fn_not_known ();
3707 : 0 : *last_conflicts = chrec_dont_know;
3708 : 0 : dependence_stats.num_ziv_unimplemented++;
3709 : 0 : break;
3710 : : }
3711 : :
3712 : 1760346 : if (dump_file && (dump_flags & TDF_DETAILS))
3713 : 24444 : fprintf (dump_file, ")\n");
3714 : 1760346 : }
3715 : :
3716 : : /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3717 : : and only if it fits to the int type. If this is not the case, or the
3718 : : bound on the number of iterations of LOOP could not be derived, returns
3719 : : chrec_dont_know. */
3720 : :
3721 : : static tree
3722 : 0 : max_stmt_executions_tree (class loop *loop)
3723 : : {
3724 : 0 : widest_int nit;
3725 : :
3726 : 0 : if (!max_stmt_executions (loop, &nit))
3727 : 0 : return chrec_dont_know;
3728 : :
3729 : 0 : if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3730 : 0 : return chrec_dont_know;
3731 : :
3732 : 0 : return wide_int_to_tree (unsigned_type_node, nit);
3733 : 0 : }
3734 : :
3735 : : /* Determine whether the CHREC is always positive/negative. If the expression
3736 : : cannot be statically analyzed, return false, otherwise set the answer into
3737 : : VALUE. */
3738 : :
3739 : : static bool
3740 : 3482 : chrec_is_positive (tree chrec, bool *value)
3741 : : {
3742 : 3482 : bool value0, value1, value2;
3743 : 3482 : tree end_value, nb_iter;
3744 : :
3745 : 3482 : switch (TREE_CODE (chrec))
3746 : : {
3747 : 0 : case POLYNOMIAL_CHREC:
3748 : 0 : if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3749 : 0 : || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3750 : 0 : return false;
3751 : :
3752 : : /* FIXME -- overflows. */
3753 : 0 : if (value0 == value1)
3754 : : {
3755 : 0 : *value = value0;
3756 : 0 : return true;
3757 : : }
3758 : :
3759 : : /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3760 : : and the proof consists in showing that the sign never
3761 : : changes during the execution of the loop, from 0 to
3762 : : loop->nb_iterations. */
3763 : 0 : if (!evolution_function_is_affine_p (chrec))
3764 : : return false;
3765 : :
3766 : 0 : nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3767 : 0 : if (chrec_contains_undetermined (nb_iter))
3768 : : return false;
3769 : :
3770 : : #if 0
3771 : : /* TODO -- If the test is after the exit, we may decrease the number of
3772 : : iterations by one. */
3773 : : if (after_exit)
3774 : : nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3775 : : #endif
3776 : :
3777 : 0 : end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3778 : :
3779 : 0 : if (!chrec_is_positive (end_value, &value2))
3780 : : return false;
3781 : :
3782 : 0 : *value = value0;
3783 : 0 : return value0 == value1;
3784 : :
3785 : 3482 : case INTEGER_CST:
3786 : 3482 : switch (tree_int_cst_sgn (chrec))
3787 : : {
3788 : 1518 : case -1:
3789 : 1518 : *value = false;
3790 : 1518 : break;
3791 : 1964 : case 1:
3792 : 1964 : *value = true;
3793 : 1964 : break;
3794 : : default:
3795 : : return false;
3796 : : }
3797 : : return true;
3798 : :
3799 : : default:
3800 : : return false;
3801 : : }
3802 : : }
3803 : :
3804 : :
3805 : : /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3806 : : constant, and CHREC_B is an affine function. *OVERLAPS_A and
3807 : : *OVERLAPS_B are initialized to the functions that describe the
3808 : : relation between the elements accessed twice by CHREC_A and
3809 : : CHREC_B. For k >= 0, the following property is verified:
3810 : :
3811 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3812 : :
3813 : : static void
3814 : 2162 : analyze_siv_subscript_cst_affine (tree chrec_a,
3815 : : tree chrec_b,
3816 : : conflict_function **overlaps_a,
3817 : : conflict_function **overlaps_b,
3818 : : tree *last_conflicts)
3819 : : {
3820 : 2162 : bool value0, value1, value2;
3821 : 2162 : tree type, difference, tmp;
3822 : :
3823 : 2162 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3824 : 2162 : chrec_a = chrec_convert (type, chrec_a, NULL);
3825 : 2162 : chrec_b = chrec_convert (type, chrec_b, NULL);
3826 : 2162 : difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3827 : :
3828 : : /* Special case overlap in the first iteration. */
3829 : 2162 : if (integer_zerop (difference))
3830 : : {
3831 : 419 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3832 : 419 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3833 : 419 : *last_conflicts = integer_one_node;
3834 : 419 : return;
3835 : : }
3836 : :
3837 : 1743 : if (!chrec_is_positive (initial_condition (difference), &value0))
3838 : : {
3839 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3840 : 0 : fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3841 : :
3842 : 0 : dependence_stats.num_siv_unimplemented++;
3843 : 0 : *overlaps_a = conflict_fn_not_known ();
3844 : 0 : *overlaps_b = conflict_fn_not_known ();
3845 : 0 : *last_conflicts = chrec_dont_know;
3846 : 0 : return;
3847 : : }
3848 : : else
3849 : : {
3850 : 1743 : if (value0 == false)
3851 : : {
3852 : 1322 : if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3853 : 1322 : || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3854 : : {
3855 : 4 : if (dump_file && (dump_flags & TDF_DETAILS))
3856 : 0 : fprintf (dump_file, "siv test failed: chrec not positive.\n");
3857 : :
3858 : 4 : *overlaps_a = conflict_fn_not_known ();
3859 : 4 : *overlaps_b = conflict_fn_not_known ();
3860 : 4 : *last_conflicts = chrec_dont_know;
3861 : 4 : dependence_stats.num_siv_unimplemented++;
3862 : 4 : return;
3863 : : }
3864 : : else
3865 : : {
3866 : 1318 : if (value1 == true)
3867 : : {
3868 : : /* Example:
3869 : : chrec_a = 12
3870 : : chrec_b = {10, +, 1}
3871 : : */
3872 : :
3873 : 1318 : if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3874 : : {
3875 : 1037 : HOST_WIDE_INT numiter;
3876 : 1037 : class loop *loop = get_chrec_loop (chrec_b);
3877 : :
3878 : 1037 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3879 : 1037 : tmp = fold_build2 (EXACT_DIV_EXPR, type,
3880 : : fold_build1 (ABS_EXPR, type, difference),
3881 : : CHREC_RIGHT (chrec_b));
3882 : 1037 : *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3883 : 1037 : *last_conflicts = integer_one_node;
3884 : :
3885 : :
3886 : : /* Perform weak-zero siv test to see if overlap is
3887 : : outside the loop bounds. */
3888 : 1037 : numiter = max_stmt_executions_int (loop);
3889 : :
3890 : 1037 : if (numiter >= 0
3891 : 1037 : && compare_tree_int (tmp, numiter) > 0)
3892 : : {
3893 : 0 : free_conflict_function (*overlaps_a);
3894 : 0 : free_conflict_function (*overlaps_b);
3895 : 0 : *overlaps_a = conflict_fn_no_dependence ();
3896 : 0 : *overlaps_b = conflict_fn_no_dependence ();
3897 : 0 : *last_conflicts = integer_zero_node;
3898 : 0 : dependence_stats.num_siv_independent++;
3899 : 0 : return;
3900 : : }
3901 : 1037 : dependence_stats.num_siv_dependent++;
3902 : 1037 : return;
3903 : : }
3904 : :
3905 : : /* When the step does not divide the difference, there are
3906 : : no overlaps. */
3907 : : else
3908 : : {
3909 : 281 : *overlaps_a = conflict_fn_no_dependence ();
3910 : 281 : *overlaps_b = conflict_fn_no_dependence ();
3911 : 281 : *last_conflicts = integer_zero_node;
3912 : 281 : dependence_stats.num_siv_independent++;
3913 : 281 : return;
3914 : : }
3915 : : }
3916 : :
3917 : : else
3918 : : {
3919 : : /* Example:
3920 : : chrec_a = 12
3921 : : chrec_b = {10, +, -1}
3922 : :
3923 : : In this case, chrec_a will not overlap with chrec_b. */
3924 : 0 : *overlaps_a = conflict_fn_no_dependence ();
3925 : 0 : *overlaps_b = conflict_fn_no_dependence ();
3926 : 0 : *last_conflicts = integer_zero_node;
3927 : 0 : dependence_stats.num_siv_independent++;
3928 : 0 : return;
3929 : : }
3930 : : }
3931 : : }
3932 : : else
3933 : : {
3934 : 421 : if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3935 : 421 : || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3936 : : {
3937 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3938 : 0 : fprintf (dump_file, "siv test failed: chrec not positive.\n");
3939 : :
3940 : 0 : *overlaps_a = conflict_fn_not_known ();
3941 : 0 : *overlaps_b = conflict_fn_not_known ();
3942 : 0 : *last_conflicts = chrec_dont_know;
3943 : 0 : dependence_stats.num_siv_unimplemented++;
3944 : 0 : return;
3945 : : }
3946 : : else
3947 : : {
3948 : 421 : if (value2 == false)
3949 : : {
3950 : : /* Example:
3951 : : chrec_a = 3
3952 : : chrec_b = {10, +, -1}
3953 : : */
3954 : 196 : if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3955 : : {
3956 : 97 : HOST_WIDE_INT numiter;
3957 : 97 : class loop *loop = get_chrec_loop (chrec_b);
3958 : :
3959 : 97 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3960 : 97 : tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3961 : : CHREC_RIGHT (chrec_b));
3962 : 97 : *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3963 : 97 : *last_conflicts = integer_one_node;
3964 : :
3965 : : /* Perform weak-zero siv test to see if overlap is
3966 : : outside the loop bounds. */
3967 : 97 : numiter = max_stmt_executions_int (loop);
3968 : :
3969 : 97 : if (numiter >= 0
3970 : 97 : && compare_tree_int (tmp, numiter) > 0)
3971 : : {
3972 : 0 : free_conflict_function (*overlaps_a);
3973 : 0 : free_conflict_function (*overlaps_b);
3974 : 0 : *overlaps_a = conflict_fn_no_dependence ();
3975 : 0 : *overlaps_b = conflict_fn_no_dependence ();
3976 : 0 : *last_conflicts = integer_zero_node;
3977 : 0 : dependence_stats.num_siv_independent++;
3978 : 0 : return;
3979 : : }
3980 : 97 : dependence_stats.num_siv_dependent++;
3981 : 97 : return;
3982 : : }
3983 : :
3984 : : /* When the step does not divide the difference, there
3985 : : are no overlaps. */
3986 : : else
3987 : : {
3988 : 99 : *overlaps_a = conflict_fn_no_dependence ();
3989 : 99 : *overlaps_b = conflict_fn_no_dependence ();
3990 : 99 : *last_conflicts = integer_zero_node;
3991 : 99 : dependence_stats.num_siv_independent++;
3992 : 99 : return;
3993 : : }
3994 : : }
3995 : : else
3996 : : {
3997 : : /* Example:
3998 : : chrec_a = 3
3999 : : chrec_b = {4, +, 1}
4000 : :
4001 : : In this case, chrec_a will not overlap with chrec_b. */
4002 : 225 : *overlaps_a = conflict_fn_no_dependence ();
4003 : 225 : *overlaps_b = conflict_fn_no_dependence ();
4004 : 225 : *last_conflicts = integer_zero_node;
4005 : 225 : dependence_stats.num_siv_independent++;
4006 : 225 : return;
4007 : : }
4008 : : }
4009 : : }
4010 : : }
4011 : : }
4012 : :
4013 : : /* Helper recursive function for initializing the matrix A. Returns
4014 : : the initial value of CHREC. */
4015 : :
4016 : : static tree
4017 : 1981560 : initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4018 : : {
4019 : 3963112 : gcc_assert (chrec);
4020 : :
4021 : 3963112 : switch (TREE_CODE (chrec))
4022 : : {
4023 : 1981560 : case POLYNOMIAL_CHREC:
4024 : 1981560 : HOST_WIDE_INT chrec_right;
4025 : 1981560 : if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4026 : 8 : return chrec_dont_know;
4027 : 1981552 : chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4028 : : /* We want to be able to negate without overflow. */
4029 : 1981552 : if (chrec_right == HOST_WIDE_INT_MIN)
4030 : 0 : return chrec_dont_know;
4031 : 1981552 : A[index][0] = mult * chrec_right;
4032 : 1981552 : return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4033 : :
4034 : 0 : case PLUS_EXPR:
4035 : 0 : case MULT_EXPR:
4036 : 0 : case MINUS_EXPR:
4037 : 0 : {
4038 : 0 : tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4039 : 0 : tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4040 : :
4041 : 0 : return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4042 : : }
4043 : :
4044 : 0 : CASE_CONVERT:
4045 : 0 : {
4046 : 0 : tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4047 : 0 : return chrec_convert (chrec_type (chrec), op, NULL);
4048 : : }
4049 : :
4050 : 0 : case BIT_NOT_EXPR:
4051 : 0 : {
4052 : : /* Handle ~X as -1 - X. */
4053 : 0 : tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4054 : 0 : return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4055 : 0 : build_int_cst (TREE_TYPE (chrec), -1), op);
4056 : : }
4057 : :
4058 : : case INTEGER_CST:
4059 : : return chrec;
4060 : :
4061 : 0 : default:
4062 : 0 : gcc_unreachable ();
4063 : : return NULL_TREE;
4064 : : }
4065 : : }
4066 : :
4067 : : #define FLOOR_DIV(x,y) ((x) / (y))
4068 : :
4069 : : /* Solves the special case of the Diophantine equation:
4070 : : | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4071 : :
4072 : : Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4073 : : number of iterations that loops X and Y run. The overlaps will be
4074 : : constructed as evolutions in dimension DIM. */
4075 : :
4076 : : static void
4077 : 56 : compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4078 : : HOST_WIDE_INT step_a,
4079 : : HOST_WIDE_INT step_b,
4080 : : affine_fn *overlaps_a,
4081 : : affine_fn *overlaps_b,
4082 : : tree *last_conflicts, int dim)
4083 : : {
4084 : 56 : if (((step_a > 0 && step_b > 0)
4085 : 10 : || (step_a < 0 && step_b < 0)))
4086 : : {
4087 : 52 : HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4088 : 52 : HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4089 : :
4090 : 52 : gcd_steps_a_b = gcd (step_a, step_b);
4091 : 52 : step_overlaps_a = step_b / gcd_steps_a_b;
4092 : 52 : step_overlaps_b = step_a / gcd_steps_a_b;
4093 : :
4094 : 52 : if (niter > 0)
4095 : : {
4096 : 52 : tau2 = FLOOR_DIV (niter, step_overlaps_a);
4097 : 52 : tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4098 : 52 : last_conflict = tau2;
4099 : 52 : *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4100 : : }
4101 : : else
4102 : 0 : *last_conflicts = chrec_dont_know;
4103 : :
4104 : 52 : *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4105 : : build_int_cst (NULL_TREE,
4106 : : step_overlaps_a));
4107 : 52 : *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4108 : : build_int_cst (NULL_TREE,
4109 : : step_overlaps_b));
4110 : 52 : }
4111 : :
4112 : : else
4113 : : {
4114 : 4 : *overlaps_a = affine_fn_cst (integer_zero_node);
4115 : 4 : *overlaps_b = affine_fn_cst (integer_zero_node);
4116 : 4 : *last_conflicts = integer_zero_node;
4117 : : }
4118 : 56 : }
4119 : :
4120 : : /* Solves the special case of a Diophantine equation where CHREC_A is
4121 : : an affine bivariate function, and CHREC_B is an affine univariate
4122 : : function. For example,
4123 : :
4124 : : | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4125 : :
4126 : : has the following overlapping functions:
4127 : :
4128 : : | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4129 : : | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4130 : : | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4131 : :
4132 : : FORNOW: This is a specialized implementation for a case occurring in
4133 : : a common benchmark. Implement the general algorithm. */
4134 : :
4135 : : static void
4136 : 0 : compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4137 : : conflict_function **overlaps_a,
4138 : : conflict_function **overlaps_b,
4139 : : tree *last_conflicts)
4140 : : {
4141 : 0 : bool xz_p, yz_p, xyz_p;
4142 : 0 : HOST_WIDE_INT step_x, step_y, step_z;
4143 : 0 : HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4144 : 0 : affine_fn overlaps_a_xz, overlaps_b_xz;
4145 : 0 : affine_fn overlaps_a_yz, overlaps_b_yz;
4146 : 0 : affine_fn overlaps_a_xyz, overlaps_b_xyz;
4147 : 0 : affine_fn ova1, ova2, ovb;
4148 : 0 : tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4149 : :
4150 : 0 : step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4151 : 0 : step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4152 : 0 : step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4153 : :
4154 : 0 : niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4155 : 0 : niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4156 : 0 : niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4157 : :
4158 : 0 : if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4159 : : {
4160 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4161 : 0 : fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4162 : :
4163 : 0 : *overlaps_a = conflict_fn_not_known ();
4164 : 0 : *overlaps_b = conflict_fn_not_known ();
4165 : 0 : *last_conflicts = chrec_dont_know;
4166 : 0 : return;
4167 : : }
4168 : :
4169 : 0 : niter = MIN (niter_x, niter_z);
4170 : 0 : compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4171 : : &overlaps_a_xz,
4172 : : &overlaps_b_xz,
4173 : : &last_conflicts_xz, 1);
4174 : 0 : niter = MIN (niter_y, niter_z);
4175 : 0 : compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4176 : : &overlaps_a_yz,
4177 : : &overlaps_b_yz,
4178 : : &last_conflicts_yz, 2);
4179 : 0 : niter = MIN (niter_x, niter_z);
4180 : 0 : niter = MIN (niter_y, niter);
4181 : 0 : compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4182 : : &overlaps_a_xyz,
4183 : : &overlaps_b_xyz,
4184 : : &last_conflicts_xyz, 3);
4185 : :
4186 : 0 : xz_p = !integer_zerop (last_conflicts_xz);
4187 : 0 : yz_p = !integer_zerop (last_conflicts_yz);
4188 : 0 : xyz_p = !integer_zerop (last_conflicts_xyz);
4189 : :
4190 : 0 : if (xz_p || yz_p || xyz_p)
4191 : : {
4192 : 0 : ova1 = affine_fn_cst (integer_zero_node);
4193 : 0 : ova2 = affine_fn_cst (integer_zero_node);
4194 : 0 : ovb = affine_fn_cst (integer_zero_node);
4195 : 0 : if (xz_p)
4196 : : {
4197 : 0 : affine_fn t0 = ova1;
4198 : 0 : affine_fn t2 = ovb;
4199 : :
4200 : 0 : ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4201 : 0 : ovb = affine_fn_plus (ovb, overlaps_b_xz);
4202 : 0 : affine_fn_free (t0);
4203 : 0 : affine_fn_free (t2);
4204 : 0 : *last_conflicts = last_conflicts_xz;
4205 : : }
4206 : 0 : if (yz_p)
4207 : : {
4208 : 0 : affine_fn t0 = ova2;
4209 : 0 : affine_fn t2 = ovb;
4210 : :
4211 : 0 : ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4212 : 0 : ovb = affine_fn_plus (ovb, overlaps_b_yz);
4213 : 0 : affine_fn_free (t0);
4214 : 0 : affine_fn_free (t2);
4215 : 0 : *last_conflicts = last_conflicts_yz;
4216 : : }
4217 : 0 : if (xyz_p)
4218 : : {
4219 : 0 : affine_fn t0 = ova1;
4220 : 0 : affine_fn t2 = ova2;
4221 : 0 : affine_fn t4 = ovb;
4222 : :
4223 : 0 : ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4224 : 0 : ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4225 : 0 : ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4226 : 0 : affine_fn_free (t0);
4227 : 0 : affine_fn_free (t2);
4228 : 0 : affine_fn_free (t4);
4229 : 0 : *last_conflicts = last_conflicts_xyz;
4230 : : }
4231 : 0 : *overlaps_a = conflict_fn (2, ova1, ova2);
4232 : 0 : *overlaps_b = conflict_fn (1, ovb);
4233 : 0 : }
4234 : : else
4235 : : {
4236 : 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4237 : 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4238 : 0 : *last_conflicts = integer_zero_node;
4239 : : }
4240 : :
4241 : 0 : affine_fn_free (overlaps_a_xz);
4242 : 0 : affine_fn_free (overlaps_b_xz);
4243 : 0 : affine_fn_free (overlaps_a_yz);
4244 : 0 : affine_fn_free (overlaps_b_yz);
4245 : 0 : affine_fn_free (overlaps_a_xyz);
4246 : 0 : affine_fn_free (overlaps_b_xyz);
4247 : : }
4248 : :
4249 : : /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4250 : :
4251 : : static void
4252 : 2020221 : lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4253 : : int size)
4254 : : {
4255 : 2020221 : memcpy (vec2, vec1, size * sizeof (*vec1));
4256 : 0 : }
4257 : :
4258 : : /* Copy the elements of M x N matrix MAT1 to MAT2. */
4259 : :
4260 : : static void
4261 : 990720 : lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4262 : : int m, int n)
4263 : : {
4264 : 990720 : int i;
4265 : :
4266 : 2972160 : for (i = 0; i < m; i++)
4267 : 1981440 : lambda_vector_copy (mat1[i], mat2[i], n);
4268 : 990720 : }
4269 : :
4270 : : /* Store the N x N identity matrix in MAT. */
4271 : :
4272 : : static void
4273 : 990720 : lambda_matrix_id (lambda_matrix mat, int size)
4274 : : {
4275 : 990720 : int i, j;
4276 : :
4277 : 2972160 : for (i = 0; i < size; i++)
4278 : 5944320 : for (j = 0; j < size; j++)
4279 : 5944320 : mat[i][j] = (i == j) ? 1 : 0;
4280 : 990720 : }
4281 : :
4282 : : /* Return the index of the first nonzero element of vector VEC1 between
4283 : : START and N. We must have START <= N.
4284 : : Returns N if VEC1 is the zero vector. */
4285 : :
4286 : : static int
4287 : 990720 : lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4288 : : {
4289 : 990720 : int j = start;
4290 : 990720 : while (j < n && vec1[j] == 0)
4291 : 0 : j++;
4292 : 990720 : return j;
4293 : : }
4294 : :
4295 : : /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4296 : : R2 = R2 + CONST1 * R1. */
4297 : :
4298 : : static bool
4299 : 1981642 : lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4300 : : lambda_int const1)
4301 : : {
4302 : 1981642 : int i;
4303 : :
4304 : 1981642 : if (const1 == 0)
4305 : : return true;
4306 : :
4307 : 4953690 : for (i = 0; i < n; i++)
4308 : : {
4309 : 2972214 : bool ovf;
4310 : 2972214 : lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4311 : 2972214 : if (ovf)
4312 : 1981642 : return false;
4313 : 2972214 : lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4314 : 2972214 : if (ovf || tem2 == HOST_WIDE_INT_MIN)
4315 : : return false;
4316 : 2972214 : mat[r2][i] = tem2;
4317 : : }
4318 : :
4319 : : return true;
4320 : : }
4321 : :
4322 : : /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4323 : : and store the result in VEC2. */
4324 : :
4325 : : static void
4326 : 985091 : lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4327 : : int size, lambda_int const1)
4328 : : {
4329 : 985091 : int i;
4330 : :
4331 : 985091 : if (const1 == 0)
4332 : 0 : lambda_vector_clear (vec2, size);
4333 : : else
4334 : 2955273 : for (i = 0; i < size; i++)
4335 : 1970182 : vec2[i] = const1 * vec1[i];
4336 : 985091 : }
4337 : :
4338 : : /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4339 : :
4340 : : static void
4341 : 985091 : lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4342 : : int size)
4343 : : {
4344 : 0 : lambda_vector_mult_const (vec1, vec2, size, -1);
4345 : 0 : }
4346 : :
4347 : : /* Negate row R1 of matrix MAT which has N columns. */
4348 : :
4349 : : static void
4350 : 985091 : lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4351 : : {
4352 : 0 : lambda_vector_negate (mat[r1], mat[r1], n);
4353 : 985091 : }
4354 : :
4355 : : /* Return true if two vectors are equal. */
4356 : :
4357 : : static bool
4358 : 283480 : lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4359 : : {
4360 : 283480 : int i;
4361 : 284669 : for (i = 0; i < size; i++)
4362 : 284385 : if (vec1[i] != vec2[i])
4363 : : return false;
4364 : : return true;
4365 : : }
4366 : :
4367 : : /* Given an M x N integer matrix A, this function determines an M x
4368 : : M unimodular matrix U, and an M x N echelon matrix S such that
4369 : : "U.A = S". This decomposition is also known as "right Hermite".
4370 : :
4371 : : Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4372 : : Restructuring Compilers" Utpal Banerjee. */
4373 : :
4374 : : static bool
4375 : 990720 : lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4376 : : lambda_matrix S, lambda_matrix U)
4377 : : {
4378 : 990720 : int i, j, i0 = 0;
4379 : :
4380 : 990720 : lambda_matrix_copy (A, S, m, n);
4381 : 990720 : lambda_matrix_id (U, m);
4382 : :
4383 : 1981440 : for (j = 0; j < n; j++)
4384 : : {
4385 : 1981440 : if (lambda_vector_first_nz (S[j], m, i0) < m)
4386 : : {
4387 : 990720 : ++i0;
4388 : 1981440 : for (i = m - 1; i >= i0; i--)
4389 : : {
4390 : 1981541 : while (S[i][j] != 0)
4391 : : {
4392 : 990821 : lambda_int factor, a, b;
4393 : :
4394 : 990821 : a = S[i-1][j];
4395 : 990821 : b = S[i][j];
4396 : 990821 : gcc_assert (a != HOST_WIDE_INT_MIN);
4397 : 990821 : factor = a / b;
4398 : :
4399 : 990821 : if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4400 : : return false;
4401 : 990821 : std::swap (S[i], S[i-1]);
4402 : :
4403 : 990821 : if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4404 : : return false;
4405 : 990821 : std::swap (U[i], U[i-1]);
4406 : : }
4407 : : }
4408 : : }
4409 : : }
4410 : :
4411 : : return true;
4412 : : }
4413 : :
4414 : : /* Determines the overlapping elements due to accesses CHREC_A and
4415 : : CHREC_B, that are affine functions. This function cannot handle
4416 : : symbolic evolution functions, ie. when initial conditions are
4417 : : parameters, because it uses lambda matrices of integers. */
4418 : :
4419 : : static void
4420 : 990780 : analyze_subscript_affine_affine (tree chrec_a,
4421 : : tree chrec_b,
4422 : : conflict_function **overlaps_a,
4423 : : conflict_function **overlaps_b,
4424 : : tree *last_conflicts)
4425 : : {
4426 : 990780 : unsigned nb_vars_a, nb_vars_b, dim;
4427 : 990780 : lambda_int gamma, gcd_alpha_beta;
4428 : 990780 : lambda_matrix A, U, S;
4429 : 990780 : struct obstack scratch_obstack;
4430 : :
4431 : 990780 : if (eq_evolutions_p (chrec_a, chrec_b))
4432 : : {
4433 : : /* The accessed index overlaps for each iteration in the
4434 : : loop. */
4435 : 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4436 : 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4437 : 0 : *last_conflicts = chrec_dont_know;
4438 : 0 : return;
4439 : : }
4440 : 990780 : if (dump_file && (dump_flags & TDF_DETAILS))
4441 : 16752 : fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4442 : :
4443 : : /* For determining the initial intersection, we have to solve a
4444 : : Diophantine equation. This is the most time consuming part.
4445 : :
4446 : : For answering to the question: "Is there a dependence?" we have
4447 : : to prove that there exists a solution to the Diophantine
4448 : : equation, and that the solution is in the iteration domain,
4449 : : i.e. the solution is positive or zero, and that the solution
4450 : : happens before the upper bound loop.nb_iterations. Otherwise
4451 : : there is no dependence. This function outputs a description of
4452 : : the iterations that hold the intersections. */
4453 : :
4454 : 990780 : nb_vars_a = nb_vars_in_chrec (chrec_a);
4455 : 990780 : nb_vars_b = nb_vars_in_chrec (chrec_b);
4456 : :
4457 : 990780 : gcc_obstack_init (&scratch_obstack);
4458 : :
4459 : 990780 : dim = nb_vars_a + nb_vars_b;
4460 : 990780 : U = lambda_matrix_new (dim, dim, &scratch_obstack);
4461 : 990780 : A = lambda_matrix_new (dim, 1, &scratch_obstack);
4462 : 990780 : S = lambda_matrix_new (dim, 1, &scratch_obstack);
4463 : :
4464 : 990780 : tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4465 : 990780 : tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4466 : 990780 : if (init_a == chrec_dont_know
4467 : 990776 : || init_b == chrec_dont_know)
4468 : : {
4469 : 4 : if (dump_file && (dump_flags & TDF_DETAILS))
4470 : 0 : fprintf (dump_file, "affine-affine test failed: "
4471 : : "representation issue.\n");
4472 : 4 : *overlaps_a = conflict_fn_not_known ();
4473 : 4 : *overlaps_b = conflict_fn_not_known ();
4474 : 4 : *last_conflicts = chrec_dont_know;
4475 : 4 : goto end_analyze_subs_aa;
4476 : : }
4477 : 990776 : gamma = int_cst_value (init_b) - int_cst_value (init_a);
4478 : :
4479 : : /* Don't do all the hard work of solving the Diophantine equation
4480 : : when we already know the solution: for example,
4481 : : | {3, +, 1}_1
4482 : : | {3, +, 4}_2
4483 : : | gamma = 3 - 3 = 0.
4484 : : Then the first overlap occurs during the first iterations:
4485 : : | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4486 : : */
4487 : 990776 : if (gamma == 0)
4488 : : {
4489 : 56 : if (nb_vars_a == 1 && nb_vars_b == 1)
4490 : : {
4491 : 56 : HOST_WIDE_INT step_a, step_b;
4492 : 56 : HOST_WIDE_INT niter, niter_a, niter_b;
4493 : 56 : affine_fn ova, ovb;
4494 : :
4495 : 56 : niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4496 : 56 : niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4497 : 56 : niter = MIN (niter_a, niter_b);
4498 : 56 : step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4499 : 56 : step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4500 : :
4501 : 56 : compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4502 : : &ova, &ovb,
4503 : : last_conflicts, 1);
4504 : 56 : *overlaps_a = conflict_fn (1, ova);
4505 : 56 : *overlaps_b = conflict_fn (1, ovb);
4506 : : }
4507 : :
4508 : 0 : else if (nb_vars_a == 2 && nb_vars_b == 1)
4509 : 0 : compute_overlap_steps_for_affine_1_2
4510 : 0 : (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4511 : :
4512 : 0 : else if (nb_vars_a == 1 && nb_vars_b == 2)
4513 : 0 : compute_overlap_steps_for_affine_1_2
4514 : 0 : (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4515 : :
4516 : : else
4517 : : {
4518 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4519 : 0 : fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4520 : 0 : *overlaps_a = conflict_fn_not_known ();
4521 : 0 : *overlaps_b = conflict_fn_not_known ();
4522 : 0 : *last_conflicts = chrec_dont_know;
4523 : : }
4524 : 56 : goto end_analyze_subs_aa;
4525 : : }
4526 : :
4527 : : /* U.A = S */
4528 : 990720 : if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4529 : : {
4530 : 0 : *overlaps_a = conflict_fn_not_known ();
4531 : 0 : *overlaps_b = conflict_fn_not_known ();
4532 : 0 : *last_conflicts = chrec_dont_know;
4533 : 0 : goto end_analyze_subs_aa;
4534 : : }
4535 : :
4536 : 990720 : if (S[0][0] < 0)
4537 : : {
4538 : 985091 : S[0][0] *= -1;
4539 : 985091 : lambda_matrix_row_negate (U, dim, 0);
4540 : : }
4541 : 990720 : gcd_alpha_beta = S[0][0];
4542 : :
4543 : : /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4544 : : but that is a quite strange case. Instead of ICEing, answer
4545 : : don't know. */
4546 : 990720 : if (gcd_alpha_beta == 0)
4547 : : {
4548 : 0 : *overlaps_a = conflict_fn_not_known ();
4549 : 0 : *overlaps_b = conflict_fn_not_known ();
4550 : 0 : *last_conflicts = chrec_dont_know;
4551 : 0 : goto end_analyze_subs_aa;
4552 : : }
4553 : :
4554 : : /* The classic "gcd-test". */
4555 : 990720 : if (!int_divides_p (gcd_alpha_beta, gamma))
4556 : : {
4557 : : /* The "gcd-test" has determined that there is no integer
4558 : : solution, i.e. there is no dependence. */
4559 : 890633 : *overlaps_a = conflict_fn_no_dependence ();
4560 : 890633 : *overlaps_b = conflict_fn_no_dependence ();
4561 : 890633 : *last_conflicts = integer_zero_node;
4562 : : }
4563 : :
4564 : : /* Both access functions are univariate. This includes SIV and MIV cases. */
4565 : 100087 : else if (nb_vars_a == 1 && nb_vars_b == 1)
4566 : : {
4567 : : /* Both functions should have the same evolution sign. */
4568 : 100087 : if (((A[0][0] > 0 && -A[1][0] > 0)
4569 : 3247 : || (A[0][0] < 0 && -A[1][0] < 0)))
4570 : : {
4571 : : /* The solutions are given by:
4572 : : |
4573 : : | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4574 : : | [u21 u22] [y0]
4575 : :
4576 : : For a given integer t. Using the following variables,
4577 : :
4578 : : | i0 = u11 * gamma / gcd_alpha_beta
4579 : : | j0 = u12 * gamma / gcd_alpha_beta
4580 : : | i1 = u21
4581 : : | j1 = u22
4582 : :
4583 : : the solutions are:
4584 : :
4585 : : | x0 = i0 + i1 * t,
4586 : : | y0 = j0 + j1 * t. */
4587 : 99767 : HOST_WIDE_INT i0, j0, i1, j1;
4588 : :
4589 : 99767 : i0 = U[0][0] * gamma / gcd_alpha_beta;
4590 : 99767 : j0 = U[0][1] * gamma / gcd_alpha_beta;
4591 : 99767 : i1 = U[1][0];
4592 : 99767 : j1 = U[1][1];
4593 : :
4594 : 99767 : if ((i1 == 0 && i0 < 0)
4595 : 99767 : || (j1 == 0 && j0 < 0))
4596 : : {
4597 : : /* There is no solution.
4598 : : FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4599 : : falls in here, but for the moment we don't look at the
4600 : : upper bound of the iteration domain. */
4601 : 0 : *overlaps_a = conflict_fn_no_dependence ();
4602 : 0 : *overlaps_b = conflict_fn_no_dependence ();
4603 : 0 : *last_conflicts = integer_zero_node;
4604 : 52846 : goto end_analyze_subs_aa;
4605 : : }
4606 : :
4607 : 99767 : if (i1 > 0 && j1 > 0)
4608 : : {
4609 : 99767 : HOST_WIDE_INT niter_a
4610 : 99767 : = max_stmt_executions_int (get_chrec_loop (chrec_a));
4611 : 99767 : HOST_WIDE_INT niter_b
4612 : 99767 : = max_stmt_executions_int (get_chrec_loop (chrec_b));
4613 : 99767 : HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4614 : :
4615 : : /* (X0, Y0) is a solution of the Diophantine equation:
4616 : : "chrec_a (X0) = chrec_b (Y0)". */
4617 : 99767 : HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4618 : : CEIL (-j0, j1));
4619 : 99767 : HOST_WIDE_INT x0 = i1 * tau1 + i0;
4620 : 99767 : HOST_WIDE_INT y0 = j1 * tau1 + j0;
4621 : :
4622 : : /* (X1, Y1) is the smallest positive solution of the eq
4623 : : "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4624 : : first conflict occurs. */
4625 : 99767 : HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4626 : 99767 : HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4627 : 99767 : HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4628 : :
4629 : 99767 : if (niter > 0)
4630 : : {
4631 : : /* If the overlap occurs outside of the bounds of the
4632 : : loop, there is no dependence. */
4633 : 93586 : if (x1 >= niter_a || y1 >= niter_b)
4634 : : {
4635 : 52846 : *overlaps_a = conflict_fn_no_dependence ();
4636 : 52846 : *overlaps_b = conflict_fn_no_dependence ();
4637 : 52846 : *last_conflicts = integer_zero_node;
4638 : 52846 : goto end_analyze_subs_aa;
4639 : : }
4640 : :
4641 : : /* max stmt executions can get quite large, avoid
4642 : : overflows by using wide ints here. */
4643 : 40740 : widest_int tau2
4644 : 81480 : = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4645 : 122220 : wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4646 : 40740 : widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4647 : 40740 : if (wi::min_precision (last_conflict, SIGNED)
4648 : 40740 : <= TYPE_PRECISION (integer_type_node))
4649 : 38468 : *last_conflicts
4650 : 38468 : = build_int_cst (integer_type_node,
4651 : 38468 : last_conflict.to_shwi ());
4652 : : else
4653 : 2272 : *last_conflicts = chrec_dont_know;
4654 : 40740 : }
4655 : : else
4656 : 6181 : *last_conflicts = chrec_dont_know;
4657 : :
4658 : 46921 : *overlaps_a
4659 : 46921 : = conflict_fn (1,
4660 : : affine_fn_univar (build_int_cst (NULL_TREE, x1),
4661 : : 1,
4662 : : build_int_cst (NULL_TREE, i1)));
4663 : 46921 : *overlaps_b
4664 : 46921 : = conflict_fn (1,
4665 : : affine_fn_univar (build_int_cst (NULL_TREE, y1),
4666 : : 1,
4667 : : build_int_cst (NULL_TREE, j1)));
4668 : 46921 : }
4669 : : else
4670 : : {
4671 : : /* FIXME: For the moment, the upper bound of the
4672 : : iteration domain for i and j is not checked. */
4673 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4674 : 0 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4675 : 0 : *overlaps_a = conflict_fn_not_known ();
4676 : 0 : *overlaps_b = conflict_fn_not_known ();
4677 : 0 : *last_conflicts = chrec_dont_know;
4678 : : }
4679 : 46921 : }
4680 : : else
4681 : : {
4682 : 320 : if (dump_file && (dump_flags & TDF_DETAILS))
4683 : 19 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4684 : 320 : *overlaps_a = conflict_fn_not_known ();
4685 : 320 : *overlaps_b = conflict_fn_not_known ();
4686 : 320 : *last_conflicts = chrec_dont_know;
4687 : : }
4688 : : }
4689 : : else
4690 : : {
4691 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4692 : 0 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4693 : 0 : *overlaps_a = conflict_fn_not_known ();
4694 : 0 : *overlaps_b = conflict_fn_not_known ();
4695 : 0 : *last_conflicts = chrec_dont_know;
4696 : : }
4697 : :
4698 : 990780 : end_analyze_subs_aa:
4699 : 990780 : obstack_free (&scratch_obstack, NULL);
4700 : 990780 : if (dump_file && (dump_flags & TDF_DETAILS))
4701 : : {
4702 : 16752 : fprintf (dump_file, " (overlaps_a = ");
4703 : 16752 : dump_conflict_function (dump_file, *overlaps_a);
4704 : 16752 : fprintf (dump_file, ")\n (overlaps_b = ");
4705 : 16752 : dump_conflict_function (dump_file, *overlaps_b);
4706 : 16752 : fprintf (dump_file, "))\n");
4707 : : }
4708 : : }
4709 : :
4710 : : /* Returns true when analyze_subscript_affine_affine can be used for
4711 : : determining the dependence relation between chrec_a and chrec_b,
4712 : : that contain symbols. This function modifies chrec_a and chrec_b
4713 : : such that the analysis result is the same, and such that they don't
4714 : : contain symbols, and then can safely be passed to the analyzer.
4715 : :
4716 : : Example: The analysis of the following tuples of evolutions produce
4717 : : the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4718 : : vs. {0, +, 1}_1
4719 : :
4720 : : {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4721 : : {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4722 : : */
4723 : :
4724 : : static bool
4725 : 55607 : can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4726 : : {
4727 : 55607 : tree diff, type, left_a, left_b, right_b;
4728 : :
4729 : 55607 : if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4730 : 55607 : || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4731 : : /* FIXME: For the moment not handled. Might be refined later. */
4732 : 14801 : return false;
4733 : :
4734 : 40806 : type = chrec_type (*chrec_a);
4735 : 40806 : left_a = CHREC_LEFT (*chrec_a);
4736 : 40806 : left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4737 : 40806 : diff = chrec_fold_minus (type, left_a, left_b);
4738 : :
4739 : 81612 : if (!evolution_function_is_constant_p (diff))
4740 : 5129 : return false;
4741 : :
4742 : 35677 : if (dump_file && (dump_flags & TDF_DETAILS))
4743 : 104 : fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4744 : :
4745 : 35677 : *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4746 : 35677 : diff, CHREC_RIGHT (*chrec_a));
4747 : 35677 : right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4748 : 35677 : *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4749 : 35677 : build_int_cst (type, 0),
4750 : : right_b);
4751 : 35677 : return true;
4752 : : }
4753 : :
4754 : : /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4755 : : *OVERLAPS_B are initialized to the functions that describe the
4756 : : relation between the elements accessed twice by CHREC_A and
4757 : : CHREC_B. For k >= 0, the following property is verified:
4758 : :
4759 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4760 : :
4761 : : static void
4762 : 1012808 : analyze_siv_subscript (tree chrec_a,
4763 : : tree chrec_b,
4764 : : conflict_function **overlaps_a,
4765 : : conflict_function **overlaps_b,
4766 : : tree *last_conflicts,
4767 : : int loop_nest_num)
4768 : : {
4769 : 1012808 : dependence_stats.num_siv++;
4770 : :
4771 : 1012808 : if (dump_file && (dump_flags & TDF_DETAILS))
4772 : 19591 : fprintf (dump_file, "(analyze_siv_subscript \n");
4773 : :
4774 : 1012808 : if (evolution_function_is_constant_p (chrec_a)
4775 : 1012808 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4776 : 1167 : analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4777 : : overlaps_a, overlaps_b, last_conflicts);
4778 : :
4779 : 1011641 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4780 : 2023282 : && evolution_function_is_constant_p (chrec_b))
4781 : 995 : analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4782 : : overlaps_b, overlaps_a, last_conflicts);
4783 : :
4784 : 1010646 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4785 : 1010646 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4786 : : {
4787 : 1010646 : if (!chrec_contains_symbols (chrec_a)
4788 : 1010646 : && !chrec_contains_symbols (chrec_b))
4789 : : {
4790 : 955039 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4791 : : overlaps_a, overlaps_b,
4792 : : last_conflicts);
4793 : :
4794 : 955039 : if (CF_NOT_KNOWN_P (*overlaps_a)
4795 : 954727 : || CF_NOT_KNOWN_P (*overlaps_b))
4796 : 312 : dependence_stats.num_siv_unimplemented++;
4797 : 954727 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4798 : 45941 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4799 : 908786 : dependence_stats.num_siv_independent++;
4800 : : else
4801 : 45941 : dependence_stats.num_siv_dependent++;
4802 : : }
4803 : 55607 : else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4804 : : &chrec_b))
4805 : : {
4806 : 35677 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4807 : : overlaps_a, overlaps_b,
4808 : : last_conflicts);
4809 : :
4810 : 35677 : if (CF_NOT_KNOWN_P (*overlaps_a)
4811 : 35669 : || CF_NOT_KNOWN_P (*overlaps_b))
4812 : 8 : dependence_stats.num_siv_unimplemented++;
4813 : 35669 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4814 : 986 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4815 : 34683 : dependence_stats.num_siv_independent++;
4816 : : else
4817 : 986 : dependence_stats.num_siv_dependent++;
4818 : : }
4819 : : else
4820 : 19930 : goto siv_subscript_dontknow;
4821 : : }
4822 : :
4823 : : else
4824 : : {
4825 : 19930 : siv_subscript_dontknow:;
4826 : 19930 : if (dump_file && (dump_flags & TDF_DETAILS))
4827 : 2789 : fprintf (dump_file, " siv test failed: unimplemented");
4828 : 19930 : *overlaps_a = conflict_fn_not_known ();
4829 : 19930 : *overlaps_b = conflict_fn_not_known ();
4830 : 19930 : *last_conflicts = chrec_dont_know;
4831 : 19930 : dependence_stats.num_siv_unimplemented++;
4832 : : }
4833 : :
4834 : 1012808 : if (dump_file && (dump_flags & TDF_DETAILS))
4835 : 19591 : fprintf (dump_file, ")\n");
4836 : 1012808 : }
4837 : :
4838 : : /* Returns false if we can prove that the greatest common divisor of the steps
4839 : : of CHREC does not divide CST, false otherwise. */
4840 : :
4841 : : static bool
4842 : 20660 : gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4843 : : {
4844 : 20660 : HOST_WIDE_INT cd = 0, val;
4845 : 20660 : tree step;
4846 : :
4847 : 20660 : if (!tree_fits_shwi_p (cst))
4848 : : return true;
4849 : 20660 : val = tree_to_shwi (cst);
4850 : :
4851 : 61834 : while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4852 : : {
4853 : 41318 : step = CHREC_RIGHT (chrec);
4854 : 41318 : if (!tree_fits_shwi_p (step))
4855 : : return true;
4856 : 41174 : cd = gcd (cd, tree_to_shwi (step));
4857 : 41174 : chrec = CHREC_LEFT (chrec);
4858 : : }
4859 : :
4860 : 20516 : return val % cd == 0;
4861 : : }
4862 : :
4863 : : /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4864 : : LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4865 : : functions that describe the relation between the elements accessed
4866 : : twice by CHREC_A and CHREC_B. For k >= 0, the following property
4867 : : is verified:
4868 : :
4869 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4870 : :
4871 : : static void
4872 : 27972 : analyze_miv_subscript (tree chrec_a,
4873 : : tree chrec_b,
4874 : : conflict_function **overlaps_a,
4875 : : conflict_function **overlaps_b,
4876 : : tree *last_conflicts,
4877 : : class loop *loop_nest)
4878 : : {
4879 : 27972 : tree type, difference;
4880 : :
4881 : 27972 : dependence_stats.num_miv++;
4882 : 27972 : if (dump_file && (dump_flags & TDF_DETAILS))
4883 : 27 : fprintf (dump_file, "(analyze_miv_subscript \n");
4884 : :
4885 : 27972 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4886 : 27972 : chrec_a = chrec_convert (type, chrec_a, NULL);
4887 : 27972 : chrec_b = chrec_convert (type, chrec_b, NULL);
4888 : 27972 : difference = chrec_fold_minus (type, chrec_a, chrec_b);
4889 : :
4890 : 27972 : if (eq_evolutions_p (chrec_a, chrec_b))
4891 : : {
4892 : : /* Access functions are the same: all the elements are accessed
4893 : : in the same order. */
4894 : 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4895 : 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4896 : 0 : *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4897 : 0 : dependence_stats.num_miv_dependent++;
4898 : : }
4899 : :
4900 : 27972 : else if (evolution_function_is_constant_p (difference)
4901 : 20690 : && evolution_function_is_affine_multivariate_p (chrec_a,
4902 : : loop_nest->num)
4903 : 48632 : && !gcd_of_steps_may_divide_p (chrec_a, difference))
4904 : : {
4905 : : /* testsuite/.../ssa-chrec-33.c
4906 : : {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4907 : :
4908 : : The difference is 1, and all the evolution steps are multiples
4909 : : of 2, consequently there are no overlapping elements. */
4910 : 19670 : *overlaps_a = conflict_fn_no_dependence ();
4911 : 19670 : *overlaps_b = conflict_fn_no_dependence ();
4912 : 19670 : *last_conflicts = integer_zero_node;
4913 : 19670 : dependence_stats.num_miv_independent++;
4914 : : }
4915 : :
4916 : 8302 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4917 : 100 : && !chrec_contains_symbols (chrec_a, loop_nest)
4918 : 91 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4919 : 8366 : && !chrec_contains_symbols (chrec_b, loop_nest))
4920 : : {
4921 : : /* testsuite/.../ssa-chrec-35.c
4922 : : {0, +, 1}_2 vs. {0, +, 1}_3
4923 : : the overlapping elements are respectively located at iterations:
4924 : : {0, +, 1}_x and {0, +, 1}_x,
4925 : : in other words, we have the equality:
4926 : : {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4927 : :
4928 : : Other examples:
4929 : : {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4930 : : {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4931 : :
4932 : : {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4933 : : {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4934 : : */
4935 : 64 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4936 : : overlaps_a, overlaps_b, last_conflicts);
4937 : :
4938 : 64 : if (CF_NOT_KNOWN_P (*overlaps_a)
4939 : 60 : || CF_NOT_KNOWN_P (*overlaps_b))
4940 : 4 : dependence_stats.num_miv_unimplemented++;
4941 : 60 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4942 : 50 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4943 : 10 : dependence_stats.num_miv_independent++;
4944 : : else
4945 : 50 : dependence_stats.num_miv_dependent++;
4946 : : }
4947 : :
4948 : : else
4949 : : {
4950 : : /* When the analysis is too difficult, answer "don't know". */
4951 : 8238 : if (dump_file && (dump_flags & TDF_DETAILS))
4952 : 23 : fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4953 : :
4954 : 8238 : *overlaps_a = conflict_fn_not_known ();
4955 : 8238 : *overlaps_b = conflict_fn_not_known ();
4956 : 8238 : *last_conflicts = chrec_dont_know;
4957 : 8238 : dependence_stats.num_miv_unimplemented++;
4958 : : }
4959 : :
4960 : 27972 : if (dump_file && (dump_flags & TDF_DETAILS))
4961 : 27 : fprintf (dump_file, ")\n");
4962 : 27972 : }
4963 : :
4964 : : /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4965 : : with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4966 : : OVERLAP_ITERATIONS_B are initialized with two functions that
4967 : : describe the iterations that contain conflicting elements.
4968 : :
4969 : : Remark: For an integer k >= 0, the following equality is true:
4970 : :
4971 : : CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4972 : : */
4973 : :
4974 : : static void
4975 : 4117179 : analyze_overlapping_iterations (tree chrec_a,
4976 : : tree chrec_b,
4977 : : conflict_function **overlap_iterations_a,
4978 : : conflict_function **overlap_iterations_b,
4979 : : tree *last_conflicts, class loop *loop_nest)
4980 : : {
4981 : 4117179 : unsigned int lnn = loop_nest->num;
4982 : :
4983 : 4117179 : dependence_stats.num_subscript_tests++;
4984 : :
4985 : 4117179 : if (dump_file && (dump_flags & TDF_DETAILS))
4986 : : {
4987 : 59143 : fprintf (dump_file, "(analyze_overlapping_iterations \n");
4988 : 59143 : fprintf (dump_file, " (chrec_a = ");
4989 : 59143 : print_generic_expr (dump_file, chrec_a);
4990 : 59143 : fprintf (dump_file, ")\n (chrec_b = ");
4991 : 59143 : print_generic_expr (dump_file, chrec_b);
4992 : 59143 : fprintf (dump_file, ")\n");
4993 : : }
4994 : :
4995 : 4117179 : if (chrec_a == NULL_TREE
4996 : 4117179 : || chrec_b == NULL_TREE
4997 : 4117179 : || chrec_contains_undetermined (chrec_a)
4998 : 8234358 : || chrec_contains_undetermined (chrec_b))
4999 : : {
5000 : 0 : dependence_stats.num_subscript_undetermined++;
5001 : :
5002 : 0 : *overlap_iterations_a = conflict_fn_not_known ();
5003 : 0 : *overlap_iterations_b = conflict_fn_not_known ();
5004 : : }
5005 : :
5006 : : /* If they are the same chrec, and are affine, they overlap
5007 : : on every iteration. */
5008 : 4117179 : else if (eq_evolutions_p (chrec_a, chrec_b)
5009 : 4117179 : && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5010 : 745340 : || operand_equal_p (chrec_a, chrec_b, 0)))
5011 : : {
5012 : 1313308 : dependence_stats.num_same_subscript_function++;
5013 : 1313308 : *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5014 : 1313308 : *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5015 : 1313308 : *last_conflicts = chrec_dont_know;
5016 : : }
5017 : :
5018 : : /* If they aren't the same, and aren't affine, we can't do anything
5019 : : yet. */
5020 : 2803871 : else if ((chrec_contains_symbols (chrec_a)
5021 : 2740320 : || chrec_contains_symbols (chrec_b))
5022 : 2804713 : && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5023 : 61946 : || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5024 : : {
5025 : 2745 : dependence_stats.num_subscript_undetermined++;
5026 : 2745 : *overlap_iterations_a = conflict_fn_not_known ();
5027 : 2745 : *overlap_iterations_b = conflict_fn_not_known ();
5028 : : }
5029 : :
5030 : 2801126 : else if (ziv_subscript_p (chrec_a, chrec_b))
5031 : 1760346 : analyze_ziv_subscript (chrec_a, chrec_b,
5032 : : overlap_iterations_a, overlap_iterations_b,
5033 : : last_conflicts);
5034 : :
5035 : 1040780 : else if (siv_subscript_p (chrec_a, chrec_b))
5036 : 1012808 : analyze_siv_subscript (chrec_a, chrec_b,
5037 : : overlap_iterations_a, overlap_iterations_b,
5038 : : last_conflicts, lnn);
5039 : :
5040 : : else
5041 : 27972 : analyze_miv_subscript (chrec_a, chrec_b,
5042 : : overlap_iterations_a, overlap_iterations_b,
5043 : : last_conflicts, loop_nest);
5044 : :
5045 : 4117179 : if (dump_file && (dump_flags & TDF_DETAILS))
5046 : : {
5047 : 59143 : fprintf (dump_file, " (overlap_iterations_a = ");
5048 : 59143 : dump_conflict_function (dump_file, *overlap_iterations_a);
5049 : 59143 : fprintf (dump_file, ")\n (overlap_iterations_b = ");
5050 : 59143 : dump_conflict_function (dump_file, *overlap_iterations_b);
5051 : 59143 : fprintf (dump_file, "))\n");
5052 : : }
5053 : 4117179 : }
5054 : :
5055 : : /* Helper function for uniquely inserting distance vectors. */
5056 : :
5057 : : static void
5058 : 876945 : save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5059 : : {
5060 : 1301547 : for (lambda_vector v : DDR_DIST_VECTS (ddr))
5061 : 426072 : if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5062 : : return;
5063 : :
5064 : 876661 : DDR_DIST_VECTS (ddr).safe_push (dist_v);
5065 : : }
5066 : :
5067 : : /* Helper function for uniquely inserting direction vectors. */
5068 : :
5069 : : static void
5070 : 876661 : save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5071 : : {
5072 : 1300411 : for (lambda_vector v : DDR_DIR_VECTS (ddr))
5073 : 424368 : if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5074 : : return;
5075 : :
5076 : 876661 : DDR_DIR_VECTS (ddr).safe_push (dir_v);
5077 : : }
5078 : :
5079 : : /* Add a distance of 1 on all the loops outer than INDEX. If we
5080 : : haven't yet determined a distance for this outer loop, push a new
5081 : : distance vector composed of the previous distance, and a distance
5082 : : of 1 for this outer loop. Example:
5083 : :
5084 : : | loop_1
5085 : : | loop_2
5086 : : | A[10]
5087 : : | endloop_2
5088 : : | endloop_1
5089 : :
5090 : : Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5091 : : save (0, 1), then we have to save (1, 0). */
5092 : :
5093 : : static void
5094 : 16457 : add_outer_distances (struct data_dependence_relation *ddr,
5095 : : lambda_vector dist_v, int index)
5096 : : {
5097 : : /* For each outer loop where init_v is not set, the accesses are
5098 : : in dependence of distance 1 in the loop. */
5099 : 19709 : while (--index >= 0)
5100 : : {
5101 : 6504 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5102 : 3252 : lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5103 : 3252 : save_v[index] = 1;
5104 : 3252 : save_dist_v (ddr, save_v);
5105 : : }
5106 : 16457 : }
5107 : :
5108 : : /* Return false when fail to represent the data dependence as a
5109 : : distance vector. A_INDEX is the index of the first reference
5110 : : (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5111 : : second reference. INIT_B is set to true when a component has been
5112 : : added to the distance vector DIST_V. INDEX_CARRY is then set to
5113 : : the index in DIST_V that carries the dependence. */
5114 : :
5115 : : static bool
5116 : 48000 : build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5117 : : unsigned int a_index, unsigned int b_index,
5118 : : lambda_vector dist_v, bool *init_b,
5119 : : int *index_carry)
5120 : : {
5121 : 48000 : unsigned i;
5122 : 96000 : lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5123 : 48000 : class loop *loop = DDR_LOOP_NEST (ddr)[0];
5124 : :
5125 : 213494 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5126 : : {
5127 : 60265 : tree access_fn_a, access_fn_b;
5128 : 60265 : struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5129 : :
5130 : 60265 : if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5131 : : {
5132 : 204 : non_affine_dependence_relation (ddr);
5133 : 204 : return false;
5134 : : }
5135 : :
5136 : 60061 : access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5137 : 60061 : access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5138 : :
5139 : 60061 : if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5140 : 47128 : && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5141 : : {
5142 : 46611 : HOST_WIDE_INT dist;
5143 : 46611 : int index;
5144 : 46611 : int var_a = CHREC_VARIABLE (access_fn_a);
5145 : 46611 : int var_b = CHREC_VARIABLE (access_fn_b);
5146 : :
5147 : 46611 : if (var_a != var_b
5148 : 46611 : || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5149 : : {
5150 : 34 : non_affine_dependence_relation (ddr);
5151 : 34 : return false;
5152 : : }
5153 : :
5154 : : /* When data references are collected in a loop while data
5155 : : dependences are analyzed in loop nest nested in the loop, we
5156 : : would have more number of access functions than number of
5157 : : loops. Skip access functions of loops not in the loop nest.
5158 : :
5159 : : See PR89725 for more information. */
5160 : 46577 : if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5161 : 2 : continue;
5162 : :
5163 : 46575 : dist = int_cst_value (SUB_DISTANCE (subscript));
5164 : 46575 : index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5165 : 46575 : *index_carry = MIN (index, *index_carry);
5166 : :
5167 : : /* This is the subscript coupling test. If we have already
5168 : : recorded a distance for this loop (a distance coming from
5169 : : another subscript), it should be the same. For example,
5170 : : in the following code, there is no dependence:
5171 : :
5172 : : | loop i = 0, N, 1
5173 : : | T[i+1][i] = ...
5174 : : | ... = T[i][i]
5175 : : | endloop
5176 : : */
5177 : 46575 : if (init_v[index] != 0 && dist_v[index] != dist)
5178 : : {
5179 : 0 : finalize_ddr_dependent (ddr, chrec_known);
5180 : 0 : return false;
5181 : : }
5182 : :
5183 : 46575 : dist_v[index] = dist;
5184 : 46575 : init_v[index] = 1;
5185 : 46575 : *init_b = true;
5186 : 46575 : }
5187 : 13450 : else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5188 : : {
5189 : : /* This can be for example an affine vs. constant dependence
5190 : : (T[i] vs. T[3]) that is not an affine dependence and is
5191 : : not representable as a distance vector. */
5192 : 1280 : non_affine_dependence_relation (ddr);
5193 : 1280 : return false;
5194 : : }
5195 : : else
5196 : 12170 : *init_b = true;
5197 : : }
5198 : :
5199 : : return true;
5200 : : }
5201 : :
5202 : : /* Return true when the DDR contains only invariant access functions wrto. loop
5203 : : number LNUM. */
5204 : :
5205 : : static bool
5206 : 694558 : invariant_access_functions (const struct data_dependence_relation *ddr,
5207 : : int lnum)
5208 : : {
5209 : 2357197 : for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5210 : 832285 : if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5211 : 832285 : || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5212 : 558762 : return false;
5213 : :
5214 : : return true;
5215 : : }
5216 : :
5217 : : /* Helper function for the case where DDR_A and DDR_B are the same
5218 : : multivariate access function with a constant step. For an example
5219 : : see pr34635-1.c. */
5220 : :
5221 : : static void
5222 : 4279 : add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5223 : : {
5224 : 4279 : int x_1, x_2;
5225 : 4279 : tree c_1 = CHREC_LEFT (c_2);
5226 : 4279 : tree c_0 = CHREC_LEFT (c_1);
5227 : 4279 : lambda_vector dist_v;
5228 : 4279 : HOST_WIDE_INT v1, v2, cd;
5229 : :
5230 : : /* Polynomials with more than 2 variables are not handled yet. When
5231 : : the evolution steps are parameters, it is not possible to
5232 : : represent the dependence using classical distance vectors. */
5233 : 4279 : if (TREE_CODE (c_0) != INTEGER_CST
5234 : 2963 : || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5235 : 6670 : || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5236 : : {
5237 : 1896 : DDR_AFFINE_P (ddr) = false;
5238 : 1896 : return;
5239 : : }
5240 : :
5241 : 2383 : x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5242 : 2383 : x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5243 : :
5244 : : /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5245 : 4766 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5246 : 2383 : v1 = int_cst_value (CHREC_RIGHT (c_1));
5247 : 2383 : v2 = int_cst_value (CHREC_RIGHT (c_2));
5248 : 2383 : cd = gcd (v1, v2);
5249 : 2383 : v1 /= cd;
5250 : 2383 : v2 /= cd;
5251 : :
5252 : 2383 : if (v2 < 0)
5253 : : {
5254 : 9 : v2 = -v2;
5255 : 9 : v1 = -v1;
5256 : : }
5257 : :
5258 : 2383 : dist_v[x_1] = v2;
5259 : 2383 : dist_v[x_2] = -v1;
5260 : 2383 : save_dist_v (ddr, dist_v);
5261 : :
5262 : 2383 : add_outer_distances (ddr, dist_v, x_1);
5263 : : }
5264 : :
5265 : : /* Helper function for the case where DDR_A and DDR_B are the same
5266 : : access functions. */
5267 : :
5268 : : static void
5269 : 18464 : add_other_self_distances (struct data_dependence_relation *ddr)
5270 : : {
5271 : 18464 : lambda_vector dist_v;
5272 : 18464 : unsigned i;
5273 : 18464 : int index_carry = DDR_NB_LOOPS (ddr);
5274 : 18464 : subscript *sub;
5275 : 18464 : class loop *loop = DDR_LOOP_NEST (ddr)[0];
5276 : :
5277 : 39586 : FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5278 : : {
5279 : 25872 : tree access_fun = SUB_ACCESS_FN (sub, 0);
5280 : :
5281 : 25872 : if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5282 : : {
5283 : 18564 : if (!evolution_function_is_univariate_p (access_fun, loop->num))
5284 : : {
5285 : 4750 : if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5286 : : {
5287 : 471 : DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5288 : 471 : return;
5289 : : }
5290 : :
5291 : 4279 : access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5292 : :
5293 : 4279 : if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5294 : 4279 : add_multivariate_self_dist (ddr, access_fun);
5295 : : else
5296 : : /* The evolution step is not constant: it varies in
5297 : : the outer loop, so this cannot be represented by a
5298 : : distance vector. For example in pr34635.c the
5299 : : evolution is {0, +, {0, +, 4}_1}_2. */
5300 : 0 : DDR_AFFINE_P (ddr) = false;
5301 : :
5302 : 4279 : return;
5303 : : }
5304 : :
5305 : : /* When data references are collected in a loop while data
5306 : : dependences are analyzed in loop nest nested in the loop, we
5307 : : would have more number of access functions than number of
5308 : : loops. Skip access functions of loops not in the loop nest.
5309 : :
5310 : : See PR89725 for more information. */
5311 : 13814 : if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5312 : : loop))
5313 : 0 : continue;
5314 : :
5315 : 13814 : index_carry = MIN (index_carry,
5316 : : index_in_loop_nest (CHREC_VARIABLE (access_fun),
5317 : : DDR_LOOP_NEST (ddr)));
5318 : : }
5319 : : }
5320 : :
5321 : 27428 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5322 : 13714 : add_outer_distances (ddr, dist_v, index_carry);
5323 : : }
5324 : :
5325 : : static void
5326 : 135796 : insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5327 : : {
5328 : 271592 : lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5329 : :
5330 : 135796 : dist_v[0] = 1;
5331 : 135796 : save_dist_v (ddr, dist_v);
5332 : 135796 : }
5333 : :
5334 : : /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5335 : : is the case for example when access functions are the same and
5336 : : equal to a constant, as in:
5337 : :
5338 : : | loop_1
5339 : : | A[3] = ...
5340 : : | ... = A[3]
5341 : : | endloop_1
5342 : :
5343 : : in which case the distance vectors are (0) and (1). */
5344 : :
5345 : : static void
5346 : 135796 : add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5347 : : {
5348 : 135796 : unsigned i, j;
5349 : :
5350 : 271592 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5351 : : {
5352 : 135796 : subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5353 : 135796 : conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5354 : 135796 : conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5355 : :
5356 : 135796 : for (j = 0; j < ca->n; j++)
5357 : 135796 : if (affine_function_zero_p (ca->fns[j]))
5358 : : {
5359 : 135796 : insert_innermost_unit_dist_vector (ddr);
5360 : 135796 : return;
5361 : : }
5362 : :
5363 : 0 : for (j = 0; j < cb->n; j++)
5364 : 0 : if (affine_function_zero_p (cb->fns[j]))
5365 : : {
5366 : 0 : insert_innermost_unit_dist_vector (ddr);
5367 : 0 : return;
5368 : : }
5369 : : }
5370 : : }
5371 : :
5372 : : /* Return true when the DDR contains two data references that have the
5373 : : same access functions. */
5374 : :
5375 : : static inline bool
5376 : 737032 : same_access_functions (const struct data_dependence_relation *ddr)
5377 : : {
5378 : 3106879 : for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5379 : 938257 : if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5380 : 938257 : SUB_ACCESS_FN (sub, 1)))
5381 : : return false;
5382 : :
5383 : : return true;
5384 : : }
5385 : :
5386 : : /* Compute the classic per loop distance vector. DDR is the data
5387 : : dependence relation to build a vector from. Return false when fail
5388 : : to represent the data dependence as a distance vector. */
5389 : :
5390 : : static bool
5391 : 3491086 : build_classic_dist_vector (struct data_dependence_relation *ddr,
5392 : : class loop *loop_nest)
5393 : : {
5394 : 3491086 : bool init_b = false;
5395 : 3491086 : int index_carry = DDR_NB_LOOPS (ddr);
5396 : 3491086 : lambda_vector dist_v;
5397 : :
5398 : 3491086 : if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5399 : : return false;
5400 : :
5401 : 737032 : if (same_access_functions (ddr))
5402 : : {
5403 : : /* Save the 0 vector. */
5404 : 1389116 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5405 : 694558 : save_dist_v (ddr, dist_v);
5406 : :
5407 : 694558 : if (invariant_access_functions (ddr, loop_nest->num))
5408 : 135796 : add_distance_for_zero_overlaps (ddr);
5409 : :
5410 : 694558 : if (DDR_NB_LOOPS (ddr) > 1)
5411 : 18464 : add_other_self_distances (ddr);
5412 : :
5413 : 694558 : return true;
5414 : : }
5415 : :
5416 : 84948 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5417 : 42474 : if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5418 : : return false;
5419 : :
5420 : : /* Save the distance vector if we initialized one. */
5421 : 40956 : if (init_b)
5422 : : {
5423 : : /* Verify a basic constraint: classic distance vectors should
5424 : : always be lexicographically positive.
5425 : :
5426 : : Data references are collected in the order of execution of
5427 : : the program, thus for the following loop
5428 : :
5429 : : | for (i = 1; i < 100; i++)
5430 : : | for (j = 1; j < 100; j++)
5431 : : | {
5432 : : | t = T[j+1][i-1]; // A
5433 : : | T[j][i] = t + 2; // B
5434 : : | }
5435 : :
5436 : : references are collected following the direction of the wind:
5437 : : A then B. The data dependence tests are performed also
5438 : : following this order, such that we're looking at the distance
5439 : : separating the elements accessed by A from the elements later
5440 : : accessed by B. But in this example, the distance returned by
5441 : : test_dep (A, B) is lexicographically negative (-1, 1), that
5442 : : means that the access A occurs later than B with respect to
5443 : : the outer loop, ie. we're actually looking upwind. In this
5444 : : case we solve test_dep (B, A) looking downwind to the
5445 : : lexicographically positive solution, that returns the
5446 : : distance vector (1, -1). */
5447 : 81912 : if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5448 : : {
5449 : 5427 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5450 : 5427 : if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5451 : : return false;
5452 : 5427 : compute_subscript_distance (ddr);
5453 : 5427 : if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5454 : : &index_carry))
5455 : : return false;
5456 : 5427 : save_dist_v (ddr, save_v);
5457 : 5427 : DDR_REVERSED_P (ddr) = true;
5458 : :
5459 : : /* In this case there is a dependence forward for all the
5460 : : outer loops:
5461 : :
5462 : : | for (k = 1; k < 100; k++)
5463 : : | for (i = 1; i < 100; i++)
5464 : : | for (j = 1; j < 100; j++)
5465 : : | {
5466 : : | t = T[j+1][i-1]; // A
5467 : : | T[j][i] = t + 2; // B
5468 : : | }
5469 : :
5470 : : the vectors are:
5471 : : (0, 1, -1)
5472 : : (1, 1, -1)
5473 : : (1, -1, 1)
5474 : : */
5475 : 5427 : if (DDR_NB_LOOPS (ddr) > 1)
5476 : : {
5477 : 81 : add_outer_distances (ddr, save_v, index_carry);
5478 : 81 : add_outer_distances (ddr, dist_v, index_carry);
5479 : : }
5480 : : }
5481 : : else
5482 : : {
5483 : 35529 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5484 : 35529 : lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5485 : :
5486 : 35529 : if (DDR_NB_LOOPS (ddr) > 1)
5487 : : {
5488 : 99 : lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5489 : :
5490 : 99 : if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5491 : : return false;
5492 : 99 : compute_subscript_distance (ddr);
5493 : 99 : if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5494 : : &index_carry))
5495 : : return false;
5496 : :
5497 : 99 : save_dist_v (ddr, save_v);
5498 : 99 : add_outer_distances (ddr, dist_v, index_carry);
5499 : 99 : add_outer_distances (ddr, opposite_v, index_carry);
5500 : : }
5501 : : else
5502 : 35430 : save_dist_v (ddr, save_v);
5503 : : }
5504 : : }
5505 : : else
5506 : : {
5507 : : /* There is a distance of 1 on all the outer loops: Example:
5508 : : there is a dependence of distance 1 on loop_1 for the array A.
5509 : :
5510 : : | loop_1
5511 : : | A[5] = ...
5512 : : | endloop
5513 : : */
5514 : 0 : add_outer_distances (ddr, dist_v,
5515 : : lambda_vector_first_nz (dist_v,
5516 : 0 : DDR_NB_LOOPS (ddr), 0));
5517 : : }
5518 : :
5519 : 40956 : if (dump_file && (dump_flags & TDF_DETAILS))
5520 : : {
5521 : 393 : unsigned i;
5522 : :
5523 : 393 : fprintf (dump_file, "(build_classic_dist_vector\n");
5524 : 1985 : for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5525 : : {
5526 : 403 : fprintf (dump_file, " dist_vector = (");
5527 : 403 : print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5528 : 806 : DDR_NB_LOOPS (ddr));
5529 : 403 : fprintf (dump_file, " )\n");
5530 : : }
5531 : 393 : fprintf (dump_file, ")\n");
5532 : : }
5533 : :
5534 : : return true;
5535 : : }
5536 : :
5537 : : /* Return the direction for a given distance.
5538 : : FIXME: Computing dir this way is suboptimal, since dir can catch
5539 : : cases that dist is unable to represent. */
5540 : :
5541 : : static inline enum data_dependence_direction
5542 : 901029 : dir_from_dist (int dist)
5543 : : {
5544 : 901029 : if (dist > 0)
5545 : : return dir_positive;
5546 : 718929 : else if (dist < 0)
5547 : : return dir_negative;
5548 : : else
5549 : 716509 : return dir_equal;
5550 : : }
5551 : :
5552 : : /* Compute the classic per loop direction vector. DDR is the data
5553 : : dependence relation to build a vector from. */
5554 : :
5555 : : static void
5556 : 735514 : build_classic_dir_vector (struct data_dependence_relation *ddr)
5557 : : {
5558 : 735514 : unsigned i, j;
5559 : 735514 : lambda_vector dist_v;
5560 : :
5561 : 1612175 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5562 : : {
5563 : 1753322 : lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5564 : :
5565 : 4432041 : for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5566 : 1619958 : dir_v[j] = dir_from_dist (dist_v[j]);
5567 : :
5568 : 876661 : save_dir_v (ddr, dir_v);
5569 : : }
5570 : 735514 : }
5571 : :
5572 : : /* Helper function. Returns true when there is a dependence between the
5573 : : data references. A_INDEX is the index of the first reference (0 for
5574 : : DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5575 : :
5576 : : static bool
5577 : 3496612 : subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5578 : : unsigned int a_index, unsigned int b_index,
5579 : : class loop *loop_nest)
5580 : : {
5581 : 3496612 : unsigned int i;
5582 : 3496612 : tree last_conflicts;
5583 : 3496612 : struct subscript *subscript;
5584 : 3496612 : tree res = NULL_TREE;
5585 : :
5586 : 4889731 : for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5587 : : {
5588 : 4117179 : conflict_function *overlaps_a, *overlaps_b;
5589 : :
5590 : 4117179 : analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5591 : : SUB_ACCESS_FN (subscript, b_index),
5592 : : &overlaps_a, &overlaps_b,
5593 : : &last_conflicts, loop_nest);
5594 : :
5595 : 4117179 : if (SUB_CONFLICTS_IN_A (subscript))
5596 : 4117179 : free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5597 : 4117179 : if (SUB_CONFLICTS_IN_B (subscript))
5598 : 4117179 : free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5599 : :
5600 : 4117179 : SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5601 : 4117179 : SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5602 : 4117179 : SUB_LAST_CONFLICT (subscript) = last_conflicts;
5603 : :
5604 : : /* If there is any undetermined conflict function we have to
5605 : : give a conservative answer in case we cannot prove that
5606 : : no dependence exists when analyzing another subscript. */
5607 : 4117179 : if (CF_NOT_KNOWN_P (overlaps_a)
5608 : 4085938 : || CF_NOT_KNOWN_P (overlaps_b))
5609 : : {
5610 : 31241 : res = chrec_dont_know;
5611 : 31241 : continue;
5612 : : }
5613 : :
5614 : : /* When there is a subscript with no dependence we can stop. */
5615 : 4085938 : else if (CF_NO_DEPENDENCE_P (overlaps_a)
5616 : 1361878 : || CF_NO_DEPENDENCE_P (overlaps_b))
5617 : : {
5618 : 2724060 : res = chrec_known;
5619 : 2724060 : break;
5620 : : }
5621 : : }
5622 : :
5623 : 3496612 : if (res == NULL_TREE)
5624 : : return true;
5625 : :
5626 : 2754054 : if (res == chrec_known)
5627 : 2724060 : dependence_stats.num_dependence_independent++;
5628 : : else
5629 : 29994 : dependence_stats.num_dependence_undetermined++;
5630 : 2754054 : finalize_ddr_dependent (ddr, res);
5631 : 2754054 : return false;
5632 : : }
5633 : :
5634 : : /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5635 : :
5636 : : static void
5637 : 3491086 : subscript_dependence_tester (struct data_dependence_relation *ddr,
5638 : : class loop *loop_nest)
5639 : : {
5640 : 3491086 : if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5641 : 737032 : dependence_stats.num_dependence_dependent++;
5642 : :
5643 : 3491086 : compute_subscript_distance (ddr);
5644 : 3491086 : if (build_classic_dist_vector (ddr, loop_nest))
5645 : 735514 : build_classic_dir_vector (ddr);
5646 : 3491086 : }
5647 : :
5648 : : /* Returns true when all the access functions of A are affine or
5649 : : constant with respect to LOOP_NEST. */
5650 : :
5651 : : static bool
5652 : 7146382 : access_functions_are_affine_or_constant_p (const struct data_reference *a,
5653 : : const class loop *loop_nest)
5654 : : {
5655 : 7146382 : vec<tree> fns = DR_ACCESS_FNS (a);
5656 : 41249949 : for (tree t : fns)
5657 : 19972782 : if (!evolution_function_is_invariant_p (t, loop_nest->num)
5658 : 19972782 : && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5659 : : return false;
5660 : :
5661 : : return true;
5662 : : }
5663 : :
5664 : : /* This computes the affine dependence relation between A and B with
5665 : : respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5666 : : independence between two accesses, while CHREC_DONT_KNOW is used
5667 : : for representing the unknown relation.
5668 : :
5669 : : Note that it is possible to stop the computation of the dependence
5670 : : relation the first time we detect a CHREC_KNOWN element for a given
5671 : : subscript. */
5672 : :
5673 : : void
5674 : 6058473 : compute_affine_dependence (struct data_dependence_relation *ddr,
5675 : : class loop *loop_nest)
5676 : : {
5677 : 6058473 : struct data_reference *dra = DDR_A (ddr);
5678 : 6058473 : struct data_reference *drb = DDR_B (ddr);
5679 : :
5680 : 6058473 : if (dump_file && (dump_flags & TDF_DETAILS))
5681 : : {
5682 : 128837 : fprintf (dump_file, "(compute_affine_dependence\n");
5683 : 128837 : fprintf (dump_file, " ref_a: ");
5684 : 128837 : print_generic_expr (dump_file, DR_REF (dra));
5685 : 128837 : fprintf (dump_file, ", stmt_a: ");
5686 : 128837 : print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5687 : 128837 : fprintf (dump_file, " ref_b: ");
5688 : 128837 : print_generic_expr (dump_file, DR_REF (drb));
5689 : 128837 : fprintf (dump_file, ", stmt_b: ");
5690 : 128837 : print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5691 : : }
5692 : :
5693 : : /* Analyze only when the dependence relation is not yet known. */
5694 : 6058473 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5695 : : {
5696 : 3653065 : dependence_stats.num_dependence_tests++;
5697 : :
5698 : 3653065 : if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5699 : 3653065 : && access_functions_are_affine_or_constant_p (drb, loop_nest))
5700 : 3491086 : subscript_dependence_tester (ddr, loop_nest);
5701 : :
5702 : : /* As a last case, if the dependence cannot be determined, or if
5703 : : the dependence is considered too difficult to determine, answer
5704 : : "don't know". */
5705 : : else
5706 : : {
5707 : 161979 : dependence_stats.num_dependence_undetermined++;
5708 : :
5709 : 161979 : if (dump_file && (dump_flags & TDF_DETAILS))
5710 : : {
5711 : 184 : fprintf (dump_file, "Data ref a:\n");
5712 : 184 : dump_data_reference (dump_file, dra);
5713 : 184 : fprintf (dump_file, "Data ref b:\n");
5714 : 184 : dump_data_reference (dump_file, drb);
5715 : 184 : fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5716 : : }
5717 : 161979 : finalize_ddr_dependent (ddr, chrec_dont_know);
5718 : : }
5719 : : }
5720 : :
5721 : 6058473 : if (dump_file && (dump_flags & TDF_DETAILS))
5722 : : {
5723 : 128837 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5724 : 114695 : fprintf (dump_file, ") -> no dependence\n");
5725 : 14142 : else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5726 : 10745 : fprintf (dump_file, ") -> dependence analysis failed\n");
5727 : : else
5728 : 3397 : fprintf (dump_file, ")\n");
5729 : : }
5730 : 6058473 : }
5731 : :
5732 : : /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5733 : : the data references in DATAREFS, in the LOOP_NEST. When
5734 : : COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5735 : : relations. Return true when successful, i.e. data references number
5736 : : is small enough to be handled. */
5737 : :
5738 : : bool
5739 : 343077 : compute_all_dependences (const vec<data_reference_p> &datarefs,
5740 : : vec<ddr_p> *dependence_relations,
5741 : : const vec<loop_p> &loop_nest,
5742 : : bool compute_self_and_rr)
5743 : : {
5744 : 343077 : struct data_dependence_relation *ddr;
5745 : 343077 : struct data_reference *a, *b;
5746 : 343077 : unsigned int i, j;
5747 : :
5748 : 343077 : if ((int) datarefs.length ()
5749 : 343077 : > param_loop_max_datarefs_for_datadeps)
5750 : : {
5751 : 0 : struct data_dependence_relation *ddr;
5752 : :
5753 : : /* Insert a single relation into dependence_relations:
5754 : : chrec_dont_know. */
5755 : 0 : ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5756 : 0 : dependence_relations->safe_push (ddr);
5757 : 0 : return false;
5758 : : }
5759 : :
5760 : 2565730 : FOR_EACH_VEC_ELT (datarefs, i, a)
5761 : 7034900 : for (j = i + 1; datarefs.iterate (j, &b); j++)
5762 : 4812247 : if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5763 : : {
5764 : 4529466 : ddr = initialize_data_dependence_relation (a, b, loop_nest);
5765 : 4529466 : dependence_relations->safe_push (ddr);
5766 : 4529466 : if (loop_nest.exists ())
5767 : 4491753 : compute_affine_dependence (ddr, loop_nest[0]);
5768 : : }
5769 : :
5770 : 343077 : if (compute_self_and_rr)
5771 : 810744 : FOR_EACH_VEC_ELT (datarefs, i, a)
5772 : : {
5773 : 606212 : ddr = initialize_data_dependence_relation (a, a, loop_nest);
5774 : 606212 : dependence_relations->safe_push (ddr);
5775 : 606212 : if (loop_nest.exists ())
5776 : 606212 : compute_affine_dependence (ddr, loop_nest[0]);
5777 : : }
5778 : :
5779 : : return true;
5780 : : }
5781 : :
5782 : : /* Describes a location of a memory reference. */
5783 : :
5784 : : struct data_ref_loc
5785 : : {
5786 : : /* The memory reference. */
5787 : : tree ref;
5788 : :
5789 : : /* True if the memory reference is read. */
5790 : : bool is_read;
5791 : :
5792 : : /* True if the data reference is conditional within the containing
5793 : : statement, i.e. if it might not occur even when the statement
5794 : : is executed and runs to completion. */
5795 : : bool is_conditional_in_stmt;
5796 : : };
5797 : :
5798 : :
5799 : : /* Stores the locations of memory references in STMT to REFERENCES. Returns
5800 : : true if STMT clobbers memory, false otherwise. */
5801 : :
5802 : : static bool
5803 : 40133300 : get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5804 : : {
5805 : 40133300 : bool clobbers_memory = false;
5806 : 40133300 : data_ref_loc ref;
5807 : 40133300 : tree op0, op1;
5808 : 40133300 : enum gimple_code stmt_code = gimple_code (stmt);
5809 : :
5810 : : /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5811 : : As we cannot model data-references to not spelled out
5812 : : accesses give up if they may occur. */
5813 : 40133300 : if (stmt_code == GIMPLE_CALL
5814 : 40133300 : && !(gimple_call_flags (stmt) & ECF_CONST))
5815 : : {
5816 : : /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5817 : 3654973 : if (gimple_call_internal_p (stmt))
5818 : 49605 : switch (gimple_call_internal_fn (stmt))
5819 : : {
5820 : 5763 : case IFN_GOMP_SIMD_LANE:
5821 : 5763 : {
5822 : 5763 : class loop *loop = gimple_bb (stmt)->loop_father;
5823 : 5763 : tree uid = gimple_call_arg (stmt, 0);
5824 : 5763 : gcc_assert (TREE_CODE (uid) == SSA_NAME);
5825 : 5763 : if (loop == NULL
5826 : 5763 : || loop->simduid != SSA_NAME_VAR (uid))
5827 : : clobbers_memory = true;
5828 : : break;
5829 : : }
5830 : : case IFN_MASK_LOAD:
5831 : : case IFN_MASK_STORE:
5832 : : break;
5833 : 2825 : case IFN_MASK_CALL:
5834 : 2825 : {
5835 : 2825 : tree orig_fndecl
5836 : 2825 : = gimple_call_addr_fndecl (gimple_call_arg (stmt, 0));
5837 : 2825 : if (!orig_fndecl
5838 : 2825 : || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5839 : : clobbers_memory = true;
5840 : : }
5841 : : break;
5842 : : default:
5843 : 3699000 : clobbers_memory = true;
5844 : : break;
5845 : : }
5846 : : else
5847 : : clobbers_memory = true;
5848 : : }
5849 : 36478327 : else if (stmt_code == GIMPLE_ASM
5850 : 36478327 : && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5851 : 9719 : || gimple_vuse (stmt)))
5852 : : clobbers_memory = true;
5853 : :
5854 : 87127424 : if (!gimple_vuse (stmt))
5855 : : return clobbers_memory;
5856 : :
5857 : 17025782 : if (stmt_code == GIMPLE_ASSIGN)
5858 : : {
5859 : 12515256 : tree base;
5860 : 12515256 : op0 = gimple_assign_lhs (stmt);
5861 : 12515256 : op1 = gimple_assign_rhs1 (stmt);
5862 : :
5863 : 12515256 : if (DECL_P (op1)
5864 : 12515256 : || (REFERENCE_CLASS_P (op1)
5865 : 6066684 : && (base = get_base_address (op1))
5866 : 6066684 : && TREE_CODE (base) != SSA_NAME
5867 : 6066666 : && !is_gimple_min_invariant (base)))
5868 : : {
5869 : 6799900 : ref.ref = op1;
5870 : 6799900 : ref.is_read = true;
5871 : 6799900 : ref.is_conditional_in_stmt = false;
5872 : 6799900 : references->safe_push (ref);
5873 : : }
5874 : : }
5875 : 4510526 : else if (stmt_code == GIMPLE_CALL)
5876 : : {
5877 : 3648360 : unsigned i = 0, n;
5878 : 3648360 : tree ptr, type;
5879 : 3648360 : unsigned int align;
5880 : :
5881 : 3648360 : ref.is_read = false;
5882 : 3648360 : if (gimple_call_internal_p (stmt))
5883 : 40994 : switch (gimple_call_internal_fn (stmt))
5884 : : {
5885 : 1806 : case IFN_MASK_LOAD:
5886 : 1806 : if (gimple_call_lhs (stmt) == NULL_TREE)
5887 : : break;
5888 : 1806 : ref.is_read = true;
5889 : : /* FALLTHRU */
5890 : 4481 : case IFN_MASK_STORE:
5891 : 4481 : ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5892 : 4481 : align = tree_to_shwi (gimple_call_arg (stmt, 1));
5893 : 4481 : if (ref.is_read)
5894 : 1806 : type = TREE_TYPE (gimple_call_lhs (stmt));
5895 : : else
5896 : 2675 : type = TREE_TYPE (gimple_call_arg (stmt, 3));
5897 : 4481 : if (TYPE_ALIGN (type) != align)
5898 : 1035 : type = build_aligned_type (type, align);
5899 : 4481 : ref.is_conditional_in_stmt = true;
5900 : 4481 : ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5901 : : ptr);
5902 : 4481 : references->safe_push (ref);
5903 : 4481 : return false;
5904 : : case IFN_MASK_CALL:
5905 : 3643879 : i = 1;
5906 : : gcc_fallthrough ();
5907 : : default:
5908 : : break;
5909 : : }
5910 : :
5911 : 3643879 : op0 = gimple_call_lhs (stmt);
5912 : 3643879 : n = gimple_call_num_args (stmt);
5913 : 14638721 : for (; i < n; i++)
5914 : : {
5915 : 7350963 : op1 = gimple_call_arg (stmt, i);
5916 : :
5917 : 7350963 : if (DECL_P (op1)
5918 : 7350963 : || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5919 : : {
5920 : 446400 : ref.ref = op1;
5921 : 446400 : ref.is_read = true;
5922 : 446400 : ref.is_conditional_in_stmt = false;
5923 : 446400 : references->safe_push (ref);
5924 : : }
5925 : : }
5926 : : }
5927 : : else
5928 : : return clobbers_memory;
5929 : :
5930 : 16159135 : if (op0
5931 : 16159135 : && (DECL_P (op0)
5932 : 13057257 : || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5933 : : {
5934 : 6529977 : ref.ref = op0;
5935 : 6529977 : ref.is_read = false;
5936 : 6529977 : ref.is_conditional_in_stmt = false;
5937 : 6529977 : references->safe_push (ref);
5938 : : }
5939 : : return clobbers_memory;
5940 : : }
5941 : :
5942 : :
5943 : : /* Returns true if the loop-nest has any data reference. */
5944 : :
5945 : : bool
5946 : 719 : loop_nest_has_data_refs (loop_p loop)
5947 : : {
5948 : 719 : basic_block *bbs = get_loop_body (loop);
5949 : 719 : auto_vec<data_ref_loc, 3> references;
5950 : :
5951 : 965 : for (unsigned i = 0; i < loop->num_nodes; i++)
5952 : : {
5953 : 895 : basic_block bb = bbs[i];
5954 : 895 : gimple_stmt_iterator bsi;
5955 : :
5956 : 3119 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5957 : : {
5958 : 1978 : gimple *stmt = gsi_stmt (bsi);
5959 : 1978 : get_references_in_stmt (stmt, &references);
5960 : 1978 : if (references.length ())
5961 : : {
5962 : 649 : free (bbs);
5963 : 649 : return true;
5964 : : }
5965 : : }
5966 : : }
5967 : 70 : free (bbs);
5968 : 70 : return false;
5969 : 719 : }
5970 : :
5971 : : /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5972 : : reference, returns false, otherwise returns true. NEST is the outermost
5973 : : loop of the loop nest in which the references should be analyzed. */
5974 : :
5975 : : opt_result
5976 : 40117380 : find_data_references_in_stmt (class loop *nest, gimple *stmt,
5977 : : vec<data_reference_p> *datarefs)
5978 : : {
5979 : 40117380 : auto_vec<data_ref_loc, 2> references;
5980 : 40117380 : data_reference_p dr;
5981 : :
5982 : 40117380 : if (get_references_in_stmt (stmt, &references))
5983 : 3698995 : return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5984 : : stmt);
5985 : :
5986 : 122336526 : for (const data_ref_loc &ref : references)
5987 : : {
5988 : 13081371 : dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5989 : 13081371 : loop_containing_stmt (stmt), ref.ref,
5990 : 13081371 : stmt, ref.is_read, ref.is_conditional_in_stmt);
5991 : 13081371 : gcc_assert (dr != NULL);
5992 : 13081371 : datarefs->safe_push (dr);
5993 : : }
5994 : :
5995 : 36418385 : return opt_result::success ();
5996 : 40117380 : }
5997 : :
5998 : : /* Stores the data references in STMT to DATAREFS. If there is an
5999 : : unanalyzable reference, returns false, otherwise returns true.
6000 : : NEST is the outermost loop of the loop nest in which the references
6001 : : should be instantiated, LOOP is the loop in which the references
6002 : : should be analyzed. */
6003 : :
6004 : : bool
6005 : 13942 : graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
6006 : : vec<data_reference_p> *datarefs)
6007 : : {
6008 : 13942 : auto_vec<data_ref_loc, 2> references;
6009 : 13942 : bool ret = true;
6010 : 13942 : data_reference_p dr;
6011 : :
6012 : 13942 : if (get_references_in_stmt (stmt, &references))
6013 : : return false;
6014 : :
6015 : 44614 : for (const data_ref_loc &ref : references)
6016 : : {
6017 : 5606 : dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
6018 : 2803 : ref.is_conditional_in_stmt);
6019 : 2803 : gcc_assert (dr != NULL);
6020 : 2803 : datarefs->safe_push (dr);
6021 : : }
6022 : :
6023 : : return ret;
6024 : 13942 : }
6025 : :
6026 : : /* Search the data references in LOOP, and record the information into
6027 : : DATAREFS. Returns chrec_dont_know when failing to analyze a
6028 : : difficult case, returns NULL_TREE otherwise. */
6029 : :
6030 : : tree
6031 : 1917182 : find_data_references_in_bb (class loop *loop, basic_block bb,
6032 : : vec<data_reference_p> *datarefs)
6033 : : {
6034 : 1917182 : gimple_stmt_iterator bsi;
6035 : :
6036 : 15438914 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6037 : : {
6038 : 11896630 : gimple *stmt = gsi_stmt (bsi);
6039 : :
6040 : 11896630 : if (!find_data_references_in_stmt (loop, stmt, datarefs))
6041 : : {
6042 : 292080 : struct data_reference *res;
6043 : 292080 : res = XCNEW (struct data_reference);
6044 : 292080 : datarefs->safe_push (res);
6045 : :
6046 : 292080 : return chrec_dont_know;
6047 : : }
6048 : : }
6049 : :
6050 : : return NULL_TREE;
6051 : : }
6052 : :
6053 : : /* Search the data references in LOOP, and record the information into
6054 : : DATAREFS. Returns chrec_dont_know when failing to analyze a
6055 : : difficult case, returns NULL_TREE otherwise.
6056 : :
6057 : : TODO: This function should be made smarter so that it can handle address
6058 : : arithmetic as if they were array accesses, etc. */
6059 : :
6060 : : tree
6061 : 679531 : find_data_references_in_loop (class loop *loop,
6062 : : vec<data_reference_p> *datarefs)
6063 : : {
6064 : 679531 : basic_block bb, *bbs;
6065 : 679531 : unsigned int i;
6066 : :
6067 : 679531 : bbs = get_loop_body_in_dom_order (loop);
6068 : :
6069 : 2949660 : for (i = 0; i < loop->num_nodes; i++)
6070 : : {
6071 : 1861643 : bb = bbs[i];
6072 : :
6073 : 1861643 : if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6074 : : {
6075 : 271045 : free (bbs);
6076 : 271045 : return chrec_dont_know;
6077 : : }
6078 : : }
6079 : 408486 : free (bbs);
6080 : :
6081 : 408486 : return NULL_TREE;
6082 : : }
6083 : :
6084 : : /* Return the alignment in bytes that DRB is guaranteed to have at all
6085 : : times. */
6086 : :
6087 : : unsigned int
6088 : 334357 : dr_alignment (innermost_loop_behavior *drb)
6089 : : {
6090 : : /* Get the alignment of BASE_ADDRESS + INIT. */
6091 : 334357 : unsigned int alignment = drb->base_alignment;
6092 : 334357 : unsigned int misalignment = (drb->base_misalignment
6093 : 334357 : + TREE_INT_CST_LOW (drb->init));
6094 : 334357 : if (misalignment != 0)
6095 : 144453 : alignment = MIN (alignment, misalignment & -misalignment);
6096 : :
6097 : : /* Cap it to the alignment of OFFSET. */
6098 : 334357 : if (!integer_zerop (drb->offset))
6099 : 18459 : alignment = MIN (alignment, drb->offset_alignment);
6100 : :
6101 : : /* Cap it to the alignment of STEP. */
6102 : 334357 : if (!integer_zerop (drb->step))
6103 : 152940 : alignment = MIN (alignment, drb->step_alignment);
6104 : :
6105 : 334357 : return alignment;
6106 : : }
6107 : :
6108 : : /* If BASE is a pointer-typed SSA name, try to find the object that it
6109 : : is based on. Return this object X on success and store the alignment
6110 : : in bytes of BASE - &X in *ALIGNMENT_OUT. */
6111 : :
6112 : : static tree
6113 : 493298 : get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6114 : : {
6115 : 493298 : if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6116 : : return NULL_TREE;
6117 : :
6118 : 185721 : gimple *def = SSA_NAME_DEF_STMT (base);
6119 : 185721 : base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6120 : :
6121 : : /* Peel chrecs and record the minimum alignment preserved by
6122 : : all steps. */
6123 : 185721 : unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6124 : 378607 : while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6125 : : {
6126 : 7165 : unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6127 : 7165 : alignment = MIN (alignment, step_alignment);
6128 : 7165 : base = CHREC_LEFT (base);
6129 : : }
6130 : :
6131 : : /* Punt if the expression is too complicated to handle. */
6132 : 185721 : if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6133 : : return NULL_TREE;
6134 : :
6135 : : /* The only useful cases are those for which a dereference folds to something
6136 : : other than an INDIRECT_REF. */
6137 : 185677 : tree ref_type = TREE_TYPE (TREE_TYPE (base));
6138 : 185677 : tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6139 : 185677 : if (!ref)
6140 : : return NULL_TREE;
6141 : :
6142 : : /* Analyze the base to which the steps we peeled were applied. */
6143 : 563 : poly_int64 bitsize, bitpos, bytepos;
6144 : 563 : machine_mode mode;
6145 : 563 : int unsignedp, reversep, volatilep;
6146 : 563 : tree offset;
6147 : 563 : base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6148 : : &unsignedp, &reversep, &volatilep);
6149 : 493298 : if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6150 : : return NULL_TREE;
6151 : :
6152 : : /* Restrict the alignment to that guaranteed by the offsets. */
6153 : 563 : unsigned int bytepos_alignment = known_alignment (bytepos);
6154 : 563 : if (bytepos_alignment != 0)
6155 : 109 : alignment = MIN (alignment, bytepos_alignment);
6156 : 563 : if (offset)
6157 : : {
6158 : 0 : unsigned int offset_alignment = highest_pow2_factor (offset);
6159 : 0 : alignment = MIN (alignment, offset_alignment);
6160 : : }
6161 : :
6162 : 563 : *alignment_out = alignment;
6163 : 563 : return base;
6164 : : }
6165 : :
6166 : : /* Return the object whose alignment would need to be changed in order
6167 : : to increase the alignment of ADDR. Store the maximum achievable
6168 : : alignment in *MAX_ALIGNMENT. */
6169 : :
6170 : : tree
6171 : 493298 : get_base_for_alignment (tree addr, unsigned int *max_alignment)
6172 : : {
6173 : 493298 : tree base = get_base_for_alignment_1 (addr, max_alignment);
6174 : 493298 : if (base)
6175 : : return base;
6176 : :
6177 : 492735 : if (TREE_CODE (addr) == ADDR_EXPR)
6178 : 241279 : addr = TREE_OPERAND (addr, 0);
6179 : 492735 : *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6180 : 492735 : return addr;
6181 : : }
6182 : :
6183 : : /* Recursive helper function. */
6184 : :
6185 : : static bool
6186 : 116576 : find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6187 : : {
6188 : : /* Inner loops of the nest should not contain siblings. Example:
6189 : : when there are two consecutive loops,
6190 : :
6191 : : | loop_0
6192 : : | loop_1
6193 : : | A[{0, +, 1}_1]
6194 : : | endloop_1
6195 : : | loop_2
6196 : : | A[{0, +, 1}_2]
6197 : : | endloop_2
6198 : : | endloop_0
6199 : :
6200 : : the dependence relation cannot be captured by the distance
6201 : : abstraction. */
6202 : 116576 : if (loop->next)
6203 : : return false;
6204 : :
6205 : 99611 : loop_nest->safe_push (loop);
6206 : 99611 : if (loop->inner)
6207 : 33913 : return find_loop_nest_1 (loop->inner, loop_nest);
6208 : : return true;
6209 : : }
6210 : :
6211 : : /* Return false when the LOOP is not well nested. Otherwise return
6212 : : true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6213 : : contain the loops from the outermost to the innermost, as they will
6214 : : appear in the classic distance vector. */
6215 : :
6216 : : bool
6217 : 853909 : find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6218 : : {
6219 : 853909 : loop_nest->safe_push (loop);
6220 : 853909 : if (loop->inner)
6221 : 82663 : return find_loop_nest_1 (loop->inner, loop_nest);
6222 : : return true;
6223 : : }
6224 : :
6225 : : /* Returns true when the data dependences have been computed, false otherwise.
6226 : : Given a loop nest LOOP, the following vectors are returned:
6227 : : DATAREFS is initialized to all the array elements contained in this loop,
6228 : : DEPENDENCE_RELATIONS contains the relations between the data references.
6229 : : Compute read-read and self relations if
6230 : : COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6231 : :
6232 : : bool
6233 : 337970 : compute_data_dependences_for_loop (class loop *loop,
6234 : : bool compute_self_and_read_read_dependences,
6235 : : vec<loop_p> *loop_nest,
6236 : : vec<data_reference_p> *datarefs,
6237 : : vec<ddr_p> *dependence_relations)
6238 : : {
6239 : 337970 : bool res = true;
6240 : :
6241 : 337970 : memset (&dependence_stats, 0, sizeof (dependence_stats));
6242 : :
6243 : : /* If the loop nest is not well formed, or one of the data references
6244 : : is not computable, give up without spending time to compute other
6245 : : dependences. */
6246 : 337970 : if (!loop
6247 : 337970 : || !find_loop_nest (loop, loop_nest)
6248 : 337968 : || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6249 : 542429 : || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6250 : : compute_self_and_read_read_dependences))
6251 : : res = false;
6252 : :
6253 : 337970 : if (dump_file && (dump_flags & TDF_STATS))
6254 : : {
6255 : 191 : fprintf (dump_file, "Dependence tester statistics:\n");
6256 : :
6257 : 191 : fprintf (dump_file, "Number of dependence tests: %d\n",
6258 : : dependence_stats.num_dependence_tests);
6259 : 191 : fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6260 : : dependence_stats.num_dependence_dependent);
6261 : 191 : fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6262 : : dependence_stats.num_dependence_independent);
6263 : 191 : fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6264 : : dependence_stats.num_dependence_undetermined);
6265 : :
6266 : 191 : fprintf (dump_file, "Number of subscript tests: %d\n",
6267 : : dependence_stats.num_subscript_tests);
6268 : 191 : fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6269 : : dependence_stats.num_subscript_undetermined);
6270 : 191 : fprintf (dump_file, "Number of same subscript function: %d\n",
6271 : : dependence_stats.num_same_subscript_function);
6272 : :
6273 : 191 : fprintf (dump_file, "Number of ziv tests: %d\n",
6274 : : dependence_stats.num_ziv);
6275 : 191 : fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6276 : : dependence_stats.num_ziv_dependent);
6277 : 191 : fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6278 : : dependence_stats.num_ziv_independent);
6279 : 191 : fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6280 : : dependence_stats.num_ziv_unimplemented);
6281 : :
6282 : 191 : fprintf (dump_file, "Number of siv tests: %d\n",
6283 : : dependence_stats.num_siv);
6284 : 191 : fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6285 : : dependence_stats.num_siv_dependent);
6286 : 191 : fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6287 : : dependence_stats.num_siv_independent);
6288 : 191 : fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6289 : : dependence_stats.num_siv_unimplemented);
6290 : :
6291 : 191 : fprintf (dump_file, "Number of miv tests: %d\n",
6292 : : dependence_stats.num_miv);
6293 : 191 : fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6294 : : dependence_stats.num_miv_dependent);
6295 : 191 : fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6296 : : dependence_stats.num_miv_independent);
6297 : 191 : fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6298 : : dependence_stats.num_miv_unimplemented);
6299 : : }
6300 : :
6301 : 337970 : return res;
6302 : : }
6303 : :
6304 : : /* Free the memory used by a data dependence relation DDR. */
6305 : :
6306 : : void
6307 : 12087899 : free_dependence_relation (struct data_dependence_relation *ddr)
6308 : : {
6309 : 12087899 : if (ddr == NULL)
6310 : : return;
6311 : :
6312 : 12087899 : if (DDR_SUBSCRIPTS (ddr).exists ())
6313 : 737032 : free_subscripts (DDR_SUBSCRIPTS (ddr));
6314 : 12087899 : DDR_DIST_VECTS (ddr).release ();
6315 : 12087899 : DDR_DIR_VECTS (ddr).release ();
6316 : :
6317 : 12087899 : free (ddr);
6318 : : }
6319 : :
6320 : : /* Free the memory used by the data dependence relations from
6321 : : DEPENDENCE_RELATIONS. */
6322 : :
6323 : : void
6324 : 2825922 : free_dependence_relations (vec<ddr_p>& dependence_relations)
6325 : : {
6326 : 8931188 : for (data_dependence_relation *ddr : dependence_relations)
6327 : 5138908 : if (ddr)
6328 : 5138908 : free_dependence_relation (ddr);
6329 : :
6330 : 2825922 : dependence_relations.release ();
6331 : 2825922 : }
6332 : :
6333 : : /* Free the memory used by the data references from DATAREFS. */
6334 : :
6335 : : void
6336 : 3069047 : free_data_refs (vec<data_reference_p>& datarefs)
6337 : : {
6338 : 17679030 : for (data_reference *dr : datarefs)
6339 : 11341495 : free_data_ref (dr);
6340 : 3069047 : datarefs.release ();
6341 : 3069047 : }
6342 : :
6343 : : /* Common routine implementing both dr_direction_indicator and
6344 : : dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6345 : : to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6346 : : Return the step as the indicator otherwise. */
6347 : :
6348 : : static tree
6349 : 45251 : dr_step_indicator (struct data_reference *dr, int useful_min)
6350 : : {
6351 : 45251 : tree step = DR_STEP (dr);
6352 : 45251 : if (!step)
6353 : : return NULL_TREE;
6354 : 45251 : STRIP_NOPS (step);
6355 : : /* Look for cases where the step is scaled by a positive constant
6356 : : integer, which will often be the access size. If the multiplication
6357 : : doesn't change the sign (due to overflow effects) then we can
6358 : : test the unscaled value instead. */
6359 : 45251 : if (TREE_CODE (step) == MULT_EXPR
6360 : 3837 : && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6361 : 49060 : && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6362 : : {
6363 : 3809 : tree factor = TREE_OPERAND (step, 1);
6364 : 3809 : step = TREE_OPERAND (step, 0);
6365 : :
6366 : : /* Strip widening and truncating conversions as well as nops. */
6367 : 243 : if (CONVERT_EXPR_P (step)
6368 : 3809 : && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6369 : 3566 : step = TREE_OPERAND (step, 0);
6370 : 3809 : tree type = TREE_TYPE (step);
6371 : :
6372 : : /* Get the range of step values that would not cause overflow. */
6373 : 7618 : widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6374 : 3809 : / wi::to_widest (factor));
6375 : 7618 : widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6376 : 3809 : / wi::to_widest (factor));
6377 : :
6378 : : /* Get the range of values that the unconverted step actually has. */
6379 : 3809 : wide_int step_min, step_max;
6380 : 3809 : value_range vr;
6381 : 3809 : if (TREE_CODE (step) != SSA_NAME
6382 : 7562 : || !get_range_query (cfun)->range_of_expr (vr, step)
6383 : 7590 : || vr.undefined_p ())
6384 : : {
6385 : 28 : step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6386 : 28 : step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6387 : : }
6388 : : else
6389 : : {
6390 : 3781 : step_min = vr.lower_bound ();
6391 : 3781 : step_max = vr.upper_bound ();
6392 : : }
6393 : :
6394 : : /* Check whether the unconverted step has an acceptable range. */
6395 : 3809 : signop sgn = TYPE_SIGN (type);
6396 : 7618 : if (wi::les_p (minv, widest_int::from (step_min, sgn))
6397 : 10328 : && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6398 : : {
6399 : 1355 : if (wi::ge_p (step_min, useful_min, sgn))
6400 : 432 : return ssize_int (useful_min);
6401 : 923 : else if (wi::lt_p (step_max, 0, sgn))
6402 : 0 : return ssize_int (-1);
6403 : : else
6404 : 923 : return fold_convert (ssizetype, step);
6405 : : }
6406 : 3809 : }
6407 : 43896 : return DR_STEP (dr);
6408 : : }
6409 : :
6410 : : /* Return a value that is negative iff DR has a negative step. */
6411 : :
6412 : : tree
6413 : 8805 : dr_direction_indicator (struct data_reference *dr)
6414 : : {
6415 : 8805 : return dr_step_indicator (dr, 0);
6416 : : }
6417 : :
6418 : : /* Return a value that is zero iff DR has a zero step. */
6419 : :
6420 : : tree
6421 : 36446 : dr_zero_step_indicator (struct data_reference *dr)
6422 : : {
6423 : 36446 : return dr_step_indicator (dr, 1);
6424 : : }
6425 : :
6426 : : /* Return true if DR is known to have a nonnegative (but possibly zero)
6427 : : step. */
6428 : :
6429 : : bool
6430 : 3220 : dr_known_forward_stride_p (struct data_reference *dr)
6431 : : {
6432 : 3220 : tree indicator = dr_direction_indicator (dr);
6433 : 3220 : tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6434 : : fold_convert (ssizetype, indicator),
6435 : : ssize_int (0));
6436 : 3220 : return neg_step_val && integer_zerop (neg_step_val);
6437 : : }
|