Branch data Line data Source code
1 : : /* Data references and dependences detectors.
2 : : Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 : : Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : /* This pass walks a given loop structure searching for array
22 : : references. The information about the array accesses is recorded
23 : : in DATA_REFERENCE structures.
24 : :
25 : : The basic test for determining the dependences is:
26 : : given two access functions chrec1 and chrec2 to a same array, and
27 : : x and y two vectors from the iteration domain, the same element of
28 : : the array is accessed twice at iterations x and y if and only if:
29 : : | chrec1 (x) == chrec2 (y).
30 : :
31 : : The goals of this analysis are:
32 : :
33 : : - to determine the independence: the relation between two
34 : : independent accesses is qualified with the chrec_known (this
35 : : information allows a loop parallelization),
36 : :
37 : : - when two data references access the same data, to qualify the
38 : : dependence relation with classic dependence representations:
39 : :
40 : : - distance vectors
41 : : - direction vectors
42 : : - loop carried level dependence
43 : : - polyhedron dependence
44 : : or with the chains of recurrences based representation,
45 : :
46 : : - to define a knowledge base for storing the data dependence
47 : : information,
48 : :
49 : : - to define an interface to access this data.
50 : :
51 : :
52 : : Definitions:
53 : :
54 : : - subscript: given two array accesses a subscript is the tuple
55 : : composed of the access functions for a given dimension. Example:
56 : : Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 : : (f1, g1), (f2, g2), (f3, g3).
58 : :
59 : : - Diophantine equation: an equation whose coefficients and
60 : : solutions are integer constants, for example the equation
61 : : | 3*x + 2*y = 1
62 : : has an integer solution x = 1 and y = -1.
63 : :
64 : : References:
65 : :
66 : : - "Advanced Compilation for High Performance Computing" by Randy
67 : : Allen and Ken Kennedy.
68 : : http://citeseer.ist.psu.edu/goff91practical.html
69 : :
70 : : - "Loop Transformations for Restructuring Compilers - The Foundations"
71 : : by Utpal Banerjee.
72 : :
73 : :
74 : : */
75 : :
76 : : #define INCLUDE_ALGORITHM
77 : : #include "config.h"
78 : : #include "system.h"
79 : : #include "coretypes.h"
80 : : #include "backend.h"
81 : : #include "rtl.h"
82 : : #include "tree.h"
83 : : #include "gimple.h"
84 : : #include "gimple-pretty-print.h"
85 : : #include "alias.h"
86 : : #include "fold-const.h"
87 : : #include "expr.h"
88 : : #include "gimple-iterator.h"
89 : : #include "tree-ssa-loop-niter.h"
90 : : #include "tree-ssa-loop.h"
91 : : #include "tree-ssa.h"
92 : : #include "cfgloop.h"
93 : : #include "tree-data-ref.h"
94 : : #include "tree-scalar-evolution.h"
95 : : #include "dumpfile.h"
96 : : #include "tree-affine.h"
97 : : #include "builtins.h"
98 : : #include "tree-eh.h"
99 : : #include "ssa.h"
100 : : #include "internal-fn.h"
101 : : #include "vr-values.h"
102 : : #include "range-op.h"
103 : : #include "tree-ssa-loop-ivopts.h"
104 : : #include "calls.h"
105 : :
106 : : static struct datadep_stats
107 : : {
108 : : int num_dependence_tests;
109 : : int num_dependence_dependent;
110 : : int num_dependence_independent;
111 : : int num_dependence_undetermined;
112 : :
113 : : int num_subscript_tests;
114 : : int num_subscript_undetermined;
115 : : int num_same_subscript_function;
116 : :
117 : : int num_ziv;
118 : : int num_ziv_independent;
119 : : int num_ziv_dependent;
120 : : int num_ziv_unimplemented;
121 : :
122 : : int num_siv;
123 : : int num_siv_independent;
124 : : int num_siv_dependent;
125 : : int num_siv_unimplemented;
126 : :
127 : : int num_miv;
128 : : int num_miv_independent;
129 : : int num_miv_dependent;
130 : : int num_miv_unimplemented;
131 : : } dependence_stats;
132 : :
133 : : static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
134 : : unsigned int, unsigned int,
135 : : class loop *);
136 : : /* Returns true iff A divides B. */
137 : :
138 : : static inline bool
139 : 1850 : tree_fold_divides_p (const_tree a, const_tree b)
140 : : {
141 : 1850 : gcc_assert (TREE_CODE (a) == INTEGER_CST);
142 : 1850 : gcc_assert (TREE_CODE (b) == INTEGER_CST);
143 : 1850 : return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
144 : : }
145 : :
146 : : /* Returns true iff A divides B. */
147 : :
148 : : static inline bool
149 : 863447 : int_divides_p (lambda_int a, lambda_int b)
150 : : {
151 : 863447 : return ((b % a) == 0);
152 : : }
153 : :
154 : : /* Return true if reference REF contains a union access. */
155 : :
156 : : static bool
157 : 371757 : ref_contains_union_access_p (tree ref)
158 : : {
159 : 422891 : while (handled_component_p (ref))
160 : : {
161 : 52283 : ref = TREE_OPERAND (ref, 0);
162 : 104566 : if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
163 : 52283 : || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
164 : : return true;
165 : : }
166 : : return false;
167 : : }
168 : :
169 : :
170 : :
171 : : /* Dump into FILE all the data references from DATAREFS. */
172 : :
173 : : static void
174 : 0 : dump_data_references (FILE *file, vec<data_reference_p> datarefs)
175 : : {
176 : 0 : for (data_reference *dr : datarefs)
177 : 0 : dump_data_reference (file, dr);
178 : 0 : }
179 : :
180 : : /* Unified dump into FILE all the data references from DATAREFS. */
181 : :
182 : : DEBUG_FUNCTION void
183 : 0 : debug (vec<data_reference_p> &ref)
184 : : {
185 : 0 : dump_data_references (stderr, ref);
186 : 0 : }
187 : :
188 : : DEBUG_FUNCTION void
189 : 0 : debug (vec<data_reference_p> *ptr)
190 : : {
191 : 0 : if (ptr)
192 : 0 : debug (*ptr);
193 : : else
194 : 0 : fprintf (stderr, "<nil>\n");
195 : 0 : }
196 : :
197 : :
198 : : /* Dump into STDERR all the data references from DATAREFS. */
199 : :
200 : : DEBUG_FUNCTION void
201 : 0 : debug_data_references (vec<data_reference_p> datarefs)
202 : : {
203 : 0 : dump_data_references (stderr, datarefs);
204 : 0 : }
205 : :
206 : : /* Print to STDERR the data_reference DR. */
207 : :
208 : : DEBUG_FUNCTION void
209 : 0 : debug_data_reference (struct data_reference *dr)
210 : : {
211 : 0 : dump_data_reference (stderr, dr);
212 : 0 : }
213 : :
214 : : /* Dump function for a DATA_REFERENCE structure. */
215 : :
216 : : void
217 : 3446 : dump_data_reference (FILE *outf,
218 : : struct data_reference *dr)
219 : : {
220 : 3446 : unsigned int i;
221 : :
222 : 3446 : fprintf (outf, "#(Data Ref: \n");
223 : 3446 : fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
224 : 3446 : fprintf (outf, "# stmt: ");
225 : 3446 : print_gimple_stmt (outf, DR_STMT (dr), 0);
226 : 3446 : fprintf (outf, "# ref: ");
227 : 3446 : print_generic_stmt (outf, DR_REF (dr));
228 : 3446 : fprintf (outf, "# base_object: ");
229 : 3446 : print_generic_stmt (outf, DR_BASE_OBJECT (dr));
230 : :
231 : 10684 : for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
232 : : {
233 : 3792 : fprintf (outf, "# Access function %d: ", i);
234 : 3792 : print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
235 : : }
236 : 3446 : fprintf (outf, "#)\n");
237 : 3446 : }
238 : :
239 : : /* Unified dump function for a DATA_REFERENCE structure. */
240 : :
241 : : DEBUG_FUNCTION void
242 : 0 : debug (data_reference &ref)
243 : : {
244 : 0 : dump_data_reference (stderr, &ref);
245 : 0 : }
246 : :
247 : : DEBUG_FUNCTION void
248 : 0 : debug (data_reference *ptr)
249 : : {
250 : 0 : if (ptr)
251 : 0 : debug (*ptr);
252 : : else
253 : 0 : fprintf (stderr, "<nil>\n");
254 : 0 : }
255 : :
256 : :
257 : : /* Dumps the affine function described by FN to the file OUTF. */
258 : :
259 : : DEBUG_FUNCTION void
260 : 30858 : dump_affine_function (FILE *outf, affine_fn fn)
261 : : {
262 : 30858 : unsigned i;
263 : 30858 : tree coef;
264 : :
265 : 30858 : print_generic_expr (outf, fn[0], TDF_SLIM);
266 : 64802 : for (i = 1; fn.iterate (i, &coef); i++)
267 : : {
268 : 3086 : fprintf (outf, " + ");
269 : 3086 : print_generic_expr (outf, coef, TDF_SLIM);
270 : 3086 : fprintf (outf, " * x_%u", i);
271 : : }
272 : 30858 : }
273 : :
274 : : /* Dumps the conflict function CF to the file OUTF. */
275 : :
276 : : DEBUG_FUNCTION void
277 : 146038 : dump_conflict_function (FILE *outf, conflict_function *cf)
278 : : {
279 : 146038 : unsigned i;
280 : :
281 : 146038 : if (cf->n == NO_DEPENDENCE)
282 : 109180 : fprintf (outf, "no dependence");
283 : 36858 : else if (cf->n == NOT_KNOWN)
284 : 6000 : fprintf (outf, "not known");
285 : : else
286 : : {
287 : 61716 : for (i = 0; i < cf->n; i++)
288 : : {
289 : 30858 : if (i != 0)
290 : 0 : fprintf (outf, " ");
291 : 30858 : fprintf (outf, "[");
292 : 30858 : dump_affine_function (outf, cf->fns[i]);
293 : 30858 : fprintf (outf, "]");
294 : : }
295 : : }
296 : 146038 : }
297 : :
298 : : /* Dump function for a SUBSCRIPT structure. */
299 : :
300 : : DEBUG_FUNCTION void
301 : 830 : dump_subscript (FILE *outf, struct subscript *subscript)
302 : : {
303 : 830 : conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
304 : :
305 : 830 : fprintf (outf, "\n (subscript \n");
306 : 830 : fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
307 : 830 : dump_conflict_function (outf, cf);
308 : 830 : if (CF_NONTRIVIAL_P (cf))
309 : : {
310 : 830 : tree last_iteration = SUB_LAST_CONFLICT (subscript);
311 : 830 : fprintf (outf, "\n last_conflict: ");
312 : 830 : print_generic_expr (outf, last_iteration);
313 : : }
314 : :
315 : 830 : cf = SUB_CONFLICTS_IN_B (subscript);
316 : 830 : fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
317 : 830 : dump_conflict_function (outf, cf);
318 : 830 : if (CF_NONTRIVIAL_P (cf))
319 : : {
320 : 830 : tree last_iteration = SUB_LAST_CONFLICT (subscript);
321 : 830 : fprintf (outf, "\n last_conflict: ");
322 : 830 : print_generic_expr (outf, last_iteration);
323 : : }
324 : :
325 : 830 : fprintf (outf, "\n (Subscript distance: ");
326 : 830 : print_generic_expr (outf, SUB_DISTANCE (subscript));
327 : 830 : fprintf (outf, " ))\n");
328 : 830 : }
329 : :
330 : : /* Print the classic direction vector DIRV to OUTF. */
331 : :
332 : : DEBUG_FUNCTION void
333 : 769 : print_direction_vector (FILE *outf,
334 : : lambda_vector dirv,
335 : : int length)
336 : : {
337 : 769 : int eq;
338 : :
339 : 1667 : for (eq = 0; eq < length; eq++)
340 : : {
341 : 898 : enum data_dependence_direction dir = ((enum data_dependence_direction)
342 : 898 : dirv[eq]);
343 : :
344 : 898 : switch (dir)
345 : : {
346 : 139 : case dir_positive:
347 : 139 : fprintf (outf, " +");
348 : 139 : break;
349 : 6 : case dir_negative:
350 : 6 : fprintf (outf, " -");
351 : 6 : break;
352 : 753 : case dir_equal:
353 : 753 : fprintf (outf, " =");
354 : 753 : break;
355 : 0 : case dir_positive_or_equal:
356 : 0 : fprintf (outf, " +=");
357 : 0 : break;
358 : 0 : case dir_positive_or_negative:
359 : 0 : fprintf (outf, " +-");
360 : 0 : break;
361 : 0 : case dir_negative_or_equal:
362 : 0 : fprintf (outf, " -=");
363 : 0 : break;
364 : 0 : case dir_star:
365 : 0 : fprintf (outf, " *");
366 : 0 : break;
367 : 0 : default:
368 : 0 : fprintf (outf, "indep");
369 : 0 : break;
370 : : }
371 : : }
372 : 769 : fprintf (outf, "\n");
373 : 769 : }
374 : :
375 : : /* Print a vector of direction vectors. */
376 : :
377 : : DEBUG_FUNCTION void
378 : 0 : print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
379 : : int length)
380 : : {
381 : 0 : for (lambda_vector v : dir_vects)
382 : 0 : print_direction_vector (outf, v, length);
383 : 0 : }
384 : :
385 : : /* Print out a vector VEC of length N to OUTFILE. */
386 : :
387 : : DEBUG_FUNCTION void
388 : 4460 : print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 : : {
390 : 4460 : int i;
391 : :
392 : 9326 : for (i = 0; i < n; i++)
393 : 4866 : fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
394 : 4460 : fprintf (outfile, "\n");
395 : 4460 : }
396 : :
397 : : /* Print a vector of distance vectors. */
398 : :
399 : : DEBUG_FUNCTION void
400 : 0 : print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
401 : : int length)
402 : : {
403 : 0 : for (lambda_vector v : dist_vects)
404 : 0 : print_lambda_vector (outf, v, length);
405 : 0 : }
406 : :
407 : : /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408 : :
409 : : DEBUG_FUNCTION void
410 : 1567 : dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
411 : : {
412 : 1567 : struct data_reference *dra, *drb;
413 : :
414 : 1567 : fprintf (outf, "(Data Dep: \n");
415 : :
416 : 1567 : if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
417 : : {
418 : 399 : if (ddr)
419 : : {
420 : 399 : dra = DDR_A (ddr);
421 : 399 : drb = DDR_B (ddr);
422 : 399 : if (dra)
423 : 399 : dump_data_reference (outf, dra);
424 : : else
425 : 0 : fprintf (outf, " (nil)\n");
426 : 399 : if (drb)
427 : 399 : dump_data_reference (outf, drb);
428 : : else
429 : 0 : fprintf (outf, " (nil)\n");
430 : : }
431 : 399 : fprintf (outf, " (don't know)\n)\n");
432 : 399 : return;
433 : : }
434 : :
435 : 1168 : dra = DDR_A (ddr);
436 : 1168 : drb = DDR_B (ddr);
437 : 1168 : dump_data_reference (outf, dra);
438 : 1168 : dump_data_reference (outf, drb);
439 : :
440 : 1168 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
441 : 419 : fprintf (outf, " (no dependence)\n");
442 : :
443 : 749 : else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
444 : : {
445 : : unsigned int i;
446 : : class loop *loopi;
447 : :
448 : : subscript *sub;
449 : 1579 : FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
450 : : {
451 : 830 : fprintf (outf, " access_fn_A: ");
452 : 830 : print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
453 : 830 : fprintf (outf, " access_fn_B: ");
454 : 830 : print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
455 : 830 : dump_subscript (outf, sub);
456 : : }
457 : :
458 : 749 : fprintf (outf, " loop nest: (");
459 : 2350 : FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
460 : 852 : fprintf (outf, "%d ", loopi->num);
461 : 749 : fprintf (outf, ")\n");
462 : :
463 : 3780 : for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
464 : : {
465 : 769 : fprintf (outf, " distance_vector: ");
466 : 769 : print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
467 : 1538 : DDR_NB_LOOPS (ddr));
468 : : }
469 : :
470 : 1518 : for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
471 : : {
472 : 769 : fprintf (outf, " direction_vector: ");
473 : 769 : print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
474 : 1538 : DDR_NB_LOOPS (ddr));
475 : : }
476 : : }
477 : :
478 : 1168 : fprintf (outf, ")\n");
479 : : }
480 : :
481 : : /* Debug version. */
482 : :
483 : : DEBUG_FUNCTION void
484 : 0 : debug_data_dependence_relation (const struct data_dependence_relation *ddr)
485 : : {
486 : 0 : dump_data_dependence_relation (stderr, ddr);
487 : 0 : }
488 : :
489 : : /* Dump into FILE all the dependence relations from DDRS. */
490 : :
491 : : DEBUG_FUNCTION void
492 : 304 : dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
493 : : {
494 : 2449 : for (auto ddr : ddrs)
495 : 1567 : dump_data_dependence_relation (file, ddr);
496 : 304 : }
497 : :
498 : : DEBUG_FUNCTION void
499 : 0 : debug (vec<ddr_p> &ref)
500 : : {
501 : 0 : dump_data_dependence_relations (stderr, ref);
502 : 0 : }
503 : :
504 : : DEBUG_FUNCTION void
505 : 0 : debug (vec<ddr_p> *ptr)
506 : : {
507 : 0 : if (ptr)
508 : 0 : debug (*ptr);
509 : : else
510 : 0 : fprintf (stderr, "<nil>\n");
511 : 0 : }
512 : :
513 : :
514 : : /* Dump to STDERR all the dependence relations from DDRS. */
515 : :
516 : : DEBUG_FUNCTION void
517 : 0 : debug_data_dependence_relations (vec<ddr_p> ddrs)
518 : : {
519 : 0 : dump_data_dependence_relations (stderr, ddrs);
520 : 0 : }
521 : :
522 : : /* Dumps the distance and direction vectors in FILE. DDRS contains
523 : : the dependence relations, and VECT_SIZE is the size of the
524 : : dependence vectors, or in other words the number of loops in the
525 : : considered nest. */
526 : :
527 : : DEBUG_FUNCTION void
528 : 0 : dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
529 : : {
530 : 0 : for (data_dependence_relation *ddr : ddrs)
531 : 0 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
532 : : {
533 : 0 : for (lambda_vector v : DDR_DIST_VECTS (ddr))
534 : : {
535 : 0 : fprintf (file, "DISTANCE_V (");
536 : 0 : print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
537 : 0 : fprintf (file, ")\n");
538 : : }
539 : :
540 : 0 : for (lambda_vector v : DDR_DIR_VECTS (ddr))
541 : : {
542 : 0 : fprintf (file, "DIRECTION_V (");
543 : 0 : print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
544 : 0 : fprintf (file, ")\n");
545 : : }
546 : : }
547 : :
548 : 0 : fprintf (file, "\n\n");
549 : 0 : }
550 : :
551 : : /* Dumps the data dependence relations DDRS in FILE. */
552 : :
553 : : DEBUG_FUNCTION void
554 : 0 : dump_ddrs (FILE *file, vec<ddr_p> ddrs)
555 : : {
556 : 0 : for (data_dependence_relation *ddr : ddrs)
557 : 0 : dump_data_dependence_relation (file, ddr);
558 : :
559 : 0 : fprintf (file, "\n\n");
560 : 0 : }
561 : :
562 : : DEBUG_FUNCTION void
563 : 0 : debug_ddrs (vec<ddr_p> ddrs)
564 : : {
565 : 0 : dump_ddrs (stderr, ddrs);
566 : 0 : }
567 : :
568 : : /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
569 : : OP0 CODE OP1, where:
570 : :
571 : : - OP0 CODE OP1 has integral type TYPE
572 : : - the range of OP0 is given by OP0_RANGE and
573 : : - the range of OP1 is given by OP1_RANGE.
574 : :
575 : : Independently of RESULT_RANGE, try to compute:
576 : :
577 : : DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
578 : : - (sizetype) (OP0 CODE OP1)
579 : :
580 : : as a constant and subtract DELTA from the ssizetype constant in *OFF.
581 : : Return true on success, or false if DELTA is not known at compile time.
582 : :
583 : : Truncation and sign changes are known to distribute over CODE, i.e.
584 : :
585 : : (itype) (A CODE B) == (itype) A CODE (itype) B
586 : :
587 : : for any integral type ITYPE whose precision is no greater than the
588 : : precision of A and B. */
589 : :
590 : : static bool
591 : 4097890 : compute_distributive_range (tree type, irange &op0_range,
592 : : tree_code code, irange &op1_range,
593 : : tree *off, irange *result_range)
594 : : {
595 : 4097890 : gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
596 : 4097890 : if (result_range)
597 : : {
598 : 1028633 : range_op_handler op (code);
599 : 1028633 : if (!op.fold_range (*result_range, type, op0_range, op1_range))
600 : 0 : result_range->set_varying (type);
601 : : }
602 : :
603 : : /* The distributive property guarantees that if TYPE is no narrower
604 : : than SIZETYPE,
605 : :
606 : : (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
607 : :
608 : : and so we can treat DELTA as zero. */
609 : 4097890 : if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
610 : : return true;
611 : :
612 : : /* If overflow is undefined, we can assume that:
613 : :
614 : : X == (ssizetype) OP0 CODE (ssizetype) OP1
615 : :
616 : : is within the range of TYPE, i.e.:
617 : :
618 : : X == (ssizetype) (TYPE) X
619 : :
620 : : Distributing the (TYPE) truncation over X gives:
621 : :
622 : : X == (ssizetype) (OP0 CODE OP1)
623 : :
624 : : Casting both sides to sizetype and distributing the sizetype cast
625 : : over X gives:
626 : :
627 : : (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
628 : :
629 : : and so we can treat DELTA as zero. */
630 : 261609 : if (TYPE_OVERFLOW_UNDEFINED (type))
631 : : return true;
632 : :
633 : : /* Compute the range of:
634 : :
635 : : (ssizetype) OP0 CODE (ssizetype) OP1
636 : :
637 : : The distributive property guarantees that this has the same bitpattern as:
638 : :
639 : : (sizetype) OP0 CODE (sizetype) OP1
640 : :
641 : : but its range is more conducive to analysis. */
642 : 96541 : range_cast (op0_range, ssizetype);
643 : 96541 : range_cast (op1_range, ssizetype);
644 : 96541 : int_range_max wide_range;
645 : 96541 : range_op_handler op (code);
646 : 96541 : bool saved_flag_wrapv = flag_wrapv;
647 : 96541 : flag_wrapv = 1;
648 : 96541 : if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
649 : 0 : wide_range.set_varying (ssizetype);;
650 : 96541 : flag_wrapv = saved_flag_wrapv;
651 : 96541 : if (wide_range.num_pairs () != 1
652 : 96541 : || wide_range.varying_p () || wide_range.undefined_p ())
653 : : return false;
654 : :
655 : 96187 : wide_int lb = wide_range.lower_bound ();
656 : 96187 : wide_int ub = wide_range.upper_bound ();
657 : :
658 : : /* Calculate the number of times that each end of the range overflows or
659 : : underflows TYPE. We can only calculate DELTA if the numbers match. */
660 : 96187 : unsigned int precision = TYPE_PRECISION (type);
661 : 96187 : if (!TYPE_UNSIGNED (type))
662 : : {
663 : 204 : wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
664 : 204 : lb -= type_min;
665 : 204 : ub -= type_min;
666 : 204 : }
667 : 96187 : wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
668 : 96187 : lb &= upper_bits;
669 : 96187 : ub &= upper_bits;
670 : 96187 : if (lb != ub)
671 : : return false;
672 : :
673 : : /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
674 : : negative values indicating underflow. The low PRECISION bits of LB
675 : : are clear, so DELTA is therefore LB (== UB). */
676 : 42249 : *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
677 : 42249 : return true;
678 : 96541 : }
679 : :
680 : : /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
681 : : given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
682 : : FROM_TYPE are integral types. */
683 : :
684 : : static bool
685 : 2591578 : nop_conversion_for_offset_p (tree to_type, tree from_type, irange &range)
686 : : {
687 : 2591578 : gcc_assert (INTEGRAL_TYPE_P (to_type)
688 : : && INTEGRAL_TYPE_P (from_type)
689 : : && !TYPE_OVERFLOW_TRAPS (to_type)
690 : : && !TYPE_OVERFLOW_TRAPS (from_type));
691 : :
692 : : /* Converting to something no narrower than sizetype and then to sizetype
693 : : is equivalent to converting directly to sizetype. */
694 : 2591578 : if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
695 : : return true;
696 : :
697 : : /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
698 : 82409 : if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
699 : 82409 : && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
700 : : return true;
701 : :
702 : : /* For narrowing conversions, we could in principle test whether
703 : : the bits in FROM_TYPE but not in TO_TYPE have a fixed value
704 : : and apply a constant adjustment.
705 : :
706 : : For other conversions (which involve a sign change) we could
707 : : check that the signs are always equal, and apply a constant
708 : : adjustment if the signs are negative.
709 : :
710 : : However, both cases should be rare. */
711 : 65135 : return range_fits_type_p (&range, TYPE_PRECISION (to_type),
712 : 130270 : TYPE_SIGN (to_type));
713 : : }
714 : :
715 : : static void
716 : : split_constant_offset (tree type, tree *var, tree *off,
717 : : irange *result_range,
718 : : hash_map<tree, std::pair<tree, tree> > &cache,
719 : : unsigned *limit);
720 : :
721 : : /* Helper function for split_constant_offset. If TYPE is a pointer type,
722 : : try to express OP0 CODE OP1 as:
723 : :
724 : : POINTER_PLUS <*VAR, (sizetype) *OFF>
725 : :
726 : : where:
727 : :
728 : : - *VAR has type TYPE
729 : : - *OFF is a constant of type ssizetype.
730 : :
731 : : If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
732 : :
733 : : *VAR + (sizetype) *OFF
734 : :
735 : : where:
736 : :
737 : : - *VAR has type sizetype
738 : : - *OFF is a constant of type ssizetype.
739 : :
740 : : In both cases, OP0 CODE OP1 has type TYPE.
741 : :
742 : : Return true on success. A false return value indicates that we can't
743 : : do better than set *OFF to zero.
744 : :
745 : : When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
746 : : if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
747 : :
748 : : CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
749 : : visited. LIMIT counts down the number of SSA names that we are
750 : : allowed to process before giving up. */
751 : :
752 : : static bool
753 : 53510475 : split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
754 : : tree *var, tree *off, irange *result_range,
755 : : hash_map<tree, std::pair<tree, tree> > &cache,
756 : : unsigned *limit)
757 : : {
758 : 53510475 : tree var0, var1;
759 : 53510475 : tree off0, off1;
760 : 53510475 : int_range_max op0_range, op1_range;
761 : :
762 : 53510475 : *var = NULL_TREE;
763 : 53510475 : *off = NULL_TREE;
764 : :
765 : 53510475 : if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
766 : : return false;
767 : :
768 : 53509896 : if (TREE_CODE (op0) == SSA_NAME
769 : 53509896 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
770 : : return false;
771 : 53509400 : if (op1
772 : 7108042 : && TREE_CODE (op1) == SSA_NAME
773 : 55679094 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
774 : : return false;
775 : :
776 : 53509400 : switch (code)
777 : : {
778 : 16095139 : case INTEGER_CST:
779 : 16095139 : *var = size_int (0);
780 : 16095139 : *off = fold_convert (ssizetype, op0);
781 : 16095139 : if (result_range)
782 : : {
783 : 1111051 : wide_int w = wi::to_wide (op0);
784 : 1111051 : result_range->set (TREE_TYPE (op0), w, w);
785 : 1111051 : }
786 : : return true;
787 : :
788 : 2224450 : case POINTER_PLUS_EXPR:
789 : 2224450 : split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
790 : 2224450 : split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
791 : 2224450 : *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
792 : 2224450 : *off = size_binop (PLUS_EXPR, off0, off1);
793 : 2224450 : return true;
794 : :
795 : 1992428 : case PLUS_EXPR:
796 : 1992428 : case MINUS_EXPR:
797 : 1992428 : split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
798 : 1992428 : split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
799 : 1992428 : *off = size_binop (code, off0, off1);
800 : 1992428 : if (!compute_distributive_range (type, op0_range, code, op1_range,
801 : : off, result_range))
802 : : return false;
803 : 1939337 : *var = fold_build2 (code, sizetype, var0, var1);
804 : 1939337 : return true;
805 : :
806 : 2572208 : case MULT_EXPR:
807 : 2572208 : if (TREE_CODE (op1) != INTEGER_CST)
808 : : return false;
809 : :
810 : 2105462 : split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
811 : 2105462 : op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
812 : 2105462 : *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
813 : 2105462 : if (!compute_distributive_range (type, op0_range, code, op1_range,
814 : : off, result_range))
815 : : return false;
816 : 2104261 : *var = fold_build2 (MULT_EXPR, sizetype, var0,
817 : : fold_convert (sizetype, op1));
818 : 2104261 : return true;
819 : :
820 : 9702973 : case ADDR_EXPR:
821 : 9702973 : {
822 : 9702973 : tree base, poffset;
823 : 9702973 : poly_int64 pbitsize, pbitpos, pbytepos;
824 : 9702973 : machine_mode pmode;
825 : 9702973 : int punsignedp, preversep, pvolatilep;
826 : :
827 : 9702973 : op0 = TREE_OPERAND (op0, 0);
828 : 9702973 : base
829 : 9702973 : = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
830 : : &punsignedp, &preversep, &pvolatilep);
831 : :
832 : 9726163 : if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
833 : : return false;
834 : 9702973 : base = build_fold_addr_expr (base);
835 : 9702973 : off0 = ssize_int (pbytepos);
836 : :
837 : 9702973 : if (poffset)
838 : : {
839 : 70 : split_constant_offset (poffset, &poffset, &off1, nullptr,
840 : : cache, limit);
841 : 70 : off0 = size_binop (PLUS_EXPR, off0, off1);
842 : 70 : base = fold_build_pointer_plus (base, poffset);
843 : : }
844 : :
845 : 9702973 : var0 = fold_convert (type, base);
846 : :
847 : : /* If variable length types are involved, punt, otherwise casts
848 : : might be converted into ARRAY_REFs in gimplify_conversion.
849 : : To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
850 : : possibly no longer appears in current GIMPLE, might resurface.
851 : : This perhaps could run
852 : : if (CONVERT_EXPR_P (var0))
853 : : {
854 : : gimplify_conversion (&var0);
855 : : // Attempt to fill in any within var0 found ARRAY_REF's
856 : : // element size from corresponding op embedded ARRAY_REF,
857 : : // if unsuccessful, just punt.
858 : : } */
859 : 19767571 : while (POINTER_TYPE_P (type))
860 : 10064598 : type = TREE_TYPE (type);
861 : 9702973 : if (int_size_in_bytes (type) < 0)
862 : : return false;
863 : :
864 : 9679783 : *var = var0;
865 : 9679783 : *off = off0;
866 : 9679783 : return true;
867 : : }
868 : :
869 : 14158939 : case SSA_NAME:
870 : 14158939 : {
871 : 14158939 : gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
872 : 14158939 : enum tree_code subcode;
873 : :
874 : 14158939 : if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
875 : : return false;
876 : :
877 : 7796017 : subcode = gimple_assign_rhs_code (def_stmt);
878 : :
879 : : /* We are using a cache to avoid un-CSEing large amounts of code. */
880 : 7796017 : bool use_cache = false;
881 : 7796017 : if (!has_single_use (op0)
882 : 7796017 : && (subcode == POINTER_PLUS_EXPR
883 : 3968331 : || subcode == PLUS_EXPR
884 : : || subcode == MINUS_EXPR
885 : : || subcode == MULT_EXPR
886 : : || subcode == ADDR_EXPR
887 : : || CONVERT_EXPR_CODE_P (subcode)))
888 : : {
889 : 1922786 : use_cache = true;
890 : 1922786 : bool existed;
891 : 1922786 : std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
892 : 1922786 : if (existed)
893 : : {
894 : 30974 : if (integer_zerop (e.second))
895 : 30974 : return false;
896 : 1546 : *var = e.first;
897 : 1546 : *off = e.second;
898 : : /* The caller sets the range in this case. */
899 : 1546 : return true;
900 : : }
901 : 1891812 : e = std::make_pair (op0, ssize_int (0));
902 : : }
903 : :
904 : 7765043 : if (*limit == 0)
905 : : return false;
906 : 7764007 : --*limit;
907 : :
908 : 7764007 : var0 = gimple_assign_rhs1 (def_stmt);
909 : 7764007 : var1 = gimple_assign_rhs2 (def_stmt);
910 : :
911 : 7764007 : bool res = split_constant_offset_1 (type, var0, subcode, var1,
912 : : var, off, nullptr, cache, limit);
913 : 7764007 : if (res && use_cache)
914 : 1678541 : *cache.get (op0) = std::make_pair (*var, *off);
915 : : /* The caller sets the range in this case. */
916 : : return res;
917 : : }
918 : 3646894 : CASE_CONVERT:
919 : 3646894 : {
920 : : /* We can only handle the following conversions:
921 : :
922 : : - Conversions from one pointer type to another pointer type.
923 : :
924 : : - Conversions from one non-trapping integral type to another
925 : : non-trapping integral type. In this case, the recursive
926 : : call makes sure that:
927 : :
928 : : (sizetype) OP0
929 : :
930 : : can be expressed as a sizetype operation involving VAR and OFF,
931 : : and all we need to do is check whether:
932 : :
933 : : (sizetype) OP0 == (sizetype) (TYPE) OP0
934 : :
935 : : - Conversions from a non-trapping sizetype-size integral type to
936 : : a like-sized pointer type. In this case, the recursive call
937 : : makes sure that:
938 : :
939 : : (sizetype) OP0 == *VAR + (sizetype) *OFF
940 : :
941 : : and we can convert that to:
942 : :
943 : : POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
944 : :
945 : : - Conversions from a sizetype-sized pointer type to a like-sized
946 : : non-trapping integral type. In this case, the recursive call
947 : : makes sure that:
948 : :
949 : : OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
950 : :
951 : : where the POINTER_PLUS and *VAR have the same precision as
952 : : TYPE (and the same precision as sizetype). Then:
953 : :
954 : : (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
955 : 3646894 : tree itype = TREE_TYPE (op0);
956 : 3646894 : if ((POINTER_TYPE_P (itype)
957 : 2686299 : || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
958 : 3646543 : && (POINTER_TYPE_P (type)
959 : 2628420 : || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
960 : 7293437 : && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
961 : 131212 : || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
962 : 131212 : && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
963 : : {
964 : 3646532 : if (POINTER_TYPE_P (type))
965 : : {
966 : 1018112 : split_constant_offset (op0, var, off, nullptr, cache, limit);
967 : 1018112 : *var = fold_convert (type, *var);
968 : : }
969 : 2628420 : else if (POINTER_TYPE_P (itype))
970 : : {
971 : 36842 : split_constant_offset (op0, var, off, nullptr, cache, limit);
972 : 36842 : *var = fold_convert (sizetype, *var);
973 : : }
974 : : else
975 : : {
976 : 2591578 : split_constant_offset (op0, var, off, &op0_range,
977 : : cache, limit);
978 : 2591578 : if (!nop_conversion_for_offset_p (type, itype, op0_range))
979 : : return false;
980 : 2542101 : if (result_range)
981 : : {
982 : 1422666 : *result_range = op0_range;
983 : 1422666 : range_cast (*result_range, type);
984 : : }
985 : : }
986 : 3597055 : return true;
987 : : }
988 : : return false;
989 : : }
990 : :
991 : : default:
992 : : return false;
993 : : }
994 : 53510475 : }
995 : :
996 : : /* If EXP has pointer type, try to express it as:
997 : :
998 : : POINTER_PLUS <*VAR, (sizetype) *OFF>
999 : :
1000 : : where:
1001 : :
1002 : : - *VAR has the same type as EXP
1003 : : - *OFF is a constant of type ssizetype.
1004 : :
1005 : : If EXP has an integral type, try to express (sizetype) EXP as:
1006 : :
1007 : : *VAR + (sizetype) *OFF
1008 : :
1009 : : where:
1010 : :
1011 : : - *VAR has type sizetype
1012 : : - *OFF is a constant of type ssizetype.
1013 : :
1014 : : If EXP_RANGE is nonnull, set it to the range of EXP.
1015 : :
1016 : : CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1017 : : visited. LIMIT counts down the number of SSA names that we are
1018 : : allowed to process before giving up. */
1019 : :
1020 : : static void
1021 : 45746540 : split_constant_offset (tree exp, tree *var, tree *off, irange *exp_range,
1022 : : hash_map<tree, std::pair<tree, tree> > &cache,
1023 : : unsigned *limit)
1024 : : {
1025 : 45746540 : tree type = TREE_TYPE (exp), op0, op1;
1026 : 45746540 : enum tree_code code;
1027 : :
1028 : 45746540 : code = TREE_CODE (exp);
1029 : 45746540 : if (exp_range)
1030 : : {
1031 : 8681896 : exp_range->set_varying (type);
1032 : 8681896 : if (code == SSA_NAME)
1033 : : {
1034 : 5058841 : int_range_max vr;
1035 : 10117682 : get_range_query (cfun)->range_of_expr (vr, exp);
1036 : 5058841 : if (vr.undefined_p ())
1037 : 4731 : vr.set_varying (TREE_TYPE (exp));
1038 : 5058841 : tree vr_min, vr_max;
1039 : 5058841 : value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1040 : 5058841 : wide_int var_min = wi::to_wide (vr_min);
1041 : 5058841 : wide_int var_max = wi::to_wide (vr_max);
1042 : 5058841 : wide_int var_nonzero = get_nonzero_bits (exp);
1043 : 15176523 : vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1044 : : &var_min, &var_max,
1045 : : var_nonzero,
1046 : 5058841 : TYPE_SIGN (type));
1047 : : /* This check for VR_VARYING is here because the old code
1048 : : using get_range_info would return VR_RANGE for the entire
1049 : : domain, instead of VR_VARYING. The new code normalizes
1050 : : full-domain ranges to VR_VARYING. */
1051 : 5058841 : if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1052 : 4927886 : exp_range->set (type, var_min, var_max);
1053 : 5058841 : }
1054 : : }
1055 : :
1056 : 45746540 : if (!tree_is_chrec (exp)
1057 : 45746535 : && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1058 : : {
1059 : 45746468 : extract_ops_from_tree (exp, &code, &op0, &op1);
1060 : 45746468 : if (split_constant_offset_1 (type, op0, code, op1, var, off,
1061 : : exp_range, cache, limit))
1062 : 35641571 : return;
1063 : : }
1064 : :
1065 : 10104969 : *var = exp;
1066 : 10104969 : if (INTEGRAL_TYPE_P (type))
1067 : 3359749 : *var = fold_convert (sizetype, *var);
1068 : 10104969 : *off = ssize_int (0);
1069 : :
1070 : 10104969 : int_range_max r;
1071 : 3057812 : if (exp_range && code != SSA_NAME
1072 : 129224 : && get_range_query (cfun)->range_of_expr (r, exp)
1073 : 10169581 : && !r.undefined_p ())
1074 : 64612 : *exp_range = r;
1075 : 10104969 : }
1076 : :
1077 : : /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1078 : : type as EXP while OFF has type ssizetype. */
1079 : :
1080 : : void
1081 : 31560720 : split_constant_offset (tree exp, tree *var, tree *off)
1082 : : {
1083 : 31560720 : unsigned limit = param_ssa_name_def_chain_limit;
1084 : 31560720 : static hash_map<tree, std::pair<tree, tree> > *cache;
1085 : 31560720 : if (!cache)
1086 : 75859 : cache = new hash_map<tree, std::pair<tree, tree> > (37);
1087 : 31560720 : split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1088 : 31560720 : *var = fold_convert (TREE_TYPE (exp), *var);
1089 : 31560720 : cache->empty ();
1090 : 31560720 : }
1091 : :
1092 : : /* Returns the address ADDR of an object in a canonical shape (without nop
1093 : : casts, and with type of pointer to the object). */
1094 : :
1095 : : static tree
1096 : 14768754 : canonicalize_base_object_address (tree addr)
1097 : : {
1098 : 14768754 : tree orig = addr;
1099 : :
1100 : 14768754 : STRIP_NOPS (addr);
1101 : :
1102 : : /* The base address may be obtained by casting from integer, in that case
1103 : : keep the cast. */
1104 : 14768754 : if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1105 : : return orig;
1106 : :
1107 : 14704069 : if (TREE_CODE (addr) != ADDR_EXPR)
1108 : : return addr;
1109 : :
1110 : 9102290 : return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1111 : : }
1112 : :
1113 : : /* Analyze the behavior of memory reference REF within STMT.
1114 : : There are two modes:
1115 : :
1116 : : - BB analysis. In this case we simply split the address into base,
1117 : : init and offset components, without reference to any containing loop.
1118 : : The resulting base and offset are general expressions and they can
1119 : : vary arbitrarily from one iteration of the containing loop to the next.
1120 : : The step is always zero.
1121 : :
1122 : : - loop analysis. In this case we analyze the reference both wrt LOOP
1123 : : and on the basis that the reference occurs (is "used") in LOOP;
1124 : : see the comment above analyze_scalar_evolution_in_loop for more
1125 : : information about this distinction. The base, init, offset and
1126 : : step fields are all invariant in LOOP.
1127 : :
1128 : : Perform BB analysis if LOOP is null, or if LOOP is the function's
1129 : : dummy outermost loop. In other cases perform loop analysis.
1130 : :
1131 : : Return true if the analysis succeeded and store the results in DRB if so.
1132 : : BB analysis can only fail for bitfield or reversed-storage accesses. */
1133 : :
1134 : : opt_result
1135 : 15203150 : dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1136 : : class loop *loop, const gimple *stmt)
1137 : : {
1138 : 15203150 : poly_int64 pbitsize, pbitpos;
1139 : 15203150 : tree base, poffset;
1140 : 15203150 : machine_mode pmode;
1141 : 15203150 : int punsignedp, preversep, pvolatilep;
1142 : 15203150 : affine_iv base_iv, offset_iv;
1143 : 15203150 : tree init, dinit, step;
1144 : 15203150 : bool in_loop = (loop && loop->num);
1145 : :
1146 : 15203150 : if (dump_file && (dump_flags & TDF_DETAILS))
1147 : 64377 : fprintf (dump_file, "analyze_innermost: ");
1148 : :
1149 : 15203150 : base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1150 : : &punsignedp, &preversep, &pvolatilep);
1151 : 15203150 : gcc_assert (base != NULL_TREE);
1152 : :
1153 : 15203150 : poly_int64 pbytepos;
1154 : 15203150 : if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1155 : 39531 : return opt_result::failure_at (stmt,
1156 : : "failed: bit offset alignment.\n");
1157 : :
1158 : 15163619 : if (preversep)
1159 : 417 : return opt_result::failure_at (stmt,
1160 : : "failed: reverse storage order.\n");
1161 : :
1162 : : /* Calculate the alignment and misalignment for the inner reference. */
1163 : 15163202 : unsigned int HOST_WIDE_INT bit_base_misalignment;
1164 : 15163202 : unsigned int bit_base_alignment;
1165 : 15163202 : get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1166 : :
1167 : : /* There are no bitfield references remaining in BASE, so the values
1168 : : we got back must be whole bytes. */
1169 : 15163202 : gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1170 : : && bit_base_misalignment % BITS_PER_UNIT == 0);
1171 : 15163202 : unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1172 : 15163202 : poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1173 : :
1174 : 15163202 : if (TREE_CODE (base) == MEM_REF)
1175 : : {
1176 : 6126199 : if (!integer_zerop (TREE_OPERAND (base, 1)))
1177 : : {
1178 : : /* Subtract MOFF from the base and add it to POFFSET instead.
1179 : : Adjust the misalignment to reflect the amount we subtracted. */
1180 : 987010 : poly_offset_int moff = mem_ref_offset (base);
1181 : 987010 : base_misalignment -= moff.force_shwi ();
1182 : 987010 : tree mofft = wide_int_to_tree (sizetype, moff);
1183 : 987010 : if (!poffset)
1184 : 978573 : poffset = mofft;
1185 : : else
1186 : 8437 : poffset = size_binop (PLUS_EXPR, poffset, mofft);
1187 : : }
1188 : 6126199 : base = TREE_OPERAND (base, 0);
1189 : : }
1190 : : else
1191 : : {
1192 : 9037003 : if (may_be_nonaddressable_p (base))
1193 : 2056 : return opt_result::failure_at (stmt,
1194 : : "failed: base not addressable.\n");
1195 : 9034947 : base = build_fold_addr_expr (base);
1196 : : }
1197 : :
1198 : 15161146 : if (in_loop)
1199 : : {
1200 : 2761221 : if (!simple_iv (loop, loop, base, &base_iv, true))
1201 : 314161 : return opt_result::failure_at
1202 : 314161 : (stmt, "failed: evolution of base is not affine.\n");
1203 : : }
1204 : : else
1205 : : {
1206 : 12399925 : base_iv.base = base;
1207 : 12399925 : base_iv.step = ssize_int (0);
1208 : 12399925 : base_iv.no_overflow = true;
1209 : : }
1210 : :
1211 : 14846985 : if (!poffset)
1212 : : {
1213 : 12370802 : offset_iv.base = ssize_int (0);
1214 : 12370802 : offset_iv.step = ssize_int (0);
1215 : : }
1216 : : else
1217 : : {
1218 : 2476183 : if (!in_loop)
1219 : : {
1220 : 1304549 : offset_iv.base = poffset;
1221 : 1304549 : offset_iv.step = ssize_int (0);
1222 : : }
1223 : 1171634 : else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1224 : 78231 : return opt_result::failure_at
1225 : 78231 : (stmt, "failed: evolution of offset is not affine.\n");
1226 : : }
1227 : :
1228 : 14768754 : init = ssize_int (pbytepos);
1229 : :
1230 : : /* Subtract any constant component from the base and add it to INIT instead.
1231 : : Adjust the misalignment to reflect the amount we subtracted. */
1232 : 14768754 : split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1233 : 14768754 : init = size_binop (PLUS_EXPR, init, dinit);
1234 : 14768754 : base_misalignment -= TREE_INT_CST_LOW (dinit);
1235 : :
1236 : 14768754 : split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1237 : 14768754 : init = size_binop (PLUS_EXPR, init, dinit);
1238 : :
1239 : 14768754 : step = size_binop (PLUS_EXPR,
1240 : : fold_convert (ssizetype, base_iv.step),
1241 : : fold_convert (ssizetype, offset_iv.step));
1242 : :
1243 : 14768754 : base = canonicalize_base_object_address (base_iv.base);
1244 : :
1245 : : /* See if get_pointer_alignment can guarantee a higher alignment than
1246 : : the one we calculated above. */
1247 : 14768754 : unsigned int HOST_WIDE_INT alt_misalignment;
1248 : 14768754 : unsigned int alt_alignment;
1249 : 14768754 : get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1250 : :
1251 : : /* As above, these values must be whole bytes. */
1252 : 14768754 : gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1253 : : && alt_misalignment % BITS_PER_UNIT == 0);
1254 : 14768754 : alt_alignment /= BITS_PER_UNIT;
1255 : 14768754 : alt_misalignment /= BITS_PER_UNIT;
1256 : :
1257 : 14768754 : if (base_alignment < alt_alignment)
1258 : : {
1259 : 123658 : base_alignment = alt_alignment;
1260 : 123658 : base_misalignment = alt_misalignment;
1261 : : }
1262 : :
1263 : 14768754 : drb->base_address = base;
1264 : 14768754 : drb->offset = fold_convert (ssizetype, offset_iv.base);
1265 : 14768754 : drb->init = init;
1266 : 14768754 : drb->step = step;
1267 : 14768754 : if (known_misalignment (base_misalignment, base_alignment,
1268 : : &drb->base_misalignment))
1269 : 14768754 : drb->base_alignment = base_alignment;
1270 : : else
1271 : : {
1272 : : drb->base_alignment = known_alignment (base_misalignment);
1273 : : drb->base_misalignment = 0;
1274 : : }
1275 : 14768754 : drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1276 : 14768754 : drb->step_alignment = highest_pow2_factor (step);
1277 : :
1278 : 14768754 : if (dump_file && (dump_flags & TDF_DETAILS))
1279 : 61189 : fprintf (dump_file, "success.\n");
1280 : :
1281 : 14768754 : return opt_result::success ();
1282 : : }
1283 : :
1284 : : /* Return true if OP is a valid component reference for a DR access
1285 : : function. This accepts a subset of what handled_component_p accepts. */
1286 : :
1287 : : static bool
1288 : 22242676 : access_fn_component_p (tree op)
1289 : : {
1290 : 22242676 : switch (TREE_CODE (op))
1291 : : {
1292 : : case REALPART_EXPR:
1293 : : case IMAGPART_EXPR:
1294 : : case ARRAY_REF:
1295 : : return true;
1296 : :
1297 : 14797059 : case COMPONENT_REF:
1298 : 14797059 : return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1299 : :
1300 : 0 : default:
1301 : 0 : return false;
1302 : : }
1303 : : }
1304 : :
1305 : : /* Returns whether BASE can have a access_fn_component_p with BASE
1306 : : as base. */
1307 : :
1308 : : static bool
1309 : 126984 : base_supports_access_fn_components_p (tree base)
1310 : : {
1311 : 126984 : switch (TREE_CODE (TREE_TYPE (base)))
1312 : : {
1313 : : case COMPLEX_TYPE:
1314 : : case ARRAY_TYPE:
1315 : : case RECORD_TYPE:
1316 : : return true;
1317 : 120409 : default:
1318 : 120409 : return false;
1319 : : }
1320 : : }
1321 : :
1322 : : /* Determines the base object and the list of indices of memory reference
1323 : : DR, analyzed in LOOP and instantiated before NEST. */
1324 : :
1325 : : static void
1326 : 15276224 : dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1327 : : {
1328 : : /* If analyzing a basic-block there are no indices to analyze
1329 : : and thus no access functions. */
1330 : 15276224 : if (!nest)
1331 : : {
1332 : 12441316 : dri->base_object = ref;
1333 : 12441316 : dri->access_fns.create (0);
1334 : 12441316 : return;
1335 : : }
1336 : :
1337 : 2834908 : vec<tree> access_fns = vNULL;
1338 : :
1339 : : /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1340 : : into a two element array with a constant index. The base is
1341 : : then just the immediate underlying object. */
1342 : 2834908 : if (TREE_CODE (ref) == REALPART_EXPR)
1343 : : {
1344 : 39917 : ref = TREE_OPERAND (ref, 0);
1345 : 39917 : access_fns.safe_push (integer_zero_node);
1346 : : }
1347 : 2794991 : else if (TREE_CODE (ref) == IMAGPART_EXPR)
1348 : : {
1349 : 39540 : ref = TREE_OPERAND (ref, 0);
1350 : 39540 : access_fns.safe_push (integer_one_node);
1351 : : }
1352 : :
1353 : : /* Analyze access functions of dimensions we know to be independent.
1354 : : The list of component references handled here should be kept in
1355 : : sync with access_fn_component_p. */
1356 : 5241390 : while (handled_component_p (ref))
1357 : : {
1358 : 2516238 : if (TREE_CODE (ref) == ARRAY_REF)
1359 : : {
1360 : 1238964 : tree op = TREE_OPERAND (ref, 1);
1361 : 1238964 : tree access_fn = analyze_scalar_evolution (loop, op);
1362 : 1238964 : access_fn = instantiate_scev (nest, loop, access_fn);
1363 : 1238964 : access_fns.safe_push (access_fn);
1364 : : }
1365 : 1277274 : else if (TREE_CODE (ref) == COMPONENT_REF
1366 : 1277274 : && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1367 : : {
1368 : : /* For COMPONENT_REFs of records (but not unions!) use the
1369 : : FIELD_DECL offset as constant access function so we can
1370 : : disambiguate a[i].f1 and a[i].f2. */
1371 : 1167518 : tree off = component_ref_field_offset (ref);
1372 : 1167518 : off = size_binop (PLUS_EXPR,
1373 : : size_binop (MULT_EXPR,
1374 : : fold_convert (bitsizetype, off),
1375 : : bitsize_int (BITS_PER_UNIT)),
1376 : : DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1377 : 1167518 : access_fns.safe_push (off);
1378 : : }
1379 : : else
1380 : : /* If we have an unhandled component we could not translate
1381 : : to an access function stop analyzing. We have determined
1382 : : our base object in this case. */
1383 : : break;
1384 : :
1385 : 2406482 : ref = TREE_OPERAND (ref, 0);
1386 : : }
1387 : :
1388 : : /* If the address operand of a MEM_REF base has an evolution in the
1389 : : analyzed nest, add it as an additional independent access-function. */
1390 : 2834908 : if (TREE_CODE (ref) == MEM_REF)
1391 : : {
1392 : 1928570 : tree op = TREE_OPERAND (ref, 0);
1393 : 1928570 : tree access_fn = analyze_scalar_evolution (loop, op);
1394 : 1928570 : access_fn = instantiate_scev (nest, loop, access_fn);
1395 : 1928570 : STRIP_NOPS (access_fn);
1396 : 1928570 : if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1397 : : {
1398 : 939992 : tree memoff = TREE_OPERAND (ref, 1);
1399 : 939992 : tree base = initial_condition (access_fn);
1400 : 939992 : tree orig_type = TREE_TYPE (base);
1401 : 939992 : STRIP_USELESS_TYPE_CONVERSION (base);
1402 : 939992 : tree off;
1403 : 939992 : split_constant_offset (base, &base, &off);
1404 : 939992 : STRIP_USELESS_TYPE_CONVERSION (base);
1405 : : /* Fold the MEM_REF offset into the evolutions initial
1406 : : value to make more bases comparable. */
1407 : 939992 : if (!integer_zerop (memoff))
1408 : : {
1409 : 66550 : off = size_binop (PLUS_EXPR, off,
1410 : : fold_convert (ssizetype, memoff));
1411 : 66550 : memoff = build_int_cst (TREE_TYPE (memoff), 0);
1412 : : }
1413 : : /* Adjust the offset so it is a multiple of the access type
1414 : : size and thus we separate bases that can possibly be used
1415 : : to produce partial overlaps (which the access_fn machinery
1416 : : cannot handle). */
1417 : 939992 : wide_int rem;
1418 : 939992 : if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1419 : 939856 : && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1420 : 1879584 : && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1421 : 939592 : rem = wi::mod_trunc
1422 : 939592 : (wi::to_wide (off),
1423 : 1879184 : wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1424 : 939592 : SIGNED);
1425 : : else
1426 : : /* If we can't compute the remainder simply force the initial
1427 : : condition to zero. */
1428 : 400 : rem = wi::to_wide (off);
1429 : 939992 : off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1430 : 939992 : memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1431 : : /* And finally replace the initial condition. */
1432 : 1879984 : access_fn = chrec_replace_initial_condition
1433 : 939992 : (access_fn, fold_convert (orig_type, off));
1434 : : /* ??? This is still not a suitable base object for
1435 : : dr_may_alias_p - the base object needs to be an
1436 : : access that covers the object as whole. With
1437 : : an evolution in the pointer this cannot be
1438 : : guaranteed.
1439 : : As a band-aid, mark the access so we can special-case
1440 : : it in dr_may_alias_p. */
1441 : 939992 : tree old = ref;
1442 : 939992 : ref = fold_build2_loc (EXPR_LOCATION (ref),
1443 : 939992 : MEM_REF, TREE_TYPE (ref),
1444 : : base, memoff);
1445 : 939992 : MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1446 : 939992 : MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1447 : 939992 : dri->unconstrained_base = true;
1448 : 939992 : access_fns.safe_push (access_fn);
1449 : 939992 : }
1450 : : }
1451 : 906338 : else if (DECL_P (ref))
1452 : : {
1453 : : /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1454 : 796582 : ref = build2 (MEM_REF, TREE_TYPE (ref),
1455 : : build_fold_addr_expr (ref),
1456 : 796582 : build_int_cst (reference_alias_ptr_type (ref), 0));
1457 : : }
1458 : :
1459 : 2834908 : dri->base_object = ref;
1460 : 2834908 : dri->access_fns = access_fns;
1461 : : }
1462 : :
1463 : : /* Extracts the alias analysis information from the memory reference DR. */
1464 : :
1465 : : static void
1466 : 15192093 : dr_analyze_alias (struct data_reference *dr)
1467 : : {
1468 : 15192093 : tree ref = DR_REF (dr);
1469 : 15192093 : tree base = get_base_address (ref), addr;
1470 : :
1471 : 15192093 : if (INDIRECT_REF_P (base)
1472 : 15192093 : || TREE_CODE (base) == MEM_REF)
1473 : : {
1474 : 6134136 : addr = TREE_OPERAND (base, 0);
1475 : 6134136 : if (TREE_CODE (addr) == SSA_NAME)
1476 : 6132992 : DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1477 : : }
1478 : 15192093 : }
1479 : :
1480 : : /* Frees data reference DR. */
1481 : :
1482 : : void
1483 : 15493488 : free_data_ref (data_reference_p dr)
1484 : : {
1485 : 15493488 : DR_ACCESS_FNS (dr).release ();
1486 : 15493488 : if (dr->alt_indices.base_object)
1487 : 84131 : dr->alt_indices.access_fns.release ();
1488 : 15493488 : free (dr);
1489 : 15493488 : }
1490 : :
1491 : : /* Analyze memory reference MEMREF, which is accessed in STMT.
1492 : : The reference is a read if IS_READ is true, otherwise it is a write.
1493 : : IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1494 : : within STMT, i.e. that it might not occur even if STMT is executed
1495 : : and runs to completion.
1496 : :
1497 : : Return the data_reference description of MEMREF. NEST is the outermost
1498 : : loop in which the reference should be instantiated, LOOP is the loop
1499 : : in which the data reference should be analyzed. */
1500 : :
1501 : : struct data_reference *
1502 : 15192093 : create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1503 : : bool is_read, bool is_conditional_in_stmt)
1504 : : {
1505 : 15192093 : struct data_reference *dr;
1506 : :
1507 : 15192093 : if (dump_file && (dump_flags & TDF_DETAILS))
1508 : : {
1509 : 63238 : fprintf (dump_file, "Creating dr for ");
1510 : 63238 : print_generic_expr (dump_file, memref, TDF_SLIM);
1511 : 63238 : fprintf (dump_file, "\n");
1512 : : }
1513 : :
1514 : 15192093 : dr = XCNEW (struct data_reference);
1515 : 15192093 : DR_STMT (dr) = stmt;
1516 : 15192093 : DR_REF (dr) = memref;
1517 : 15192093 : DR_IS_READ (dr) = is_read;
1518 : 15192093 : DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1519 : :
1520 : 27633409 : dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1521 : : nest != NULL ? loop : NULL, stmt);
1522 : 15192093 : dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1523 : 15192093 : dr_analyze_alias (dr);
1524 : :
1525 : 15192093 : if (dump_file && (dump_flags & TDF_DETAILS))
1526 : : {
1527 : 63238 : unsigned i;
1528 : 63238 : fprintf (dump_file, "\tbase_address: ");
1529 : 63238 : print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1530 : 63238 : fprintf (dump_file, "\n\toffset from base address: ");
1531 : 63238 : print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1532 : 63238 : fprintf (dump_file, "\n\tconstant offset from base address: ");
1533 : 63238 : print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1534 : 63238 : fprintf (dump_file, "\n\tstep: ");
1535 : 63238 : print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1536 : 63238 : fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1537 : 63238 : fprintf (dump_file, "\n\tbase misalignment: %d",
1538 : : DR_BASE_MISALIGNMENT (dr));
1539 : 63238 : fprintf (dump_file, "\n\toffset alignment: %d",
1540 : : DR_OFFSET_ALIGNMENT (dr));
1541 : 63238 : fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1542 : 63238 : fprintf (dump_file, "\n\tbase_object: ");
1543 : 63238 : print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1544 : 63238 : fprintf (dump_file, "\n");
1545 : 180847 : for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1546 : : {
1547 : 54371 : fprintf (dump_file, "\tAccess function %d: ", i);
1548 : 54371 : print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1549 : : }
1550 : : }
1551 : :
1552 : 15192093 : return dr;
1553 : : }
1554 : :
1555 : : /* A helper function computes order between two tree expressions T1 and T2.
1556 : : This is used in comparator functions sorting objects based on the order
1557 : : of tree expressions. The function returns -1, 0, or 1. */
1558 : :
1559 : : int
1560 : 375889690 : data_ref_compare_tree (tree t1, tree t2)
1561 : : {
1562 : 375889690 : int i, cmp;
1563 : 375889690 : enum tree_code code;
1564 : 375889690 : char tclass;
1565 : :
1566 : 375889690 : if (t1 == t2)
1567 : : return 0;
1568 : 173305388 : if (t1 == NULL)
1569 : : return -1;
1570 : 173218220 : if (t2 == NULL)
1571 : : return 1;
1572 : :
1573 : 173164785 : STRIP_USELESS_TYPE_CONVERSION (t1);
1574 : 173164785 : STRIP_USELESS_TYPE_CONVERSION (t2);
1575 : 173164785 : if (t1 == t2)
1576 : : return 0;
1577 : :
1578 : 172649171 : if (TREE_CODE (t1) != TREE_CODE (t2)
1579 : 12724955 : && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1580 : 18060644 : return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1581 : :
1582 : 159924216 : code = TREE_CODE (t1);
1583 : 159924216 : switch (code)
1584 : : {
1585 : 46017068 : case INTEGER_CST:
1586 : 46017068 : return tree_int_cst_compare (t1, t2);
1587 : :
1588 : 0 : case STRING_CST:
1589 : 0 : if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1590 : 0 : return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1591 : 0 : return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1592 : 0 : TREE_STRING_LENGTH (t1));
1593 : :
1594 : 13624298 : case SSA_NAME:
1595 : 13624298 : if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1596 : 13624298 : return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1597 : : break;
1598 : :
1599 : 100282850 : default:
1600 : 100282850 : if (POLY_INT_CST_P (t1))
1601 : : return compare_sizes_for_sort (wi::to_poly_widest (t1),
1602 : : wi::to_poly_widest (t2));
1603 : :
1604 : 100282850 : tclass = TREE_CODE_CLASS (code);
1605 : :
1606 : : /* For decls, compare their UIDs. */
1607 : 100282850 : if (tclass == tcc_declaration)
1608 : : {
1609 : 19604138 : if (DECL_UID (t1) != DECL_UID (t2))
1610 : 19603499 : return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1611 : : break;
1612 : : }
1613 : : /* For expressions, compare their operands recursively. */
1614 : 80678712 : else if (IS_EXPR_CODE_CLASS (tclass))
1615 : : {
1616 : 142251210 : for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1617 : : {
1618 : 92559492 : cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1619 : 92559492 : TREE_OPERAND (t2, i));
1620 : 92559492 : if (cmp != 0)
1621 : : return cmp;
1622 : : }
1623 : : }
1624 : : else
1625 : 0 : gcc_unreachable ();
1626 : : }
1627 : :
1628 : : return 0;
1629 : : }
1630 : :
1631 : : /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1632 : : check. */
1633 : :
1634 : : opt_result
1635 : 115816 : runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1636 : : {
1637 : 115816 : if (dump_enabled_p ())
1638 : 7116 : dump_printf (MSG_NOTE,
1639 : : "consider run-time aliasing test between %T and %T\n",
1640 : 7116 : DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1641 : :
1642 : 115816 : if (!speed_p)
1643 : 0 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1644 : : "runtime alias check not supported when"
1645 : : " optimizing for size.\n");
1646 : :
1647 : : /* FORNOW: We don't support versioning with outer-loop in either
1648 : : vectorization or loop distribution. */
1649 : 115816 : if (loop != NULL && loop->inner != NULL)
1650 : 125 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1651 : : "runtime alias check not supported for"
1652 : : " outer loop.\n");
1653 : :
1654 : : /* FORNOW: We don't support handling different address spaces. */
1655 : 115691 : if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1656 : 115691 : != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1657 : 4 : return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1658 : : "runtime alias check between different "
1659 : : "address spaces not supported.\n");
1660 : :
1661 : 115687 : return opt_result::success ();
1662 : : }
1663 : :
1664 : : /* Operator == between two dr_with_seg_len objects.
1665 : :
1666 : : This equality operator is used to make sure two data refs
1667 : : are the same one so that we will consider to combine the
1668 : : aliasing checks of those two pairs of data dependent data
1669 : : refs. */
1670 : :
1671 : : static bool
1672 : 108587 : operator == (const dr_with_seg_len& d1,
1673 : : const dr_with_seg_len& d2)
1674 : : {
1675 : 108587 : return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1676 : 108587 : DR_BASE_ADDRESS (d2.dr), 0)
1677 : 82800 : && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1678 : 82282 : && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1679 : 77836 : && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1680 : 77652 : && known_eq (d1.access_size, d2.access_size)
1681 : 182946 : && d1.align == d2.align);
1682 : : }
1683 : :
1684 : : /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1685 : : so that we can combine aliasing checks in one scan. */
1686 : :
1687 : : static int
1688 : 990529 : comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1689 : : {
1690 : 990529 : const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1691 : 990529 : const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1692 : 990529 : const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1693 : 990529 : const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1694 : :
1695 : : /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1696 : : if a and c have the same basic address snd step, and b and d have the same
1697 : : address and step. Therefore, if any a&c or b&d don't have the same address
1698 : : and step, we don't care the order of those two pairs after sorting. */
1699 : 990529 : int comp_res;
1700 : :
1701 : 990529 : if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1702 : 990529 : DR_BASE_ADDRESS (b1.dr))) != 0)
1703 : : return comp_res;
1704 : 481126 : if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1705 : 481126 : DR_BASE_ADDRESS (b2.dr))) != 0)
1706 : : return comp_res;
1707 : 303339 : if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1708 : 303339 : DR_STEP (b1.dr))) != 0)
1709 : : return comp_res;
1710 : 302723 : if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1711 : 302723 : DR_STEP (b2.dr))) != 0)
1712 : : return comp_res;
1713 : 295160 : if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1714 : 295160 : DR_OFFSET (b1.dr))) != 0)
1715 : : return comp_res;
1716 : 289961 : if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1717 : 289961 : DR_INIT (b1.dr))) != 0)
1718 : : return comp_res;
1719 : 213619 : if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1720 : 213619 : DR_OFFSET (b2.dr))) != 0)
1721 : : return comp_res;
1722 : 211708 : if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1723 : 211708 : DR_INIT (b2.dr))) != 0)
1724 : : return comp_res;
1725 : :
1726 : : return 0;
1727 : : }
1728 : :
1729 : : /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1730 : :
1731 : : static void
1732 : 1021 : dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1733 : : {
1734 : 2042 : dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1735 : 1021 : DR_REF (alias_pair->first.dr),
1736 : 1021 : DR_REF (alias_pair->second.dr));
1737 : :
1738 : 1021 : dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1739 : : alias_pair->first.seg_len);
1740 : 1021 : if (!operand_equal_p (alias_pair->first.seg_len,
1741 : 1021 : alias_pair->second.seg_len, 0))
1742 : 253 : dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1743 : :
1744 : 1021 : dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1745 : 1021 : dump_dec (MSG_NOTE, alias_pair->first.access_size);
1746 : 1021 : if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1747 : : {
1748 : 269 : dump_printf (MSG_NOTE, " vs. ");
1749 : 269 : dump_dec (MSG_NOTE, alias_pair->second.access_size);
1750 : : }
1751 : :
1752 : 1021 : dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1753 : : alias_pair->first.align);
1754 : 1021 : if (alias_pair->first.align != alias_pair->second.align)
1755 : 72 : dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1756 : :
1757 : 1021 : dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1758 : 1021 : if (alias_pair->flags & DR_ALIAS_RAW)
1759 : 173 : dump_printf (MSG_NOTE, " RAW");
1760 : 1021 : if (alias_pair->flags & DR_ALIAS_WAR)
1761 : 821 : dump_printf (MSG_NOTE, " WAR");
1762 : 1021 : if (alias_pair->flags & DR_ALIAS_WAW)
1763 : 171 : dump_printf (MSG_NOTE, " WAW");
1764 : 1021 : if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1765 : 284 : dump_printf (MSG_NOTE, " ARBITRARY");
1766 : 1021 : if (alias_pair->flags & DR_ALIAS_SWAPPED)
1767 : 0 : dump_printf (MSG_NOTE, " SWAPPED");
1768 : 1021 : if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1769 : 0 : dump_printf (MSG_NOTE, " UNSWAPPED");
1770 : 1021 : if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1771 : 0 : dump_printf (MSG_NOTE, " MIXED_STEPS");
1772 : 1021 : if (alias_pair->flags == 0)
1773 : 0 : dump_printf (MSG_NOTE, " <none>");
1774 : 1021 : dump_printf (MSG_NOTE, "\n");
1775 : 1021 : }
1776 : :
1777 : : /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1778 : : FACTOR is number of iterations that each data reference is accessed.
1779 : :
1780 : : Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1781 : : we create an expression:
1782 : :
1783 : : ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1784 : : || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1785 : :
1786 : : for aliasing checks. However, in some cases we can decrease the number
1787 : : of checks by combining two checks into one. For example, suppose we have
1788 : : another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1789 : : condition is satisfied:
1790 : :
1791 : : load_ptr_0 < load_ptr_1 &&
1792 : : load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1793 : :
1794 : : (this condition means, in each iteration of vectorized loop, the accessed
1795 : : memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1796 : : load_ptr_1.)
1797 : :
1798 : : we then can use only the following expression to finish the alising checks
1799 : : between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1800 : :
1801 : : ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1802 : : || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1803 : :
1804 : : Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1805 : : basic address. */
1806 : :
1807 : : void
1808 : 14960 : prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1809 : : poly_uint64)
1810 : : {
1811 : 14960 : if (alias_pairs->is_empty ())
1812 : 14960 : return;
1813 : :
1814 : : /* Canonicalize each pair so that the base components are ordered wrt
1815 : : data_ref_compare_tree. This allows the loop below to merge more
1816 : : cases. */
1817 : : unsigned int i;
1818 : : dr_with_seg_len_pair_t *alias_pair;
1819 : 64621 : FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1820 : : {
1821 : 50242 : data_reference_p dr_a = alias_pair->first.dr;
1822 : 50242 : data_reference_p dr_b = alias_pair->second.dr;
1823 : 50242 : int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1824 : : DR_BASE_ADDRESS (dr_b));
1825 : 50242 : if (comp_res == 0)
1826 : 1101 : comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1827 : 1101 : if (comp_res == 0)
1828 : 58 : comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1829 : 50242 : if (comp_res > 0)
1830 : : {
1831 : 16305 : std::swap (alias_pair->first, alias_pair->second);
1832 : 16305 : alias_pair->flags |= DR_ALIAS_SWAPPED;
1833 : : }
1834 : : else
1835 : 33937 : alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1836 : : }
1837 : :
1838 : : /* Sort the collected data ref pairs so that we can scan them once to
1839 : : combine all possible aliasing checks. */
1840 : 14379 : alias_pairs->qsort (comp_dr_with_seg_len_pair);
1841 : :
1842 : : /* Scan the sorted dr pairs and check if we can combine alias checks
1843 : : of two neighboring dr pairs. */
1844 : : unsigned int last = 0;
1845 : 50242 : for (i = 1; i < alias_pairs->length (); ++i)
1846 : : {
1847 : : /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1848 : 35863 : dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1849 : 35863 : dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1850 : :
1851 : 35863 : dr_with_seg_len *dr_a1 = &alias_pair1->first;
1852 : 35863 : dr_with_seg_len *dr_b1 = &alias_pair1->second;
1853 : 35863 : dr_with_seg_len *dr_a2 = &alias_pair2->first;
1854 : 35863 : dr_with_seg_len *dr_b2 = &alias_pair2->second;
1855 : :
1856 : : /* Remove duplicate data ref pairs. */
1857 : 35863 : if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1858 : : {
1859 : 16155 : if (dump_enabled_p ())
1860 : 1897 : dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1861 : 1897 : DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1862 : 1897 : DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1863 : 16155 : alias_pair1->flags |= alias_pair2->flags;
1864 : 52018 : continue;
1865 : : }
1866 : :
1867 : : /* Assume that we won't be able to merge the pairs, then correct
1868 : : if we do. */
1869 : 19708 : last += 1;
1870 : 19708 : if (last != i)
1871 : 4582 : (*alias_pairs)[last] = (*alias_pairs)[i];
1872 : :
1873 : 19708 : if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1874 : : {
1875 : : /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1876 : : and DR_A1 and DR_A2 are two consecutive memrefs. */
1877 : 17153 : if (*dr_a1 == *dr_a2)
1878 : : {
1879 : 12448 : std::swap (dr_a1, dr_b1);
1880 : 12448 : std::swap (dr_a2, dr_b2);
1881 : : }
1882 : :
1883 : 17153 : poly_int64 init_a1, init_a2;
1884 : : /* Only consider cases in which the distance between the initial
1885 : : DR_A1 and the initial DR_A2 is known at compile time. */
1886 : 31587 : if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1887 : 17153 : DR_BASE_ADDRESS (dr_a2->dr), 0)
1888 : 2986 : || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1889 : 2986 : DR_OFFSET (dr_a2->dr), 0)
1890 : 2719 : || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1891 : 19872 : || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1892 : 14441 : continue;
1893 : :
1894 : : /* Don't combine if we can't tell which one comes first. */
1895 : 2719 : if (!ordered_p (init_a1, init_a2))
1896 : : continue;
1897 : :
1898 : : /* Work out what the segment length would be if we did combine
1899 : : DR_A1 and DR_A2:
1900 : :
1901 : : - If DR_A1 and DR_A2 have equal lengths, that length is
1902 : : also the combined length.
1903 : :
1904 : : - If DR_A1 and DR_A2 both have negative "lengths", the combined
1905 : : length is the lower bound on those lengths.
1906 : :
1907 : : - If DR_A1 and DR_A2 both have positive lengths, the combined
1908 : : length is the upper bound on those lengths.
1909 : :
1910 : : Other cases are unlikely to give a useful combination.
1911 : :
1912 : : The lengths both have sizetype, so the sign is taken from
1913 : : the step instead. */
1914 : 2719 : poly_uint64 new_seg_len = 0;
1915 : 2719 : bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1916 : 2719 : dr_a2->seg_len, 0);
1917 : 2719 : if (new_seg_len_p)
1918 : : {
1919 : 7 : poly_uint64 seg_len_a1, seg_len_a2;
1920 : 7 : if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1921 : 7 : || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1922 : 7 : continue;
1923 : :
1924 : 0 : tree indicator_a = dr_direction_indicator (dr_a1->dr);
1925 : 0 : if (TREE_CODE (indicator_a) != INTEGER_CST)
1926 : 0 : continue;
1927 : :
1928 : 0 : tree indicator_b = dr_direction_indicator (dr_a2->dr);
1929 : 0 : if (TREE_CODE (indicator_b) != INTEGER_CST)
1930 : 0 : continue;
1931 : :
1932 : 0 : int sign_a = tree_int_cst_sgn (indicator_a);
1933 : 0 : int sign_b = tree_int_cst_sgn (indicator_b);
1934 : :
1935 : 0 : if (sign_a <= 0 && sign_b <= 0)
1936 : 0 : new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1937 : 0 : else if (sign_a >= 0 && sign_b >= 0)
1938 : 0 : new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1939 : : else
1940 : 0 : continue;
1941 : : }
1942 : : /* At this point we're committed to merging the refs. */
1943 : :
1944 : : /* Make sure dr_a1 starts left of dr_a2. */
1945 : 2712 : if (maybe_gt (init_a1, init_a2))
1946 : : {
1947 : 0 : std::swap (*dr_a1, *dr_a2);
1948 : 0 : std::swap (init_a1, init_a2);
1949 : : }
1950 : :
1951 : : /* The DR_Bs are equal, so only the DR_As can introduce
1952 : : mixed steps. */
1953 : 2712 : if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1954 : 0 : alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1955 : :
1956 : 2712 : if (new_seg_len_p)
1957 : : {
1958 : 0 : dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1959 : : new_seg_len);
1960 : 0 : dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1961 : : }
1962 : :
1963 : : /* This is always positive due to the swap above. */
1964 : 2712 : poly_uint64 diff = init_a2 - init_a1;
1965 : :
1966 : : /* The new check will start at DR_A1. Make sure that its access
1967 : : size encompasses the initial DR_A2. */
1968 : 2712 : if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1969 : : {
1970 : 1027 : dr_a1->access_size = upper_bound (dr_a1->access_size,
1971 : : diff + dr_a2->access_size);
1972 : 1027 : unsigned int new_align = known_alignment (dr_a1->access_size);
1973 : 1027 : dr_a1->align = MIN (dr_a1->align, new_align);
1974 : : }
1975 : 2712 : if (dump_enabled_p ())
1976 : 761 : dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1977 : 761 : DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1978 : 761 : DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1979 : 2712 : alias_pair1->flags |= alias_pair2->flags;
1980 : 2712 : last -= 1;
1981 : : }
1982 : : }
1983 : 14379 : alias_pairs->truncate (last + 1);
1984 : :
1985 : : /* Try to restore the original dr_with_seg_len order within each
1986 : : dr_with_seg_len_pair_t. If we ended up combining swapped and
1987 : : unswapped pairs into the same check, we have to invalidate any
1988 : : RAW, WAR and WAW information for it. */
1989 : 14379 : if (dump_enabled_p ())
1990 : 757 : dump_printf (MSG_NOTE, "merged alias checks:\n");
1991 : 45754 : FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1992 : : {
1993 : 31375 : unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1994 : 31375 : unsigned int swapped = (alias_pair->flags & swap_mask);
1995 : 31375 : if (swapped == DR_ALIAS_SWAPPED)
1996 : 9455 : std::swap (alias_pair->first, alias_pair->second);
1997 : 21920 : else if (swapped != DR_ALIAS_UNSWAPPED)
1998 : 1573 : alias_pair->flags |= DR_ALIAS_ARBITRARY;
1999 : 31375 : alias_pair->flags &= ~swap_mask;
2000 : 31375 : if (dump_enabled_p ())
2001 : 1021 : dump_alias_pair (alias_pair, " ");
2002 : : }
2003 : : }
2004 : :
2005 : : /* A subroutine of create_intersect_range_checks, with a subset of the
2006 : : same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
2007 : : to optimize cases in which the references form a simple RAW, WAR or
2008 : : WAR dependence. */
2009 : :
2010 : : static bool
2011 : 4180 : create_ifn_alias_checks (tree *cond_expr,
2012 : : const dr_with_seg_len_pair_t &alias_pair)
2013 : : {
2014 : 4180 : const dr_with_seg_len& dr_a = alias_pair.first;
2015 : 4180 : const dr_with_seg_len& dr_b = alias_pair.second;
2016 : :
2017 : : /* Check for cases in which:
2018 : :
2019 : : (a) we have a known RAW, WAR or WAR dependence
2020 : : (b) the accesses are well-ordered in both the original and new code
2021 : : (see the comment above the DR_ALIAS_* flags for details); and
2022 : : (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2023 : 4180 : if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2024 : : return false;
2025 : :
2026 : : /* Make sure that both DRs access the same pattern of bytes,
2027 : : with a constant length and step. */
2028 : 2861 : poly_uint64 seg_len;
2029 : 2861 : if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2030 : 2481 : || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2031 : 2477 : || maybe_ne (dr_a.access_size, dr_b.access_size)
2032 : 2437 : || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2033 : 5298 : || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2034 : 439 : return false;
2035 : :
2036 : 2422 : unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2037 : 2422 : tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2038 : 2422 : tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2039 : :
2040 : : /* See whether the target suports what we want to do. WAW checks are
2041 : : equivalent to WAR checks here. */
2042 : 4844 : internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2043 : 2422 : ? IFN_CHECK_RAW_PTRS
2044 : : : IFN_CHECK_WAR_PTRS);
2045 : 2422 : unsigned int align = MIN (dr_a.align, dr_b.align);
2046 : 2422 : poly_uint64 full_length = seg_len + bytes;
2047 : 2422 : if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2048 : : full_length, align))
2049 : : {
2050 : 2422 : full_length = seg_len + dr_a.access_size;
2051 : 2422 : if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2052 : : full_length, align))
2053 : : return false;
2054 : : }
2055 : :
2056 : : /* Commit to using this form of test. */
2057 : 0 : addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2058 : 0 : addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2059 : :
2060 : 0 : addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2061 : 0 : addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2062 : :
2063 : 0 : *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2064 : : ifn, boolean_type_node,
2065 : : 4, addr_a, addr_b,
2066 : 0 : size_int (full_length),
2067 : 0 : size_int (align));
2068 : :
2069 : 0 : if (dump_enabled_p ())
2070 : : {
2071 : 0 : if (ifn == IFN_CHECK_RAW_PTRS)
2072 : 0 : dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2073 : : else
2074 : 0 : dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2075 : : }
2076 : : return true;
2077 : : }
2078 : :
2079 : : /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2080 : : free of aliases, using a condition based on index values instead
2081 : : of a condition based on addresses. Return true on success,
2082 : : storing the condition in *COND_EXPR.
2083 : :
2084 : : This can only be done if the two data references in ALIAS_PAIR access
2085 : : the same array object and the index is the only difference. For example,
2086 : : if the two data references are DR_A and DR_B:
2087 : :
2088 : : DR_A DR_B
2089 : : data-ref arr[i] arr[j]
2090 : : base_object arr arr
2091 : : index {i_0, +, 1}_loop {j_0, +, 1}_loop
2092 : :
2093 : : The addresses and their index are like:
2094 : :
2095 : : |<- ADDR_A ->| |<- ADDR_B ->|
2096 : : ------------------------------------------------------->
2097 : : | | | | | | | | | |
2098 : : ------------------------------------------------------->
2099 : : i_0 ... i_0+4 j_0 ... j_0+4
2100 : :
2101 : : We can create expression based on index rather than address:
2102 : :
2103 : : (unsigned) (i_0 - j_0 + 3) <= 6
2104 : :
2105 : : i.e. the indices are less than 4 apart.
2106 : :
2107 : : Note evolution step of index needs to be considered in comparison. */
2108 : :
2109 : : static bool
2110 : 4331 : create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2111 : : const dr_with_seg_len_pair_t &alias_pair)
2112 : : {
2113 : 4331 : const dr_with_seg_len &dr_a = alias_pair.first;
2114 : 4331 : const dr_with_seg_len &dr_b = alias_pair.second;
2115 : 4331 : if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2116 : 4331 : || integer_zerop (DR_STEP (dr_a.dr))
2117 : 4087 : || integer_zerop (DR_STEP (dr_b.dr))
2118 : 16472 : || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2119 : 329 : return false;
2120 : :
2121 : 4002 : poly_uint64 seg_len1, seg_len2;
2122 : 4002 : if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2123 : 4002 : || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2124 : 258 : return false;
2125 : :
2126 : 3744 : if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2127 : : return false;
2128 : :
2129 : 3744 : if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2130 : : return false;
2131 : :
2132 : 152 : if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2133 : : return false;
2134 : :
2135 : 152 : gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2136 : :
2137 : 152 : bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2138 : 152 : unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2139 : 152 : if (neg_step)
2140 : : {
2141 : 30 : abs_step = -abs_step;
2142 : 30 : seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2143 : 30 : seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2144 : : }
2145 : :
2146 : : /* Infer the number of iterations with which the memory segment is accessed
2147 : : by DR. In other words, alias is checked if memory segment accessed by
2148 : : DR_A in some iterations intersect with memory segment accessed by DR_B
2149 : : in the same amount iterations.
2150 : : Note segnment length is a linear function of number of iterations with
2151 : : DR_STEP as the coefficient. */
2152 : 152 : poly_uint64 niter_len1, niter_len2;
2153 : 152 : if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2154 : 152 : || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2155 : : return false;
2156 : :
2157 : : /* Divide each access size by the byte step, rounding up. */
2158 : 152 : poly_uint64 niter_access1, niter_access2;
2159 : 152 : if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2160 : : abs_step, &niter_access1)
2161 : 152 : || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2162 : : abs_step, &niter_access2))
2163 : : return false;
2164 : :
2165 : 152 : bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2166 : :
2167 : 152 : int found = -1;
2168 : 311 : for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2169 : : {
2170 : 160 : tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2171 : 160 : tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2172 : : /* Two indices must be the same if they are not scev, or not scev wrto
2173 : : current loop being vecorized. */
2174 : 160 : if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2175 : 152 : || TREE_CODE (access2) != POLYNOMIAL_CHREC
2176 : 152 : || CHREC_VARIABLE (access1) != (unsigned)loop->num
2177 : 312 : || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2178 : : {
2179 : 8 : if (operand_equal_p (access1, access2, 0))
2180 : 7 : continue;
2181 : :
2182 : : return false;
2183 : : }
2184 : 152 : if (found >= 0)
2185 : : return false;
2186 : 152 : found = i;
2187 : : }
2188 : :
2189 : : /* Ought not to happen in practice, since if all accesses are equal then the
2190 : : alias should be decidable at compile time. */
2191 : 151 : if (found < 0)
2192 : : return false;
2193 : :
2194 : : /* The two indices must have the same step. */
2195 : 151 : tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2196 : 151 : tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2197 : 151 : if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2198 : : return false;
2199 : :
2200 : 151 : tree idx_step = CHREC_RIGHT (access1);
2201 : : /* Index must have const step, otherwise DR_STEP won't be constant. */
2202 : 151 : gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2203 : : /* Index must evaluate in the same direction as DR. */
2204 : 151 : gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2205 : :
2206 : 151 : tree min1 = CHREC_LEFT (access1);
2207 : 151 : tree min2 = CHREC_LEFT (access2);
2208 : 151 : if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2209 : : return false;
2210 : :
2211 : : /* Ideally, alias can be checked against loop's control IV, but we
2212 : : need to prove linear mapping between control IV and reference
2213 : : index. Although that should be true, we check against (array)
2214 : : index of data reference. Like segment length, index length is
2215 : : linear function of the number of iterations with index_step as
2216 : : the coefficient, i.e, niter_len * idx_step. */
2217 : 151 : offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2218 : : SIGNED);
2219 : 151 : if (neg_step)
2220 : 30 : abs_idx_step = -abs_idx_step;
2221 : 302 : poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2222 : 302 : poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2223 : 151 : poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2224 : 151 : poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2225 : :
2226 : 151 : gcc_assert (known_ge (idx_len1, 0)
2227 : : && known_ge (idx_len2, 0)
2228 : : && known_ge (idx_access1, 0)
2229 : : && known_ge (idx_access2, 0));
2230 : :
2231 : : /* Each access has the following pattern, with lengths measured
2232 : : in units of INDEX:
2233 : :
2234 : : <-- idx_len -->
2235 : : <--- A: -ve step --->
2236 : : +-----+-------+-----+-------+-----+
2237 : : | n-1 | ..... | 0 | ..... | n-1 |
2238 : : +-----+-------+-----+-------+-----+
2239 : : <--- B: +ve step --->
2240 : : <-- idx_len -->
2241 : : |
2242 : : min
2243 : :
2244 : : where "n" is the number of scalar iterations covered by the segment
2245 : : and where each access spans idx_access units.
2246 : :
2247 : : A is the range of bytes accessed when the step is negative,
2248 : : B is the range when the step is positive.
2249 : :
2250 : : When checking for general overlap, we need to test whether
2251 : : the range:
2252 : :
2253 : : [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2254 : :
2255 : : overlaps:
2256 : :
2257 : : [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2258 : :
2259 : : where:
2260 : :
2261 : : low_offsetN = +ve step ? 0 : -idx_lenN;
2262 : : high_offsetN = +ve step ? idx_lenN : 0;
2263 : :
2264 : : This is equivalent to testing whether:
2265 : :
2266 : : min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2267 : : && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2268 : :
2269 : : Converting this into a single test, there is an overlap if:
2270 : :
2271 : : 0 <= min2 - min1 + bias <= limit
2272 : :
2273 : : where bias = high_offset2 + idx_access2 - 1 - low_offset1
2274 : : limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2275 : : + (high_offset2 - low_offset2 + idx_access2 - 1)
2276 : : i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2277 : :
2278 : : Combining the tests requires limit to be computable in an unsigned
2279 : : form of the index type; if it isn't, we fall back to the usual
2280 : : pointer-based checks.
2281 : :
2282 : : We can do better if DR_B is a write and if DR_A and DR_B are
2283 : : well-ordered in both the original and the new code (see the
2284 : : comment above the DR_ALIAS_* flags for details). In this case
2285 : : we know that for each i in [0, n-1], the write performed by
2286 : : access i of DR_B occurs after access numbers j<=i of DR_A in
2287 : : both the original and the new code. Any write or anti
2288 : : dependencies wrt those DR_A accesses are therefore maintained.
2289 : :
2290 : : We just need to make sure that each individual write in DR_B does not
2291 : : overlap any higher-indexed access in DR_A; such DR_A accesses happen
2292 : : after the DR_B access in the original code but happen before it in
2293 : : the new code.
2294 : :
2295 : : We know the steps for both accesses are equal, so by induction, we
2296 : : just need to test whether the first write of DR_B overlaps a later
2297 : : access of DR_A. In other words, we need to move min1 along by
2298 : : one iteration:
2299 : :
2300 : : min1' = min1 + idx_step
2301 : :
2302 : : and use the ranges:
2303 : :
2304 : : [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2305 : :
2306 : : and:
2307 : :
2308 : : [min2, min2 + idx_access2 - 1]
2309 : :
2310 : : where:
2311 : :
2312 : : low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2313 : : high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2314 : 151 : if (waw_or_war_p)
2315 : 120 : idx_len1 -= abs_idx_step;
2316 : :
2317 : 151 : poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2318 : 151 : if (!waw_or_war_p)
2319 : 151 : limit += idx_len2;
2320 : :
2321 : 151 : tree utype = unsigned_type_for (TREE_TYPE (min1));
2322 : 151 : if (!wi::fits_to_tree_p (limit, utype))
2323 : : return false;
2324 : :
2325 : 151 : poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2326 : 151 : poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2327 : 151 : poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2328 : : /* Equivalent to adding IDX_STEP to MIN1. */
2329 : 151 : if (waw_or_war_p)
2330 : 120 : bias -= wi::to_offset (idx_step);
2331 : :
2332 : 151 : tree subject = fold_build2 (MINUS_EXPR, utype,
2333 : : fold_convert (utype, min2),
2334 : : fold_convert (utype, min1));
2335 : 151 : subject = fold_build2 (PLUS_EXPR, utype, subject,
2336 : : wide_int_to_tree (utype, bias));
2337 : 151 : tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2338 : : wide_int_to_tree (utype, limit));
2339 : 151 : if (*cond_expr)
2340 : 0 : *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2341 : : *cond_expr, part_cond_expr);
2342 : : else
2343 : 151 : *cond_expr = part_cond_expr;
2344 : 151 : if (dump_enabled_p ())
2345 : : {
2346 : 133 : if (waw_or_war_p)
2347 : 103 : dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2348 : : else
2349 : 30 : dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2350 : : }
2351 : : return true;
2352 : : }
2353 : :
2354 : : /* A subroutine of create_intersect_range_checks, with a subset of the
2355 : : same arguments. Try to optimize cases in which the second access
2356 : : is a write and in which some overlap is valid. */
2357 : :
2358 : : static bool
2359 : 4180 : create_waw_or_war_checks (tree *cond_expr,
2360 : : const dr_with_seg_len_pair_t &alias_pair)
2361 : : {
2362 : 4180 : const dr_with_seg_len& dr_a = alias_pair.first;
2363 : 4180 : const dr_with_seg_len& dr_b = alias_pair.second;
2364 : :
2365 : : /* Check for cases in which:
2366 : :
2367 : : (a) DR_B is always a write;
2368 : : (b) the accesses are well-ordered in both the original and new code
2369 : : (see the comment above the DR_ALIAS_* flags for details); and
2370 : : (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2371 : 4180 : if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2372 : : return false;
2373 : :
2374 : : /* Check for equal (but possibly variable) steps. */
2375 : 2818 : tree step = DR_STEP (dr_a.dr);
2376 : 2818 : if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2377 : : return false;
2378 : :
2379 : : /* Make sure that we can operate on sizetype without loss of precision. */
2380 : 2445 : tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2381 : 2445 : if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2382 : : return false;
2383 : :
2384 : : /* All addresses involved are known to have a common alignment ALIGN.
2385 : : We can therefore subtract ALIGN from an exclusive endpoint to get
2386 : : an inclusive endpoint. In the best (and common) case, ALIGN is the
2387 : : same as the access sizes of both DRs, and so subtracting ALIGN
2388 : : cancels out the addition of an access size. */
2389 : 2445 : unsigned int align = MIN (dr_a.align, dr_b.align);
2390 : 2445 : poly_uint64 last_chunk_a = dr_a.access_size - align;
2391 : 2445 : poly_uint64 last_chunk_b = dr_b.access_size - align;
2392 : :
2393 : : /* Get a boolean expression that is true when the step is negative. */
2394 : 2445 : tree indicator = dr_direction_indicator (dr_a.dr);
2395 : 2445 : tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2396 : : fold_convert (ssizetype, indicator),
2397 : : ssize_int (0));
2398 : :
2399 : : /* Get lengths in sizetype. */
2400 : 2445 : tree seg_len_a
2401 : 2445 : = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2402 : 2445 : step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2403 : :
2404 : : /* Each access has the following pattern:
2405 : :
2406 : : <- |seg_len| ->
2407 : : <--- A: -ve step --->
2408 : : +-----+-------+-----+-------+-----+
2409 : : | n-1 | ..... | 0 | ..... | n-1 |
2410 : : +-----+-------+-----+-------+-----+
2411 : : <--- B: +ve step --->
2412 : : <- |seg_len| ->
2413 : : |
2414 : : base address
2415 : :
2416 : : where "n" is the number of scalar iterations covered by the segment.
2417 : :
2418 : : A is the range of bytes accessed when the step is negative,
2419 : : B is the range when the step is positive.
2420 : :
2421 : : We know that DR_B is a write. We also know (from checking that
2422 : : DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2423 : : the write performed by access i of DR_B occurs after access numbers
2424 : : j<=i of DR_A in both the original and the new code. Any write or
2425 : : anti dependencies wrt those DR_A accesses are therefore maintained.
2426 : :
2427 : : We just need to make sure that each individual write in DR_B does not
2428 : : overlap any higher-indexed access in DR_A; such DR_A accesses happen
2429 : : after the DR_B access in the original code but happen before it in
2430 : : the new code.
2431 : :
2432 : : We know the steps for both accesses are equal, so by induction, we
2433 : : just need to test whether the first write of DR_B overlaps a later
2434 : : access of DR_A. In other words, we need to move addr_a along by
2435 : : one iteration:
2436 : :
2437 : : addr_a' = addr_a + step
2438 : :
2439 : : and check whether:
2440 : :
2441 : : [addr_b, addr_b + last_chunk_b]
2442 : :
2443 : : overlaps:
2444 : :
2445 : : [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2446 : :
2447 : : where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2448 : :
2449 : : low_offset_a = +ve step ? 0 : seg_len_a - step
2450 : : high_offset_a = +ve step ? seg_len_a - step : 0
2451 : :
2452 : : This is equivalent to testing whether:
2453 : :
2454 : : addr_a' + low_offset_a <= addr_b + last_chunk_b
2455 : : && addr_b <= addr_a' + high_offset_a + last_chunk_a
2456 : :
2457 : : Converting this into a single test, there is an overlap if:
2458 : :
2459 : : 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2460 : :
2461 : : where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2462 : :
2463 : : If DR_A is performed, limit + |step| - last_chunk_b is known to be
2464 : : less than the size of the object underlying DR_A. We also know
2465 : : that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2466 : : guaranteed at compile time. There can therefore be no overflow if
2467 : : "limit" is calculated in an unsigned type with pointer precision. */
2468 : 2445 : tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2469 : : DR_OFFSET (dr_a.dr));
2470 : 2445 : addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2471 : :
2472 : 2445 : tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2473 : : DR_OFFSET (dr_b.dr));
2474 : 2445 : addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2475 : :
2476 : : /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2477 : 2445 : addr_a = fold_build_pointer_plus (addr_a, step);
2478 : 2445 : tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2479 : : seg_len_a, step);
2480 : 2445 : if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2481 : 0 : seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2482 : :
2483 : 2445 : tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2484 : : seg_len_a_minus_step, size_zero_node);
2485 : 2445 : if (!CONSTANT_CLASS_P (low_offset_a))
2486 : 0 : low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2487 : :
2488 : : /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2489 : : but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2490 : 2445 : tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2491 : : low_offset_a);
2492 : :
2493 : : /* The amount added to addr_b - addr_a'. */
2494 : 2445 : tree bias = fold_build2 (MINUS_EXPR, sizetype,
2495 : : size_int (last_chunk_b), low_offset_a);
2496 : :
2497 : 2445 : tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2498 : 2445 : limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2499 : : size_int (last_chunk_a + last_chunk_b));
2500 : :
2501 : 2445 : tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2502 : 2445 : subject = fold_build2 (PLUS_EXPR, sizetype,
2503 : : fold_convert (sizetype, subject), bias);
2504 : :
2505 : 2445 : *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2506 : 2445 : if (dump_enabled_p ())
2507 : 319 : dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2508 : : return true;
2509 : : }
2510 : :
2511 : : /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2512 : : every address ADDR accessed by D:
2513 : :
2514 : : *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2515 : :
2516 : : In this case, every element accessed by D is aligned to at least
2517 : : ALIGN bytes.
2518 : :
2519 : : If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2520 : :
2521 : : *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2522 : :
2523 : : static void
2524 : 3470 : get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2525 : : tree *seg_max_out, HOST_WIDE_INT align)
2526 : : {
2527 : : /* Each access has the following pattern:
2528 : :
2529 : : <- |seg_len| ->
2530 : : <--- A: -ve step --->
2531 : : +-----+-------+-----+-------+-----+
2532 : : | n-1 | ,.... | 0 | ..... | n-1 |
2533 : : +-----+-------+-----+-------+-----+
2534 : : <--- B: +ve step --->
2535 : : <- |seg_len| ->
2536 : : |
2537 : : base address
2538 : :
2539 : : where "n" is the number of scalar iterations covered by the segment.
2540 : : (This should be VF for a particular pair if we know that both steps
2541 : : are the same, otherwise it will be the full number of scalar loop
2542 : : iterations.)
2543 : :
2544 : : A is the range of bytes accessed when the step is negative,
2545 : : B is the range when the step is positive.
2546 : :
2547 : : If the access size is "access_size" bytes, the lowest addressed byte is:
2548 : :
2549 : : base + (step < 0 ? seg_len : 0) [LB]
2550 : :
2551 : : and the highest addressed byte is always below:
2552 : :
2553 : : base + (step < 0 ? 0 : seg_len) + access_size [UB]
2554 : :
2555 : : Thus:
2556 : :
2557 : : LB <= ADDR < UB
2558 : :
2559 : : If ALIGN is nonzero, all three values are aligned to at least ALIGN
2560 : : bytes, so:
2561 : :
2562 : : LB <= ADDR <= UB - ALIGN
2563 : :
2564 : : where "- ALIGN" folds naturally with the "+ access_size" and often
2565 : : cancels it out.
2566 : :
2567 : : We don't try to simplify LB and UB beyond this (e.g. by using
2568 : : MIN and MAX based on whether seg_len rather than the stride is
2569 : : negative) because it is possible for the absolute size of the
2570 : : segment to overflow the range of a ssize_t.
2571 : :
2572 : : Keeping the pointer_plus outside of the cond_expr should allow
2573 : : the cond_exprs to be shared with other alias checks. */
2574 : 3470 : tree indicator = dr_direction_indicator (d.dr);
2575 : 3470 : tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2576 : : fold_convert (ssizetype, indicator),
2577 : : ssize_int (0));
2578 : 3470 : tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2579 : : DR_OFFSET (d.dr));
2580 : 3470 : addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2581 : 3470 : tree seg_len
2582 : 3470 : = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2583 : :
2584 : 3470 : tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2585 : : seg_len, size_zero_node);
2586 : 3470 : tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2587 : : size_zero_node, seg_len);
2588 : 3470 : max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2589 : : size_int (d.access_size - align));
2590 : :
2591 : 3470 : *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2592 : 3470 : *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2593 : 3470 : }
2594 : :
2595 : : /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2596 : : storing the condition in *COND_EXPR. The fallback is to generate a
2597 : : a test that the two accesses do not overlap:
2598 : :
2599 : : end_a <= start_b || end_b <= start_a. */
2600 : :
2601 : : static void
2602 : 4331 : create_intersect_range_checks (class loop *loop, tree *cond_expr,
2603 : : const dr_with_seg_len_pair_t &alias_pair)
2604 : : {
2605 : 4331 : const dr_with_seg_len& dr_a = alias_pair.first;
2606 : 4331 : const dr_with_seg_len& dr_b = alias_pair.second;
2607 : 4331 : *cond_expr = NULL_TREE;
2608 : 4331 : if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2609 : 2596 : return;
2610 : :
2611 : 4180 : if (create_ifn_alias_checks (cond_expr, alias_pair))
2612 : : return;
2613 : :
2614 : 4180 : if (create_waw_or_war_checks (cond_expr, alias_pair))
2615 : : return;
2616 : :
2617 : 1735 : unsigned HOST_WIDE_INT min_align;
2618 : 1735 : tree_code cmp_code;
2619 : : /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2620 : : are equivalent. This is just an optimization heuristic. */
2621 : 1735 : if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2622 : 1631 : && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2623 : : {
2624 : : /* In this case adding access_size to seg_len is likely to give
2625 : : a simple X * step, where X is either the number of scalar
2626 : : iterations or the vectorization factor. We're better off
2627 : : keeping that, rather than subtracting an alignment from it.
2628 : :
2629 : : In this case the maximum values are exclusive and so there is
2630 : : no alias if the maximum of one segment equals the minimum
2631 : : of another. */
2632 : 1535 : min_align = 0;
2633 : 1535 : cmp_code = LE_EXPR;
2634 : : }
2635 : : else
2636 : : {
2637 : : /* Calculate the minimum alignment shared by all four pointers,
2638 : : then arrange for this alignment to be subtracted from the
2639 : : exclusive maximum values to get inclusive maximum values.
2640 : : This "- min_align" is cumulative with a "+ access_size"
2641 : : in the calculation of the maximum values. In the best
2642 : : (and common) case, the two cancel each other out, leaving
2643 : : us with an inclusive bound based only on seg_len. In the
2644 : : worst case we're simply adding a smaller number than before.
2645 : :
2646 : : Because the maximum values are inclusive, there is an alias
2647 : : if the maximum value of one segment is equal to the minimum
2648 : : value of the other. */
2649 : 200 : min_align = std::min (dr_a.align, dr_b.align);
2650 : 200 : min_align = std::min (min_align, known_alignment (dr_a.access_size));
2651 : 200 : min_align = std::min (min_align, known_alignment (dr_b.access_size));
2652 : 200 : cmp_code = LT_EXPR;
2653 : : }
2654 : :
2655 : 1735 : tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2656 : 1735 : get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2657 : 1735 : get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2658 : :
2659 : 1735 : *cond_expr
2660 : 1735 : = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2661 : : fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2662 : : fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2663 : 1735 : if (dump_enabled_p ())
2664 : 267 : dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2665 : : }
2666 : :
2667 : : /* Create a conditional expression that represents the run-time checks for
2668 : : overlapping of address ranges represented by a list of data references
2669 : : pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2670 : : COND_EXPR is the conditional expression to be used in the if statement
2671 : : that controls which version of the loop gets executed at runtime. */
2672 : :
2673 : : void
2674 : 2968 : create_runtime_alias_checks (class loop *loop,
2675 : : const vec<dr_with_seg_len_pair_t> *alias_pairs,
2676 : : tree * cond_expr)
2677 : : {
2678 : 2968 : tree part_cond_expr;
2679 : :
2680 : 2968 : fold_defer_overflow_warnings ();
2681 : 13235 : for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2682 : : {
2683 : 4331 : gcc_assert (alias_pair.flags);
2684 : 4331 : if (dump_enabled_p ())
2685 : 719 : dump_printf (MSG_NOTE,
2686 : : "create runtime check for data references %T and %T\n",
2687 : 719 : DR_REF (alias_pair.first.dr),
2688 : 719 : DR_REF (alias_pair.second.dr));
2689 : :
2690 : : /* Create condition expression for each pair data references. */
2691 : 4331 : create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2692 : 4331 : if (*cond_expr)
2693 : 4271 : *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2694 : : *cond_expr, part_cond_expr);
2695 : : else
2696 : 60 : *cond_expr = part_cond_expr;
2697 : : }
2698 : 2968 : fold_undefer_and_ignore_overflow_warnings ();
2699 : 2968 : }
2700 : :
2701 : : /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2702 : : expressions. */
2703 : : static bool
2704 : 0 : dr_equal_offsets_p1 (tree offset1, tree offset2)
2705 : : {
2706 : 0 : bool res;
2707 : :
2708 : 0 : STRIP_NOPS (offset1);
2709 : 0 : STRIP_NOPS (offset2);
2710 : :
2711 : 0 : if (offset1 == offset2)
2712 : : return true;
2713 : :
2714 : 0 : if (TREE_CODE (offset1) != TREE_CODE (offset2)
2715 : 0 : || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2716 : : return false;
2717 : :
2718 : 0 : res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2719 : 0 : TREE_OPERAND (offset2, 0));
2720 : :
2721 : 0 : if (!res || !BINARY_CLASS_P (offset1))
2722 : : return res;
2723 : :
2724 : 0 : res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2725 : 0 : TREE_OPERAND (offset2, 1));
2726 : :
2727 : 0 : return res;
2728 : : }
2729 : :
2730 : : /* Check if DRA and DRB have equal offsets. */
2731 : : bool
2732 : 0 : dr_equal_offsets_p (struct data_reference *dra,
2733 : : struct data_reference *drb)
2734 : : {
2735 : 0 : tree offset1, offset2;
2736 : :
2737 : 0 : offset1 = DR_OFFSET (dra);
2738 : 0 : offset2 = DR_OFFSET (drb);
2739 : :
2740 : 0 : return dr_equal_offsets_p1 (offset1, offset2);
2741 : : }
2742 : :
2743 : : /* Returns true if FNA == FNB. */
2744 : :
2745 : : static bool
2746 : 0 : affine_function_equal_p (affine_fn fna, affine_fn fnb)
2747 : : {
2748 : 0 : unsigned i, n = fna.length ();
2749 : :
2750 : 0 : if (n != fnb.length ())
2751 : : return false;
2752 : :
2753 : 0 : for (i = 0; i < n; i++)
2754 : 0 : if (!operand_equal_p (fna[i], fnb[i], 0))
2755 : : return false;
2756 : :
2757 : : return true;
2758 : : }
2759 : :
2760 : : /* If all the functions in CF are the same, returns one of them,
2761 : : otherwise returns NULL. */
2762 : :
2763 : : static affine_fn
2764 : 2080074 : common_affine_function (conflict_function *cf)
2765 : : {
2766 : 2080074 : unsigned i;
2767 : 2080074 : affine_fn comm;
2768 : :
2769 : 2080074 : if (!CF_NONTRIVIAL_P (cf))
2770 : 0 : return affine_fn ();
2771 : :
2772 : 2080074 : comm = cf->fns[0];
2773 : :
2774 : 2080074 : for (i = 1; i < cf->n; i++)
2775 : 0 : if (!affine_function_equal_p (comm, cf->fns[i]))
2776 : 0 : return affine_fn ();
2777 : :
2778 : 2080074 : return comm;
2779 : : }
2780 : :
2781 : : /* Returns the base of the affine function FN. */
2782 : :
2783 : : static tree
2784 : 1195320 : affine_function_base (affine_fn fn)
2785 : : {
2786 : 0 : return fn[0];
2787 : : }
2788 : :
2789 : : /* Returns true if FN is a constant. */
2790 : :
2791 : : static bool
2792 : 1195624 : affine_function_constant_p (affine_fn fn)
2793 : : {
2794 : 1195624 : unsigned i;
2795 : 1195624 : tree coef;
2796 : :
2797 : 1247469 : for (i = 1; fn.iterate (i, &coef); i++)
2798 : 52149 : if (!integer_zerop (coef))
2799 : : return false;
2800 : :
2801 : : return true;
2802 : : }
2803 : :
2804 : : /* Returns true if FN is the zero constant function. */
2805 : :
2806 : : static bool
2807 : 155587 : affine_function_zero_p (affine_fn fn)
2808 : : {
2809 : 155587 : return (integer_zerop (affine_function_base (fn))
2810 : 155587 : && affine_function_constant_p (fn));
2811 : : }
2812 : :
2813 : : /* Returns a signed integer type with the largest precision from TA
2814 : : and TB. */
2815 : :
2816 : : static tree
2817 : 2872258 : signed_type_for_types (tree ta, tree tb)
2818 : : {
2819 : 2872258 : if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2820 : 157 : return signed_type_for (ta);
2821 : : else
2822 : 2872101 : return signed_type_for (tb);
2823 : : }
2824 : :
2825 : : /* Applies operation OP on affine functions FNA and FNB, and returns the
2826 : : result. */
2827 : :
2828 : : static affine_fn
2829 : 1040037 : affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2830 : : {
2831 : 1040037 : unsigned i, n, m;
2832 : 1040037 : affine_fn ret;
2833 : 1040037 : tree coef;
2834 : :
2835 : 3120111 : if (fnb.length () > fna.length ())
2836 : : {
2837 : 0 : n = fna.length ();
2838 : 0 : m = fnb.length ();
2839 : : }
2840 : : else
2841 : : {
2842 : 1040037 : n = fnb.length ();
2843 : : m = fna.length ();
2844 : : }
2845 : :
2846 : 1040037 : ret.create (m);
2847 : 2132223 : for (i = 0; i < n; i++)
2848 : : {
2849 : 2184372 : tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2850 : 1092186 : TREE_TYPE (fnb[i]));
2851 : 1092186 : ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2852 : : }
2853 : :
2854 : 1040037 : for (; fna.iterate (i, &coef); i++)
2855 : 0 : ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2856 : : coef, integer_zero_node));
2857 : 1040037 : for (; fnb.iterate (i, &coef); i++)
2858 : 0 : ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2859 : : integer_zero_node, coef));
2860 : :
2861 : 1040037 : return ret;
2862 : : }
2863 : :
2864 : : /* Returns the sum of affine functions FNA and FNB. */
2865 : :
2866 : : static affine_fn
2867 : 0 : affine_fn_plus (affine_fn fna, affine_fn fnb)
2868 : : {
2869 : 0 : return affine_fn_op (PLUS_EXPR, fna, fnb);
2870 : : }
2871 : :
2872 : : /* Returns the difference of affine functions FNA and FNB. */
2873 : :
2874 : : static affine_fn
2875 : 1040037 : affine_fn_minus (affine_fn fna, affine_fn fnb)
2876 : : {
2877 : 0 : return affine_fn_op (MINUS_EXPR, fna, fnb);
2878 : : }
2879 : :
2880 : : /* Frees affine function FN. */
2881 : :
2882 : : static void
2883 : 3915593 : affine_fn_free (affine_fn fn)
2884 : : {
2885 : 0 : fn.release ();
2886 : 0 : }
2887 : :
2888 : : /* Determine for each subscript in the data dependence relation DDR
2889 : : the distance. */
2890 : :
2891 : : static void
2892 : 3422859 : compute_subscript_distance (struct data_dependence_relation *ddr)
2893 : : {
2894 : 3422859 : conflict_function *cf_a, *cf_b;
2895 : 3422859 : affine_fn fn_a, fn_b, diff;
2896 : :
2897 : 3422859 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2898 : : {
2899 : : unsigned int i;
2900 : :
2901 : 4462896 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2902 : : {
2903 : 1040037 : struct subscript *subscript;
2904 : :
2905 : 1040037 : subscript = DDR_SUBSCRIPT (ddr, i);
2906 : 1040037 : cf_a = SUB_CONFLICTS_IN_A (subscript);
2907 : 1040037 : cf_b = SUB_CONFLICTS_IN_B (subscript);
2908 : :
2909 : 1040037 : fn_a = common_affine_function (cf_a);
2910 : 1040037 : fn_b = common_affine_function (cf_b);
2911 : 1040037 : if (!fn_a.exists () || !fn_b.exists ())
2912 : : {
2913 : 0 : SUB_DISTANCE (subscript) = chrec_dont_know;
2914 : 0 : return;
2915 : : }
2916 : 1040037 : diff = affine_fn_minus (fn_a, fn_b);
2917 : :
2918 : 1040037 : if (affine_function_constant_p (diff))
2919 : 1039733 : SUB_DISTANCE (subscript) = affine_function_base (diff);
2920 : : else
2921 : 304 : SUB_DISTANCE (subscript) = chrec_dont_know;
2922 : :
2923 : 1040037 : affine_fn_free (diff);
2924 : : }
2925 : : }
2926 : : }
2927 : :
2928 : : /* Returns the conflict function for "unknown". */
2929 : :
2930 : : static conflict_function *
2931 : 17230380 : conflict_fn_not_known (void)
2932 : : {
2933 : 0 : conflict_function *fn = XCNEW (conflict_function);
2934 : 17230380 : fn->n = NOT_KNOWN;
2935 : :
2936 : 17230380 : return fn;
2937 : : }
2938 : :
2939 : : /* Returns the conflict function for "independent". */
2940 : :
2941 : : static conflict_function *
2942 : 5164924 : conflict_fn_no_dependence (void)
2943 : : {
2944 : 0 : conflict_function *fn = XCNEW (conflict_function);
2945 : 5164924 : fn->n = NO_DEPENDENCE;
2946 : :
2947 : 5164924 : return fn;
2948 : : }
2949 : :
2950 : : /* Returns true if the address of OBJ is invariant in LOOP. */
2951 : :
2952 : : static bool
2953 : 3660539 : object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2954 : : {
2955 : 3709293 : while (handled_component_p (obj))
2956 : : {
2957 : 56003 : if (TREE_CODE (obj) == ARRAY_REF)
2958 : : {
2959 : 17857 : for (int i = 1; i < 4; ++i)
2960 : 15205 : if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2961 : 15205 : loop->num))
2962 : : return false;
2963 : : }
2964 : 46102 : else if (TREE_CODE (obj) == COMPONENT_REF)
2965 : : {
2966 : 27245 : if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2967 : 27245 : loop->num))
2968 : : return false;
2969 : : }
2970 : 48754 : obj = TREE_OPERAND (obj, 0);
2971 : : }
2972 : :
2973 : 3653290 : if (!INDIRECT_REF_P (obj)
2974 : 3653290 : && TREE_CODE (obj) != MEM_REF)
2975 : : return true;
2976 : :
2977 : 3631660 : return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2978 : 7263320 : loop->num);
2979 : : }
2980 : :
2981 : : /* Returns false if we can prove that data references A and B do not alias,
2982 : : true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2983 : : considered. */
2984 : :
2985 : : bool
2986 : 13340072 : dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2987 : : class loop *loop_nest)
2988 : : {
2989 : 13340072 : tree addr_a = DR_BASE_OBJECT (a);
2990 : 13340072 : tree addr_b = DR_BASE_OBJECT (b);
2991 : :
2992 : : /* If we are not processing a loop nest but scalar code we
2993 : : do not need to care about possible cross-iteration dependences
2994 : : and thus can process the full original reference. Do so,
2995 : : similar to how loop invariant motion applies extra offset-based
2996 : : disambiguation. */
2997 : 13340072 : if (!loop_nest)
2998 : : {
2999 : 7333350 : tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
3000 : 7333350 : tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
3001 : :
3002 : 7333350 : if (DR_BASE_ADDRESS (a)
3003 : 7324122 : && DR_BASE_ADDRESS (b)
3004 : 7323778 : && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
3005 : 6524157 : && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
3006 : 6435014 : && tree_size_a
3007 : 6435014 : && tree_size_b
3008 : 6435005 : && poly_int_tree_p (tree_size_a)
3009 : 6434982 : && poly_int_tree_p (tree_size_b)
3010 : 13768332 : && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
3011 : 6434982 : wi::to_poly_widest (tree_size_a),
3012 : 6434982 : wi::to_poly_widest (DR_INIT (b)),
3013 : 6434982 : wi::to_poly_widest (tree_size_b)))
3014 : : {
3015 : 4635212 : gcc_assert (integer_zerop (DR_STEP (a))
3016 : : && integer_zerop (DR_STEP (b)));
3017 : 4635241 : return false;
3018 : : }
3019 : :
3020 : 10792552 : aff_tree off1, off2;
3021 : : poly_widest_int size1, size2;
3022 : 2698138 : get_inner_reference_aff (DR_REF (a), &off1, &size1);
3023 : 2698138 : get_inner_reference_aff (DR_REF (b), &off2, &size2);
3024 : 2698138 : aff_combination_scale (&off1, -1);
3025 : 2698138 : aff_combination_add (&off2, &off1);
3026 : 2698138 : if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3027 : 29 : return false;
3028 : 2698138 : }
3029 : :
3030 : 8704831 : if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3031 : 6311648 : && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3032 : : /* For cross-iteration dependences the cliques must be valid for the
3033 : : whole loop, not just individual iterations. */
3034 : 6080765 : && (!loop_nest
3035 : 5866873 : || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3036 : 5072009 : || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3037 : 5945274 : && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3038 : 14460835 : && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3039 : : return false;
3040 : :
3041 : : /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3042 : : do not know the size of the base-object. So we cannot do any
3043 : : offset/overlap based analysis but have to rely on points-to
3044 : : information only. */
3045 : 8541681 : if (TREE_CODE (addr_a) == MEM_REF
3046 : 8541681 : && (DR_UNCONSTRAINED_BASE (a)
3047 : 4900309 : || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3048 : : {
3049 : : /* For true dependences we can apply TBAA. */
3050 : 3987488 : if (flag_strict_aliasing
3051 : 3817009 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3052 : 4132232 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3053 : 144744 : get_alias_set (DR_REF (b))))
3054 : : return false;
3055 : 3970538 : if (TREE_CODE (addr_b) == MEM_REF)
3056 : 3893272 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3057 : 7786544 : TREE_OPERAND (addr_b, 0));
3058 : : else
3059 : 77266 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3060 : 77266 : build_fold_addr_expr (addr_b));
3061 : : }
3062 : 4554193 : else if (TREE_CODE (addr_b) == MEM_REF
3063 : 4554193 : && (DR_UNCONSTRAINED_BASE (b)
3064 : 2186255 : || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3065 : : {
3066 : : /* For true dependences we can apply TBAA. */
3067 : 278126 : if (flag_strict_aliasing
3068 : 225630 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3069 : 343306 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3070 : 65180 : get_alias_set (DR_REF (b))))
3071 : : return false;
3072 : 263708 : if (TREE_CODE (addr_a) == MEM_REF)
3073 : 152874 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3074 : 305748 : TREE_OPERAND (addr_b, 0));
3075 : : else
3076 : 110834 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3077 : 221668 : TREE_OPERAND (addr_b, 0));
3078 : : }
3079 : : /* If dr_analyze_innermost failed to handle a component we are
3080 : : possibly left with a non-base in which case we didn't analyze
3081 : : a possible evolution of the base when analyzing a loop. */
3082 : 4276067 : else if (loop_nest
3083 : 6026019 : && (handled_component_p (addr_a) || handled_component_p (addr_b)))
3084 : : {
3085 : : /* For true dependences we can apply TBAA. */
3086 : 127763 : if (flag_strict_aliasing
3087 : 126517 : && DR_IS_WRITE (a) && DR_IS_READ (b)
3088 : 148753 : && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3089 : 20990 : get_alias_set (DR_REF (b))))
3090 : : return false;
3091 : 121064 : if (TREE_CODE (addr_a) == MEM_REF)
3092 : 14810 : return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3093 : 14810 : build_fold_addr_expr (addr_b));
3094 : 106254 : else if (TREE_CODE (addr_b) == MEM_REF)
3095 : 23291 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3096 : 46582 : TREE_OPERAND (addr_b, 0));
3097 : : else
3098 : 82963 : return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3099 : 82963 : build_fold_addr_expr (addr_b));
3100 : : }
3101 : :
3102 : : /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3103 : : that is being subsetted in the loop nest. */
3104 : 4148304 : if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3105 : 2777085 : return refs_output_dependent_p (addr_a, addr_b);
3106 : 1371219 : else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3107 : 364338 : return refs_anti_dependent_p (addr_a, addr_b);
3108 : 1006881 : return refs_may_alias_p (addr_a, addr_b);
3109 : : }
3110 : :
3111 : : /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3112 : : if it is meaningful to compare their associated access functions
3113 : : when checking for dependencies. */
3114 : :
3115 : : static bool
3116 : 11121338 : access_fn_components_comparable_p (tree ref_a, tree ref_b)
3117 : : {
3118 : : /* Allow pairs of component refs from the following sets:
3119 : :
3120 : : { REALPART_EXPR, IMAGPART_EXPR }
3121 : : { COMPONENT_REF }
3122 : : { ARRAY_REF }. */
3123 : 11121338 : tree_code code_a = TREE_CODE (ref_a);
3124 : 11121338 : tree_code code_b = TREE_CODE (ref_b);
3125 : 11121338 : if (code_a == IMAGPART_EXPR)
3126 : 35365 : code_a = REALPART_EXPR;
3127 : 11121338 : if (code_b == IMAGPART_EXPR)
3128 : 41443 : code_b = REALPART_EXPR;
3129 : 11121338 : if (code_a != code_b)
3130 : : return false;
3131 : :
3132 : 10408633 : if (TREE_CODE (ref_a) == COMPONENT_REF)
3133 : : /* ??? We cannot simply use the type of operand #0 of the refs here as
3134 : : the Fortran compiler smuggles type punning into COMPONENT_REFs.
3135 : : Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3136 : 7042220 : return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3137 : 7042220 : == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3138 : :
3139 : 3366413 : return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3140 : 6732826 : TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3141 : : }
3142 : :
3143 : : /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3144 : : is true when the main indices of A and B were not comparable so we try again
3145 : : with alternate indices computed on an indirect reference. */
3146 : :
3147 : : struct data_dependence_relation *
3148 : 5984983 : initialize_data_dependence_relation (struct data_dependence_relation *res,
3149 : : vec<loop_p> loop_nest,
3150 : : bool use_alt_indices)
3151 : : {
3152 : 5984983 : struct data_reference *a = DDR_A (res);
3153 : 5984983 : struct data_reference *b = DDR_B (res);
3154 : 5984983 : unsigned int i;
3155 : :
3156 : 5984983 : struct indices *indices_a = &a->indices;
3157 : 5984983 : struct indices *indices_b = &b->indices;
3158 : 5984983 : if (use_alt_indices)
3159 : : {
3160 : 149505 : if (TREE_CODE (DR_REF (a)) != MEM_REF)
3161 : 117220 : indices_a = &a->alt_indices;
3162 : 149505 : if (TREE_CODE (DR_REF (b)) != MEM_REF)
3163 : 137065 : indices_b = &b->alt_indices;
3164 : : }
3165 : 5984983 : unsigned int num_dimensions_a = indices_a->access_fns.length ();
3166 : 5984983 : unsigned int num_dimensions_b = indices_b->access_fns.length ();
3167 : 5984983 : if (num_dimensions_a == 0 || num_dimensions_b == 0)
3168 : : {
3169 : 1914338 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3170 : 1914338 : return res;
3171 : : }
3172 : :
3173 : : /* For unconstrained bases, the root (highest-indexed) subscript
3174 : : describes a variation in the base of the original DR_REF rather
3175 : : than a component access. We have no type that accurately describes
3176 : : the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3177 : : applying this subscript) so limit the search to the last real
3178 : : component access.
3179 : :
3180 : : E.g. for:
3181 : :
3182 : : void
3183 : : f (int a[][8], int b[][8])
3184 : : {
3185 : : for (int i = 0; i < 8; ++i)
3186 : : a[i * 2][0] = b[i][0];
3187 : : }
3188 : :
3189 : : the a and b accesses have a single ARRAY_REF component reference [0]
3190 : : but have two subscripts. */
3191 : 4070645 : if (indices_a->unconstrained_base)
3192 : 1058668 : num_dimensions_a -= 1;
3193 : 4070645 : if (indices_b->unconstrained_base)
3194 : 1045238 : num_dimensions_b -= 1;
3195 : :
3196 : : /* These structures describe sequences of component references in
3197 : : DR_REF (A) and DR_REF (B). Each component reference is tied to a
3198 : : specific access function. */
3199 : 4070645 : struct {
3200 : : /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3201 : : DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3202 : : indices. In C notation, these are the indices of the rightmost
3203 : : component references; e.g. for a sequence .b.c.d, the start
3204 : : index is for .d. */
3205 : : unsigned int start_a;
3206 : : unsigned int start_b;
3207 : :
3208 : : /* The sequence contains LENGTH consecutive access functions from
3209 : : each DR. */
3210 : : unsigned int length;
3211 : :
3212 : : /* The enclosing objects for the A and B sequences respectively,
3213 : : i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3214 : : and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3215 : : tree object_a;
3216 : : tree object_b;
3217 : 4070645 : } full_seq = {}, struct_seq = {};
3218 : :
3219 : : /* Before each iteration of the loop:
3220 : :
3221 : : - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3222 : : - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3223 : 4070645 : unsigned int index_a = 0;
3224 : 4070645 : unsigned int index_b = 0;
3225 : 4070645 : tree ref_a = DR_REF (a);
3226 : 4070645 : tree ref_b = DR_REF (b);
3227 : :
3228 : : /* Now walk the component references from the final DR_REFs back up to
3229 : : the enclosing base objects. Each component reference corresponds
3230 : : to one access function in the DR, with access function 0 being for
3231 : : the final DR_REF and the highest-indexed access function being the
3232 : : one that is applied to the base of the DR.
3233 : :
3234 : : Look for a sequence of component references whose access functions
3235 : : are comparable (see access_fn_components_comparable_p). If more
3236 : : than one such sequence exists, pick the one nearest the base
3237 : : (which is the leftmost sequence in C notation). Store this sequence
3238 : : in FULL_SEQ.
3239 : :
3240 : : For example, if we have:
3241 : :
3242 : : struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3243 : :
3244 : : A: a[0][i].s.c.d
3245 : : B: __real b[0][i].s.e[i].f
3246 : :
3247 : : (where d is the same type as the real component of f) then the access
3248 : : functions would be:
3249 : :
3250 : : 0 1 2 3
3251 : : A: .d .c .s [i]
3252 : :
3253 : : 0 1 2 3 4 5
3254 : : B: __real .f [i] .e .s [i]
3255 : :
3256 : : The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3257 : : and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3258 : : COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3259 : : the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3260 : : so is comparable. The A3/B5 column contains two ARRAY_REFs that
3261 : : index foo[10] arrays, so is again comparable. The sequence is
3262 : : therefore:
3263 : :
3264 : : A: [1, 3] (i.e. [i].s.c)
3265 : : B: [3, 5] (i.e. [i].s.e)
3266 : :
3267 : : Also look for sequences of component references whose access
3268 : : functions are comparable and whose enclosing objects have the same
3269 : : RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3270 : : example, STRUCT_SEQ would be:
3271 : :
3272 : : A: [1, 2] (i.e. s.c)
3273 : : B: [3, 4] (i.e. s.e) */
3274 : 15179133 : while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3275 : : {
3276 : : /* The alternate indices form always has a single dimension
3277 : : with unconstrained base. */
3278 : 11121338 : gcc_assert (!use_alt_indices);
3279 : :
3280 : : /* REF_A and REF_B must be one of the component access types
3281 : : allowed by dr_analyze_indices. */
3282 : 11121338 : gcc_checking_assert (access_fn_component_p (ref_a));
3283 : 11121338 : gcc_checking_assert (access_fn_component_p (ref_b));
3284 : :
3285 : : /* Get the immediately-enclosing objects for REF_A and REF_B,
3286 : : i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3287 : : and DR_ACCESS_FN (B, INDEX_B). */
3288 : 11121338 : tree object_a = TREE_OPERAND (ref_a, 0);
3289 : 11121338 : tree object_b = TREE_OPERAND (ref_b, 0);
3290 : :
3291 : 11121338 : tree type_a = TREE_TYPE (object_a);
3292 : 11121338 : tree type_b = TREE_TYPE (object_b);
3293 : 11121338 : if (access_fn_components_comparable_p (ref_a, ref_b))
3294 : : {
3295 : : /* This pair of component accesses is comparable for dependence
3296 : : analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3297 : : DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3298 : 8025583 : if (full_seq.start_a + full_seq.length != index_a
3299 : 7145699 : || full_seq.start_b + full_seq.length != index_b)
3300 : : {
3301 : : /* The accesses don't extend the current sequence,
3302 : : so start a new one here. */
3303 : 1136886 : full_seq.start_a = index_a;
3304 : 1136886 : full_seq.start_b = index_b;
3305 : 1136886 : full_seq.length = 0;
3306 : : }
3307 : :
3308 : : /* Add this pair of references to the sequence. */
3309 : 8025583 : full_seq.length += 1;
3310 : 8025583 : full_seq.object_a = object_a;
3311 : 8025583 : full_seq.object_b = object_b;
3312 : :
3313 : : /* If the enclosing objects are structures (and thus have the
3314 : : same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3315 : 8025583 : if (TREE_CODE (type_a) == RECORD_TYPE)
3316 : 4675817 : struct_seq = full_seq;
3317 : :
3318 : : /* Move to the next containing reference for both A and B. */
3319 : 8025583 : ref_a = object_a;
3320 : 8025583 : ref_b = object_b;
3321 : 8025583 : index_a += 1;
3322 : 8025583 : index_b += 1;
3323 : 8025583 : continue;
3324 : : }
3325 : :
3326 : : /* Try to approach equal type sizes. */
3327 : 3095755 : if (!COMPLETE_TYPE_P (type_a)
3328 : 3091116 : || !COMPLETE_TYPE_P (type_b)
3329 : 3084777 : || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3330 : 6178934 : || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3331 : : break;
3332 : :
3333 : 3082905 : unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3334 : 3082905 : unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3335 : 3082905 : if (size_a <= size_b)
3336 : : {
3337 : 1604973 : index_a += 1;
3338 : 1604973 : ref_a = object_a;
3339 : : }
3340 : 3082905 : if (size_b <= size_a)
3341 : : {
3342 : 1598416 : index_b += 1;
3343 : 1598416 : ref_b = object_b;
3344 : : }
3345 : : }
3346 : :
3347 : : /* See whether FULL_SEQ ends at the base and whether the two bases
3348 : : are equal. We do not care about TBAA or alignment info so we can
3349 : : use OEP_ADDRESS_OF to avoid false negatives. */
3350 : 4070645 : tree base_a = indices_a->base_object;
3351 : 4070645 : tree base_b = indices_b->base_object;
3352 : 4070645 : bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3353 : 3982887 : && full_seq.start_b + full_seq.length == num_dimensions_b
3354 : 3939129 : && (indices_a->unconstrained_base
3355 : 3939129 : == indices_b->unconstrained_base)
3356 : 3932173 : && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3357 : 3507158 : && (types_compatible_p (TREE_TYPE (base_a),
3358 : 3507158 : TREE_TYPE (base_b))
3359 : 66082 : || (!base_supports_access_fn_components_p (base_a)
3360 : 60902 : && !base_supports_access_fn_components_p (base_b)
3361 : 59507 : && operand_equal_p
3362 : 59507 : (TYPE_SIZE (TREE_TYPE (base_a)),
3363 : 59507 : TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3364 : 7519638 : && (!loop_nest.exists ()
3365 : 3448993 : || (object_address_invariant_in_loop_p
3366 : 3448993 : (loop_nest[0], base_a))));
3367 : :
3368 : : /* If the bases are the same, we can include the base variation too.
3369 : : E.g. the b accesses in:
3370 : :
3371 : : for (int i = 0; i < n; ++i)
3372 : : b[i + 4][0] = b[i][0];
3373 : :
3374 : : have a definite dependence distance of 4, while for:
3375 : :
3376 : : for (int i = 0; i < n; ++i)
3377 : : a[i + 4][0] = b[i][0];
3378 : :
3379 : : the dependence distance depends on the gap between a and b.
3380 : :
3381 : : If the bases are different then we can only rely on the sequence
3382 : : rooted at a structure access, since arrays are allowed to overlap
3383 : : arbitrarily and change shape arbitrarily. E.g. we treat this as
3384 : : valid code:
3385 : :
3386 : : int a[256];
3387 : : ...
3388 : : ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3389 : :
3390 : : where two lvalues with the same int[4][3] type overlap, and where
3391 : : both lvalues are distinct from the object's declared type. */
3392 : 3393706 : if (same_base_p)
3393 : : {
3394 : 3393706 : if (indices_a->unconstrained_base)
3395 : 634491 : full_seq.length += 1;
3396 : : }
3397 : : else
3398 : : full_seq = struct_seq;
3399 : :
3400 : : /* Punt if we didn't find a suitable sequence. */
3401 : 4070645 : if (full_seq.length == 0)
3402 : : {
3403 : 478823 : if (use_alt_indices
3404 : 395315 : || (TREE_CODE (DR_REF (a)) == MEM_REF
3405 : 277858 : && TREE_CODE (DR_REF (b)) == MEM_REF)
3406 : 151122 : || may_be_nonaddressable_p (DR_REF (a))
3407 : 629809 : || may_be_nonaddressable_p (DR_REF (b)))
3408 : : {
3409 : : /* Fully exhausted possibilities. */
3410 : 329318 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3411 : 329318 : return res;
3412 : : }
3413 : :
3414 : : /* Try evaluating both DRs as dereferences of pointers. */
3415 : 149505 : if (!a->alt_indices.base_object
3416 : 59460 : && TREE_CODE (DR_REF (a)) != MEM_REF)
3417 : : {
3418 : 27175 : tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3419 : : build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3420 : : build_int_cst
3421 : 27175 : (reference_alias_ptr_type (DR_REF (a)), 0));
3422 : 81525 : dr_analyze_indices (&a->alt_indices, alt_ref,
3423 : 27175 : loop_preheader_edge (loop_nest[0]),
3424 : : loop_containing_stmt (DR_STMT (a)));
3425 : : }
3426 : 149505 : if (!b->alt_indices.base_object
3427 : 69396 : && TREE_CODE (DR_REF (b)) != MEM_REF)
3428 : : {
3429 : 56956 : tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3430 : : build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3431 : : build_int_cst
3432 : 56956 : (reference_alias_ptr_type (DR_REF (b)), 0));
3433 : 170868 : dr_analyze_indices (&b->alt_indices, alt_ref,
3434 : 56956 : loop_preheader_edge (loop_nest[0]),
3435 : : loop_containing_stmt (DR_STMT (b)));
3436 : : }
3437 : 149505 : return initialize_data_dependence_relation (res, loop_nest, true);
3438 : : }
3439 : :
3440 : 3591822 : if (!same_base_p)
3441 : : {
3442 : : /* Partial overlap is possible for different bases when strict aliasing
3443 : : is not in effect. It's also possible if either base involves a union
3444 : : access; e.g. for:
3445 : :
3446 : : struct s1 { int a[2]; };
3447 : : struct s2 { struct s1 b; int c; };
3448 : : struct s3 { int d; struct s1 e; };
3449 : : union u { struct s2 f; struct s3 g; } *p, *q;
3450 : :
3451 : : the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3452 : : "p->g.e" (base "p->g") and might partially overlap the s1 at
3453 : : "q->g.e" (base "q->g"). */
3454 : 198116 : if (!flag_strict_aliasing
3455 : 186453 : || ref_contains_union_access_p (full_seq.object_a)
3456 : 383420 : || ref_contains_union_access_p (full_seq.object_b))
3457 : : {
3458 : 12812 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3459 : 12812 : return res;
3460 : : }
3461 : :
3462 : 185304 : DDR_COULD_BE_INDEPENDENT_P (res) = true;
3463 : 185304 : if (!loop_nest.exists ()
3464 : 370608 : || (object_address_invariant_in_loop_p (loop_nest[0],
3465 : 185304 : full_seq.object_a)
3466 : 26242 : && object_address_invariant_in_loop_p (loop_nest[0],
3467 : 26242 : full_seq.object_b)))
3468 : : {
3469 : 9315 : DDR_OBJECT_A (res) = full_seq.object_a;
3470 : 9315 : DDR_OBJECT_B (res) = full_seq.object_b;
3471 : : }
3472 : : }
3473 : :
3474 : 3579010 : DDR_AFFINE_P (res) = true;
3475 : 3579010 : DDR_ARE_DEPENDENT (res) = NULL_TREE;
3476 : 3579010 : DDR_SUBSCRIPTS (res).create (full_seq.length);
3477 : 3579010 : DDR_LOOP_NEST (res) = loop_nest;
3478 : 3579010 : DDR_SELF_REFERENCE (res) = false;
3479 : :
3480 : 12167272 : for (i = 0; i < full_seq.length; ++i)
3481 : : {
3482 : 8588262 : struct subscript *subscript;
3483 : :
3484 : 8588262 : subscript = XNEW (struct subscript);
3485 : 8588262 : SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3486 : 8588262 : SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3487 : 8588262 : SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3488 : 8588262 : SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3489 : 8588262 : SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3490 : 8588262 : SUB_DISTANCE (subscript) = chrec_dont_know;
3491 : 8588262 : DDR_SUBSCRIPTS (res).safe_push (subscript);
3492 : : }
3493 : :
3494 : : return res;
3495 : : }
3496 : :
3497 : : /* Initialize a data dependence relation between data accesses A and
3498 : : B. NB_LOOPS is the number of loops surrounding the references: the
3499 : : size of the classic distance/direction vectors. */
3500 : :
3501 : : struct data_dependence_relation *
3502 : 12155278 : initialize_data_dependence_relation (struct data_reference *a,
3503 : : struct data_reference *b,
3504 : : vec<loop_p> loop_nest)
3505 : : {
3506 : 12155278 : data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3507 : 12155278 : DDR_A (res) = a;
3508 : 12155278 : DDR_B (res) = b;
3509 : 12155278 : DDR_LOOP_NEST (res).create (0);
3510 : 12155278 : DDR_SUBSCRIPTS (res).create (0);
3511 : 12155278 : DDR_DIR_VECTS (res).create (0);
3512 : 12155278 : DDR_DIST_VECTS (res).create (0);
3513 : :
3514 : 12155278 : if (a == NULL || b == NULL)
3515 : : {
3516 : 0 : DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3517 : 0 : return res;
3518 : : }
3519 : :
3520 : : /* If the data references do not alias, then they are independent. */
3521 : 18156580 : if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3522 : : {
3523 : 6319800 : DDR_ARE_DEPENDENT (res) = chrec_known;
3524 : 6319800 : return res;
3525 : : }
3526 : :
3527 : 5835478 : return initialize_data_dependence_relation (res, loop_nest, false);
3528 : : }
3529 : :
3530 : :
3531 : : /* Frees memory used by the conflict function F. */
3532 : :
3533 : : static void
3534 : 25270860 : free_conflict_function (conflict_function *f)
3535 : : {
3536 : 25270860 : unsigned i;
3537 : :
3538 : 25270860 : if (CF_NONTRIVIAL_P (f))
3539 : : {
3540 : 5751112 : for (i = 0; i < f->n; i++)
3541 : 2875556 : affine_fn_free (f->fns[i]);
3542 : : }
3543 : 25270860 : free (f);
3544 : 25270860 : }
3545 : :
3546 : : /* Frees memory used by SUBSCRIPTS. */
3547 : :
3548 : : static void
3549 : 3579010 : free_subscripts (vec<subscript_p> subscripts)
3550 : : {
3551 : 19325292 : for (subscript_p s : subscripts)
3552 : : {
3553 : 8588262 : free_conflict_function (s->conflicting_iterations_in_a);
3554 : 8588262 : free_conflict_function (s->conflicting_iterations_in_b);
3555 : 8588262 : free (s);
3556 : : }
3557 : 3579010 : subscripts.release ();
3558 : 3579010 : }
3559 : :
3560 : : /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3561 : : description. */
3562 : :
3563 : : static inline void
3564 : 2770490 : finalize_ddr_dependent (struct data_dependence_relation *ddr,
3565 : : tree chrec)
3566 : : {
3567 : 2770490 : DDR_ARE_DEPENDENT (ddr) = chrec;
3568 : 2770490 : free_subscripts (DDR_SUBSCRIPTS (ddr));
3569 : 2770490 : DDR_SUBSCRIPTS (ddr).create (0);
3570 : 162189 : }
3571 : :
3572 : : /* The dependence relation DDR cannot be represented by a distance
3573 : : vector. */
3574 : :
3575 : : static inline void
3576 : 1858 : non_affine_dependence_relation (struct data_dependence_relation *ddr)
3577 : : {
3578 : 1858 : if (dump_file && (dump_flags & TDF_DETAILS))
3579 : 71 : fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3580 : :
3581 : 1858 : DDR_AFFINE_P (ddr) = false;
3582 : 1858 : }
3583 : :
3584 : :
3585 : :
3586 : : /* This section contains the classic Banerjee tests. */
3587 : :
3588 : : /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3589 : : variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3590 : :
3591 : : static inline bool
3592 : 2661388 : ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3593 : : {
3594 : 2661388 : return (evolution_function_is_constant_p (chrec_a)
3595 : 4414304 : && evolution_function_is_constant_p (chrec_b));
3596 : : }
3597 : :
3598 : : /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3599 : : variable, i.e., if the SIV (Single Index Variable) test is true. */
3600 : :
3601 : : static bool
3602 : 909898 : siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3603 : : {
3604 : 1818373 : if ((evolution_function_is_constant_p (chrec_a)
3605 : 1426 : && evolution_function_is_univariate_p (chrec_b))
3606 : 1818373 : || (evolution_function_is_constant_p (chrec_b)
3607 : 1166 : && evolution_function_is_univariate_p (chrec_a)))
3608 : 2586 : return true;
3609 : :
3610 : 907312 : if (evolution_function_is_univariate_p (chrec_a)
3611 : 907312 : && evolution_function_is_univariate_p (chrec_b))
3612 : : {
3613 : 881382 : switch (TREE_CODE (chrec_a))
3614 : : {
3615 : 881382 : case POLYNOMIAL_CHREC:
3616 : 881382 : switch (TREE_CODE (chrec_b))
3617 : : {
3618 : 881382 : case POLYNOMIAL_CHREC:
3619 : 881382 : if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3620 : : return false;
3621 : : /* FALLTHRU */
3622 : :
3623 : : default:
3624 : : return true;
3625 : : }
3626 : :
3627 : : default:
3628 : : return true;
3629 : : }
3630 : : }
3631 : :
3632 : : return false;
3633 : : }
3634 : :
3635 : : /* Creates a conflict function with N dimensions. The affine functions
3636 : : in each dimension follow. */
3637 : :
3638 : : static conflict_function *
3639 : 2875556 : conflict_fn (unsigned n, ...)
3640 : : {
3641 : 2875556 : unsigned i;
3642 : 2875556 : conflict_function *ret = XCNEW (conflict_function);
3643 : 2875556 : va_list ap;
3644 : :
3645 : 2875556 : gcc_assert (n > 0 && n <= MAX_DIM);
3646 : 2875556 : va_start (ap, n);
3647 : :
3648 : 2875556 : ret->n = n;
3649 : 5751112 : for (i = 0; i < n; i++)
3650 : 2875556 : ret->fns[i] = va_arg (ap, affine_fn);
3651 : 2875556 : va_end (ap);
3652 : :
3653 : 2875556 : return ret;
3654 : : }
3655 : :
3656 : : /* Returns constant affine function with value CST. */
3657 : :
3658 : : static affine_fn
3659 : 2770822 : affine_fn_cst (tree cst)
3660 : : {
3661 : 2770822 : affine_fn fn;
3662 : 2770822 : fn.create (1);
3663 : 2770822 : fn.quick_push (cst);
3664 : 2770822 : return fn;
3665 : : }
3666 : :
3667 : : /* Returns affine function with single variable, CST + COEF * x_DIM. */
3668 : :
3669 : : static affine_fn
3670 : 104734 : affine_fn_univar (tree cst, unsigned dim, tree coef)
3671 : : {
3672 : 104734 : affine_fn fn;
3673 : 104734 : fn.create (dim + 1);
3674 : 104734 : unsigned i;
3675 : :
3676 : 104734 : gcc_assert (dim > 0);
3677 : 104734 : fn.quick_push (cst);
3678 : 209468 : for (i = 1; i < dim; i++)
3679 : 0 : fn.quick_push (integer_zero_node);
3680 : 104734 : fn.quick_push (coef);
3681 : 104734 : return fn;
3682 : : }
3683 : :
3684 : : /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3685 : : *OVERLAPS_B are initialized to the functions that describe the
3686 : : relation between the elements accessed twice by CHREC_A and
3687 : : CHREC_B. For k >= 0, the following property is verified:
3688 : :
3689 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3690 : :
3691 : : static void
3692 : 1751490 : analyze_ziv_subscript (tree chrec_a,
3693 : : tree chrec_b,
3694 : : conflict_function **overlaps_a,
3695 : : conflict_function **overlaps_b,
3696 : : tree *last_conflicts)
3697 : : {
3698 : 1751490 : tree type, difference;
3699 : 1751490 : dependence_stats.num_ziv++;
3700 : :
3701 : 1751490 : if (dump_file && (dump_flags & TDF_DETAILS))
3702 : 20070 : fprintf (dump_file, "(analyze_ziv_subscript \n");
3703 : :
3704 : 1751490 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3705 : 1751490 : chrec_a = chrec_convert (type, chrec_a, NULL);
3706 : 1751490 : chrec_b = chrec_convert (type, chrec_b, NULL);
3707 : 1751490 : difference = chrec_fold_minus (type, chrec_a, chrec_b);
3708 : :
3709 : 1751490 : switch (TREE_CODE (difference))
3710 : : {
3711 : 1751490 : case INTEGER_CST:
3712 : 1751490 : if (integer_zerop (difference))
3713 : : {
3714 : : /* The difference is equal to zero: the accessed index
3715 : : overlaps for each iteration in the loop. */
3716 : 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3717 : 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3718 : 0 : *last_conflicts = chrec_dont_know;
3719 : 0 : dependence_stats.num_ziv_dependent++;
3720 : : }
3721 : : else
3722 : : {
3723 : : /* The accesses do not overlap. */
3724 : 1751490 : *overlaps_a = conflict_fn_no_dependence ();
3725 : 1751490 : *overlaps_b = conflict_fn_no_dependence ();
3726 : 1751490 : *last_conflicts = integer_zero_node;
3727 : 1751490 : dependence_stats.num_ziv_independent++;
3728 : : }
3729 : : break;
3730 : :
3731 : 0 : default:
3732 : : /* We're not sure whether the indexes overlap. For the moment,
3733 : : conservatively answer "don't know". */
3734 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3735 : 0 : fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3736 : :
3737 : 0 : *overlaps_a = conflict_fn_not_known ();
3738 : 0 : *overlaps_b = conflict_fn_not_known ();
3739 : 0 : *last_conflicts = chrec_dont_know;
3740 : 0 : dependence_stats.num_ziv_unimplemented++;
3741 : 0 : break;
3742 : : }
3743 : :
3744 : 1751490 : if (dump_file && (dump_flags & TDF_DETAILS))
3745 : 20070 : fprintf (dump_file, ")\n");
3746 : 1751490 : }
3747 : :
3748 : : /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3749 : : and only if it fits to the int type. If this is not the case, or the
3750 : : bound on the number of iterations of LOOP could not be derived, returns
3751 : : chrec_dont_know. */
3752 : :
3753 : : static tree
3754 : 0 : max_stmt_executions_tree (class loop *loop)
3755 : : {
3756 : 0 : widest_int nit;
3757 : :
3758 : 0 : if (!max_stmt_executions (loop, &nit))
3759 : 0 : return chrec_dont_know;
3760 : :
3761 : 0 : if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3762 : 0 : return chrec_dont_know;
3763 : :
3764 : 0 : return wide_int_to_tree (unsigned_type_node, nit);
3765 : 0 : }
3766 : :
3767 : : /* Determine whether the CHREC is always positive/negative. If the expression
3768 : : cannot be statically analyzed, return false, otherwise set the answer into
3769 : : VALUE. */
3770 : :
3771 : : static bool
3772 : 3994 : chrec_is_positive (tree chrec, bool *value)
3773 : : {
3774 : 3994 : bool value0, value1, value2;
3775 : 3994 : tree end_value, nb_iter;
3776 : :
3777 : 3994 : switch (TREE_CODE (chrec))
3778 : : {
3779 : 0 : case POLYNOMIAL_CHREC:
3780 : 0 : if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3781 : 0 : || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3782 : 0 : return false;
3783 : :
3784 : : /* FIXME -- overflows. */
3785 : 0 : if (value0 == value1)
3786 : : {
3787 : 0 : *value = value0;
3788 : 0 : return true;
3789 : : }
3790 : :
3791 : : /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3792 : : and the proof consists in showing that the sign never
3793 : : changes during the execution of the loop, from 0 to
3794 : : loop->nb_iterations. */
3795 : 0 : if (!evolution_function_is_affine_p (chrec))
3796 : : return false;
3797 : :
3798 : 0 : nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3799 : 0 : if (chrec_contains_undetermined (nb_iter))
3800 : : return false;
3801 : :
3802 : : #if 0
3803 : : /* TODO -- If the test is after the exit, we may decrease the number of
3804 : : iterations by one. */
3805 : : if (after_exit)
3806 : : nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3807 : : #endif
3808 : :
3809 : 0 : end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3810 : :
3811 : 0 : if (!chrec_is_positive (end_value, &value2))
3812 : : return false;
3813 : :
3814 : 0 : *value = value0;
3815 : 0 : return value0 == value1;
3816 : :
3817 : 3994 : case INTEGER_CST:
3818 : 3994 : switch (tree_int_cst_sgn (chrec))
3819 : : {
3820 : 1854 : case -1:
3821 : 1854 : *value = false;
3822 : 1854 : break;
3823 : 2140 : case 1:
3824 : 2140 : *value = true;
3825 : 2140 : break;
3826 : : default:
3827 : : return false;
3828 : : }
3829 : : return true;
3830 : :
3831 : : default:
3832 : : return false;
3833 : : }
3834 : : }
3835 : :
3836 : :
3837 : : /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3838 : : constant, and CHREC_B is an affine function. *OVERLAPS_A and
3839 : : *OVERLAPS_B are initialized to the functions that describe the
3840 : : relation between the elements accessed twice by CHREC_A and
3841 : : CHREC_B. For k >= 0, the following property is verified:
3842 : :
3843 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3844 : :
3845 : : static void
3846 : 2586 : analyze_siv_subscript_cst_affine (tree chrec_a,
3847 : : tree chrec_b,
3848 : : conflict_function **overlaps_a,
3849 : : conflict_function **overlaps_b,
3850 : : tree *last_conflicts)
3851 : : {
3852 : 2586 : bool value0, value1, value2;
3853 : 2586 : tree type, difference, tmp;
3854 : :
3855 : 2586 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3856 : 2586 : chrec_a = chrec_convert (type, chrec_a, NULL);
3857 : 2586 : chrec_b = chrec_convert (type, chrec_b, NULL);
3858 : 2586 : difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3859 : :
3860 : : /* Special case overlap in the first iteration. */
3861 : 2586 : if (integer_zerop (difference))
3862 : : {
3863 : 587 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3864 : 587 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3865 : 587 : *last_conflicts = integer_one_node;
3866 : 587 : return;
3867 : : }
3868 : :
3869 : 1999 : if (!chrec_is_positive (initial_condition (difference), &value0))
3870 : : {
3871 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3872 : 0 : fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3873 : :
3874 : 0 : dependence_stats.num_siv_unimplemented++;
3875 : 0 : *overlaps_a = conflict_fn_not_known ();
3876 : 0 : *overlaps_b = conflict_fn_not_known ();
3877 : 0 : *last_conflicts = chrec_dont_know;
3878 : 0 : return;
3879 : : }
3880 : : else
3881 : : {
3882 : 1999 : if (value0 == false)
3883 : : {
3884 : 1658 : if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3885 : 1658 : || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3886 : : {
3887 : 4 : if (dump_file && (dump_flags & TDF_DETAILS))
3888 : 0 : fprintf (dump_file, "siv test failed: chrec not positive.\n");
3889 : :
3890 : 4 : *overlaps_a = conflict_fn_not_known ();
3891 : 4 : *overlaps_b = conflict_fn_not_known ();
3892 : 4 : *last_conflicts = chrec_dont_know;
3893 : 4 : dependence_stats.num_siv_unimplemented++;
3894 : 4 : return;
3895 : : }
3896 : : else
3897 : : {
3898 : 1654 : if (value1 == true)
3899 : : {
3900 : : /* Example:
3901 : : chrec_a = 12
3902 : : chrec_b = {10, +, 1}
3903 : : */
3904 : :
3905 : 1654 : if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3906 : : {
3907 : 1373 : HOST_WIDE_INT numiter;
3908 : 1373 : class loop *loop = get_chrec_loop (chrec_b);
3909 : :
3910 : 1373 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3911 : 1373 : tmp = fold_build2 (EXACT_DIV_EXPR, type,
3912 : : fold_build1 (ABS_EXPR, type, difference),
3913 : : CHREC_RIGHT (chrec_b));
3914 : 1373 : *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3915 : 1373 : *last_conflicts = integer_one_node;
3916 : :
3917 : :
3918 : : /* Perform weak-zero siv test to see if overlap is
3919 : : outside the loop bounds. */
3920 : 1373 : numiter = max_stmt_executions_int (loop);
3921 : :
3922 : 1373 : if (numiter >= 0
3923 : 1373 : && compare_tree_int (tmp, numiter) > 0)
3924 : : {
3925 : 0 : free_conflict_function (*overlaps_a);
3926 : 0 : free_conflict_function (*overlaps_b);
3927 : 0 : *overlaps_a = conflict_fn_no_dependence ();
3928 : 0 : *overlaps_b = conflict_fn_no_dependence ();
3929 : 0 : *last_conflicts = integer_zero_node;
3930 : 0 : dependence_stats.num_siv_independent++;
3931 : 0 : return;
3932 : : }
3933 : 1373 : dependence_stats.num_siv_dependent++;
3934 : 1373 : return;
3935 : : }
3936 : :
3937 : : /* When the step does not divide the difference, there are
3938 : : no overlaps. */
3939 : : else
3940 : : {
3941 : 281 : *overlaps_a = conflict_fn_no_dependence ();
3942 : 281 : *overlaps_b = conflict_fn_no_dependence ();
3943 : 281 : *last_conflicts = integer_zero_node;
3944 : 281 : dependence_stats.num_siv_independent++;
3945 : 281 : return;
3946 : : }
3947 : : }
3948 : :
3949 : : else
3950 : : {
3951 : : /* Example:
3952 : : chrec_a = 12
3953 : : chrec_b = {10, +, -1}
3954 : :
3955 : : In this case, chrec_a will not overlap with chrec_b. */
3956 : 0 : *overlaps_a = conflict_fn_no_dependence ();
3957 : 0 : *overlaps_b = conflict_fn_no_dependence ();
3958 : 0 : *last_conflicts = integer_zero_node;
3959 : 0 : dependence_stats.num_siv_independent++;
3960 : 0 : return;
3961 : : }
3962 : : }
3963 : : }
3964 : : else
3965 : : {
3966 : 341 : if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3967 : 341 : || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3968 : : {
3969 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
3970 : 0 : fprintf (dump_file, "siv test failed: chrec not positive.\n");
3971 : :
3972 : 0 : *overlaps_a = conflict_fn_not_known ();
3973 : 0 : *overlaps_b = conflict_fn_not_known ();
3974 : 0 : *last_conflicts = chrec_dont_know;
3975 : 0 : dependence_stats.num_siv_unimplemented++;
3976 : 0 : return;
3977 : : }
3978 : : else
3979 : : {
3980 : 341 : if (value2 == false)
3981 : : {
3982 : : /* Example:
3983 : : chrec_a = 3
3984 : : chrec_b = {10, +, -1}
3985 : : */
3986 : 196 : if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3987 : : {
3988 : 97 : HOST_WIDE_INT numiter;
3989 : 97 : class loop *loop = get_chrec_loop (chrec_b);
3990 : :
3991 : 97 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3992 : 97 : tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3993 : : CHREC_RIGHT (chrec_b));
3994 : 97 : *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3995 : 97 : *last_conflicts = integer_one_node;
3996 : :
3997 : : /* Perform weak-zero siv test to see if overlap is
3998 : : outside the loop bounds. */
3999 : 97 : numiter = max_stmt_executions_int (loop);
4000 : :
4001 : 97 : if (numiter >= 0
4002 : 97 : && compare_tree_int (tmp, numiter) > 0)
4003 : : {
4004 : 0 : free_conflict_function (*overlaps_a);
4005 : 0 : free_conflict_function (*overlaps_b);
4006 : 0 : *overlaps_a = conflict_fn_no_dependence ();
4007 : 0 : *overlaps_b = conflict_fn_no_dependence ();
4008 : 0 : *last_conflicts = integer_zero_node;
4009 : 0 : dependence_stats.num_siv_independent++;
4010 : 0 : return;
4011 : : }
4012 : 97 : dependence_stats.num_siv_dependent++;
4013 : 97 : return;
4014 : : }
4015 : :
4016 : : /* When the step does not divide the difference, there
4017 : : are no overlaps. */
4018 : : else
4019 : : {
4020 : 99 : *overlaps_a = conflict_fn_no_dependence ();
4021 : 99 : *overlaps_b = conflict_fn_no_dependence ();
4022 : 99 : *last_conflicts = integer_zero_node;
4023 : 99 : dependence_stats.num_siv_independent++;
4024 : 99 : return;
4025 : : }
4026 : : }
4027 : : else
4028 : : {
4029 : : /* Example:
4030 : : chrec_a = 3
4031 : : chrec_b = {4, +, 1}
4032 : :
4033 : : In this case, chrec_a will not overlap with chrec_b. */
4034 : 145 : *overlaps_a = conflict_fn_no_dependence ();
4035 : 145 : *overlaps_b = conflict_fn_no_dependence ();
4036 : 145 : *last_conflicts = integer_zero_node;
4037 : 145 : dependence_stats.num_siv_independent++;
4038 : 145 : return;
4039 : : }
4040 : : }
4041 : : }
4042 : : }
4043 : : }
4044 : :
4045 : : /* Helper recursive function for initializing the matrix A. Returns
4046 : : the initial value of CHREC. */
4047 : :
4048 : : static tree
4049 : 1727018 : initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4050 : : {
4051 : 3454028 : gcc_assert (chrec);
4052 : :
4053 : 3454028 : switch (TREE_CODE (chrec))
4054 : : {
4055 : 1727018 : case POLYNOMIAL_CHREC:
4056 : 1727018 : HOST_WIDE_INT chrec_right;
4057 : 1727018 : if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4058 : 8 : return chrec_dont_know;
4059 : 1727010 : chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4060 : : /* We want to be able to negate without overflow. */
4061 : 1727010 : if (chrec_right == HOST_WIDE_INT_MIN)
4062 : 0 : return chrec_dont_know;
4063 : 1727010 : A[index][0] = mult * chrec_right;
4064 : 1727010 : return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4065 : :
4066 : 0 : case PLUS_EXPR:
4067 : 0 : case MULT_EXPR:
4068 : 0 : case MINUS_EXPR:
4069 : 0 : {
4070 : 0 : tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4071 : 0 : tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4072 : :
4073 : 0 : return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4074 : : }
4075 : :
4076 : 0 : CASE_CONVERT:
4077 : 0 : {
4078 : 0 : tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4079 : 0 : return chrec_convert (chrec_type (chrec), op, NULL);
4080 : : }
4081 : :
4082 : 0 : case BIT_NOT_EXPR:
4083 : 0 : {
4084 : : /* Handle ~X as -1 - X. */
4085 : 0 : tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4086 : 0 : return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4087 : 0 : build_int_cst (TREE_TYPE (chrec), -1), op);
4088 : : }
4089 : :
4090 : : case INTEGER_CST:
4091 : : return chrec;
4092 : :
4093 : 0 : default:
4094 : 0 : gcc_unreachable ();
4095 : : return NULL_TREE;
4096 : : }
4097 : : }
4098 : :
4099 : : #define FLOOR_DIV(x,y) ((x) / (y))
4100 : :
4101 : : /* Solves the special case of the Diophantine equation:
4102 : : | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4103 : :
4104 : : Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4105 : : number of iterations that loops X and Y run. The overlaps will be
4106 : : constructed as evolutions in dimension DIM. */
4107 : :
4108 : : static void
4109 : 58 : compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4110 : : HOST_WIDE_INT step_a,
4111 : : HOST_WIDE_INT step_b,
4112 : : affine_fn *overlaps_a,
4113 : : affine_fn *overlaps_b,
4114 : : tree *last_conflicts, int dim)
4115 : : {
4116 : 58 : if (((step_a > 0 && step_b > 0)
4117 : 8 : || (step_a < 0 && step_b < 0)))
4118 : : {
4119 : 54 : HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4120 : 54 : HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4121 : :
4122 : 54 : gcd_steps_a_b = gcd (step_a, step_b);
4123 : 54 : step_overlaps_a = step_b / gcd_steps_a_b;
4124 : 54 : step_overlaps_b = step_a / gcd_steps_a_b;
4125 : :
4126 : 54 : if (niter > 0)
4127 : : {
4128 : 54 : tau2 = FLOOR_DIV (niter, step_overlaps_a);
4129 : 54 : tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4130 : 54 : last_conflict = tau2;
4131 : 54 : *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4132 : : }
4133 : : else
4134 : 0 : *last_conflicts = chrec_dont_know;
4135 : :
4136 : 54 : *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4137 : 54 : build_int_cst (NULL_TREE,
4138 : : step_overlaps_a));
4139 : 54 : *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4140 : 54 : build_int_cst (NULL_TREE,
4141 : : step_overlaps_b));
4142 : 54 : }
4143 : :
4144 : : else
4145 : : {
4146 : 4 : *overlaps_a = affine_fn_cst (integer_zero_node);
4147 : 4 : *overlaps_b = affine_fn_cst (integer_zero_node);
4148 : 4 : *last_conflicts = integer_zero_node;
4149 : : }
4150 : 58 : }
4151 : :
4152 : : /* Solves the special case of a Diophantine equation where CHREC_A is
4153 : : an affine bivariate function, and CHREC_B is an affine univariate
4154 : : function. For example,
4155 : :
4156 : : | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4157 : :
4158 : : has the following overlapping functions:
4159 : :
4160 : : | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4161 : : | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4162 : : | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4163 : :
4164 : : FORNOW: This is a specialized implementation for a case occurring in
4165 : : a common benchmark. Implement the general algorithm. */
4166 : :
4167 : : static void
4168 : 0 : compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4169 : : conflict_function **overlaps_a,
4170 : : conflict_function **overlaps_b,
4171 : : tree *last_conflicts)
4172 : : {
4173 : 0 : bool xz_p, yz_p, xyz_p;
4174 : 0 : HOST_WIDE_INT step_x, step_y, step_z;
4175 : 0 : HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4176 : 0 : affine_fn overlaps_a_xz, overlaps_b_xz;
4177 : 0 : affine_fn overlaps_a_yz, overlaps_b_yz;
4178 : 0 : affine_fn overlaps_a_xyz, overlaps_b_xyz;
4179 : 0 : affine_fn ova1, ova2, ovb;
4180 : 0 : tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4181 : :
4182 : 0 : step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4183 : 0 : step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4184 : 0 : step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4185 : :
4186 : 0 : niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4187 : 0 : niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4188 : 0 : niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4189 : :
4190 : 0 : if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4191 : : {
4192 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4193 : 0 : fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4194 : :
4195 : 0 : *overlaps_a = conflict_fn_not_known ();
4196 : 0 : *overlaps_b = conflict_fn_not_known ();
4197 : 0 : *last_conflicts = chrec_dont_know;
4198 : 0 : return;
4199 : : }
4200 : :
4201 : 0 : niter = MIN (niter_x, niter_z);
4202 : 0 : compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4203 : : &overlaps_a_xz,
4204 : : &overlaps_b_xz,
4205 : : &last_conflicts_xz, 1);
4206 : 0 : niter = MIN (niter_y, niter_z);
4207 : 0 : compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4208 : : &overlaps_a_yz,
4209 : : &overlaps_b_yz,
4210 : : &last_conflicts_yz, 2);
4211 : 0 : niter = MIN (niter_x, niter_z);
4212 : 0 : niter = MIN (niter_y, niter);
4213 : 0 : compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4214 : : &overlaps_a_xyz,
4215 : : &overlaps_b_xyz,
4216 : : &last_conflicts_xyz, 3);
4217 : :
4218 : 0 : xz_p = !integer_zerop (last_conflicts_xz);
4219 : 0 : yz_p = !integer_zerop (last_conflicts_yz);
4220 : 0 : xyz_p = !integer_zerop (last_conflicts_xyz);
4221 : :
4222 : 0 : if (xz_p || yz_p || xyz_p)
4223 : : {
4224 : 0 : ova1 = affine_fn_cst (integer_zero_node);
4225 : 0 : ova2 = affine_fn_cst (integer_zero_node);
4226 : 0 : ovb = affine_fn_cst (integer_zero_node);
4227 : 0 : if (xz_p)
4228 : : {
4229 : 0 : affine_fn t0 = ova1;
4230 : 0 : affine_fn t2 = ovb;
4231 : :
4232 : 0 : ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4233 : 0 : ovb = affine_fn_plus (ovb, overlaps_b_xz);
4234 : 0 : affine_fn_free (t0);
4235 : 0 : affine_fn_free (t2);
4236 : 0 : *last_conflicts = last_conflicts_xz;
4237 : : }
4238 : 0 : if (yz_p)
4239 : : {
4240 : 0 : affine_fn t0 = ova2;
4241 : 0 : affine_fn t2 = ovb;
4242 : :
4243 : 0 : ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4244 : 0 : ovb = affine_fn_plus (ovb, overlaps_b_yz);
4245 : 0 : affine_fn_free (t0);
4246 : 0 : affine_fn_free (t2);
4247 : 0 : *last_conflicts = last_conflicts_yz;
4248 : : }
4249 : 0 : if (xyz_p)
4250 : : {
4251 : 0 : affine_fn t0 = ova1;
4252 : 0 : affine_fn t2 = ova2;
4253 : 0 : affine_fn t4 = ovb;
4254 : :
4255 : 0 : ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4256 : 0 : ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4257 : 0 : ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4258 : 0 : affine_fn_free (t0);
4259 : 0 : affine_fn_free (t2);
4260 : 0 : affine_fn_free (t4);
4261 : 0 : *last_conflicts = last_conflicts_xyz;
4262 : : }
4263 : 0 : *overlaps_a = conflict_fn (2, ova1, ova2);
4264 : 0 : *overlaps_b = conflict_fn (1, ovb);
4265 : 0 : }
4266 : : else
4267 : : {
4268 : 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4269 : 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4270 : 0 : *last_conflicts = integer_zero_node;
4271 : : }
4272 : :
4273 : 0 : affine_fn_free (overlaps_a_xz);
4274 : 0 : affine_fn_free (overlaps_b_xz);
4275 : 0 : affine_fn_free (overlaps_a_yz);
4276 : 0 : affine_fn_free (overlaps_b_yz);
4277 : 0 : affine_fn_free (overlaps_a_xyz);
4278 : 0 : affine_fn_free (overlaps_b_xyz);
4279 : : }
4280 : :
4281 : : /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4282 : :
4283 : : static void
4284 : 1769964 : lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4285 : : int size)
4286 : : {
4287 : 1769964 : memcpy (vec2, vec1, size * sizeof (*vec1));
4288 : 0 : }
4289 : :
4290 : : /* Copy the elements of M x N matrix MAT1 to MAT2. */
4291 : :
4292 : : static void
4293 : 863447 : lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4294 : : int m, int n)
4295 : : {
4296 : 863447 : int i;
4297 : :
4298 : 2590341 : for (i = 0; i < m; i++)
4299 : 1726894 : lambda_vector_copy (mat1[i], mat2[i], n);
4300 : 863447 : }
4301 : :
4302 : : /* Store the N x N identity matrix in MAT. */
4303 : :
4304 : : static void
4305 : 863447 : lambda_matrix_id (lambda_matrix mat, int size)
4306 : : {
4307 : 863447 : int i, j;
4308 : :
4309 : 2590341 : for (i = 0; i < size; i++)
4310 : 5180682 : for (j = 0; j < size; j++)
4311 : 5180682 : mat[i][j] = (i == j) ? 1 : 0;
4312 : 863447 : }
4313 : :
4314 : : /* Return the index of the first nonzero element of vector VEC1 between
4315 : : START and N. We must have START <= N.
4316 : : Returns N if VEC1 is the zero vector. */
4317 : :
4318 : : static int
4319 : 863447 : lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4320 : : {
4321 : 863447 : int j = start;
4322 : 863447 : while (j < n && vec1[j] == 0)
4323 : 0 : j++;
4324 : 863447 : return j;
4325 : : }
4326 : :
4327 : : /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4328 : : R2 = R2 + CONST1 * R1. */
4329 : :
4330 : : static bool
4331 : 1727156 : lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4332 : : lambda_int const1)
4333 : : {
4334 : 1727156 : int i;
4335 : :
4336 : 1727156 : if (const1 == 0)
4337 : : return true;
4338 : :
4339 : 4317325 : for (i = 0; i < n; i++)
4340 : : {
4341 : 2590395 : bool ovf;
4342 : 2590395 : lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4343 : 2590395 : if (ovf)
4344 : 1727156 : return false;
4345 : 2590395 : lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4346 : 2590395 : if (ovf || tem2 == HOST_WIDE_INT_MIN)
4347 : : return false;
4348 : 2590395 : mat[r2][i] = tem2;
4349 : : }
4350 : :
4351 : : return true;
4352 : : }
4353 : :
4354 : : /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4355 : : and store the result in VEC2. */
4356 : :
4357 : : static void
4358 : 856262 : lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4359 : : int size, lambda_int const1)
4360 : : {
4361 : 856262 : int i;
4362 : :
4363 : 856262 : if (const1 == 0)
4364 : 0 : lambda_vector_clear (vec2, size);
4365 : : else
4366 : 2568786 : for (i = 0; i < size; i++)
4367 : 1712524 : vec2[i] = const1 * vec1[i];
4368 : 856262 : }
4369 : :
4370 : : /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4371 : :
4372 : : static void
4373 : 856262 : lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4374 : : int size)
4375 : : {
4376 : 0 : lambda_vector_mult_const (vec1, vec2, size, -1);
4377 : 0 : }
4378 : :
4379 : : /* Negate row R1 of matrix MAT which has N columns. */
4380 : :
4381 : : static void
4382 : 856262 : lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4383 : : {
4384 : 0 : lambda_vector_negate (mat[r1], mat[r1], n);
4385 : 856262 : }
4386 : :
4387 : : /* Return true if two vectors are equal. */
4388 : :
4389 : : static bool
4390 : 323058 : lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4391 : : {
4392 : 323058 : int i;
4393 : 324327 : for (i = 0; i < size; i++)
4394 : 324023 : if (vec1[i] != vec2[i])
4395 : : return false;
4396 : : return true;
4397 : : }
4398 : :
4399 : : /* Given an M x N integer matrix A, this function determines an M x
4400 : : M unimodular matrix U, and an M x N echelon matrix S such that
4401 : : "U.A = S". This decomposition is also known as "right Hermite".
4402 : :
4403 : : Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4404 : : Restructuring Compilers" Utpal Banerjee. */
4405 : :
4406 : : static bool
4407 : 863447 : lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4408 : : lambda_matrix S, lambda_matrix U)
4409 : : {
4410 : 863447 : int i, j, i0 = 0;
4411 : :
4412 : 863447 : lambda_matrix_copy (A, S, m, n);
4413 : 863447 : lambda_matrix_id (U, m);
4414 : :
4415 : 1726894 : for (j = 0; j < n; j++)
4416 : : {
4417 : 1726894 : if (lambda_vector_first_nz (S[j], m, i0) < m)
4418 : : {
4419 : 863447 : ++i0;
4420 : 1726894 : for (i = m - 1; i >= i0; i--)
4421 : : {
4422 : 1727025 : while (S[i][j] != 0)
4423 : : {
4424 : 863578 : lambda_int factor, a, b;
4425 : :
4426 : 863578 : a = S[i-1][j];
4427 : 863578 : b = S[i][j];
4428 : 863578 : gcc_assert (a != HOST_WIDE_INT_MIN);
4429 : 863578 : factor = a / b;
4430 : :
4431 : 863578 : if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4432 : : return false;
4433 : 863578 : std::swap (S[i], S[i-1]);
4434 : :
4435 : 863578 : if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4436 : : return false;
4437 : 863578 : std::swap (U[i], U[i-1]);
4438 : : }
4439 : : }
4440 : : }
4441 : : }
4442 : :
4443 : : return true;
4444 : : }
4445 : :
4446 : : /* Determines the overlapping elements due to accesses CHREC_A and
4447 : : CHREC_B, that are affine functions. This function cannot handle
4448 : : symbolic evolution functions, ie. when initial conditions are
4449 : : parameters, because it uses lambda matrices of integers. */
4450 : :
4451 : : static void
4452 : 863509 : analyze_subscript_affine_affine (tree chrec_a,
4453 : : tree chrec_b,
4454 : : conflict_function **overlaps_a,
4455 : : conflict_function **overlaps_b,
4456 : : tree *last_conflicts)
4457 : : {
4458 : 863509 : unsigned nb_vars_a, nb_vars_b, dim;
4459 : 863509 : lambda_int gamma, gcd_alpha_beta;
4460 : 863509 : lambda_matrix A, U, S;
4461 : 863509 : struct obstack scratch_obstack;
4462 : :
4463 : 863509 : if (eq_evolutions_p (chrec_a, chrec_b))
4464 : : {
4465 : : /* The accessed index overlaps for each iteration in the
4466 : : loop. */
4467 : 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4468 : 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4469 : 0 : *last_conflicts = chrec_dont_know;
4470 : 0 : return;
4471 : : }
4472 : 863509 : if (dump_file && (dump_flags & TDF_DETAILS))
4473 : 17992 : fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4474 : :
4475 : : /* For determining the initial intersection, we have to solve a
4476 : : Diophantine equation. This is the most time consuming part.
4477 : :
4478 : : For answering to the question: "Is there a dependence?" we have
4479 : : to prove that there exists a solution to the Diophantine
4480 : : equation, and that the solution is in the iteration domain,
4481 : : i.e. the solution is positive or zero, and that the solution
4482 : : happens before the upper bound loop.nb_iterations. Otherwise
4483 : : there is no dependence. This function outputs a description of
4484 : : the iterations that hold the intersections. */
4485 : :
4486 : 863509 : nb_vars_a = nb_vars_in_chrec (chrec_a);
4487 : 863509 : nb_vars_b = nb_vars_in_chrec (chrec_b);
4488 : :
4489 : 863509 : gcc_obstack_init (&scratch_obstack);
4490 : :
4491 : 863509 : dim = nb_vars_a + nb_vars_b;
4492 : 863509 : U = lambda_matrix_new (dim, dim, &scratch_obstack);
4493 : 863509 : A = lambda_matrix_new (dim, 1, &scratch_obstack);
4494 : 863509 : S = lambda_matrix_new (dim, 1, &scratch_obstack);
4495 : :
4496 : 863509 : tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4497 : 863509 : tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4498 : 863509 : if (init_a == chrec_dont_know
4499 : 863505 : || init_b == chrec_dont_know)
4500 : : {
4501 : 4 : if (dump_file && (dump_flags & TDF_DETAILS))
4502 : 0 : fprintf (dump_file, "affine-affine test failed: "
4503 : : "representation issue.\n");
4504 : 4 : *overlaps_a = conflict_fn_not_known ();
4505 : 4 : *overlaps_b = conflict_fn_not_known ();
4506 : 4 : *last_conflicts = chrec_dont_know;
4507 : 4 : goto end_analyze_subs_aa;
4508 : : }
4509 : 863505 : gamma = int_cst_value (init_b) - int_cst_value (init_a);
4510 : :
4511 : : /* Don't do all the hard work of solving the Diophantine equation
4512 : : when we already know the solution: for example,
4513 : : | {3, +, 1}_1
4514 : : | {3, +, 4}_2
4515 : : | gamma = 3 - 3 = 0.
4516 : : Then the first overlap occurs during the first iterations:
4517 : : | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4518 : : */
4519 : 863505 : if (gamma == 0)
4520 : : {
4521 : 58 : if (nb_vars_a == 1 && nb_vars_b == 1)
4522 : : {
4523 : 58 : HOST_WIDE_INT step_a, step_b;
4524 : 58 : HOST_WIDE_INT niter, niter_a, niter_b;
4525 : 58 : affine_fn ova, ovb;
4526 : :
4527 : 58 : niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4528 : 58 : niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4529 : 58 : niter = MIN (niter_a, niter_b);
4530 : 58 : step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4531 : 58 : step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4532 : :
4533 : 58 : compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4534 : : &ova, &ovb,
4535 : : last_conflicts, 1);
4536 : 58 : *overlaps_a = conflict_fn (1, ova);
4537 : 58 : *overlaps_b = conflict_fn (1, ovb);
4538 : : }
4539 : :
4540 : 0 : else if (nb_vars_a == 2 && nb_vars_b == 1)
4541 : 0 : compute_overlap_steps_for_affine_1_2
4542 : 0 : (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4543 : :
4544 : 0 : else if (nb_vars_a == 1 && nb_vars_b == 2)
4545 : 0 : compute_overlap_steps_for_affine_1_2
4546 : 0 : (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4547 : :
4548 : : else
4549 : : {
4550 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4551 : 0 : fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4552 : 0 : *overlaps_a = conflict_fn_not_known ();
4553 : 0 : *overlaps_b = conflict_fn_not_known ();
4554 : 0 : *last_conflicts = chrec_dont_know;
4555 : : }
4556 : 58 : goto end_analyze_subs_aa;
4557 : : }
4558 : :
4559 : : /* U.A = S */
4560 : 863447 : if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4561 : : {
4562 : 0 : *overlaps_a = conflict_fn_not_known ();
4563 : 0 : *overlaps_b = conflict_fn_not_known ();
4564 : 0 : *last_conflicts = chrec_dont_know;
4565 : 0 : goto end_analyze_subs_aa;
4566 : : }
4567 : :
4568 : 863447 : if (S[0][0] < 0)
4569 : : {
4570 : 856262 : S[0][0] *= -1;
4571 : 856262 : lambda_matrix_row_negate (U, dim, 0);
4572 : : }
4573 : 863447 : gcd_alpha_beta = S[0][0];
4574 : :
4575 : : /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4576 : : but that is a quite strange case. Instead of ICEing, answer
4577 : : don't know. */
4578 : 863447 : if (gcd_alpha_beta == 0)
4579 : : {
4580 : 0 : *overlaps_a = conflict_fn_not_known ();
4581 : 0 : *overlaps_b = conflict_fn_not_known ();
4582 : 0 : *last_conflicts = chrec_dont_know;
4583 : 0 : goto end_analyze_subs_aa;
4584 : : }
4585 : :
4586 : : /* The classic "gcd-test". */
4587 : 863447 : if (!int_divides_p (gcd_alpha_beta, gamma))
4588 : : {
4589 : : /* The "gcd-test" has determined that there is no integer
4590 : : solution, i.e. there is no dependence. */
4591 : 755371 : *overlaps_a = conflict_fn_no_dependence ();
4592 : 755371 : *overlaps_b = conflict_fn_no_dependence ();
4593 : 755371 : *last_conflicts = integer_zero_node;
4594 : : }
4595 : :
4596 : : /* Both access functions are univariate. This includes SIV and MIV cases. */
4597 : 108076 : else if (nb_vars_a == 1 && nb_vars_b == 1)
4598 : : {
4599 : : /* Both functions should have the same evolution sign. */
4600 : 108076 : if (((A[0][0] > 0 && -A[1][0] > 0)
4601 : 3550 : || (A[0][0] < 0 && -A[1][0] < 0)))
4602 : : {
4603 : : /* The solutions are given by:
4604 : : |
4605 : : | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4606 : : | [u21 u22] [y0]
4607 : :
4608 : : For a given integer t. Using the following variables,
4609 : :
4610 : : | i0 = u11 * gamma / gcd_alpha_beta
4611 : : | j0 = u12 * gamma / gcd_alpha_beta
4612 : : | i1 = u21
4613 : : | j1 = u22
4614 : :
4615 : : the solutions are:
4616 : :
4617 : : | x0 = i0 + i1 * t,
4618 : : | y0 = j0 + j1 * t. */
4619 : 107719 : HOST_WIDE_INT i0, j0, i1, j1;
4620 : :
4621 : 107719 : i0 = U[0][0] * gamma / gcd_alpha_beta;
4622 : 107719 : j0 = U[0][1] * gamma / gcd_alpha_beta;
4623 : 107719 : i1 = U[1][0];
4624 : 107719 : j1 = U[1][1];
4625 : :
4626 : 107719 : if ((i1 == 0 && i0 < 0)
4627 : 107719 : || (j1 == 0 && j0 < 0))
4628 : : {
4629 : : /* There is no solution.
4630 : : FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4631 : : falls in here, but for the moment we don't look at the
4632 : : upper bound of the iteration domain. */
4633 : 0 : *overlaps_a = conflict_fn_no_dependence ();
4634 : 0 : *overlaps_b = conflict_fn_no_dependence ();
4635 : 0 : *last_conflicts = integer_zero_node;
4636 : 55406 : goto end_analyze_subs_aa;
4637 : : }
4638 : :
4639 : 107719 : if (i1 > 0 && j1 > 0)
4640 : : {
4641 : 107719 : HOST_WIDE_INT niter_a
4642 : 107719 : = max_stmt_executions_int (get_chrec_loop (chrec_a));
4643 : 107719 : HOST_WIDE_INT niter_b
4644 : 107719 : = max_stmt_executions_int (get_chrec_loop (chrec_b));
4645 : 107719 : HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4646 : :
4647 : : /* (X0, Y0) is a solution of the Diophantine equation:
4648 : : "chrec_a (X0) = chrec_b (Y0)". */
4649 : 107719 : HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4650 : : CEIL (-j0, j1));
4651 : 107719 : HOST_WIDE_INT x0 = i1 * tau1 + i0;
4652 : 107719 : HOST_WIDE_INT y0 = j1 * tau1 + j0;
4653 : :
4654 : : /* (X1, Y1) is the smallest positive solution of the eq
4655 : : "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4656 : : first conflict occurs. */
4657 : 107719 : HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4658 : 107719 : HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4659 : 107719 : HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4660 : :
4661 : 107719 : if (niter > 0)
4662 : : {
4663 : : /* If the overlap occurs outside of the bounds of the
4664 : : loop, there is no dependence. */
4665 : 101542 : if (x1 >= niter_a || y1 >= niter_b)
4666 : : {
4667 : 55406 : *overlaps_a = conflict_fn_no_dependence ();
4668 : 55406 : *overlaps_b = conflict_fn_no_dependence ();
4669 : 55406 : *last_conflicts = integer_zero_node;
4670 : 55406 : goto end_analyze_subs_aa;
4671 : : }
4672 : :
4673 : : /* max stmt executions can get quite large, avoid
4674 : : overflows by using wide ints here. */
4675 : 46136 : widest_int tau2
4676 : 92272 : = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4677 : 138408 : wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4678 : 46136 : widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4679 : 46136 : if (wi::min_precision (last_conflict, SIGNED)
4680 : 46136 : <= TYPE_PRECISION (integer_type_node))
4681 : 43759 : *last_conflicts
4682 : 43759 : = build_int_cst (integer_type_node,
4683 : 43759 : last_conflict.to_shwi ());
4684 : : else
4685 : 2377 : *last_conflicts = chrec_dont_know;
4686 : 46136 : }
4687 : : else
4688 : 6177 : *last_conflicts = chrec_dont_know;
4689 : :
4690 : 52313 : *overlaps_a
4691 : 52313 : = conflict_fn (1,
4692 : 52313 : affine_fn_univar (build_int_cst (NULL_TREE, x1),
4693 : : 1,
4694 : 52313 : build_int_cst (NULL_TREE, i1)));
4695 : 52313 : *overlaps_b
4696 : 52313 : = conflict_fn (1,
4697 : 52313 : affine_fn_univar (build_int_cst (NULL_TREE, y1),
4698 : : 1,
4699 : 52313 : build_int_cst (NULL_TREE, j1)));
4700 : 52313 : }
4701 : : else
4702 : : {
4703 : : /* FIXME: For the moment, the upper bound of the
4704 : : iteration domain for i and j is not checked. */
4705 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4706 : 0 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4707 : 0 : *overlaps_a = conflict_fn_not_known ();
4708 : 0 : *overlaps_b = conflict_fn_not_known ();
4709 : 0 : *last_conflicts = chrec_dont_know;
4710 : : }
4711 : 52313 : }
4712 : : else
4713 : : {
4714 : 357 : if (dump_file && (dump_flags & TDF_DETAILS))
4715 : 19 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4716 : 357 : *overlaps_a = conflict_fn_not_known ();
4717 : 357 : *overlaps_b = conflict_fn_not_known ();
4718 : 357 : *last_conflicts = chrec_dont_know;
4719 : : }
4720 : : }
4721 : : else
4722 : : {
4723 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4724 : 0 : fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4725 : 0 : *overlaps_a = conflict_fn_not_known ();
4726 : 0 : *overlaps_b = conflict_fn_not_known ();
4727 : 0 : *last_conflicts = chrec_dont_know;
4728 : : }
4729 : :
4730 : 863509 : end_analyze_subs_aa:
4731 : 863509 : obstack_free (&scratch_obstack, NULL);
4732 : 863509 : if (dump_file && (dump_flags & TDF_DETAILS))
4733 : : {
4734 : 17992 : fprintf (dump_file, " (overlaps_a = ");
4735 : 17992 : dump_conflict_function (dump_file, *overlaps_a);
4736 : 17992 : fprintf (dump_file, ")\n (overlaps_b = ");
4737 : 17992 : dump_conflict_function (dump_file, *overlaps_b);
4738 : 17992 : fprintf (dump_file, "))\n");
4739 : : }
4740 : : }
4741 : :
4742 : : /* Returns true when analyze_subscript_affine_affine can be used for
4743 : : determining the dependence relation between chrec_a and chrec_b,
4744 : : that contain symbols. This function modifies chrec_a and chrec_b
4745 : : such that the analysis result is the same, and such that they don't
4746 : : contain symbols, and then can safely be passed to the analyzer.
4747 : :
4748 : : Example: The analysis of the following tuples of evolutions produce
4749 : : the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4750 : : vs. {0, +, 1}_1
4751 : :
4752 : : {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4753 : : {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4754 : : */
4755 : :
4756 : : static bool
4757 : 53828 : can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4758 : : {
4759 : 53828 : tree diff, type, left_a, left_b, right_b;
4760 : :
4761 : 53828 : if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4762 : 53828 : || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4763 : : /* FIXME: For the moment not handled. Might be refined later. */
4764 : 12991 : return false;
4765 : :
4766 : 40837 : type = chrec_type (*chrec_a);
4767 : 40837 : left_a = CHREC_LEFT (*chrec_a);
4768 : 40837 : left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4769 : 40837 : diff = chrec_fold_minus (type, left_a, left_b);
4770 : :
4771 : 81674 : if (!evolution_function_is_constant_p (diff))
4772 : 4880 : return false;
4773 : :
4774 : 35957 : if (dump_file && (dump_flags & TDF_DETAILS))
4775 : 105 : fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4776 : :
4777 : 35957 : *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4778 : 35957 : diff, CHREC_RIGHT (*chrec_a));
4779 : 35957 : right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4780 : 35957 : *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4781 : 35957 : build_int_cst (type, 0),
4782 : : right_b);
4783 : 35957 : return true;
4784 : : }
4785 : :
4786 : : /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4787 : : *OVERLAPS_B are initialized to the functions that describe the
4788 : : relation between the elements accessed twice by CHREC_A and
4789 : : CHREC_B. For k >= 0, the following property is verified:
4790 : :
4791 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4792 : :
4793 : : static void
4794 : 883902 : analyze_siv_subscript (tree chrec_a,
4795 : : tree chrec_b,
4796 : : conflict_function **overlaps_a,
4797 : : conflict_function **overlaps_b,
4798 : : tree *last_conflicts,
4799 : : int loop_nest_num)
4800 : : {
4801 : 883902 : dependence_stats.num_siv++;
4802 : :
4803 : 883902 : if (dump_file && (dump_flags & TDF_DETAILS))
4804 : 21035 : fprintf (dump_file, "(analyze_siv_subscript \n");
4805 : :
4806 : 883902 : if (evolution_function_is_constant_p (chrec_a)
4807 : 883902 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4808 : 1423 : analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4809 : : overlaps_a, overlaps_b, last_conflicts);
4810 : :
4811 : 882479 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4812 : 1764958 : && evolution_function_is_constant_p (chrec_b))
4813 : 1163 : analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4814 : : overlaps_b, overlaps_a, last_conflicts);
4815 : :
4816 : 881316 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4817 : 881316 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4818 : : {
4819 : 881316 : if (!chrec_contains_symbols (chrec_a)
4820 : 881316 : && !chrec_contains_symbols (chrec_b))
4821 : : {
4822 : 827488 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4823 : : overlaps_a, overlaps_b,
4824 : : last_conflicts);
4825 : :
4826 : 827488 : if (CF_NOT_KNOWN_P (*overlaps_a)
4827 : 827139 : || CF_NOT_KNOWN_P (*overlaps_b))
4828 : 349 : dependence_stats.num_siv_unimplemented++;
4829 : 827139 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4830 : 51341 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4831 : 775798 : dependence_stats.num_siv_independent++;
4832 : : else
4833 : 51341 : dependence_stats.num_siv_dependent++;
4834 : : }
4835 : 53828 : else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4836 : : &chrec_b))
4837 : : {
4838 : 35957 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4839 : : overlaps_a, overlaps_b,
4840 : : last_conflicts);
4841 : :
4842 : 35957 : if (CF_NOT_KNOWN_P (*overlaps_a)
4843 : 35949 : || CF_NOT_KNOWN_P (*overlaps_b))
4844 : 8 : dependence_stats.num_siv_unimplemented++;
4845 : 35949 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4846 : 980 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4847 : 34969 : dependence_stats.num_siv_independent++;
4848 : : else
4849 : 980 : dependence_stats.num_siv_dependent++;
4850 : : }
4851 : : else
4852 : 17871 : goto siv_subscript_dontknow;
4853 : : }
4854 : :
4855 : : else
4856 : : {
4857 : 17871 : siv_subscript_dontknow:;
4858 : 17871 : if (dump_file && (dump_flags & TDF_DETAILS))
4859 : 2885 : fprintf (dump_file, " siv test failed: unimplemented");
4860 : 17871 : *overlaps_a = conflict_fn_not_known ();
4861 : 17871 : *overlaps_b = conflict_fn_not_known ();
4862 : 17871 : *last_conflicts = chrec_dont_know;
4863 : 17871 : dependence_stats.num_siv_unimplemented++;
4864 : : }
4865 : :
4866 : 883902 : if (dump_file && (dump_flags & TDF_DETAILS))
4867 : 21035 : fprintf (dump_file, ")\n");
4868 : 883902 : }
4869 : :
4870 : : /* Returns false if we can prove that the greatest common divisor of the steps
4871 : : of CHREC does not divide CST, false otherwise. */
4872 : :
4873 : : static bool
4874 : 20660 : gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4875 : : {
4876 : 20660 : HOST_WIDE_INT cd = 0, val;
4877 : 20660 : tree step;
4878 : :
4879 : 20660 : if (!tree_fits_shwi_p (cst))
4880 : : return true;
4881 : 20660 : val = tree_to_shwi (cst);
4882 : :
4883 : 61834 : while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4884 : : {
4885 : 41318 : step = CHREC_RIGHT (chrec);
4886 : 41318 : if (!tree_fits_shwi_p (step))
4887 : : return true;
4888 : 41174 : cd = gcd (cd, tree_to_shwi (step));
4889 : 41174 : chrec = CHREC_LEFT (chrec);
4890 : : }
4891 : :
4892 : 20516 : return val % cd == 0;
4893 : : }
4894 : :
4895 : : /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4896 : : LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4897 : : functions that describe the relation between the elements accessed
4898 : : twice by CHREC_A and CHREC_B. For k >= 0, the following property
4899 : : is verified:
4900 : :
4901 : : CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4902 : :
4903 : : static void
4904 : 25996 : analyze_miv_subscript (tree chrec_a,
4905 : : tree chrec_b,
4906 : : conflict_function **overlaps_a,
4907 : : conflict_function **overlaps_b,
4908 : : tree *last_conflicts,
4909 : : class loop *loop_nest)
4910 : : {
4911 : 25996 : tree type, difference;
4912 : :
4913 : 25996 : dependence_stats.num_miv++;
4914 : 25996 : if (dump_file && (dump_flags & TDF_DETAILS))
4915 : 27 : fprintf (dump_file, "(analyze_miv_subscript \n");
4916 : :
4917 : 25996 : type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4918 : 25996 : chrec_a = chrec_convert (type, chrec_a, NULL);
4919 : 25996 : chrec_b = chrec_convert (type, chrec_b, NULL);
4920 : 25996 : difference = chrec_fold_minus (type, chrec_a, chrec_b);
4921 : :
4922 : 25996 : if (eq_evolutions_p (chrec_a, chrec_b))
4923 : : {
4924 : : /* Access functions are the same: all the elements are accessed
4925 : : in the same order. */
4926 : 0 : *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4927 : 0 : *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4928 : 0 : *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4929 : 0 : dependence_stats.num_miv_dependent++;
4930 : : }
4931 : :
4932 : 25996 : else if (evolution_function_is_constant_p (difference)
4933 : 20690 : && evolution_function_is_affine_multivariate_p (chrec_a,
4934 : : loop_nest->num)
4935 : 46656 : && !gcd_of_steps_may_divide_p (chrec_a, difference))
4936 : : {
4937 : : /* testsuite/.../ssa-chrec-33.c
4938 : : {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4939 : :
4940 : : The difference is 1, and all the evolution steps are multiples
4941 : : of 2, consequently there are no overlapping elements. */
4942 : 19670 : *overlaps_a = conflict_fn_no_dependence ();
4943 : 19670 : *overlaps_b = conflict_fn_no_dependence ();
4944 : 19670 : *last_conflicts = integer_zero_node;
4945 : 19670 : dependence_stats.num_miv_independent++;
4946 : : }
4947 : :
4948 : 6326 : else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4949 : 100 : && !chrec_contains_symbols (chrec_a, loop_nest)
4950 : 91 : && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4951 : 6390 : && !chrec_contains_symbols (chrec_b, loop_nest))
4952 : : {
4953 : : /* testsuite/.../ssa-chrec-35.c
4954 : : {0, +, 1}_2 vs. {0, +, 1}_3
4955 : : the overlapping elements are respectively located at iterations:
4956 : : {0, +, 1}_x and {0, +, 1}_x,
4957 : : in other words, we have the equality:
4958 : : {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4959 : :
4960 : : Other examples:
4961 : : {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4962 : : {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4963 : :
4964 : : {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4965 : : {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4966 : : */
4967 : 64 : analyze_subscript_affine_affine (chrec_a, chrec_b,
4968 : : overlaps_a, overlaps_b, last_conflicts);
4969 : :
4970 : 64 : if (CF_NOT_KNOWN_P (*overlaps_a)
4971 : 60 : || CF_NOT_KNOWN_P (*overlaps_b))
4972 : 4 : dependence_stats.num_miv_unimplemented++;
4973 : 60 : else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4974 : 50 : || CF_NO_DEPENDENCE_P (*overlaps_b))
4975 : 10 : dependence_stats.num_miv_independent++;
4976 : : else
4977 : 50 : dependence_stats.num_miv_dependent++;
4978 : : }
4979 : :
4980 : : else
4981 : : {
4982 : : /* When the analysis is too difficult, answer "don't know". */
4983 : 6262 : if (dump_file && (dump_flags & TDF_DETAILS))
4984 : 23 : fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4985 : :
4986 : 6262 : *overlaps_a = conflict_fn_not_known ();
4987 : 6262 : *overlaps_b = conflict_fn_not_known ();
4988 : 6262 : *last_conflicts = chrec_dont_know;
4989 : 6262 : dependence_stats.num_miv_unimplemented++;
4990 : : }
4991 : :
4992 : 25996 : if (dump_file && (dump_flags & TDF_DETAILS))
4993 : 27 : fprintf (dump_file, ")\n");
4994 : 25996 : }
4995 : :
4996 : : /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4997 : : with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4998 : : OVERLAP_ITERATIONS_B are initialized with two functions that
4999 : : describe the iterations that contain conflicting elements.
5000 : :
5001 : : Remark: For an integer k >= 0, the following equality is true:
5002 : :
5003 : : CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
5004 : : */
5005 : :
5006 : : static void
5007 : 4047168 : analyze_overlapping_iterations (tree chrec_a,
5008 : : tree chrec_b,
5009 : : conflict_function **overlap_iterations_a,
5010 : : conflict_function **overlap_iterations_b,
5011 : : tree *last_conflicts, class loop *loop_nest)
5012 : : {
5013 : 4047168 : unsigned int lnn = loop_nest->num;
5014 : :
5015 : 4047168 : dependence_stats.num_subscript_tests++;
5016 : :
5017 : 4047168 : if (dump_file && (dump_flags & TDF_DETAILS))
5018 : : {
5019 : 54197 : fprintf (dump_file, "(analyze_overlapping_iterations \n");
5020 : 54197 : fprintf (dump_file, " (chrec_a = ");
5021 : 54197 : print_generic_expr (dump_file, chrec_a);
5022 : 54197 : fprintf (dump_file, ")\n (chrec_b = ");
5023 : 54197 : print_generic_expr (dump_file, chrec_b);
5024 : 54197 : fprintf (dump_file, ")\n");
5025 : : }
5026 : :
5027 : 4047168 : if (chrec_a == NULL_TREE
5028 : 4047168 : || chrec_b == NULL_TREE
5029 : 4047168 : || chrec_contains_undetermined (chrec_a)
5030 : 8094336 : || chrec_contains_undetermined (chrec_b))
5031 : : {
5032 : 0 : dependence_stats.num_subscript_undetermined++;
5033 : :
5034 : 0 : *overlap_iterations_a = conflict_fn_not_known ();
5035 : 0 : *overlap_iterations_b = conflict_fn_not_known ();
5036 : : }
5037 : :
5038 : : /* If they are the same chrec, and are affine, they overlap
5039 : : on every iteration. */
5040 : 4047168 : else if (eq_evolutions_p (chrec_a, chrec_b)
5041 : 4047168 : && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5042 : 769481 : || operand_equal_p (chrec_a, chrec_b, 0)))
5043 : : {
5044 : 1383350 : dependence_stats.num_same_subscript_function++;
5045 : 1383350 : *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5046 : 1383350 : *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5047 : 1383350 : *last_conflicts = chrec_dont_know;
5048 : : }
5049 : :
5050 : : /* If they aren't the same, and aren't affine, we can't do anything
5051 : : yet. */
5052 : 2663818 : else if ((chrec_contains_symbols (chrec_a)
5053 : 2602332 : || chrec_contains_symbols (chrec_b))
5054 : 2664639 : && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5055 : 60153 : || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5056 : : {
5057 : 2430 : dependence_stats.num_subscript_undetermined++;
5058 : 2430 : *overlap_iterations_a = conflict_fn_not_known ();
5059 : 2430 : *overlap_iterations_b = conflict_fn_not_known ();
5060 : : }
5061 : :
5062 : 2661388 : else if (ziv_subscript_p (chrec_a, chrec_b))
5063 : 1751490 : analyze_ziv_subscript (chrec_a, chrec_b,
5064 : : overlap_iterations_a, overlap_iterations_b,
5065 : : last_conflicts);
5066 : :
5067 : 909898 : else if (siv_subscript_p (chrec_a, chrec_b))
5068 : 883902 : analyze_siv_subscript (chrec_a, chrec_b,
5069 : : overlap_iterations_a, overlap_iterations_b,
5070 : : last_conflicts, lnn);
5071 : :
5072 : : else
5073 : 25996 : analyze_miv_subscript (chrec_a, chrec_b,
5074 : : overlap_iterations_a, overlap_iterations_b,
5075 : : last_conflicts, loop_nest);
5076 : :
5077 : 4047168 : if (dump_file && (dump_flags & TDF_DETAILS))
5078 : : {
5079 : 54197 : fprintf (dump_file, " (overlap_iterations_a = ");
5080 : 54197 : dump_conflict_function (dump_file, *overlap_iterations_a);
5081 : 54197 : fprintf (dump_file, ")\n (overlap_iterations_b = ");
5082 : 54197 : dump_conflict_function (dump_file, *overlap_iterations_b);
5083 : 54197 : fprintf (dump_file, "))\n");
5084 : : }
5085 : 4047168 : }
5086 : :
5087 : : /* Helper function for uniquely inserting distance vectors. */
5088 : :
5089 : : static void
5090 : 967862 : save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5091 : : {
5092 : 1451791 : for (lambda_vector v : DDR_DIST_VECTS (ddr))
5093 : 485499 : if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5094 : : return;
5095 : :
5096 : 967558 : DDR_DIST_VECTS (ddr).safe_push (dist_v);
5097 : : }
5098 : :
5099 : : /* Helper function for uniquely inserting direction vectors. */
5100 : :
5101 : : static void
5102 : 967558 : save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5103 : : {
5104 : 1450575 : for (lambda_vector v : DDR_DIR_VECTS (ddr))
5105 : 483675 : if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5106 : : return;
5107 : :
5108 : 967558 : DDR_DIR_VECTS (ddr).safe_push (dir_v);
5109 : : }
5110 : :
5111 : : /* Add a distance of 1 on all the loops outer than INDEX. If we
5112 : : haven't yet determined a distance for this outer loop, push a new
5113 : : distance vector composed of the previous distance, and a distance
5114 : : of 1 for this outer loop. Example:
5115 : :
5116 : : | loop_1
5117 : : | loop_2
5118 : : | A[10]
5119 : : | endloop_2
5120 : : | endloop_1
5121 : :
5122 : : Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5123 : : save (0, 1), then we have to save (1, 0). */
5124 : :
5125 : : static void
5126 : 16616 : add_outer_distances (struct data_dependence_relation *ddr,
5127 : : lambda_vector dist_v, int index)
5128 : : {
5129 : : /* For each outer loop where init_v is not set, the accesses are
5130 : : in dependence of distance 1 in the loop. */
5131 : 19910 : while (--index >= 0)
5132 : : {
5133 : 6588 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5134 : 3294 : lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5135 : 3294 : save_v[index] = 1;
5136 : 3294 : save_dist_v (ddr, save_v);
5137 : : }
5138 : 16616 : }
5139 : :
5140 : : /* Return false when fail to represent the data dependence as a
5141 : : distance vector. A_INDEX is the index of the first reference
5142 : : (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5143 : : second reference. INIT_B is set to true when a component has been
5144 : : added to the distance vector DIST_V. INDEX_CARRY is then set to
5145 : : the index in DIST_V that carries the dependence. */
5146 : :
5147 : : static bool
5148 : 53606 : build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5149 : : unsigned int a_index, unsigned int b_index,
5150 : : lambda_vector dist_v, bool *init_b,
5151 : : int *index_carry)
5152 : : {
5153 : 53606 : unsigned i;
5154 : 107212 : lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5155 : 53606 : class loop *loop = DDR_LOOP_NEST (ddr)[0];
5156 : :
5157 : 120170 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5158 : : {
5159 : 68422 : tree access_fn_a, access_fn_b;
5160 : 68422 : struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5161 : :
5162 : 68422 : if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5163 : : {
5164 : 304 : non_affine_dependence_relation (ddr);
5165 : 304 : return false;
5166 : : }
5167 : :
5168 : 68118 : access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5169 : 68118 : access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5170 : :
5171 : 68118 : if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5172 : 52495 : && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5173 : : {
5174 : 51930 : HOST_WIDE_INT dist;
5175 : 51930 : int index;
5176 : 51930 : int var_a = CHREC_VARIABLE (access_fn_a);
5177 : 51930 : int var_b = CHREC_VARIABLE (access_fn_b);
5178 : :
5179 : 51930 : if (var_a != var_b
5180 : 51930 : || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5181 : : {
5182 : 34 : non_affine_dependence_relation (ddr);
5183 : 34 : return false;
5184 : : }
5185 : :
5186 : : /* When data references are collected in a loop while data
5187 : : dependences are analyzed in loop nest nested in the loop, we
5188 : : would have more number of access functions than number of
5189 : : loops. Skip access functions of loops not in the loop nest.
5190 : :
5191 : : See PR89725 for more information. */
5192 : 51896 : if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5193 : 2 : continue;
5194 : :
5195 : 51894 : dist = int_cst_value (SUB_DISTANCE (subscript));
5196 : 51894 : index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5197 : 51894 : *index_carry = MIN (index, *index_carry);
5198 : :
5199 : : /* This is the subscript coupling test. If we have already
5200 : : recorded a distance for this loop (a distance coming from
5201 : : another subscript), it should be the same. For example,
5202 : : in the following code, there is no dependence:
5203 : :
5204 : : | loop i = 0, N, 1
5205 : : | T[i+1][i] = ...
5206 : : | ... = T[i][i]
5207 : : | endloop
5208 : : */
5209 : 51894 : if (init_v[index] != 0 && dist_v[index] != dist)
5210 : : {
5211 : 0 : finalize_ddr_dependent (ddr, chrec_known);
5212 : 0 : return false;
5213 : : }
5214 : :
5215 : 51894 : dist_v[index] = dist;
5216 : 51894 : init_v[index] = 1;
5217 : 51894 : *init_b = true;
5218 : 51894 : }
5219 : 16188 : else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5220 : : {
5221 : : /* This can be for example an affine vs. constant dependence
5222 : : (T[i] vs. T[3]) that is not an affine dependence and is
5223 : : not representable as a distance vector. */
5224 : 1520 : non_affine_dependence_relation (ddr);
5225 : 1520 : return false;
5226 : : }
5227 : : }
5228 : :
5229 : : return true;
5230 : : }
5231 : :
5232 : : /* Return true when the DDR contains only invariant access functions wrto. loop
5233 : : number LNUM. */
5234 : :
5235 : : static bool
5236 : 760952 : invariant_access_functions (const struct data_dependence_relation *ddr,
5237 : : int lnum)
5238 : : {
5239 : 2581971 : for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5240 : 904480 : if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5241 : 904480 : || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5242 : 605365 : return false;
5243 : :
5244 : : return true;
5245 : : }
5246 : :
5247 : : /* Helper function for the case where DDR_A and DDR_B are the same
5248 : : multivariate access function with a constant step. For an example
5249 : : see pr34635-1.c. */
5250 : :
5251 : : static void
5252 : 4284 : add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5253 : : {
5254 : 4284 : int x_1, x_2;
5255 : 4284 : tree c_1 = CHREC_LEFT (c_2);
5256 : 4284 : tree c_0 = CHREC_LEFT (c_1);
5257 : 4284 : lambda_vector dist_v;
5258 : 4284 : HOST_WIDE_INT v1, v2, cd;
5259 : :
5260 : : /* Polynomials with more than 2 variables are not handled yet. When
5261 : : the evolution steps are parameters, it is not possible to
5262 : : represent the dependence using classical distance vectors. */
5263 : 4284 : if (TREE_CODE (c_0) != INTEGER_CST
5264 : 2944 : || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5265 : 6611 : || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5266 : : {
5267 : 1965 : DDR_AFFINE_P (ddr) = false;
5268 : 1965 : return;
5269 : : }
5270 : :
5271 : 2319 : x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5272 : 2319 : x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5273 : :
5274 : : /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5275 : 4638 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5276 : 2319 : v1 = int_cst_value (CHREC_RIGHT (c_1));
5277 : 2319 : v2 = int_cst_value (CHREC_RIGHT (c_2));
5278 : 2319 : cd = gcd (v1, v2);
5279 : 2319 : v1 /= cd;
5280 : 2319 : v2 /= cd;
5281 : :
5282 : 2319 : if (v2 < 0)
5283 : : {
5284 : 9 : v2 = -v2;
5285 : 9 : v1 = -v1;
5286 : : }
5287 : :
5288 : 2319 : dist_v[x_1] = v2;
5289 : 2319 : dist_v[x_2] = -v1;
5290 : 2319 : save_dist_v (ddr, dist_v);
5291 : :
5292 : 2319 : add_outer_distances (ddr, dist_v, x_1);
5293 : : }
5294 : :
5295 : : /* Helper function for the case where DDR_A and DDR_B are the same
5296 : : access functions. */
5297 : :
5298 : : static void
5299 : 18658 : add_other_self_distances (struct data_dependence_relation *ddr)
5300 : : {
5301 : 18658 : lambda_vector dist_v;
5302 : 18658 : unsigned i;
5303 : 18658 : int index_carry = DDR_NB_LOOPS (ddr);
5304 : 18658 : subscript *sub;
5305 : 18658 : class loop *loop = DDR_LOOP_NEST (ddr)[0];
5306 : :
5307 : 39974 : FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5308 : : {
5309 : 26057 : tree access_fun = SUB_ACCESS_FN (sub, 0);
5310 : :
5311 : 26057 : if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5312 : : {
5313 : 18735 : if (!evolution_function_is_univariate_p (access_fun, loop->num))
5314 : : {
5315 : 4741 : if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5316 : : {
5317 : 457 : DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5318 : 457 : return;
5319 : : }
5320 : :
5321 : 4284 : access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5322 : :
5323 : 4284 : if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5324 : 4284 : add_multivariate_self_dist (ddr, access_fun);
5325 : : else
5326 : : /* The evolution step is not constant: it varies in
5327 : : the outer loop, so this cannot be represented by a
5328 : : distance vector. For example in pr34635.c the
5329 : : evolution is {0, +, {0, +, 4}_1}_2. */
5330 : 0 : DDR_AFFINE_P (ddr) = false;
5331 : :
5332 : 4284 : return;
5333 : : }
5334 : :
5335 : : /* When data references are collected in a loop while data
5336 : : dependences are analyzed in loop nest nested in the loop, we
5337 : : would have more number of access functions than number of
5338 : : loops. Skip access functions of loops not in the loop nest.
5339 : :
5340 : : See PR89725 for more information. */
5341 : 13994 : if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5342 : : loop))
5343 : 0 : continue;
5344 : :
5345 : 13994 : index_carry = MIN (index_carry,
5346 : : index_in_loop_nest (CHREC_VARIABLE (access_fun),
5347 : : DDR_LOOP_NEST (ddr)));
5348 : : }
5349 : : }
5350 : :
5351 : 27834 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5352 : 13917 : add_outer_distances (ddr, dist_v, index_carry);
5353 : : }
5354 : :
5355 : : static void
5356 : 155587 : insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5357 : : {
5358 : 311174 : lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5359 : :
5360 : 155587 : dist_v[0] = 1;
5361 : 155587 : save_dist_v (ddr, dist_v);
5362 : 155587 : }
5363 : :
5364 : : /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5365 : : is the case for example when access functions are the same and
5366 : : equal to a constant, as in:
5367 : :
5368 : : | loop_1
5369 : : | A[3] = ...
5370 : : | ... = A[3]
5371 : : | endloop_1
5372 : :
5373 : : in which case the distance vectors are (0) and (1). */
5374 : :
5375 : : static void
5376 : 155587 : add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5377 : : {
5378 : 155587 : unsigned i, j;
5379 : :
5380 : 155587 : for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5381 : : {
5382 : 155587 : subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5383 : 155587 : conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5384 : 155587 : conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5385 : :
5386 : 155587 : for (j = 0; j < ca->n; j++)
5387 : 155587 : if (affine_function_zero_p (ca->fns[j]))
5388 : : {
5389 : 155587 : insert_innermost_unit_dist_vector (ddr);
5390 : 155587 : return;
5391 : : }
5392 : :
5393 : 0 : for (j = 0; j < cb->n; j++)
5394 : 0 : if (affine_function_zero_p (cb->fns[j]))
5395 : : {
5396 : 0 : insert_innermost_unit_dist_vector (ddr);
5397 : 0 : return;
5398 : : }
5399 : : }
5400 : : }
5401 : :
5402 : : /* Return true when the DDR contains two data references that have the
5403 : : same access functions. */
5404 : :
5405 : : static inline bool
5406 : 808520 : same_access_functions (const struct data_dependence_relation *ddr)
5407 : : {
5408 : 3402490 : for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5409 : 1024498 : if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5410 : 1024498 : SUB_ACCESS_FN (sub, 1)))
5411 : : return false;
5412 : :
5413 : : return true;
5414 : : }
5415 : :
5416 : : /* Compute the classic per loop distance vector. DDR is the data
5417 : : dependence relation to build a vector from. Return false when fail
5418 : : to represent the data dependence as a distance vector. */
5419 : :
5420 : : static bool
5421 : 3416821 : build_classic_dist_vector (struct data_dependence_relation *ddr,
5422 : : class loop *loop_nest)
5423 : : {
5424 : 3416821 : bool init_b = false;
5425 : 3416821 : int index_carry = DDR_NB_LOOPS (ddr);
5426 : 3416821 : lambda_vector dist_v;
5427 : :
5428 : 3416821 : if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5429 : : return false;
5430 : :
5431 : 808520 : if (same_access_functions (ddr))
5432 : : {
5433 : : /* Save the 0 vector. */
5434 : 1521904 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5435 : 760952 : save_dist_v (ddr, dist_v);
5436 : :
5437 : 760952 : if (invariant_access_functions (ddr, loop_nest->num))
5438 : 155587 : add_distance_for_zero_overlaps (ddr);
5439 : :
5440 : 760952 : if (DDR_NB_LOOPS (ddr) > 1)
5441 : 18658 : add_other_self_distances (ddr);
5442 : :
5443 : 760952 : return true;
5444 : : }
5445 : :
5446 : 95136 : dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5447 : 47568 : if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5448 : : return false;
5449 : :
5450 : : /* Save the distance vector if we initialized one. */
5451 : 45710 : if (init_b)
5452 : : {
5453 : : /* Verify a basic constraint: classic distance vectors should
5454 : : always be lexicographically positive.
5455 : :
5456 : : Data references are collected in the order of execution of
5457 : : the program, thus for the following loop
5458 : :
5459 : : | for (i = 1; i < 100; i++)
5460 : : | for (j = 1; j < 100; j++)
5461 : : | {
5462 : : | t = T[j+1][i-1]; // A
5463 : : | T[j][i] = t + 2; // B
5464 : : | }
5465 : :
5466 : : references are collected following the direction of the wind:
5467 : : A then B. The data dependence tests are performed also
5468 : : following this order, such that we're looking at the distance
5469 : : separating the elements accessed by A from the elements later
5470 : : accessed by B. But in this example, the distance returned by
5471 : : test_dep (A, B) is lexicographically negative (-1, 1), that
5472 : : means that the access A occurs later than B with respect to
5473 : : the outer loop, ie. we're actually looking upwind. In this
5474 : : case we solve test_dep (B, A) looking downwind to the
5475 : : lexicographically positive solution, that returns the
5476 : : distance vector (1, -1). */
5477 : 91420 : if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5478 : : {
5479 : 5934 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5480 : 5934 : if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5481 : : return false;
5482 : 5934 : compute_subscript_distance (ddr);
5483 : 5934 : if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5484 : : &index_carry))
5485 : : return false;
5486 : 5934 : save_dist_v (ddr, save_v);
5487 : 5934 : DDR_REVERSED_P (ddr) = true;
5488 : :
5489 : : /* In this case there is a dependence forward for all the
5490 : : outer loops:
5491 : :
5492 : : | for (k = 1; k < 100; k++)
5493 : : | for (i = 1; i < 100; i++)
5494 : : | for (j = 1; j < 100; j++)
5495 : : | {
5496 : : | t = T[j+1][i-1]; // A
5497 : : | T[j][i] = t + 2; // B
5498 : : | }
5499 : :
5500 : : the vectors are:
5501 : : (0, 1, -1)
5502 : : (1, 1, -1)
5503 : : (1, -1, 1)
5504 : : */
5505 : 5934 : if (DDR_NB_LOOPS (ddr) > 1)
5506 : : {
5507 : 86 : add_outer_distances (ddr, save_v, index_carry);
5508 : 86 : add_outer_distances (ddr, dist_v, index_carry);
5509 : : }
5510 : : }
5511 : : else
5512 : : {
5513 : 39776 : lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5514 : 39776 : lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5515 : :
5516 : 39776 : if (DDR_NB_LOOPS (ddr) > 1)
5517 : : {
5518 : 104 : lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5519 : :
5520 : 104 : if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5521 : : return false;
5522 : 104 : compute_subscript_distance (ddr);
5523 : 104 : if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5524 : : &index_carry))
5525 : : return false;
5526 : :
5527 : 104 : save_dist_v (ddr, save_v);
5528 : 104 : add_outer_distances (ddr, dist_v, index_carry);
5529 : 104 : add_outer_distances (ddr, opposite_v, index_carry);
5530 : : }
5531 : : else
5532 : 39672 : save_dist_v (ddr, save_v);
5533 : : }
5534 : : }
5535 : : else
5536 : : {
5537 : : /* There is a distance of 1 on all the outer loops: Example:
5538 : : there is a dependence of distance 1 on loop_1 for the array A.
5539 : :
5540 : : | loop_1
5541 : : | A[5] = ...
5542 : : | endloop
5543 : : */
5544 : 0 : add_outer_distances (ddr, dist_v,
5545 : : lambda_vector_first_nz (dist_v,
5546 : 0 : DDR_NB_LOOPS (ddr), 0));
5547 : : }
5548 : :
5549 : : return true;
5550 : : }
5551 : :
5552 : : /* Return the direction for a given distance.
5553 : : FIXME: Computing dir this way is suboptimal, since dir can catch
5554 : : cases that dist is unable to represent. */
5555 : :
5556 : : static inline enum data_dependence_direction
5557 : 992108 : dir_from_dist (int dist)
5558 : : {
5559 : 992108 : if (dist > 0)
5560 : : return dir_positive;
5561 : 785474 : else if (dist < 0)
5562 : : return dir_negative;
5563 : : else
5564 : 783118 : return dir_equal;
5565 : : }
5566 : :
5567 : : /* Compute the classic per loop direction vector. DDR is the data
5568 : : dependence relation to build a vector from. */
5569 : :
5570 : : static void
5571 : 806662 : build_classic_dir_vector (struct data_dependence_relation *ddr)
5572 : : {
5573 : 806662 : unsigned i, j;
5574 : 806662 : lambda_vector dist_v;
5575 : :
5576 : 1774220 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5577 : : {
5578 : 1935116 : lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5579 : :
5580 : 2927224 : for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5581 : 1777582 : dir_v[j] = dir_from_dist (dist_v[j]);
5582 : :
5583 : 967558 : save_dir_v (ddr, dir_v);
5584 : : }
5585 : 806662 : }
5586 : :
5587 : : /* Helper function. Returns true when there is a dependence between the
5588 : : data references. A_INDEX is the index of the first reference (0 for
5589 : : DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5590 : :
5591 : : static bool
5592 : 3422859 : subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5593 : : unsigned int a_index, unsigned int b_index,
5594 : : class loop *loop_nest)
5595 : : {
5596 : 3422859 : unsigned int i;
5597 : 3422859 : tree last_conflicts;
5598 : 3422859 : struct subscript *subscript;
5599 : 3422859 : tree res = NULL_TREE;
5600 : :
5601 : 4887565 : for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5602 : : {
5603 : 4047168 : conflict_function *overlaps_a, *overlaps_b;
5604 : :
5605 : 4047168 : analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5606 : : SUB_ACCESS_FN (subscript, b_index),
5607 : : &overlaps_a, &overlaps_b,
5608 : : &last_conflicts, loop_nest);
5609 : :
5610 : 4047168 : if (SUB_CONFLICTS_IN_A (subscript))
5611 : 4047168 : free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5612 : 4047168 : if (SUB_CONFLICTS_IN_B (subscript))
5613 : 4047168 : free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5614 : :
5615 : 4047168 : SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5616 : 4047168 : SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5617 : 4047168 : SUB_LAST_CONFLICT (subscript) = last_conflicts;
5618 : :
5619 : : /* If there is any undetermined conflict function we have to
5620 : : give a conservative answer in case we cannot prove that
5621 : : no dependence exists when analyzing another subscript. */
5622 : 4047168 : if (CF_NOT_KNOWN_P (overlaps_a)
5623 : 4020240 : || CF_NOT_KNOWN_P (overlaps_b))
5624 : : {
5625 : 26928 : res = chrec_dont_know;
5626 : 26928 : continue;
5627 : : }
5628 : :
5629 : : /* When there is a subscript with no dependence we can stop. */
5630 : 4020240 : else if (CF_NO_DEPENDENCE_P (overlaps_a)
5631 : 1437778 : || CF_NO_DEPENDENCE_P (overlaps_b))
5632 : : {
5633 : 2582462 : res = chrec_known;
5634 : 2582462 : break;
5635 : : }
5636 : : }
5637 : :
5638 : 3422859 : if (res == NULL_TREE)
5639 : : return true;
5640 : :
5641 : 2608301 : if (res == chrec_known)
5642 : 2582462 : dependence_stats.num_dependence_independent++;
5643 : : else
5644 : 25839 : dependence_stats.num_dependence_undetermined++;
5645 : 2608301 : finalize_ddr_dependent (ddr, res);
5646 : 2608301 : return false;
5647 : : }
5648 : :
5649 : : /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5650 : :
5651 : : static void
5652 : 3416821 : subscript_dependence_tester (struct data_dependence_relation *ddr,
5653 : : class loop *loop_nest)
5654 : : {
5655 : 3416821 : if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5656 : 808520 : dependence_stats.num_dependence_dependent++;
5657 : :
5658 : 3416821 : compute_subscript_distance (ddr);
5659 : 3416821 : if (build_classic_dist_vector (ddr, loop_nest))
5660 : : {
5661 : 806662 : if (dump_file && (dump_flags & TDF_DETAILS))
5662 : : {
5663 : 3616 : unsigned i;
5664 : :
5665 : 3616 : fprintf (dump_file, "(build_classic_dist_vector\n");
5666 : 10923 : for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5667 : : {
5668 : 3691 : fprintf (dump_file, " dist_vector = (");
5669 : 3691 : print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5670 : 7382 : DDR_NB_LOOPS (ddr));
5671 : 3691 : fprintf (dump_file, " )\n");
5672 : : }
5673 : 3616 : fprintf (dump_file, ")\n");
5674 : : }
5675 : :
5676 : 806662 : build_classic_dir_vector (ddr);
5677 : : }
5678 : 3416821 : }
5679 : :
5680 : : /* Returns true when all the access functions of A are affine or
5681 : : constant with respect to LOOP_NEST. */
5682 : :
5683 : : static bool
5684 : 6998275 : access_functions_are_affine_or_constant_p (const struct data_reference *a,
5685 : : const class loop *loop_nest)
5686 : : {
5687 : 6998275 : vec<tree> fns = DR_ACCESS_FNS (a);
5688 : 40561086 : for (tree t : fns)
5689 : 19728450 : if (!evolution_function_is_invariant_p (t, loop_nest->num)
5690 : 19728450 : && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5691 : : return false;
5692 : :
5693 : : return true;
5694 : : }
5695 : :
5696 : : /* This computes the affine dependence relation between A and B with
5697 : : respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5698 : : independence between two accesses, while CHREC_DONT_KNOW is used
5699 : : for representing the unknown relation.
5700 : :
5701 : : Note that it is possible to stop the computation of the dependence
5702 : : relation the first time we detect a CHREC_KNOWN element for a given
5703 : : subscript. */
5704 : :
5705 : : void
5706 : 6001302 : compute_affine_dependence (struct data_dependence_relation *ddr,
5707 : : class loop *loop_nest)
5708 : : {
5709 : 6001302 : struct data_reference *dra = DDR_A (ddr);
5710 : 6001302 : struct data_reference *drb = DDR_B (ddr);
5711 : :
5712 : 6001302 : if (dump_file && (dump_flags & TDF_DETAILS))
5713 : : {
5714 : 126673 : fprintf (dump_file, "(compute_affine_dependence\n");
5715 : 126673 : fprintf (dump_file, " ref_a: ");
5716 : 126673 : print_generic_expr (dump_file, DR_REF (dra));
5717 : 126673 : fprintf (dump_file, ", stmt_a: ");
5718 : 126673 : print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5719 : 126673 : fprintf (dump_file, " ref_b: ");
5720 : 126673 : print_generic_expr (dump_file, DR_REF (drb));
5721 : 126673 : fprintf (dump_file, ", stmt_b: ");
5722 : 126673 : print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5723 : : }
5724 : :
5725 : : /* Analyze only when the dependence relation is not yet known. */
5726 : 6001302 : if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5727 : : {
5728 : 3579010 : dependence_stats.num_dependence_tests++;
5729 : :
5730 : 3579010 : if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5731 : 3579010 : && access_functions_are_affine_or_constant_p (drb, loop_nest))
5732 : 3416821 : subscript_dependence_tester (ddr, loop_nest);
5733 : :
5734 : : /* As a last case, if the dependence cannot be determined, or if
5735 : : the dependence is considered too difficult to determine, answer
5736 : : "don't know". */
5737 : : else
5738 : : {
5739 : 162189 : dependence_stats.num_dependence_undetermined++;
5740 : :
5741 : 162189 : if (dump_file && (dump_flags & TDF_DETAILS))
5742 : : {
5743 : 156 : fprintf (dump_file, "Data ref a:\n");
5744 : 156 : dump_data_reference (dump_file, dra);
5745 : 156 : fprintf (dump_file, "Data ref b:\n");
5746 : 156 : dump_data_reference (dump_file, drb);
5747 : 156 : fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5748 : : }
5749 : 162189 : finalize_ddr_dependent (ddr, chrec_dont_know);
5750 : : }
5751 : : }
5752 : :
5753 : 6001302 : if (dump_file && (dump_flags & TDF_DETAILS))
5754 : : {
5755 : 126673 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5756 : 112165 : fprintf (dump_file, ") -> no dependence\n");
5757 : 14508 : else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5758 : 10823 : fprintf (dump_file, ") -> dependence analysis failed\n");
5759 : : else
5760 : 3685 : fprintf (dump_file, ")\n");
5761 : : }
5762 : 6001302 : }
5763 : :
5764 : : /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5765 : : the data references in DATAREFS, in the LOOP_NEST. When
5766 : : COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5767 : : relations. Return true when successful, i.e. data references number
5768 : : is small enough to be handled. */
5769 : :
5770 : : bool
5771 : 379549 : compute_all_dependences (const vec<data_reference_p> &datarefs,
5772 : : vec<ddr_p> *dependence_relations,
5773 : : const vec<loop_p> &loop_nest,
5774 : : bool compute_self_and_rr)
5775 : : {
5776 : 379549 : struct data_dependence_relation *ddr;
5777 : 379549 : struct data_reference *a, *b;
5778 : 379549 : unsigned int i, j;
5779 : :
5780 : 379549 : if ((int) datarefs.length ()
5781 : 379549 : > param_loop_max_datarefs_for_datadeps)
5782 : : {
5783 : 0 : struct data_dependence_relation *ddr;
5784 : :
5785 : : /* Insert a single relation into dependence_relations:
5786 : : chrec_dont_know. */
5787 : 0 : ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5788 : 0 : dependence_relations->safe_push (ddr);
5789 : 0 : return false;
5790 : : }
5791 : :
5792 : 2824792 : FOR_EACH_VEC_ELT (datarefs, i, a)
5793 : 7188692 : for (j = i + 1; datarefs.iterate (j, &b); j++)
5794 : 4743449 : if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5795 : : {
5796 : 4387276 : ddr = initialize_data_dependence_relation (a, b, loop_nest);
5797 : 4387276 : dependence_relations->safe_push (ddr);
5798 : 4387276 : if (loop_nest.exists ())
5799 : 4349638 : compute_affine_dependence (ddr, loop_nest[0]);
5800 : : }
5801 : :
5802 : 379549 : if (compute_self_and_rr)
5803 : 893081 : FOR_EACH_VEC_ELT (datarefs, i, a)
5804 : : {
5805 : 662529 : ddr = initialize_data_dependence_relation (a, a, loop_nest);
5806 : 662529 : dependence_relations->safe_push (ddr);
5807 : 662529 : if (loop_nest.exists ())
5808 : 662529 : compute_affine_dependence (ddr, loop_nest[0]);
5809 : : }
5810 : :
5811 : : return true;
5812 : : }
5813 : :
5814 : : /* Describes a location of a memory reference. */
5815 : :
5816 : : struct data_ref_loc
5817 : : {
5818 : : /* The memory reference. */
5819 : : tree ref;
5820 : :
5821 : : /* True if the memory reference is read. */
5822 : : bool is_read;
5823 : :
5824 : : /* True if the data reference is conditional within the containing
5825 : : statement, i.e. if it might not occur even when the statement
5826 : : is executed and runs to completion. */
5827 : : bool is_conditional_in_stmt;
5828 : : };
5829 : :
5830 : :
5831 : : /* Stores the locations of memory references in STMT to REFERENCES. Returns
5832 : : true if STMT clobbers memory, false otherwise. */
5833 : :
5834 : : static bool
5835 : 42992023 : get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5836 : : {
5837 : 42992023 : bool clobbers_memory = false;
5838 : 42992023 : data_ref_loc ref;
5839 : 42992023 : tree op0, op1;
5840 : 42992023 : enum gimple_code stmt_code = gimple_code (stmt);
5841 : :
5842 : : /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5843 : : As we cannot model data-references to not spelled out
5844 : : accesses give up if they may occur. */
5845 : 42992023 : if (stmt_code == GIMPLE_CALL
5846 : 42992023 : && !(gimple_call_flags (stmt) & ECF_CONST))
5847 : : {
5848 : : /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5849 : 3671855 : if (gimple_call_internal_p (stmt))
5850 : 48572 : switch (gimple_call_internal_fn (stmt))
5851 : : {
5852 : 5616 : case IFN_GOMP_SIMD_LANE:
5853 : 5616 : {
5854 : 5616 : class loop *loop = gimple_bb (stmt)->loop_father;
5855 : 5616 : tree uid = gimple_call_arg (stmt, 0);
5856 : 5616 : gcc_assert (TREE_CODE (uid) == SSA_NAME);
5857 : 5616 : if (loop == NULL
5858 : 5616 : || loop->simduid != SSA_NAME_VAR (uid))
5859 : : clobbers_memory = true;
5860 : : break;
5861 : : }
5862 : : case IFN_MASK_LOAD:
5863 : : case IFN_MASK_STORE:
5864 : : break;
5865 : 2825 : case IFN_MASK_CALL:
5866 : 2825 : {
5867 : 2825 : tree orig_fndecl
5868 : 2825 : = gimple_call_addr_fndecl (gimple_call_arg (stmt, 0));
5869 : 2825 : if (!orig_fndecl
5870 : 2825 : || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5871 : : clobbers_memory = true;
5872 : : }
5873 : : break;
5874 : : default:
5875 : 3709014 : clobbers_memory = true;
5876 : : break;
5877 : : }
5878 : 3623283 : else if (gimple_call_builtin_p (stmt, BUILT_IN_PREFETCH))
5879 : : clobbers_memory = false;
5880 : : else
5881 : 3709014 : clobbers_memory = true;
5882 : : }
5883 : 39320168 : else if (stmt_code == GIMPLE_ASM
5884 : 39320168 : && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5885 : 8306 : || gimple_vuse (stmt)))
5886 : : clobbers_memory = true;
5887 : :
5888 : 91640875 : if (!gimple_vuse (stmt))
5889 : : return clobbers_memory;
5890 : :
5891 : 17367695 : if (stmt_code == GIMPLE_ASSIGN)
5892 : : {
5893 : 12834058 : tree base;
5894 : 12834058 : op0 = gimple_assign_lhs (stmt);
5895 : 12834058 : op1 = gimple_assign_rhs1 (stmt);
5896 : :
5897 : 12834058 : if (DECL_P (op1)
5898 : 12834058 : || (REFERENCE_CLASS_P (op1)
5899 : 6227238 : && (base = get_base_address (op1))
5900 : 6227238 : && TREE_CODE (base) != SSA_NAME
5901 : 6227217 : && !is_gimple_min_invariant (base)))
5902 : : {
5903 : 6969001 : ref.ref = op1;
5904 : 6969001 : ref.is_read = true;
5905 : 6969001 : ref.is_conditional_in_stmt = false;
5906 : 6969001 : references->safe_push (ref);
5907 : : }
5908 : : }
5909 : 4533637 : else if (stmt_code == GIMPLE_CALL)
5910 : : {
5911 : 3665563 : unsigned i = 0, n;
5912 : 3665563 : tree ptr, type;
5913 : 3665563 : unsigned int align;
5914 : :
5915 : 3665563 : ref.is_read = false;
5916 : 3665563 : if (gimple_call_internal_p (stmt))
5917 : 40105 : switch (gimple_call_internal_fn (stmt))
5918 : : {
5919 : 1826 : case IFN_MASK_LOAD:
5920 : 1826 : if (gimple_call_lhs (stmt) == NULL_TREE)
5921 : : break;
5922 : 1826 : ref.is_read = true;
5923 : : /* FALLTHRU */
5924 : 3281 : case IFN_MASK_STORE:
5925 : 3281 : ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5926 : 3281 : align = tree_to_shwi (gimple_call_arg (stmt, 1));
5927 : 3281 : if (ref.is_read)
5928 : 1826 : type = TREE_TYPE (gimple_call_lhs (stmt));
5929 : : else
5930 : 1455 : type = TREE_TYPE (gimple_call_arg (stmt, 3));
5931 : 3281 : if (TYPE_ALIGN (type) != align)
5932 : 1057 : type = build_aligned_type (type, align);
5933 : 3281 : ref.is_conditional_in_stmt = true;
5934 : 3281 : ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5935 : : ptr);
5936 : 3281 : references->safe_push (ref);
5937 : 3281 : return false;
5938 : : case IFN_MASK_CALL:
5939 : 3662282 : i = 1;
5940 : : gcc_fallthrough ();
5941 : : default:
5942 : : break;
5943 : : }
5944 : :
5945 : 3662282 : op0 = gimple_call_lhs (stmt);
5946 : 3662282 : n = gimple_call_num_args (stmt);
5947 : 14739496 : for (; i < n; i++)
5948 : : {
5949 : 7414932 : op1 = gimple_call_arg (stmt, i);
5950 : :
5951 : 7414932 : if (DECL_P (op1)
5952 : 7414932 : || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5953 : : {
5954 : 447933 : ref.ref = op1;
5955 : 447933 : ref.is_read = true;
5956 : 447933 : ref.is_conditional_in_stmt = false;
5957 : 447933 : references->safe_push (ref);
5958 : : }
5959 : : }
5960 : : }
5961 : : else
5962 : : return clobbers_memory;
5963 : :
5964 : 16496340 : if (op0
5965 : 16496340 : && (DECL_P (op0)
5966 : 13503377 : || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5967 : : {
5968 : 6549214 : ref.ref = op0;
5969 : 6549214 : ref.is_read = false;
5970 : 6549214 : ref.is_conditional_in_stmt = false;
5971 : 6549214 : references->safe_push (ref);
5972 : : }
5973 : : return clobbers_memory;
5974 : : }
5975 : :
5976 : :
5977 : : /* Returns true if the loop-nest has any data reference. */
5978 : :
5979 : : bool
5980 : 712 : loop_nest_has_data_refs (loop_p loop)
5981 : : {
5982 : 712 : basic_block *bbs = get_loop_body (loop);
5983 : 712 : auto_vec<data_ref_loc, 3> references;
5984 : :
5985 : 961 : for (unsigned i = 0; i < loop->num_nodes; i++)
5986 : : {
5987 : 891 : basic_block bb = bbs[i];
5988 : 891 : gimple_stmt_iterator bsi;
5989 : :
5990 : 3112 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5991 : : {
5992 : 1972 : gimple *stmt = gsi_stmt (bsi);
5993 : 1972 : get_references_in_stmt (stmt, &references);
5994 : 1972 : if (references.length ())
5995 : : {
5996 : 642 : free (bbs);
5997 : 642 : return true;
5998 : : }
5999 : : }
6000 : : }
6001 : 70 : free (bbs);
6002 : 70 : return false;
6003 : 712 : }
6004 : :
6005 : : /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
6006 : : reference, returns false, otherwise returns true. NEST is the outermost
6007 : : loop of the loop nest in which the references should be analyzed. */
6008 : :
6009 : : opt_result
6010 : 42976175 : find_data_references_in_stmt (class loop *nest, gimple *stmt,
6011 : : vec<data_reference_p> *datarefs)
6012 : : {
6013 : 42976175 : auto_vec<data_ref_loc, 2> references;
6014 : 42976175 : data_reference_p dr;
6015 : :
6016 : 42976175 : if (get_references_in_stmt (stmt, &references))
6017 : 3709009 : return opt_result::failure_at (stmt, "statement clobbers memory: %G",
6018 : : stmt);
6019 : :
6020 : 131076279 : for (const data_ref_loc &ref : references)
6021 : : {
6022 : 13274781 : dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
6023 : 13274781 : loop_containing_stmt (stmt), ref.ref,
6024 : 13274781 : stmt, ref.is_read, ref.is_conditional_in_stmt);
6025 : 13274781 : gcc_assert (dr != NULL);
6026 : 13274781 : datarefs->safe_push (dr);
6027 : : }
6028 : :
6029 : 39267166 : return opt_result::success ();
6030 : 42976175 : }
6031 : :
6032 : : /* Stores the data references in STMT to DATAREFS. If there is an
6033 : : unanalyzable reference, returns false, otherwise returns true.
6034 : : NEST is the outermost loop of the loop nest in which the references
6035 : : should be instantiated, LOOP is the loop in which the references
6036 : : should be analyzed. */
6037 : :
6038 : : bool
6039 : 13876 : graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
6040 : : vec<data_reference_p> *datarefs)
6041 : : {
6042 : 13876 : auto_vec<data_ref_loc, 2> references;
6043 : 13876 : bool ret = true;
6044 : 13876 : data_reference_p dr;
6045 : :
6046 : 13876 : if (get_references_in_stmt (stmt, &references))
6047 : : return false;
6048 : :
6049 : 44399 : for (const data_ref_loc &ref : references)
6050 : : {
6051 : 5572 : dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
6052 : 2786 : ref.is_conditional_in_stmt);
6053 : 2786 : gcc_assert (dr != NULL);
6054 : 2786 : datarefs->safe_push (dr);
6055 : : }
6056 : :
6057 : : return ret;
6058 : 13876 : }
6059 : :
6060 : : /* Search the data references in LOOP, and record the information into
6061 : : DATAREFS. Returns chrec_dont_know when failing to analyze a
6062 : : difficult case, returns NULL_TREE otherwise. */
6063 : :
6064 : : tree
6065 : 2102315 : find_data_references_in_bb (class loop *loop, basic_block bb,
6066 : : vec<data_reference_p> *datarefs)
6067 : : {
6068 : 2102315 : gimple_stmt_iterator bsi;
6069 : :
6070 : 17497673 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6071 : : {
6072 : 13596466 : gimple *stmt = gsi_stmt (bsi);
6073 : :
6074 : 13596466 : if (!find_data_references_in_stmt (loop, stmt, datarefs))
6075 : : {
6076 : 303423 : struct data_reference *res;
6077 : 303423 : res = XCNEW (struct data_reference);
6078 : 303423 : datarefs->safe_push (res);
6079 : :
6080 : 303423 : return chrec_dont_know;
6081 : : }
6082 : : }
6083 : :
6084 : : return NULL_TREE;
6085 : : }
6086 : :
6087 : : /* Search the data references in LOOP, and record the information into
6088 : : DATAREFS. Returns chrec_dont_know when failing to analyze a
6089 : : difficult case, returns NULL_TREE otherwise.
6090 : :
6091 : : TODO: This function should be made smarter so that it can handle address
6092 : : arithmetic as if they were array accesses, etc. */
6093 : :
6094 : : tree
6095 : 734216 : find_data_references_in_loop (class loop *loop,
6096 : : vec<data_reference_p> *datarefs)
6097 : : {
6098 : 734216 : basic_block bb, *bbs;
6099 : 734216 : unsigned int i;
6100 : :
6101 : 734216 : bbs = get_loop_body_in_dom_order (loop);
6102 : :
6103 : 3232236 : for (i = 0; i < loop->num_nodes; i++)
6104 : : {
6105 : 2045031 : bb = bbs[i];
6106 : :
6107 : 2045031 : if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6108 : : {
6109 : 281227 : free (bbs);
6110 : 281227 : return chrec_dont_know;
6111 : : }
6112 : : }
6113 : 452989 : free (bbs);
6114 : :
6115 : 452989 : return NULL_TREE;
6116 : : }
6117 : :
6118 : : /* Return the alignment in bytes that DRB is guaranteed to have at all
6119 : : times. */
6120 : :
6121 : : unsigned int
6122 : 344353 : dr_alignment (innermost_loop_behavior *drb)
6123 : : {
6124 : : /* Get the alignment of BASE_ADDRESS + INIT. */
6125 : 344353 : unsigned int alignment = drb->base_alignment;
6126 : 344353 : unsigned int misalignment = (drb->base_misalignment
6127 : 344353 : + TREE_INT_CST_LOW (drb->init));
6128 : 344353 : if (misalignment != 0)
6129 : 145863 : alignment = MIN (alignment, misalignment & -misalignment);
6130 : :
6131 : : /* Cap it to the alignment of OFFSET. */
6132 : 344353 : if (!integer_zerop (drb->offset))
6133 : 26564 : alignment = MIN (alignment, drb->offset_alignment);
6134 : :
6135 : : /* Cap it to the alignment of STEP. */
6136 : 344353 : if (!integer_zerop (drb->step))
6137 : 183411 : alignment = MIN (alignment, drb->step_alignment);
6138 : :
6139 : 344353 : return alignment;
6140 : : }
6141 : :
6142 : : /* If BASE is a pointer-typed SSA name, try to find the object that it
6143 : : is based on. Return this object X on success and store the alignment
6144 : : in bytes of BASE - &X in *ALIGNMENT_OUT. */
6145 : :
6146 : : static tree
6147 : 517396 : get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6148 : : {
6149 : 517396 : if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6150 : : return NULL_TREE;
6151 : :
6152 : 198891 : gimple *def = SSA_NAME_DEF_STMT (base);
6153 : 198891 : base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6154 : :
6155 : : /* Peel chrecs and record the minimum alignment preserved by
6156 : : all steps. */
6157 : 198891 : unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6158 : 400875 : while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6159 : : {
6160 : 3093 : unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6161 : 3093 : alignment = MIN (alignment, step_alignment);
6162 : 3093 : base = CHREC_LEFT (base);
6163 : : }
6164 : :
6165 : : /* Punt if the expression is too complicated to handle. */
6166 : 198891 : if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6167 : : return NULL_TREE;
6168 : :
6169 : : /* The only useful cases are those for which a dereference folds to something
6170 : : other than an INDIRECT_REF. */
6171 : 198853 : tree ref_type = TREE_TYPE (TREE_TYPE (base));
6172 : 198853 : tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6173 : 198853 : if (!ref)
6174 : : return NULL_TREE;
6175 : :
6176 : : /* Analyze the base to which the steps we peeled were applied. */
6177 : 160 : poly_int64 bitsize, bitpos, bytepos;
6178 : 160 : machine_mode mode;
6179 : 160 : int unsignedp, reversep, volatilep;
6180 : 160 : tree offset;
6181 : 160 : base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6182 : : &unsignedp, &reversep, &volatilep);
6183 : 517396 : if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6184 : : return NULL_TREE;
6185 : :
6186 : : /* Restrict the alignment to that guaranteed by the offsets. */
6187 : 160 : unsigned int bytepos_alignment = known_alignment (bytepos);
6188 : 160 : if (bytepos_alignment != 0)
6189 : 74 : alignment = MIN (alignment, bytepos_alignment);
6190 : 160 : if (offset)
6191 : : {
6192 : 0 : unsigned int offset_alignment = highest_pow2_factor (offset);
6193 : 0 : alignment = MIN (alignment, offset_alignment);
6194 : : }
6195 : :
6196 : 160 : *alignment_out = alignment;
6197 : 160 : return base;
6198 : : }
6199 : :
6200 : : /* Return the object whose alignment would need to be changed in order
6201 : : to increase the alignment of ADDR. Store the maximum achievable
6202 : : alignment in *MAX_ALIGNMENT. */
6203 : :
6204 : : tree
6205 : 517396 : get_base_for_alignment (tree addr, unsigned int *max_alignment)
6206 : : {
6207 : 517396 : tree base = get_base_for_alignment_1 (addr, max_alignment);
6208 : 517396 : if (base)
6209 : : return base;
6210 : :
6211 : 517236 : if (TREE_CODE (addr) == ADDR_EXPR)
6212 : 250447 : addr = TREE_OPERAND (addr, 0);
6213 : 517236 : *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6214 : 517236 : return addr;
6215 : : }
6216 : :
6217 : : /* Recursive helper function. */
6218 : :
6219 : : static bool
6220 : 131485 : find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6221 : : {
6222 : : /* Inner loops of the nest should not contain siblings. Example:
6223 : : when there are two consecutive loops,
6224 : :
6225 : : | loop_0
6226 : : | loop_1
6227 : : | A[{0, +, 1}_1]
6228 : : | endloop_1
6229 : : | loop_2
6230 : : | A[{0, +, 1}_2]
6231 : : | endloop_2
6232 : : | endloop_0
6233 : :
6234 : : the dependence relation cannot be captured by the distance
6235 : : abstraction. */
6236 : 131485 : if (loop->next)
6237 : : return false;
6238 : :
6239 : 112239 : loop_nest->safe_push (loop);
6240 : 112239 : if (loop->inner)
6241 : 38555 : return find_loop_nest_1 (loop->inner, loop_nest);
6242 : : return true;
6243 : : }
6244 : :
6245 : : /* Return false when the LOOP is not well nested. Otherwise return
6246 : : true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6247 : : contain the loops from the outermost to the innermost, as they will
6248 : : appear in the classic distance vector. */
6249 : :
6250 : : bool
6251 : 924088 : find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6252 : : {
6253 : 924088 : loop_nest->safe_push (loop);
6254 : 924088 : if (loop->inner)
6255 : 92930 : return find_loop_nest_1 (loop->inner, loop_nest);
6256 : : return true;
6257 : : }
6258 : :
6259 : : /* Returns true when the data dependences have been computed, false otherwise.
6260 : : Given a loop nest LOOP, the following vectors are returned:
6261 : : DATAREFS is initialized to all the array elements contained in this loop,
6262 : : DEPENDENCE_RELATIONS contains the relations between the data references.
6263 : : Compute read-read and self relations if
6264 : : COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6265 : :
6266 : : bool
6267 : 368410 : compute_data_dependences_for_loop (class loop *loop,
6268 : : bool compute_self_and_read_read_dependences,
6269 : : vec<loop_p> *loop_nest,
6270 : : vec<data_reference_p> *datarefs,
6271 : : vec<ddr_p> *dependence_relations)
6272 : : {
6273 : 368410 : bool res = true;
6274 : :
6275 : 368410 : memset (&dependence_stats, 0, sizeof (dependence_stats));
6276 : :
6277 : : /* If the loop nest is not well formed, or one of the data references
6278 : : is not computable, give up without spending time to compute other
6279 : : dependences. */
6280 : 368410 : if (!loop
6281 : 368410 : || !find_loop_nest (loop, loop_nest)
6282 : 368408 : || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6283 : 598892 : || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6284 : : compute_self_and_read_read_dependences))
6285 : : res = false;
6286 : :
6287 : 368410 : if (dump_file && (dump_flags & TDF_STATS))
6288 : : {
6289 : 157 : fprintf (dump_file, "Dependence tester statistics:\n");
6290 : :
6291 : 157 : fprintf (dump_file, "Number of dependence tests: %d\n",
6292 : : dependence_stats.num_dependence_tests);
6293 : 157 : fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6294 : : dependence_stats.num_dependence_dependent);
6295 : 157 : fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6296 : : dependence_stats.num_dependence_independent);
6297 : 157 : fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6298 : : dependence_stats.num_dependence_undetermined);
6299 : :
6300 : 157 : fprintf (dump_file, "Number of subscript tests: %d\n",
6301 : : dependence_stats.num_subscript_tests);
6302 : 157 : fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6303 : : dependence_stats.num_subscript_undetermined);
6304 : 157 : fprintf (dump_file, "Number of same subscript function: %d\n",
6305 : : dependence_stats.num_same_subscript_function);
6306 : :
6307 : 157 : fprintf (dump_file, "Number of ziv tests: %d\n",
6308 : : dependence_stats.num_ziv);
6309 : 157 : fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6310 : : dependence_stats.num_ziv_dependent);
6311 : 157 : fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6312 : : dependence_stats.num_ziv_independent);
6313 : 157 : fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6314 : : dependence_stats.num_ziv_unimplemented);
6315 : :
6316 : 157 : fprintf (dump_file, "Number of siv tests: %d\n",
6317 : : dependence_stats.num_siv);
6318 : 157 : fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6319 : : dependence_stats.num_siv_dependent);
6320 : 157 : fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6321 : : dependence_stats.num_siv_independent);
6322 : 157 : fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6323 : : dependence_stats.num_siv_unimplemented);
6324 : :
6325 : 157 : fprintf (dump_file, "Number of miv tests: %d\n",
6326 : : dependence_stats.num_miv);
6327 : 157 : fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6328 : : dependence_stats.num_miv_dependent);
6329 : 157 : fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6330 : : dependence_stats.num_miv_independent);
6331 : 157 : fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6332 : : dependence_stats.num_miv_unimplemented);
6333 : : }
6334 : :
6335 : 368410 : return res;
6336 : : }
6337 : :
6338 : : /* Free the memory used by a data dependence relation DDR. */
6339 : :
6340 : : void
6341 : 12155278 : free_dependence_relation (struct data_dependence_relation *ddr)
6342 : : {
6343 : 12155278 : if (ddr == NULL)
6344 : : return;
6345 : :
6346 : 12155278 : if (DDR_SUBSCRIPTS (ddr).exists ())
6347 : 808520 : free_subscripts (DDR_SUBSCRIPTS (ddr));
6348 : 12155278 : DDR_DIST_VECTS (ddr).release ();
6349 : 12155278 : DDR_DIR_VECTS (ddr).release ();
6350 : :
6351 : 12155278 : free (ddr);
6352 : : }
6353 : :
6354 : : /* Free the memory used by the data dependence relations from
6355 : : DEPENDENCE_RELATIONS. */
6356 : :
6357 : : void
6358 : 2886295 : free_dependence_relations (vec<ddr_p>& dependence_relations)
6359 : : {
6360 : 8988732 : for (data_dependence_relation *ddr : dependence_relations)
6361 : 5053045 : if (ddr)
6362 : 5053045 : free_dependence_relation (ddr);
6363 : :
6364 : 2886295 : dependence_relations.release ();
6365 : 2886295 : }
6366 : :
6367 : : /* Free the memory used by the data references from DATAREFS. */
6368 : :
6369 : : void
6370 : 3145491 : free_data_refs (vec<data_reference_p>& datarefs)
6371 : : {
6372 : 18222713 : for (data_reference *dr : datarefs)
6373 : 11676014 : free_data_ref (dr);
6374 : 3145491 : datarefs.release ();
6375 : 3145491 : }
6376 : :
6377 : : /* Common routine implementing both dr_direction_indicator and
6378 : : dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6379 : : to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6380 : : Return the step as the indicator otherwise. */
6381 : :
6382 : : static tree
6383 : 43830 : dr_step_indicator (struct data_reference *dr, int useful_min)
6384 : : {
6385 : 43830 : tree step = DR_STEP (dr);
6386 : 43830 : if (!step)
6387 : : return NULL_TREE;
6388 : 43830 : STRIP_NOPS (step);
6389 : : /* Look for cases where the step is scaled by a positive constant
6390 : : integer, which will often be the access size. If the multiplication
6391 : : doesn't change the sign (due to overflow effects) then we can
6392 : : test the unscaled value instead. */
6393 : 43830 : if (TREE_CODE (step) == MULT_EXPR
6394 : 3919 : && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6395 : 47721 : && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6396 : : {
6397 : 3891 : tree factor = TREE_OPERAND (step, 1);
6398 : 3891 : step = TREE_OPERAND (step, 0);
6399 : :
6400 : : /* Strip widening and truncating conversions as well as nops. */
6401 : 253 : if (CONVERT_EXPR_P (step)
6402 : 3891 : && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6403 : 3638 : step = TREE_OPERAND (step, 0);
6404 : 3891 : tree type = TREE_TYPE (step);
6405 : :
6406 : : /* Get the range of step values that would not cause overflow. */
6407 : 7782 : widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6408 : 3891 : / wi::to_widest (factor));
6409 : 7782 : widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6410 : 3891 : / wi::to_widest (factor));
6411 : :
6412 : : /* Get the range of values that the unconverted step actually has. */
6413 : 3891 : wide_int step_min, step_max;
6414 : 3891 : int_range_max vr;
6415 : 3891 : if (TREE_CODE (step) != SSA_NAME
6416 : 7710 : || !get_range_query (cfun)->range_of_expr (vr, step)
6417 : 7746 : || vr.undefined_p ())
6418 : : {
6419 : 36 : step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6420 : 36 : step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6421 : : }
6422 : : else
6423 : : {
6424 : 3855 : step_min = vr.lower_bound ();
6425 : 3855 : step_max = vr.upper_bound ();
6426 : : }
6427 : :
6428 : : /* Check whether the unconverted step has an acceptable range. */
6429 : 3891 : signop sgn = TYPE_SIGN (type);
6430 : 7782 : if (wi::les_p (minv, widest_int::from (step_min, sgn))
6431 : 10632 : && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6432 : : {
6433 : 1416 : if (wi::ge_p (step_min, useful_min, sgn))
6434 : 432 : return ssize_int (useful_min);
6435 : 984 : else if (wi::lt_p (step_max, 0, sgn))
6436 : 0 : return ssize_int (-1);
6437 : : else
6438 : 984 : return fold_convert (ssizetype, step);
6439 : : }
6440 : 3891 : }
6441 : 42414 : return DR_STEP (dr);
6442 : : }
6443 : :
6444 : : /* Return a value that is negative iff DR has a negative step. */
6445 : :
6446 : : tree
6447 : 9442 : dr_direction_indicator (struct data_reference *dr)
6448 : : {
6449 : 9442 : return dr_step_indicator (dr, 0);
6450 : : }
6451 : :
6452 : : /* Return a value that is zero iff DR has a zero step. */
6453 : :
6454 : : tree
6455 : 34388 : dr_zero_step_indicator (struct data_reference *dr)
6456 : : {
6457 : 34388 : return dr_step_indicator (dr, 1);
6458 : : }
6459 : :
6460 : : /* Return true if DR is known to have a nonnegative (but possibly zero)
6461 : : step. */
6462 : :
6463 : : bool
6464 : 3527 : dr_known_forward_stride_p (struct data_reference *dr)
6465 : : {
6466 : 3527 : tree indicator = dr_direction_indicator (dr);
6467 : 3527 : tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6468 : : fold_convert (ssizetype, indicator),
6469 : : ssize_int (0));
6470 : 3527 : return neg_step_val && integer_zerop (neg_step_val);
6471 : : }
|