Line data Source code
1 : /* Predictive commoning.
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* This file implements the predictive commoning optimization. Predictive
21 : commoning can be viewed as CSE around a loop, and with some improvements,
22 : as generalized strength reduction-- i.e., reusing values computed in
23 : earlier iterations of a loop in the later ones. So far, the pass only
24 : handles the most useful case, that is, reusing values of memory references.
25 : If you think this is all just a special case of PRE, you are sort of right;
26 : however, concentrating on loops is simpler, and makes it possible to
27 : incorporate data dependence analysis to detect the opportunities, perform
28 : loop unrolling to avoid copies together with renaming immediately,
29 : and if needed, we could also take register pressure into account.
30 :
31 : Let us demonstrate what is done on an example:
32 :
33 : for (i = 0; i < 100; i++)
34 : {
35 : a[i+2] = a[i] + a[i+1];
36 : b[10] = b[10] + i;
37 : c[i] = c[99 - i];
38 : d[i] = d[i + 1];
39 : }
40 :
41 : 1) We find data references in the loop, and split them to mutually
42 : independent groups (i.e., we find components of a data dependence
43 : graph). We ignore read-read dependences whose distance is not constant.
44 : (TODO -- we could also ignore antidependences). In this example, we
45 : find the following groups:
46 :
47 : a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 : b[10]{read}, b[10]{write}
49 : c[99 - i]{read}, c[i]{write}
50 : d[i + 1]{read}, d[i]{write}
51 :
52 : 2) Inside each of the group, we verify several conditions:
53 : a) all the references must differ in indices only, and the indices
54 : must all have the same step
55 : b) the references must dominate loop latch (and thus, they must be
56 : ordered by dominance relation).
57 : c) the distance of the indices must be a small multiple of the step
58 : We are then able to compute the difference of the references (# of
59 : iterations before they point to the same place as the first of them).
60 : Also, in case there are writes in the loop, we split the groups into
61 : chains whose head is the write whose values are used by the reads in
62 : the same chain. The chains are then processed independently,
63 : making the further transformations simpler. Also, the shorter chains
64 : need the same number of registers, but may require lower unrolling
65 : factor in order to get rid of the copies on the loop latch.
66 :
67 : In our example, we get the following chains (the chain for c is invalid).
68 :
69 : a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 : b[10]{read,+0}, b[10]{write,+0}
71 : d[i + 1]{read,+0}, d[i]{write,+1}
72 :
73 : 3) For each read, we determine the read or write whose value it reuses,
74 : together with the distance of this reuse. I.e. we take the last
75 : reference before it with distance 0, or the last of the references
76 : with the smallest positive distance to the read. Then, we remove
77 : the references that are not used in any of these chains, discard the
78 : empty groups, and propagate all the links so that they point to the
79 : single root reference of the chain (adjusting their distance
80 : appropriately). Some extra care needs to be taken for references with
81 : step 0. In our example (the numbers indicate the distance of the
82 : reuse),
83 :
84 : a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 : b[10] --> (*) 1, b[10] (*)
86 :
87 : 4) The chains are combined together if possible. If the corresponding
88 : elements of two chains are always combined together with the same
89 : operator, we remember just the result of this combination, instead
90 : of remembering the values separately. We may need to perform
91 : reassociation to enable combining, for example
92 :
93 : e[i] + f[i+1] + e[i+1] + f[i]
94 :
95 : can be reassociated as
96 :
97 : (e[i] + f[i]) + (e[i+1] + f[i+1])
98 :
99 : and we can combine the chains for e and f into one chain.
100 :
101 : 5) For each root reference (end of the chain) R, let N be maximum distance
102 : of a reference reusing its value. Variables R0 up to RN are created,
103 : together with phi nodes that transfer values from R1 .. RN to
104 : R0 .. R(N-1).
105 : Initial values are loaded to R0..R(N-1) (in case not all references
106 : must necessarily be accessed and they may trap, we may fail here;
107 : TODO sometimes, the loads could be guarded by a check for the number
108 : of iterations). Values loaded/stored in roots are also copied to
109 : RN. Other reads are replaced with the appropriate variable Ri.
110 : Everything is put to SSA form.
111 :
112 : As a small improvement, if R0 is dead after the root (i.e., all uses of
113 : the value with the maximum distance dominate the root), we can avoid
114 : creating RN and use R0 instead of it.
115 :
116 : In our example, we get (only the parts concerning a and b are shown):
117 : for (i = 0; i < 100; i++)
118 : {
119 : f = phi (a[0], s);
120 : s = phi (a[1], f);
121 : x = phi (b[10], x);
122 :
123 : f = f + s;
124 : a[i+2] = f;
125 : x = x + i;
126 : b[10] = x;
127 : }
128 :
129 : 6) Factor F for unrolling is determined as the smallest common multiple of
130 : (N + 1) for each root reference (N for references for that we avoided
131 : creating RN). If F and the loop is small enough, loop is unrolled F
132 : times. The stores to RN (R0) in the copies of the loop body are
133 : periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 : be coalesced and the copies can be eliminated.
135 :
136 : TODO -- copy propagation and other optimizations may change the live
137 : ranges of the temporary registers and prevent them from being coalesced;
138 : this may increase the register pressure.
139 :
140 : In our case, F = 2 and the (main loop of the) result is
141 :
142 : for (i = 0; i < ...; i += 2)
143 : {
144 : f = phi (a[0], f);
145 : s = phi (a[1], s);
146 : x = phi (b[10], x);
147 :
148 : f = f + s;
149 : a[i+2] = f;
150 : x = x + i;
151 : b[10] = x;
152 :
153 : s = s + f;
154 : a[i+3] = s;
155 : x = x + i;
156 : b[10] = x;
157 : }
158 :
159 : Apart from predictive commoning on Load-Load and Store-Load chains, we
160 : also support Store-Store chains -- stores killed by other store can be
161 : eliminated. Given below example:
162 :
163 : for (i = 0; i < n; i++)
164 : {
165 : a[i] = 1;
166 : a[i+2] = 2;
167 : }
168 :
169 : It can be replaced with:
170 :
171 : t0 = a[0];
172 : t1 = a[1];
173 : for (i = 0; i < n; i++)
174 : {
175 : a[i] = 1;
176 : t2 = 2;
177 : t0 = t1;
178 : t1 = t2;
179 : }
180 : a[n] = t0;
181 : a[n+1] = t1;
182 :
183 : If the loop runs more than 1 iterations, it can be further simplified into:
184 :
185 : for (i = 0; i < n; i++)
186 : {
187 : a[i] = 1;
188 : }
189 : a[n] = 2;
190 : a[n+1] = 2;
191 :
192 : The interesting part is this can be viewed either as general store motion
193 : or general dead store elimination in either intra/inter-iterations way.
194 :
195 : With trivial effort, we also support load inside Store-Store chains if the
196 : load is dominated by a store statement in the same iteration of loop. You
197 : can see this as a restricted Store-Mixed-Load-Store chain.
198 :
199 : TODO: For now, we don't support store-store chains in multi-exit loops. We
200 : force to not unroll in case of store-store chain even if other chains might
201 : ask for unroll.
202 :
203 : Predictive commoning can be generalized for arbitrary computations (not
204 : just memory loads), and also nontrivial transfer functions (e.g., replacing
205 : i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
206 :
207 : #include "config.h"
208 : #include "system.h"
209 : #include "coretypes.h"
210 : #include "backend.h"
211 : #include "rtl.h"
212 : #include "tree.h"
213 : #include "gimple.h"
214 : #include "predict.h"
215 : #include "tree-pass.h"
216 : #include "ssa.h"
217 : #include "gimple-pretty-print.h"
218 : #include "alias.h"
219 : #include "fold-const.h"
220 : #include "cfgloop.h"
221 : #include "tree-eh.h"
222 : #include "gimplify.h"
223 : #include "gimple-iterator.h"
224 : #include "gimplify-me.h"
225 : #include "tree-ssa-loop-ivopts.h"
226 : #include "tree-ssa-loop-manip.h"
227 : #include "tree-ssa-loop-niter.h"
228 : #include "tree-ssa-loop.h"
229 : #include "tree-into-ssa.h"
230 : #include "tree-dfa.h"
231 : #include "tree-ssa.h"
232 : #include "tree-data-ref.h"
233 : #include "tree-scalar-evolution.h"
234 : #include "tree-affine.h"
235 : #include "builtins.h"
236 : #include "opts.h"
237 : #include "tree-ssa-address.h"
238 :
239 : /* The maximum number of iterations between the considered memory
240 : references. */
241 :
242 : #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
243 :
244 : /* Data references (or phi nodes that carry data reference values across
245 : loop iterations). */
246 :
247 : typedef class dref_d
248 : {
249 : public:
250 : /* The reference itself. */
251 : struct data_reference *ref;
252 :
253 : /* The statement in that the reference appears. */
254 : gimple *stmt;
255 :
256 : /* In case that STMT is a phi node, this field is set to the SSA name
257 : defined by it in replace_phis_by_defined_names (in order to avoid
258 : pointing to phi node that got reallocated in the meantime). */
259 : tree name_defined_by_phi;
260 :
261 : /* Distance of the reference from the root of the chain (in number of
262 : iterations of the loop). */
263 : unsigned distance;
264 :
265 : /* Number of iterations offset from the first reference in the component. */
266 : widest_int offset;
267 :
268 : /* Number of the reference in a component, in dominance ordering. */
269 : unsigned pos;
270 :
271 : /* True if the memory reference is always accessed when the loop is
272 : entered. */
273 : unsigned always_accessed : 1;
274 : } *dref;
275 :
276 :
277 : /* Type of the chain of the references. */
278 :
279 : enum chain_type
280 : {
281 : /* The addresses of the references in the chain are constant. */
282 : CT_INVARIANT,
283 :
284 : /* There are only loads in the chain. */
285 : CT_LOAD,
286 :
287 : /* Root of the chain is store, the rest are loads. */
288 : CT_STORE_LOAD,
289 :
290 : /* There are only stores in the chain. */
291 : CT_STORE_STORE,
292 :
293 : /* A combination of two chains. */
294 : CT_COMBINATION
295 : };
296 :
297 : /* Chains of data references. */
298 :
299 : typedef struct chain
300 : {
301 71086 : chain (chain_type t) : type (t), op (ERROR_MARK), rslt_type (NULL_TREE),
302 71086 : ch1 (NULL), ch2 (NULL), length (0), init_seq (NULL), fini_seq (NULL),
303 71086 : has_max_use_after (false), all_always_accessed (false), combined (false),
304 71086 : inv_store_elimination (false) {}
305 :
306 : /* Type of the chain. */
307 : enum chain_type type;
308 :
309 : /* For combination chains, the operator and the two chains that are
310 : combined, and the type of the result. */
311 : enum tree_code op;
312 : tree rslt_type;
313 : struct chain *ch1, *ch2;
314 :
315 : /* The references in the chain. */
316 : auto_vec<dref> refs;
317 :
318 : /* The maximum distance of the reference in the chain from the root. */
319 : unsigned length;
320 :
321 : /* The variables used to copy the value throughout iterations. */
322 : auto_vec<tree> vars;
323 :
324 : /* Initializers for the variables. */
325 : auto_vec<tree> inits;
326 :
327 : /* Finalizers for the eliminated stores. */
328 : auto_vec<tree> finis;
329 :
330 : /* gimple stmts intializing the initial variables of the chain. */
331 : gimple_seq init_seq;
332 :
333 : /* gimple stmts finalizing the eliminated stores of the chain. */
334 : gimple_seq fini_seq;
335 :
336 : /* True if there is a use of a variable with the maximal distance
337 : that comes after the root in the loop. */
338 : unsigned has_max_use_after : 1;
339 :
340 : /* True if all the memory references in the chain are always accessed. */
341 : unsigned all_always_accessed : 1;
342 :
343 : /* True if this chain was combined together with some other chain. */
344 : unsigned combined : 1;
345 :
346 : /* True if this is store elimination chain and eliminated stores store
347 : loop invariant value into memory. */
348 : unsigned inv_store_elimination : 1;
349 : } *chain_p;
350 :
351 :
352 : /* Describes the knowledge about the step of the memory references in
353 : the component. */
354 :
355 : enum ref_step_type
356 : {
357 : /* The step is zero. */
358 : RS_INVARIANT,
359 :
360 : /* The step is nonzero. */
361 : RS_NONZERO,
362 :
363 : /* The step may or may not be nonzero. */
364 : RS_ANY
365 : };
366 :
367 : /* Components of the data dependence graph. */
368 :
369 768388 : struct component
370 : {
371 384194 : component (bool es) : comp_step (RS_ANY), eliminate_store_p (es),
372 384194 : next (NULL) {}
373 :
374 : /* The references in the component. */
375 : auto_vec<dref> refs;
376 :
377 : /* What we know about the step of the references in the component. */
378 : enum ref_step_type comp_step;
379 :
380 : /* True if all references in component are stores and we try to do
381 : intra/inter loop iteration dead store elimination. */
382 : bool eliminate_store_p;
383 :
384 : /* Next component in the list. */
385 : struct component *next;
386 : };
387 :
388 : /* A class to encapsulate the global states used for predictive
389 : commoning work on top of one given LOOP. */
390 :
391 : class pcom_worker
392 : {
393 : public:
394 427656 : pcom_worker (loop_p l) : m_loop (l), m_cache (NULL) {}
395 :
396 427656 : ~pcom_worker ()
397 : {
398 427656 : free_data_refs (m_datarefs);
399 427656 : free_dependence_relations (m_dependences);
400 427656 : free_affine_expand_cache (&m_cache);
401 427656 : release_chains ();
402 427656 : }
403 :
404 : pcom_worker (const pcom_worker &) = delete;
405 : pcom_worker &operator= (const pcom_worker &) = delete;
406 :
407 : /* Performs predictive commoning. */
408 : unsigned tree_predictive_commoning_loop (bool allow_unroll_p);
409 :
410 : /* Perform the predictive commoning optimization for chains, make this
411 : public for being called in callback execute_pred_commoning_cbck. */
412 : void execute_pred_commoning (bitmap tmp_vars);
413 :
414 : private:
415 : /* The pointer to the given loop. */
416 : loop_p m_loop;
417 :
418 : /* All data references. */
419 : auto_vec<data_reference_p, 10> m_datarefs;
420 :
421 : /* All data dependences. */
422 : auto_vec<ddr_p, 10> m_dependences;
423 :
424 : /* All chains. */
425 : auto_vec<chain_p> m_chains;
426 :
427 : /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
428 : auto_bitmap m_looparound_phis;
429 :
430 : typedef hash_map<tree, name_expansion *> tree_expand_map_t;
431 : /* Cache used by tree_to_aff_combination_expand. */
432 : tree_expand_map_t *m_cache;
433 :
434 : /* Splits dependence graph to components. */
435 : struct component *split_data_refs_to_components ();
436 :
437 : /* Check the conditions on references inside each of components COMPS,
438 : and remove the unsuitable components from the list. */
439 : struct component *filter_suitable_components (struct component *comps);
440 :
441 : /* Find roots of the values and determine distances in components COMPS,
442 : and separates the references to chains. */
443 : void determine_roots (struct component *comps);
444 :
445 : /* Prepare initializers for chains, and free chains that cannot
446 : be used because the initializers might trap. */
447 : void prepare_initializers ();
448 :
449 : /* Generates finalizer memory reference for chains. Returns true if
450 : finalizer code generation for chains breaks loop closed ssa form. */
451 : bool prepare_finalizers ();
452 :
453 : /* Try to combine the chains. */
454 : void try_combine_chains ();
455 :
456 : /* Frees CHAINS. */
457 : void release_chains ();
458 :
459 : /* Frees a chain CHAIN. */
460 : void release_chain (chain_p chain);
461 :
462 : /* Prepare initializers for CHAIN. Returns false if this is impossible
463 : because one of these initializers may trap, true otherwise. */
464 : bool prepare_initializers_chain (chain_p chain);
465 :
466 : /* Generates finalizer memory references for CHAIN. Returns true
467 : if finalizer code for CHAIN can be generated, otherwise false. */
468 : bool prepare_finalizers_chain (chain_p chain);
469 :
470 : /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
471 : void aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset);
472 :
473 : /* Determines number of iterations of the innermost enclosing loop before
474 : B refers to exactly the same location as A and stores it to OFF. */
475 : bool determine_offset (struct data_reference *a, struct data_reference *b,
476 : poly_widest_int *off);
477 :
478 : /* Returns true if the component COMP satisfies the conditions
479 : described in 2) at the beginning of this file. */
480 : bool suitable_component_p (struct component *comp);
481 :
482 : /* Returns true if REF is a valid initializer for ROOT with given
483 : DISTANCE (in iterations of the innermost enclosing loop). */
484 : bool valid_initializer_p (struct data_reference *ref, unsigned distance,
485 : struct data_reference *root);
486 :
487 : /* Finds looparound phi node of loop that copies the value of REF. */
488 : gphi *find_looparound_phi (dref ref, dref root);
489 :
490 : /* Find roots of the values and determine distances in the component
491 : COMP. The references are redistributed into chains. */
492 : void determine_roots_comp (struct component *comp);
493 :
494 : /* For references in CHAIN that are copied around the loop, add the
495 : results of such copies to the chain. */
496 : void add_looparound_copies (chain_p chain);
497 :
498 : /* Returns the single statement in that NAME is used, excepting
499 : the looparound phi nodes contained in one of the chains. */
500 : gimple *single_nonlooparound_use (tree name);
501 :
502 : /* Remove statement STMT, as well as the chain of assignments in that
503 : it is used. */
504 : void remove_stmt (gimple *stmt);
505 :
506 : /* Perform the predictive commoning optimization for a chain CHAIN. */
507 : void execute_pred_commoning_chain (chain_p chain, bitmap tmp_vars);
508 :
509 : /* Returns the modify statement that uses NAME. */
510 : gimple *find_use_stmt (tree *name);
511 :
512 : /* If the operation used in STMT is associative and commutative, go
513 : through the tree of the same operations and returns its root. */
514 : gimple *find_associative_operation_root (gimple *stmt, unsigned *distance);
515 :
516 : /* Returns the common statement in that NAME1 and NAME2 have a use. */
517 : gimple *find_common_use_stmt (tree *name1, tree *name2);
518 :
519 : /* Checks whether R1 and R2 are combined together using CODE, with the
520 : result in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order
521 : R2 CODE R1 if it is true. */
522 : bool combinable_refs_p (dref r1, dref r2, enum tree_code *code, bool *swap,
523 : tree *rslt_type);
524 :
525 : /* Reassociates the expression in that NAME1 and NAME2 are used so that
526 : they are combined in a single statement, and returns this statement. */
527 : gimple *reassociate_to_the_same_stmt (tree name1, tree name2);
528 :
529 : /* Returns the statement that combines references R1 and R2. */
530 : gimple *stmt_combining_refs (dref r1, dref r2);
531 :
532 : /* Tries to combine chains CH1 and CH2 together. */
533 : chain_p combine_chains (chain_p ch1, chain_p ch2);
534 : };
535 :
536 : /* Dumps data reference REF to FILE. */
537 :
538 : extern void dump_dref (FILE *, dref);
539 : void
540 308 : dump_dref (FILE *file, dref ref)
541 : {
542 308 : if (ref->ref)
543 : {
544 289 : fprintf (file, " ");
545 289 : print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
546 289 : fprintf (file, " (id %u%s)\n", ref->pos,
547 289 : DR_IS_READ (ref->ref) ? "" : ", write");
548 :
549 289 : fprintf (file, " offset ");
550 289 : print_decs (ref->offset, file);
551 289 : fprintf (file, "\n");
552 :
553 289 : fprintf (file, " distance %u\n", ref->distance);
554 : }
555 : else
556 : {
557 19 : if (gimple_code (ref->stmt) == GIMPLE_PHI)
558 1 : fprintf (file, " looparound ref\n");
559 : else
560 18 : fprintf (file, " combination ref\n");
561 19 : fprintf (file, " in statement ");
562 19 : print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
563 19 : fprintf (file, "\n");
564 19 : fprintf (file, " distance %u\n", ref->distance);
565 : }
566 :
567 308 : }
568 :
569 : /* Dumps CHAIN to FILE. */
570 :
571 : extern void dump_chain (FILE *, chain_p);
572 : void
573 58 : dump_chain (FILE *file, chain_p chain)
574 : {
575 58 : dref a;
576 58 : const char *chain_type;
577 58 : unsigned i;
578 58 : tree var;
579 :
580 58 : switch (chain->type)
581 : {
582 : case CT_INVARIANT:
583 : chain_type = "Load motion";
584 : break;
585 :
586 24 : case CT_LOAD:
587 24 : chain_type = "Loads-only";
588 24 : break;
589 :
590 5 : case CT_STORE_LOAD:
591 5 : chain_type = "Store-loads";
592 5 : break;
593 :
594 19 : case CT_STORE_STORE:
595 19 : chain_type = "Store-stores";
596 19 : break;
597 :
598 9 : case CT_COMBINATION:
599 9 : chain_type = "Combination";
600 9 : break;
601 :
602 0 : default:
603 0 : gcc_unreachable ();
604 : }
605 :
606 58 : fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
607 58 : chain->combined ? " (combined)" : "");
608 58 : if (chain->type != CT_INVARIANT)
609 57 : fprintf (file, " max distance %u%s\n", chain->length,
610 57 : chain->has_max_use_after ? "" : ", may reuse first");
611 :
612 58 : if (chain->type == CT_COMBINATION)
613 : {
614 18 : fprintf (file, " equal to %p %s %p in type ",
615 9 : (void *) chain->ch1, op_symbol_code (chain->op),
616 9 : (void *) chain->ch2);
617 9 : print_generic_expr (file, chain->rslt_type, TDF_SLIM);
618 9 : fprintf (file, "\n");
619 : }
620 :
621 58 : if (chain->vars.exists ())
622 : {
623 0 : fprintf (file, " vars");
624 0 : FOR_EACH_VEC_ELT (chain->vars, i, var)
625 : {
626 0 : fprintf (file, " ");
627 0 : print_generic_expr (file, var, TDF_SLIM);
628 : }
629 0 : fprintf (file, "\n");
630 : }
631 :
632 58 : if (chain->inits.exists ())
633 : {
634 41 : fprintf (file, " inits");
635 149 : FOR_EACH_VEC_ELT (chain->inits, i, var)
636 : {
637 67 : fprintf (file, " ");
638 67 : print_generic_expr (file, var, TDF_SLIM);
639 : }
640 41 : fprintf (file, "\n");
641 : }
642 :
643 58 : fprintf (file, " references:\n");
644 251 : FOR_EACH_VEC_ELT (chain->refs, i, a)
645 135 : dump_dref (file, a);
646 :
647 58 : fprintf (file, "\n");
648 58 : }
649 :
650 : /* Dumps CHAINS to FILE. */
651 :
652 : void
653 34 : dump_chains (FILE *file, const vec<chain_p> &chains)
654 : {
655 34 : chain_p chain;
656 34 : unsigned i;
657 :
658 92 : FOR_EACH_VEC_ELT (chains, i, chain)
659 58 : dump_chain (file, chain);
660 34 : }
661 :
662 : /* Dumps COMP to FILE. */
663 :
664 : extern void dump_component (FILE *, struct component *);
665 : void
666 103 : dump_component (FILE *file, struct component *comp)
667 : {
668 103 : dref a;
669 103 : unsigned i;
670 :
671 206 : fprintf (file, "Component%s:\n",
672 103 : comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
673 379 : FOR_EACH_VEC_ELT (comp->refs, i, a)
674 173 : dump_dref (file, a);
675 103 : fprintf (file, "\n");
676 103 : }
677 :
678 : /* Dumps COMPS to FILE. */
679 :
680 : extern void dump_components (FILE *, struct component *);
681 : void
682 54 : dump_components (FILE *file, struct component *comps)
683 : {
684 54 : struct component *comp;
685 :
686 157 : for (comp = comps; comp; comp = comp->next)
687 103 : dump_component (file, comp);
688 54 : }
689 :
690 : /* Frees a chain CHAIN. */
691 :
692 : void
693 105917 : pcom_worker::release_chain (chain_p chain)
694 : {
695 105917 : dref ref;
696 105917 : unsigned i;
697 :
698 105917 : if (chain == NULL)
699 105917 : return;
700 :
701 162250 : FOR_EACH_VEC_ELT (chain->refs, i, ref)
702 91164 : free (ref);
703 :
704 71086 : if (chain->init_seq)
705 0 : gimple_seq_discard (chain->init_seq);
706 :
707 71086 : if (chain->fini_seq)
708 0 : gimple_seq_discard (chain->fini_seq);
709 :
710 71086 : delete chain;
711 : }
712 :
713 : /* Frees CHAINS. */
714 :
715 : void
716 427656 : pcom_worker::release_chains ()
717 : {
718 427656 : unsigned i;
719 427656 : chain_p chain;
720 :
721 455434 : FOR_EACH_VEC_ELT (m_chains, i, chain)
722 27778 : release_chain (chain);
723 427656 : }
724 :
725 : /* Frees list of components COMPS. */
726 :
727 : static void
728 171506 : release_components (struct component *comps)
729 : {
730 171506 : struct component *act, *next;
731 :
732 527598 : for (act = comps; act; act = next)
733 : {
734 356092 : next = act->next;
735 356092 : delete act;
736 : }
737 171506 : }
738 :
739 : /* Finds a root of tree given by FATHERS containing A, and performs path
740 : shortening. */
741 :
742 : static unsigned
743 6983194 : component_of (vec<unsigned> &fathers, unsigned a)
744 : {
745 6983194 : unsigned root, n;
746 :
747 10888024 : for (root = a; root != fathers[root]; root = fathers[root])
748 3904830 : continue;
749 :
750 10888024 : for (; a != root; a = n)
751 : {
752 3904830 : n = fathers[a];
753 3904830 : fathers[a] = root;
754 : }
755 :
756 6983194 : return root;
757 3904830 : }
758 :
759 : /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
760 : components, A and B are components to merge. */
761 :
762 : static void
763 357934 : merge_comps (vec<unsigned> &fathers, vec<unsigned> &sizes,
764 : unsigned a, unsigned b)
765 : {
766 357934 : unsigned ca = component_of (fathers, a);
767 357934 : unsigned cb = component_of (fathers, b);
768 :
769 357934 : if (ca == cb)
770 : return;
771 :
772 357934 : if (sizes[ca] < sizes[cb])
773 : {
774 18211 : sizes[cb] += sizes[ca];
775 18211 : fathers[ca] = cb;
776 : }
777 : else
778 : {
779 339723 : sizes[ca] += sizes[cb];
780 339723 : fathers[cb] = ca;
781 : }
782 : }
783 :
784 : /* Returns true if A is a reference that is suitable for predictive commoning
785 : in the innermost loop that contains it. REF_STEP is set according to the
786 : step of the reference A. */
787 :
788 : static bool
789 1149152 : suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
790 : {
791 1149152 : tree ref = DR_REF (a), step = DR_STEP (a);
792 :
793 1149152 : if (!step
794 1017834 : || TREE_THIS_VOLATILE (ref)
795 1003419 : || !is_gimple_reg_type (TREE_TYPE (ref))
796 2129708 : || tree_could_throw_p (ref))
797 207490 : return false;
798 :
799 941662 : if (integer_zerop (step))
800 55176 : *ref_step = RS_INVARIANT;
801 886486 : else if (integer_nonzerop (step))
802 835236 : *ref_step = RS_NONZERO;
803 : else
804 51250 : *ref_step = RS_ANY;
805 :
806 : return true;
807 : }
808 :
809 : /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
810 :
811 : void
812 219450 : pcom_worker::aff_combination_dr_offset (struct data_reference *dr,
813 : aff_tree *offset)
814 : {
815 219450 : tree type = TREE_TYPE (DR_OFFSET (dr));
816 219450 : aff_tree delta;
817 :
818 219450 : tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, &m_cache);
819 219450 : aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
820 219450 : aff_combination_add (offset, &delta);
821 219450 : }
822 :
823 : /* Determines number of iterations of the innermost enclosing loop before B
824 : refers to exactly the same location as A and stores it to OFF. If A and
825 : B do not have the same step, they never meet, or anything else fails,
826 : returns false, otherwise returns true. Both A and B are assumed to
827 : satisfy suitable_reference_p. */
828 :
829 : bool
830 263732 : pcom_worker::determine_offset (struct data_reference *a,
831 : struct data_reference *b, poly_widest_int *off)
832 : {
833 791196 : aff_tree diff, baseb, step;
834 263732 : tree typea, typeb;
835 :
836 : /* Check that both the references access the location in the same type. */
837 263732 : typea = TREE_TYPE (DR_REF (a));
838 263732 : typeb = TREE_TYPE (DR_REF (b));
839 263732 : if (!useless_type_conversion_p (typeb, typea))
840 : return false;
841 :
842 : /* Check whether the base address and the step of both references is the
843 : same. */
844 238639 : if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
845 238639 : || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
846 120281 : return false;
847 :
848 118358 : if (integer_zerop (DR_STEP (a)))
849 : {
850 : /* If the references have loop invariant address, check that they access
851 : exactly the same location. */
852 8641 : *off = 0;
853 8641 : return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
854 8641 : && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
855 : }
856 :
857 : /* Compare the offsets of the addresses, and check whether the difference
858 : is a multiple of step. */
859 109717 : aff_combination_dr_offset (a, &diff);
860 109717 : aff_combination_dr_offset (b, &baseb);
861 109717 : aff_combination_scale (&baseb, -1);
862 109717 : aff_combination_add (&diff, &baseb);
863 :
864 109717 : tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
865 : &step, &m_cache);
866 109717 : return aff_combination_constant_multiple_p (&diff, &step, off);
867 263732 : }
868 :
869 : /* Returns the last basic block in LOOP for that we are sure that
870 : it is executed whenever the loop is entered. */
871 :
872 : static basic_block
873 256680 : last_always_executed_block (class loop *loop)
874 : {
875 256680 : unsigned i;
876 256680 : auto_vec<edge> exits = get_loop_exit_edges (loop);
877 256680 : edge ex;
878 256680 : basic_block last = loop->latch;
879 :
880 655327 : FOR_EACH_VEC_ELT (exits, i, ex)
881 398647 : last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
882 :
883 256680 : return last;
884 256680 : }
885 :
886 : /* Splits dependence graph on DATAREFS described by DEPENDENCES to
887 : components. */
888 :
889 : struct component *
890 256680 : pcom_worker::split_data_refs_to_components ()
891 : {
892 256680 : unsigned i, n = m_datarefs.length ();
893 256680 : unsigned ca, ia, ib, bad;
894 256680 : struct data_reference *dr, *dra, *drb;
895 256680 : struct data_dependence_relation *ddr;
896 256680 : struct component *comp_list = NULL, *comp;
897 256680 : dref dataref;
898 : /* Don't do store elimination if loop has multiple exit edges. */
899 256680 : bool eliminate_store_p = single_exit (m_loop) != NULL;
900 256680 : basic_block last_always_executed = last_always_executed_block (m_loop);
901 256680 : auto_bitmap no_store_store_comps;
902 256680 : auto_vec<unsigned> comp_father (n + 1);
903 256680 : auto_vec<unsigned> comp_size (n + 1);
904 256680 : comp_father.quick_grow (n + 1);
905 256680 : comp_size.quick_grow (n + 1);
906 :
907 1256061 : FOR_EACH_VEC_ELT (m_datarefs, i, dr)
908 : {
909 742913 : if (!DR_REF (dr))
910 : /* A fake reference for call or asm_expr that may clobber memory;
911 : just fail. */
912 : return NULL;
913 : /* predcom pass isn't prepared to handle calls with data references. */
914 742913 : if (is_gimple_call (DR_STMT (dr)))
915 : return NULL;
916 742701 : dr->aux = (void *) (size_t) i;
917 742701 : comp_father[i] = i;
918 742701 : comp_size[i] = 1;
919 : }
920 :
921 : /* A component reserved for the "bad" data references. */
922 256468 : comp_father[n] = n;
923 256468 : comp_size[n] = 1;
924 :
925 998596 : FOR_EACH_VEC_ELT (m_datarefs, i, dr)
926 : {
927 742128 : enum ref_step_type dummy;
928 :
929 742128 : if (!suitable_reference_p (dr, &dummy))
930 : {
931 207490 : ia = (unsigned) (size_t) dr->aux;
932 207490 : merge_comps (comp_father, comp_size, n, ia);
933 : }
934 : }
935 :
936 9132436 : FOR_EACH_VEC_ELT (m_dependences, i, ddr)
937 : {
938 : poly_widest_int dummy_off;
939 :
940 4437984 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
941 2003773 : continue;
942 :
943 2434211 : dra = DDR_A (ddr);
944 2434211 : drb = DDR_B (ddr);
945 :
946 : /* Don't do store elimination if there is any unknown dependence for
947 : any store data reference. */
948 1462914 : if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
949 2795911 : && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
950 330779 : || DDR_NUM_DIST_VECTS (ddr) == 0))
951 : eliminate_store_p = false;
952 :
953 2434211 : ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
954 2434211 : ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
955 2434211 : if (ia == ib)
956 2034897 : continue;
957 :
958 399314 : bad = component_of (comp_father, n);
959 :
960 : /* If both A and B are reads, we may ignore unsuitable dependences. */
961 399314 : if (DR_IS_READ (dra) && DR_IS_READ (drb))
962 : {
963 161821 : if (ia == bad || ib == bad
964 335907 : || !determine_offset (dra, drb, &dummy_off))
965 168004 : continue;
966 : }
967 : /* If A is read and B write or vice versa and there is unsuitable
968 : dependence, instead of merging both components into a component
969 : that will certainly not pass suitable_component_p, just put the
970 : read into bad component, perhaps at least the write together with
971 : all the other data refs in it's component will be optimizable. */
972 207765 : else if (DR_IS_READ (dra) && ib != bad)
973 : {
974 129181 : if (ia == bad)
975 : {
976 71863 : bitmap_set_bit (no_store_store_comps, ib);
977 71863 : continue;
978 : }
979 57318 : else if (!determine_offset (dra, drb, &dummy_off))
980 : {
981 28104 : bitmap_set_bit (no_store_store_comps, ib);
982 28104 : merge_comps (comp_father, comp_size, bad, ia);
983 28104 : continue;
984 : }
985 : }
986 78584 : else if (DR_IS_READ (drb) && ia != bad)
987 : {
988 22145 : if (ib == bad)
989 : {
990 13342 : bitmap_set_bit (no_store_store_comps, ia);
991 13342 : continue;
992 : }
993 8803 : else if (!determine_offset (dra, drb, &dummy_off))
994 : {
995 4021 : bitmap_set_bit (no_store_store_comps, ia);
996 4021 : merge_comps (comp_father, comp_size, bad, ib);
997 4021 : continue;
998 : }
999 : }
1000 43642 : else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
1001 37440 : && ia != bad && ib != bad
1002 65546 : && !determine_offset (dra, drb, &dummy_off))
1003 : {
1004 4339 : merge_comps (comp_father, comp_size, bad, ia);
1005 4339 : merge_comps (comp_father, comp_size, bad, ib);
1006 4339 : continue;
1007 : }
1008 :
1009 109641 : merge_comps (comp_father, comp_size, ia, ib);
1010 4437984 : }
1011 :
1012 256468 : if (eliminate_store_p)
1013 : {
1014 118886 : tree niters = number_of_latch_executions (m_loop);
1015 :
1016 : /* Don't do store elimination if niters info is unknown because stores
1017 : in the last iteration can't be eliminated and we need to recover it
1018 : after loop. */
1019 118886 : eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
1020 : }
1021 :
1022 256468 : auto_vec<struct component *> comps;
1023 256468 : comps.safe_grow_cleared (n, true);
1024 256468 : bad = component_of (comp_father, n);
1025 1255064 : FOR_EACH_VEC_ELT (m_datarefs, i, dr)
1026 : {
1027 742128 : ia = (unsigned) (size_t) dr->aux;
1028 742128 : ca = component_of (comp_father, ia);
1029 742128 : if (ca == bad)
1030 301534 : continue;
1031 :
1032 440594 : comp = comps[ca];
1033 440594 : if (!comp)
1034 : {
1035 384194 : comp = new component (eliminate_store_p);
1036 384194 : comp->refs.reserve_exact (comp_size[ca]);
1037 384194 : comps[ca] = comp;
1038 : }
1039 :
1040 440594 : dataref = XCNEW (class dref_d);
1041 440594 : dataref->ref = dr;
1042 440594 : dataref->stmt = DR_STMT (dr);
1043 440594 : dataref->offset = 0;
1044 440594 : dataref->distance = 0;
1045 :
1046 440594 : dataref->always_accessed
1047 440594 : = dominated_by_p (CDI_DOMINATORS, last_always_executed,
1048 440594 : gimple_bb (dataref->stmt));
1049 440594 : dataref->pos = comp->refs.length ();
1050 440594 : comp->refs.quick_push (dataref);
1051 : }
1052 :
1053 256468 : if (eliminate_store_p)
1054 : {
1055 98576 : bitmap_iterator bi;
1056 99570 : EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
1057 : {
1058 994 : ca = component_of (comp_father, ia);
1059 994 : if (ca != bad)
1060 894 : comps[ca]->eliminate_store_p = false;
1061 : }
1062 : }
1063 :
1064 998596 : for (i = 0; i < n; i++)
1065 : {
1066 742128 : comp = comps[i];
1067 742128 : if (comp)
1068 : {
1069 384194 : comp->next = comp_list;
1070 384194 : comp_list = comp;
1071 : }
1072 : }
1073 256468 : return comp_list;
1074 513148 : }
1075 :
1076 : /* Returns true if the component COMP satisfies the conditions
1077 : described in 2) at the beginning of this file. */
1078 :
1079 : bool
1080 384194 : pcom_worker::suitable_component_p (struct component *comp)
1081 : {
1082 384194 : unsigned i;
1083 384194 : dref a, first;
1084 384194 : basic_block ba, bp = m_loop->header;
1085 384194 : bool ok, has_write = false;
1086 :
1087 792962 : FOR_EACH_VEC_ELT (comp->refs, i, a)
1088 : {
1089 430084 : ba = gimple_bb (a->stmt);
1090 :
1091 430084 : if (!just_once_each_iteration_p (m_loop, ba))
1092 : return false;
1093 :
1094 408768 : gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
1095 408768 : bp = ba;
1096 :
1097 408768 : if (DR_IS_WRITE (a->ref))
1098 141629 : has_write = true;
1099 : }
1100 :
1101 362878 : first = comp->refs[0];
1102 362878 : ok = suitable_reference_p (first->ref, &comp->comp_step);
1103 362878 : gcc_assert (ok);
1104 362878 : first->offset = 0;
1105 :
1106 769834 : FOR_EACH_VEC_ELT (comp->refs, i, a)
1107 : {
1108 407024 : if (has_write && comp->comp_step == RS_NONZERO)
1109 : {
1110 : /* Punt for non-invariant references where step isn't a multiple
1111 : of reference size. If step is smaller than reference size,
1112 : it overlaps the access in next iteration, if step is larger,
1113 : but not multiple of the access size, there could be overlap
1114 : in some later iteration. There might be more latent issues
1115 : about this in predcom or data reference analysis. If the
1116 : reference is a COMPONENT_REF, also check if step isn't a
1117 : multiple of the containg aggregate size. See PR111683. */
1118 144537 : tree ref = DR_REF (a->ref);
1119 144537 : tree step = DR_STEP (a->ref);
1120 144537 : if (TREE_CODE (ref) == COMPONENT_REF
1121 144537 : && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1122 55 : ref = TREE_OPERAND (ref, 0);
1123 144609 : do
1124 : {
1125 144573 : tree sz = TYPE_SIZE_UNIT (TREE_TYPE (ref));
1126 144573 : if (TREE_CODE (sz) != INTEGER_CST)
1127 68 : return false;
1128 289146 : if (wi::multiple_of_p (wi::to_offset (step),
1129 289146 : wi::to_offset (sz), SIGNED))
1130 : break;
1131 104 : if (TREE_CODE (ref) != COMPONENT_REF)
1132 : return false;
1133 36 : ref = TREE_OPERAND (ref, 0);
1134 36 : }
1135 : while (1);
1136 : }
1137 406956 : if (i == 0)
1138 362810 : continue;
1139 : /* Polynomial offsets are no use, since we need to know the
1140 : gap between iteration numbers at compile time. */
1141 : poly_widest_int offset;
1142 44146 : if (!determine_offset (first->ref, a->ref, &offset)
1143 44146 : || !offset.is_constant (&a->offset))
1144 0 : return false;
1145 :
1146 44146 : enum ref_step_type a_step;
1147 44146 : gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
1148 : && a_step == comp->comp_step);
1149 44146 : }
1150 :
1151 : /* If there is a write inside the component, we must know whether the
1152 : step is nonzero or not -- we would not otherwise be able to recognize
1153 : whether the value accessed by reads comes from the OFFSET-th iteration
1154 : or the previous one. */
1155 362810 : if (has_write && comp->comp_step == RS_ANY)
1156 : return false;
1157 :
1158 : return true;
1159 : }
1160 :
1161 : /* Check the conditions on references inside each of components COMPS,
1162 : and remove the unsuitable components from the list. The new list
1163 : of components is returned. The conditions are described in 2) at
1164 : the beginning of this file. */
1165 :
1166 : struct component *
1167 171506 : pcom_worker::filter_suitable_components (struct component *comps)
1168 : {
1169 171506 : struct component **comp, *act;
1170 :
1171 555700 : for (comp = &comps; *comp; )
1172 : {
1173 384194 : act = *comp;
1174 384194 : if (suitable_component_p (act))
1175 356092 : comp = &act->next;
1176 : else
1177 : {
1178 28102 : dref ref;
1179 28102 : unsigned i;
1180 :
1181 28102 : *comp = act->next;
1182 69630 : FOR_EACH_VEC_ELT (act->refs, i, ref)
1183 41528 : free (ref);
1184 28102 : delete act;
1185 : }
1186 : }
1187 :
1188 171506 : return comps;
1189 : }
1190 :
1191 : /* Compares two drefs A and B by their offset and position. Callback for
1192 : qsort. */
1193 :
1194 : static int
1195 194063 : order_drefs (const void *a, const void *b)
1196 : {
1197 194063 : const dref *const da = (const dref *) a;
1198 194063 : const dref *const db = (const dref *) b;
1199 194063 : int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
1200 :
1201 194063 : if (offcmp != 0)
1202 : return offcmp;
1203 :
1204 101428 : return (*da)->pos - (*db)->pos;
1205 : }
1206 :
1207 : /* Compares two drefs A and B by their position. Callback for qsort. */
1208 :
1209 : static int
1210 48 : order_drefs_by_pos (const void *a, const void *b)
1211 : {
1212 48 : const dref *const da = (const dref *) a;
1213 48 : const dref *const db = (const dref *) b;
1214 :
1215 48 : return (*da)->pos - (*db)->pos;
1216 : }
1217 :
1218 : /* Returns root of the CHAIN. */
1219 :
1220 : static inline dref
1221 101090 : get_chain_root (chain_p chain)
1222 : {
1223 202180 : return chain->refs[0];
1224 : }
1225 :
1226 : /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1227 : exist. */
1228 :
1229 : static inline dref
1230 93 : get_chain_last_write_at (chain_p chain, unsigned distance)
1231 : {
1232 321 : for (unsigned i = chain->refs.length (); i > 0; i--)
1233 216 : if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1234 216 : && distance == chain->refs[i - 1]->distance)
1235 : return chain->refs[i - 1];
1236 :
1237 : return NULL;
1238 : }
1239 :
1240 : /* Given CHAIN, returns the last write ref with the same distance before load
1241 : at index LOAD_IDX, or NULL if it doesn't exist. */
1242 :
1243 : static inline dref
1244 5 : get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1245 : {
1246 5 : gcc_assert (load_idx < chain->refs.length ());
1247 :
1248 5 : unsigned distance = chain->refs[load_idx]->distance;
1249 :
1250 5 : for (unsigned i = load_idx; i > 0; i--)
1251 5 : if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1252 5 : && distance == chain->refs[i - 1]->distance)
1253 : return chain->refs[i - 1];
1254 :
1255 : return NULL;
1256 : }
1257 :
1258 : /* Adds REF to the chain CHAIN. */
1259 :
1260 : static void
1261 17864 : add_ref_to_chain (chain_p chain, dref ref)
1262 : {
1263 17864 : dref root = get_chain_root (chain);
1264 :
1265 17864 : gcc_assert (wi::les_p (root->offset, ref->offset));
1266 17864 : widest_int dist = ref->offset - root->offset;
1267 17864 : gcc_assert (wi::fits_uhwi_p (dist));
1268 :
1269 17864 : chain->refs.safe_push (ref);
1270 :
1271 17864 : ref->distance = dist.to_uhwi ();
1272 :
1273 17864 : if (ref->distance >= chain->length)
1274 : {
1275 17864 : chain->length = ref->distance;
1276 17864 : chain->has_max_use_after = false;
1277 : }
1278 :
1279 : /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1280 17864 : if (DR_IS_WRITE (ref->ref))
1281 72 : chain->type = CT_STORE_STORE;
1282 :
1283 : /* Don't set the flag for store-store chain since there is no use. */
1284 17864 : if (chain->type != CT_STORE_STORE
1285 17790 : && ref->distance == chain->length
1286 17790 : && ref->pos > root->pos)
1287 7036 : chain->has_max_use_after = true;
1288 :
1289 17864 : chain->all_always_accessed &= ref->always_accessed;
1290 17864 : }
1291 :
1292 : /* Returns the chain for invariant component COMP. */
1293 :
1294 : static chain_p
1295 13285 : make_invariant_chain (struct component *comp)
1296 : {
1297 13285 : chain_p chain = new struct chain (CT_INVARIANT);
1298 13285 : unsigned i;
1299 13285 : dref ref;
1300 :
1301 13285 : chain->all_always_accessed = true;
1302 :
1303 28739 : FOR_EACH_VEC_ELT (comp->refs, i, ref)
1304 : {
1305 15454 : chain->refs.safe_push (ref);
1306 15454 : chain->all_always_accessed &= ref->always_accessed;
1307 : }
1308 :
1309 13285 : chain->inits = vNULL;
1310 13285 : chain->finis = vNULL;
1311 :
1312 13285 : return chain;
1313 : }
1314 :
1315 : /* Make a new chain of type TYPE rooted at REF. */
1316 :
1317 : static chain_p
1318 57772 : make_rooted_chain (dref ref, enum chain_type type)
1319 : {
1320 57772 : chain_p chain = new struct chain (type);
1321 :
1322 57772 : chain->refs.safe_push (ref);
1323 57772 : chain->all_always_accessed = ref->always_accessed;
1324 57772 : ref->distance = 0;
1325 :
1326 57772 : chain->inits = vNULL;
1327 57772 : chain->finis = vNULL;
1328 :
1329 57772 : return chain;
1330 : }
1331 :
1332 : /* Returns true if CHAIN is not trivial. */
1333 :
1334 : static bool
1335 92603 : nontrivial_chain_p (chain_p chain)
1336 : {
1337 57772 : return chain != NULL && chain->refs.length () > 1;
1338 : }
1339 :
1340 : /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1341 : is no such name. */
1342 :
1343 : static tree
1344 90002 : name_for_ref (dref ref)
1345 : {
1346 90002 : tree name;
1347 :
1348 90002 : if (is_gimple_assign (ref->stmt))
1349 : {
1350 90002 : if (!ref->ref || DR_IS_READ (ref->ref))
1351 90002 : name = gimple_assign_lhs (ref->stmt);
1352 : else
1353 0 : name = gimple_assign_rhs1 (ref->stmt);
1354 : }
1355 : else
1356 0 : name = PHI_RESULT (ref->stmt);
1357 :
1358 90002 : return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1359 : }
1360 :
1361 : /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1362 : iterations of the innermost enclosing loop). */
1363 :
1364 : bool
1365 8 : pcom_worker::valid_initializer_p (struct data_reference *ref, unsigned distance,
1366 : struct data_reference *root)
1367 : {
1368 32 : aff_tree diff, base, step;
1369 : poly_widest_int off;
1370 :
1371 : /* Both REF and ROOT must be accessing the same object. */
1372 8 : if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1373 : return false;
1374 :
1375 : /* The initializer is defined outside of loop, hence its address must be
1376 : invariant inside the loop. */
1377 8 : gcc_assert (integer_zerop (DR_STEP (ref)));
1378 :
1379 : /* If the address of the reference is invariant, initializer must access
1380 : exactly the same location. */
1381 8 : if (integer_zerop (DR_STEP (root)))
1382 0 : return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1383 0 : && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1384 :
1385 : /* Verify that this index of REF is equal to the root's index at
1386 : -DISTANCE-th iteration. */
1387 8 : aff_combination_dr_offset (root, &diff);
1388 8 : aff_combination_dr_offset (ref, &base);
1389 8 : aff_combination_scale (&base, -1);
1390 8 : aff_combination_add (&diff, &base);
1391 :
1392 8 : tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1393 : &step, &m_cache);
1394 8 : if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1395 : return false;
1396 :
1397 8 : if (maybe_ne (off, distance))
1398 : return false;
1399 :
1400 : return true;
1401 8 : }
1402 :
1403 : /* Finds looparound phi node of loop that copies the value of REF, and if its
1404 : initial value is correct (equal to initial value of REF shifted by one
1405 : iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1406 : is the root of the current chain. */
1407 :
1408 : gphi *
1409 35249 : pcom_worker::find_looparound_phi (dref ref, dref root)
1410 : {
1411 35249 : tree name, init, init_ref;
1412 35249 : gimple *init_stmt;
1413 35249 : edge latch = loop_latch_edge (m_loop);
1414 35249 : struct data_reference init_dr;
1415 35249 : gphi_iterator psi;
1416 :
1417 35249 : if (is_gimple_assign (ref->stmt))
1418 : {
1419 35248 : if (DR_IS_READ (ref->ref))
1420 32860 : name = gimple_assign_lhs (ref->stmt);
1421 : else
1422 2388 : name = gimple_assign_rhs1 (ref->stmt);
1423 : }
1424 : else
1425 1 : name = PHI_RESULT (ref->stmt);
1426 35249 : if (!name)
1427 : return NULL;
1428 :
1429 35249 : tree entry_vuse = NULL_TREE;
1430 35249 : gphi *phi = NULL;
1431 309765 : for (psi = gsi_start_phis (m_loop->header); !gsi_end_p (psi); gsi_next (&psi))
1432 : {
1433 274528 : gphi *p = psi.phi ();
1434 274528 : if (PHI_ARG_DEF_FROM_EDGE (p, latch) == name)
1435 : phi = p;
1436 549032 : else if (virtual_operand_p (gimple_phi_result (p)))
1437 24551 : entry_vuse = PHI_ARG_DEF_FROM_EDGE (p, loop_preheader_edge (m_loop));
1438 274528 : if (phi && entry_vuse)
1439 : break;
1440 : }
1441 35249 : if (!phi || !entry_vuse)
1442 : return NULL;
1443 :
1444 12 : init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
1445 12 : if (TREE_CODE (init) != SSA_NAME)
1446 : return NULL;
1447 10 : init_stmt = SSA_NAME_DEF_STMT (init);
1448 10 : if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1449 : return NULL;
1450 8 : gcc_assert (gimple_assign_lhs (init_stmt) == init);
1451 :
1452 8 : init_ref = gimple_assign_rhs1 (init_stmt);
1453 8 : if (!REFERENCE_CLASS_P (init_ref)
1454 8 : && !DECL_P (init_ref))
1455 : return NULL;
1456 :
1457 : /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1458 : loop enclosing PHI). */
1459 8 : memset (&init_dr, 0, sizeof (struct data_reference));
1460 8 : DR_REF (&init_dr) = init_ref;
1461 8 : DR_STMT (&init_dr) = phi;
1462 8 : if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, m_loop,
1463 : init_stmt))
1464 : return NULL;
1465 :
1466 8 : if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1467 : return NULL;
1468 :
1469 : /* Make sure nothing clobbers the location we re-use the initial value
1470 : from. */
1471 16 : if (entry_vuse != gimple_vuse (init_stmt))
1472 : {
1473 7 : ao_ref ref;
1474 7 : ao_ref_init (&ref, init_ref);
1475 7 : unsigned limit = param_sccvn_max_alias_queries_per_access;
1476 7 : tree vdef = entry_vuse;
1477 7 : do
1478 : {
1479 7 : gimple *def = SSA_NAME_DEF_STMT (vdef);
1480 7 : if (limit-- == 0 || gimple_code (def) == GIMPLE_PHI)
1481 7 : return NULL;
1482 7 : if (stmt_may_clobber_ref_p_1 (def, &ref))
1483 : /* When the stmt is an assign to init_ref we could in theory
1484 : use its RHS for the initial value of the looparound PHI
1485 : we replace in prepare_initializers_chain, but we have
1486 : no convenient place to store this info at the moment. */
1487 : return NULL;
1488 0 : vdef = gimple_vuse (def);
1489 : }
1490 0 : while (vdef != gimple_vuse (init_stmt));
1491 : }
1492 :
1493 : return phi;
1494 : }
1495 :
1496 : /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1497 :
1498 : static void
1499 1 : insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1500 : {
1501 1 : dref nw = XCNEW (class dref_d), aref;
1502 1 : unsigned i;
1503 :
1504 1 : nw->stmt = phi;
1505 1 : nw->distance = ref->distance + 1;
1506 1 : nw->always_accessed = 1;
1507 :
1508 2 : FOR_EACH_VEC_ELT (chain->refs, i, aref)
1509 2 : if (aref->distance >= nw->distance)
1510 : break;
1511 1 : chain->refs.safe_insert (i, nw);
1512 :
1513 1 : if (nw->distance > chain->length)
1514 : {
1515 0 : chain->length = nw->distance;
1516 0 : chain->has_max_use_after = false;
1517 : }
1518 1 : }
1519 :
1520 : /* For references in CHAIN that are copied around the loop (created previously
1521 : by PRE, or by user), add the results of such copies to the chain. This
1522 : enables us to remove the copies by unrolling, and may need less registers
1523 : (also, it may allow us to combine chains together). */
1524 :
1525 : void
1526 17520 : pcom_worker::add_looparound_copies (chain_p chain)
1527 : {
1528 17520 : unsigned i;
1529 17520 : dref ref, root = get_chain_root (chain);
1530 17520 : gphi *phi;
1531 :
1532 17520 : if (chain->type == CT_STORE_STORE)
1533 17520 : return;
1534 :
1535 52710 : FOR_EACH_VEC_ELT (chain->refs, i, ref)
1536 : {
1537 35249 : phi = find_looparound_phi (ref, root);
1538 35249 : if (!phi)
1539 35248 : continue;
1540 :
1541 1 : bitmap_set_bit (m_looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1542 1 : insert_looparound_copy (chain, ref, phi);
1543 : }
1544 : }
1545 :
1546 : /* Find roots of the values and determine distances in the component COMP.
1547 : The references are redistributed into chains. */
1548 :
1549 : void
1550 356092 : pcom_worker::determine_roots_comp (struct component *comp)
1551 : {
1552 356092 : unsigned i;
1553 356092 : dref a;
1554 356092 : chain_p chain = NULL;
1555 356092 : widest_int last_ofs = 0;
1556 356092 : enum chain_type type;
1557 :
1558 : /* Invariants are handled specially. */
1559 356092 : if (comp->comp_step == RS_INVARIANT)
1560 : {
1561 13285 : chain = make_invariant_chain (comp);
1562 13285 : m_chains.safe_push (chain);
1563 13285 : return;
1564 : }
1565 :
1566 : /* Trivial component. */
1567 342807 : if (comp->refs.length () <= 1)
1568 : {
1569 307976 : if (comp->refs.length () == 1)
1570 : {
1571 307976 : free (comp->refs[0]);
1572 307976 : comp->refs.truncate (0);
1573 : }
1574 307976 : return;
1575 : }
1576 :
1577 34831 : comp->refs.qsort (order_drefs);
1578 :
1579 : /* For Store-Store chain, we only support load if it is dominated by a
1580 : store statement in the same iteration of loop. */
1581 34831 : if (comp->eliminate_store_p)
1582 18706 : for (a = NULL, i = 0; i < comp->refs.length (); i++)
1583 : {
1584 18640 : if (DR_IS_WRITE (comp->refs[i]->ref))
1585 : a = comp->refs[i];
1586 16840 : else if (a == NULL || a->offset != comp->refs[i]->offset)
1587 : {
1588 : /* If there is load that is not dominated by a store in the
1589 : same iteration of loop, clear the flag so no Store-Store
1590 : chain is generated for this component. */
1591 16827 : comp->eliminate_store_p = false;
1592 16827 : break;
1593 : }
1594 : }
1595 :
1596 : /* Determine roots and create chains for components. */
1597 110467 : FOR_EACH_VEC_ELT (comp->refs, i, a)
1598 : {
1599 209044 : if (!chain
1600 40805 : || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
1601 22454 : || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1602 192077 : || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1603 : {
1604 57772 : if (nontrivial_chain_p (chain))
1605 : {
1606 901 : add_looparound_copies (chain);
1607 901 : m_chains.safe_push (chain);
1608 : }
1609 : else
1610 56871 : release_chain (chain);
1611 :
1612 : /* Determine type of the chain. If the root reference is a load,
1613 : this can only be a CT_LOAD chain; other chains are intialized
1614 : to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1615 : new reference is added. */
1616 57772 : type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
1617 57772 : chain = make_rooted_chain (a, type);
1618 57772 : last_ofs = a->offset;
1619 57772 : continue;
1620 : }
1621 :
1622 17864 : add_ref_to_chain (chain, a);
1623 : }
1624 :
1625 34831 : if (nontrivial_chain_p (chain))
1626 : {
1627 16619 : add_looparound_copies (chain);
1628 16619 : m_chains.safe_push (chain);
1629 : }
1630 : else
1631 18212 : release_chain (chain);
1632 356092 : }
1633 :
1634 : /* Find roots of the values and determine distances in components COMPS, and
1635 : separates the references to chains. */
1636 :
1637 : void
1638 171506 : pcom_worker::determine_roots (struct component *comps)
1639 : {
1640 171506 : struct component *comp;
1641 :
1642 527598 : for (comp = comps; comp; comp = comp->next)
1643 356092 : determine_roots_comp (comp);
1644 171506 : }
1645 :
1646 : /* Replace the reference in statement STMT with temporary variable
1647 : NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1648 : the reference in the statement. IN_LHS is true if the reference
1649 : is in the lhs of STMT, false if it is in rhs. */
1650 :
1651 : static void
1652 36682 : replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1653 : {
1654 36682 : tree val;
1655 36682 : gassign *new_stmt;
1656 36682 : gimple_stmt_iterator bsi, psi;
1657 :
1658 36682 : if (gimple_code (stmt) == GIMPLE_PHI)
1659 : {
1660 1 : gcc_assert (!in_lhs && !set);
1661 :
1662 1 : val = PHI_RESULT (stmt);
1663 1 : bsi = gsi_after_labels (gimple_bb (stmt));
1664 1 : psi = gsi_for_stmt (stmt);
1665 1 : remove_phi_node (&psi, false);
1666 :
1667 : /* Turn the phi node into GIMPLE_ASSIGN. */
1668 1 : new_stmt = gimple_build_assign (val, new_tree);
1669 1 : gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1670 18714 : return;
1671 : }
1672 :
1673 : /* Since the reference is of gimple_reg type, it should only
1674 : appear as lhs or rhs of modify statement. */
1675 36681 : gcc_assert (is_gimple_assign (stmt));
1676 :
1677 36681 : bsi = gsi_for_stmt (stmt);
1678 :
1679 : /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1680 36681 : if (!set)
1681 : {
1682 18712 : gcc_assert (!in_lhs);
1683 18712 : gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1684 18712 : stmt = gsi_stmt (bsi);
1685 18712 : update_stmt (stmt);
1686 18712 : return;
1687 : }
1688 :
1689 17969 : if (in_lhs)
1690 : {
1691 : /* We have statement
1692 :
1693 : OLD = VAL
1694 :
1695 : If OLD is a memory reference, then VAL is gimple_val, and we transform
1696 : this to
1697 :
1698 : OLD = VAL
1699 : NEW = VAL
1700 :
1701 : Otherwise, we are replacing a combination chain,
1702 : VAL is the expression that performs the combination, and OLD is an
1703 : SSA name. In this case, we transform the assignment to
1704 :
1705 : OLD = VAL
1706 : NEW = OLD
1707 :
1708 : */
1709 :
1710 3868 : val = gimple_assign_lhs (stmt);
1711 3868 : if (TREE_CODE (val) != SSA_NAME)
1712 : {
1713 3852 : val = gimple_assign_rhs1 (stmt);
1714 3852 : gcc_assert (gimple_assign_single_p (stmt));
1715 3852 : if (TREE_CLOBBER_P (val))
1716 0 : val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1717 : else
1718 3852 : gcc_assert (gimple_assign_copy_p (stmt));
1719 : }
1720 : }
1721 : else
1722 : {
1723 : /* VAL = OLD
1724 :
1725 : is transformed to
1726 :
1727 : VAL = OLD
1728 : NEW = VAL */
1729 :
1730 14101 : val = gimple_assign_lhs (stmt);
1731 : }
1732 :
1733 17969 : new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1734 17969 : gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1735 : }
1736 :
1737 : /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1738 : of the loop it was analyzed in. Append init stmts to STMTS. */
1739 :
1740 : static tree
1741 25515 : ref_at_iteration (data_reference_p dr, int iter,
1742 : gimple_seq *stmts, tree niters = NULL_TREE)
1743 : {
1744 25515 : tree off = DR_OFFSET (dr);
1745 25515 : tree coff = DR_INIT (dr);
1746 25515 : tree ref = DR_REF (dr);
1747 25515 : enum tree_code ref_code = ERROR_MARK;
1748 25515 : tree ref_type = NULL_TREE;
1749 25515 : tree ref_op1 = NULL_TREE;
1750 25515 : tree ref_op2 = NULL_TREE;
1751 25515 : tree new_offset;
1752 :
1753 25515 : if (iter != 0)
1754 : {
1755 25478 : new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1756 25478 : if (TREE_CODE (new_offset) == INTEGER_CST)
1757 25472 : coff = size_binop (PLUS_EXPR, coff, new_offset);
1758 : else
1759 6 : off = size_binop (PLUS_EXPR, off, new_offset);
1760 : }
1761 :
1762 25515 : if (niters != NULL_TREE)
1763 : {
1764 65 : niters = fold_convert (ssizetype, niters);
1765 65 : new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1766 65 : if (TREE_CODE (niters) == INTEGER_CST)
1767 39 : coff = size_binop (PLUS_EXPR, coff, new_offset);
1768 : else
1769 26 : off = size_binop (PLUS_EXPR, off, new_offset);
1770 : }
1771 :
1772 : /* While data-ref analysis punts on bit offsets it still handles
1773 : bitfield accesses at byte boundaries. Cope with that. Note that
1774 : if the bitfield object also starts at a byte-boundary we can simply
1775 : replicate the COMPONENT_REF, but we have to subtract the component's
1776 : byte-offset from the MEM_REF address first.
1777 : Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1778 : start at offset zero. */
1779 25515 : if (TREE_CODE (ref) == COMPONENT_REF
1780 25515 : && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1781 : {
1782 96 : unsigned HOST_WIDE_INT boff;
1783 96 : tree field = TREE_OPERAND (ref, 1);
1784 96 : tree offset = component_ref_field_offset (ref);
1785 96 : ref_type = TREE_TYPE (ref);
1786 96 : boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1787 : /* This can occur in Ada. See the comment in get_bit_range. */
1788 96 : if (boff % BITS_PER_UNIT != 0
1789 96 : || !tree_fits_uhwi_p (offset))
1790 : {
1791 0 : ref_code = BIT_FIELD_REF;
1792 0 : ref_op1 = DECL_SIZE (field);
1793 0 : ref_op2 = bitsize_zero_node;
1794 : }
1795 : else
1796 : {
1797 96 : boff >>= LOG2_BITS_PER_UNIT;
1798 96 : boff += tree_to_uhwi (offset);
1799 96 : coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1800 96 : ref_code = COMPONENT_REF;
1801 96 : ref_op1 = field;
1802 96 : ref_op2 = TREE_OPERAND (ref, 2);
1803 96 : ref = TREE_OPERAND (ref, 0);
1804 : }
1805 : }
1806 : /* We may not associate the constant offset across the pointer plus
1807 : expression because that might form a pointer to before the object
1808 : then. But for some cases we can retain that to allow tree_could_trap_p
1809 : to return false - see gcc.dg/tree-ssa/predcom-1.c */
1810 25515 : tree addr, alias_ptr;
1811 25515 : if (integer_zerop (off)
1812 25515 : && TREE_CODE (DR_BASE_ADDRESS (dr)) != POINTER_PLUS_EXPR)
1813 : {
1814 24364 : alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1815 24364 : addr = DR_BASE_ADDRESS (dr);
1816 : }
1817 : else
1818 : {
1819 1151 : alias_ptr = build_zero_cst (reference_alias_ptr_type (ref));
1820 1151 : off = size_binop (PLUS_EXPR, off, coff);
1821 1151 : addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1822 : }
1823 25515 : addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1824 : is_gimple_mem_ref_addr, NULL_TREE);
1825 25515 : tree type = build_aligned_type (TREE_TYPE (ref),
1826 : get_object_alignment (ref));
1827 25515 : ref = build2 (MEM_REF, type, addr, alias_ptr);
1828 25515 : copy_ref_info (ref, DR_REF (dr));
1829 25515 : if (ref_type)
1830 96 : ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1831 25515 : return ref;
1832 : }
1833 :
1834 : /* Get the initialization expression for the INDEX-th temporary variable
1835 : of CHAIN. */
1836 :
1837 : static tree
1838 11267 : get_init_expr (chain_p chain, unsigned index)
1839 : {
1840 11267 : if (chain->type == CT_COMBINATION)
1841 : {
1842 43 : tree e1 = get_init_expr (chain->ch1, index);
1843 43 : tree e2 = get_init_expr (chain->ch2, index);
1844 :
1845 43 : return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1846 : }
1847 : else
1848 11224 : return chain->inits[index];
1849 : }
1850 :
1851 : /* Returns a new temporary variable used for the I-th variable carrying
1852 : value of REF. The variable's uid is marked in TMP_VARS. */
1853 :
1854 : static tree
1855 19778 : predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1856 : {
1857 19778 : tree type = TREE_TYPE (ref);
1858 : /* We never access the components of the temporary variable in predictive
1859 : commoning. */
1860 19778 : tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1861 19778 : bitmap_set_bit (tmp_vars, DECL_UID (var));
1862 19778 : return var;
1863 : }
1864 :
1865 : /* Creates the variables for CHAIN, as well as phi nodes for them and
1866 : initialization on entry to LOOP. Uids of the newly created
1867 : temporary variables are marked in TMP_VARS. */
1868 :
1869 : static void
1870 16461 : initialize_root_vars (class loop *loop, chain_p chain, bitmap tmp_vars)
1871 : {
1872 16461 : unsigned i;
1873 16461 : unsigned n = chain->length;
1874 16461 : dref root = get_chain_root (chain);
1875 16461 : bool reuse_first = !chain->has_max_use_after;
1876 16461 : tree ref, init, var, next;
1877 16461 : gphi *phi;
1878 16461 : gimple_seq stmts;
1879 16461 : edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1880 :
1881 : /* If N == 0, then all the references are within the single iteration. And
1882 : since this is an nonempty chain, reuse_first cannot be true. */
1883 16461 : gcc_assert (n > 0 || !reuse_first);
1884 :
1885 16461 : chain->vars.create (n + 1);
1886 :
1887 16461 : if (chain->type == CT_COMBINATION)
1888 16 : ref = gimple_assign_lhs (root->stmt);
1889 : else
1890 16445 : ref = DR_REF (root->ref);
1891 :
1892 49158 : for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1893 : {
1894 17878 : var = predcom_tmp_var (ref, i, tmp_vars);
1895 17878 : chain->vars.quick_push (var);
1896 : }
1897 16461 : if (reuse_first)
1898 9725 : chain->vars.quick_push (chain->vars[0]);
1899 :
1900 44064 : FOR_EACH_VEC_ELT (chain->vars, i, var)
1901 27603 : chain->vars[i] = make_ssa_name (var);
1902 :
1903 27603 : for (i = 0; i < n; i++)
1904 : {
1905 11142 : var = chain->vars[i];
1906 11142 : next = chain->vars[i + 1];
1907 11142 : init = get_init_expr (chain, i);
1908 :
1909 11142 : init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1910 11142 : if (stmts)
1911 11141 : gsi_insert_seq_on_edge_immediate (entry, stmts);
1912 :
1913 11142 : phi = create_phi_node (var, loop->header);
1914 11142 : add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1915 11142 : add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1916 : }
1917 16461 : }
1918 :
1919 : /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1920 : all stores to be eliminated store loop invariant values into memory.
1921 : In this case, we can use these invariant values directly after LOOP. */
1922 :
1923 : static bool
1924 37 : is_inv_store_elimination_chain (class loop *loop, chain_p chain)
1925 : {
1926 37 : if (chain->length == 0 || chain->type != CT_STORE_STORE)
1927 : return false;
1928 :
1929 37 : gcc_assert (!chain->has_max_use_after);
1930 :
1931 : /* If loop iterates for unknown times or fewer times than chain->length,
1932 : we still need to setup root variable and propagate it with PHI node. */
1933 37 : tree niters = number_of_latch_executions (loop);
1934 37 : if (TREE_CODE (niters) != INTEGER_CST
1935 37 : || wi::leu_p (wi::to_wide (niters), chain->length))
1936 11 : return false;
1937 :
1938 : /* Check stores in chain for elimination if they only store loop invariant
1939 : values. */
1940 52 : for (unsigned i = 0; i < chain->length; i++)
1941 : {
1942 38 : dref a = get_chain_last_write_at (chain, i);
1943 38 : if (a == NULL)
1944 6 : continue;
1945 :
1946 32 : gimple *def_stmt, *stmt = a->stmt;
1947 32 : if (!gimple_assign_single_p (stmt))
1948 : return false;
1949 :
1950 32 : tree val = gimple_assign_rhs1 (stmt);
1951 32 : if (TREE_CLOBBER_P (val))
1952 : return false;
1953 :
1954 32 : if (CONSTANT_CLASS_P (val))
1955 20 : continue;
1956 :
1957 12 : if (TREE_CODE (val) != SSA_NAME)
1958 : return false;
1959 :
1960 12 : def_stmt = SSA_NAME_DEF_STMT (val);
1961 12 : if (gimple_nop_p (def_stmt))
1962 0 : continue;
1963 :
1964 12 : if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1965 : return false;
1966 : }
1967 : return true;
1968 : }
1969 :
1970 : /* Creates root variables for store elimination CHAIN in which stores for
1971 : elimination only store loop invariant values. In this case, we neither
1972 : need to load root variables before loop nor propagate it with PHI nodes. */
1973 :
1974 : static void
1975 14 : initialize_root_vars_store_elim_1 (chain_p chain)
1976 : {
1977 14 : tree var;
1978 14 : unsigned i, n = chain->length;
1979 :
1980 14 : chain->vars.create (n);
1981 14 : chain->vars.safe_grow_cleared (n, true);
1982 :
1983 : /* Initialize root value for eliminated stores at each distance. */
1984 40 : for (i = 0; i < n; i++)
1985 : {
1986 26 : dref a = get_chain_last_write_at (chain, i);
1987 26 : if (a == NULL)
1988 6 : continue;
1989 :
1990 20 : var = gimple_assign_rhs1 (a->stmt);
1991 20 : chain->vars[a->distance] = var;
1992 : }
1993 :
1994 : /* We don't propagate values with PHI nodes, so manually propagate value
1995 : to bubble positions. */
1996 14 : var = chain->vars[0];
1997 26 : for (i = 1; i < n; i++)
1998 : {
1999 12 : if (chain->vars[i] != NULL_TREE)
2000 : {
2001 6 : var = chain->vars[i];
2002 6 : continue;
2003 : }
2004 6 : chain->vars[i] = var;
2005 : }
2006 :
2007 : /* Revert the vector. */
2008 21 : for (i = 0; i < n / 2; i++)
2009 7 : std::swap (chain->vars[i], chain->vars[n - i - 1]);
2010 14 : }
2011 :
2012 : /* Creates root variables for store elimination CHAIN in which stores for
2013 : elimination store loop variant values. In this case, we may need to
2014 : load root variables before LOOP and propagate it with PHI nodes. Uids
2015 : of the newly created root variables are marked in TMP_VARS. */
2016 :
2017 : static void
2018 23 : initialize_root_vars_store_elim_2 (class loop *loop,
2019 : chain_p chain, bitmap tmp_vars)
2020 : {
2021 23 : unsigned i, n = chain->length;
2022 23 : tree ref, init, var, next, val, phi_result;
2023 23 : gimple *stmt;
2024 23 : gimple_seq stmts;
2025 :
2026 23 : chain->vars.create (n);
2027 :
2028 23 : ref = DR_REF (get_chain_root (chain)->ref);
2029 62 : for (i = 0; i < n; i++)
2030 : {
2031 39 : var = predcom_tmp_var (ref, i, tmp_vars);
2032 39 : chain->vars.quick_push (var);
2033 : }
2034 :
2035 62 : FOR_EACH_VEC_ELT (chain->vars, i, var)
2036 39 : chain->vars[i] = make_ssa_name (var);
2037 :
2038 : /* Root values are either rhs operand of stores to be eliminated, or
2039 : loaded from memory before loop. */
2040 23 : auto_vec<tree> vtemps;
2041 23 : vtemps.safe_grow_cleared (n, true);
2042 62 : for (i = 0; i < n; i++)
2043 : {
2044 39 : init = get_init_expr (chain, i);
2045 39 : if (init == NULL_TREE)
2046 : {
2047 : /* Root value is rhs operand of the store to be eliminated if
2048 : it isn't loaded from memory before loop. */
2049 29 : dref a = get_chain_last_write_at (chain, i);
2050 29 : val = gimple_assign_rhs1 (a->stmt);
2051 29 : if (TREE_CLOBBER_P (val))
2052 : {
2053 0 : val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
2054 0 : gimple_assign_set_rhs1 (a->stmt, val);
2055 : }
2056 :
2057 29 : vtemps[n - i - 1] = val;
2058 : }
2059 : else
2060 : {
2061 10 : edge latch = loop_latch_edge (loop);
2062 10 : edge entry = loop_preheader_edge (loop);
2063 :
2064 : /* Root value is loaded from memory before loop, we also need
2065 : to add PHI nodes to propagate the value across iterations. */
2066 10 : init = force_gimple_operand (init, &stmts, true, NULL_TREE);
2067 10 : if (stmts)
2068 10 : gsi_insert_seq_on_edge_immediate (entry, stmts);
2069 :
2070 10 : next = chain->vars[n - i];
2071 10 : phi_result = copy_ssa_name (next);
2072 10 : gphi *phi = create_phi_node (phi_result, loop->header);
2073 10 : add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
2074 10 : add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
2075 10 : vtemps[n - i - 1] = phi_result;
2076 : }
2077 : }
2078 :
2079 : /* Find the insertion position. */
2080 23 : dref last = get_chain_root (chain);
2081 77 : for (i = 0; i < chain->refs.length (); i++)
2082 : {
2083 54 : if (chain->refs[i]->pos > last->pos)
2084 6 : last = chain->refs[i];
2085 : }
2086 :
2087 23 : gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
2088 :
2089 : /* Insert statements copying root value to root variable. */
2090 85 : for (i = 0; i < n; i++)
2091 : {
2092 39 : var = chain->vars[i];
2093 39 : val = vtemps[i];
2094 39 : stmt = gimple_build_assign (var, val);
2095 39 : gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
2096 : }
2097 23 : }
2098 :
2099 : /* Generates stores for CHAIN's eliminated stores in LOOP's last
2100 : (CHAIN->length - 1) iterations. */
2101 :
2102 : static void
2103 37 : finalize_eliminated_stores (class loop *loop, chain_p chain)
2104 : {
2105 37 : unsigned i, n = chain->length;
2106 :
2107 102 : for (i = 0; i < n; i++)
2108 : {
2109 65 : tree var = chain->vars[i];
2110 65 : tree fini = chain->finis[n - i - 1];
2111 65 : gimple *stmt = gimple_build_assign (fini, var);
2112 :
2113 65 : gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
2114 : }
2115 :
2116 37 : if (chain->fini_seq)
2117 : {
2118 37 : gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
2119 37 : chain->fini_seq = NULL;
2120 : }
2121 37 : }
2122 :
2123 : /* Initializes a variable for load motion for ROOT and prepares phi nodes and
2124 : initialization on entry to LOOP if necessary. The ssa name for the variable
2125 : is stored in VARS. If WRITTEN is true, also a phi node to copy its value
2126 : around the loop is created. Uid of the newly created temporary variable
2127 : is marked in TMP_VARS. INITS is the list containing the (single)
2128 : initializer. */
2129 :
2130 : static void
2131 1861 : initialize_root_vars_lm (class loop *loop, dref root, bool written,
2132 : vec<tree> *vars, const vec<tree> &inits,
2133 : bitmap tmp_vars)
2134 : {
2135 1861 : unsigned i;
2136 1861 : tree ref = DR_REF (root->ref), init, var, next;
2137 1861 : gimple_seq stmts;
2138 1861 : gphi *phi;
2139 1861 : edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
2140 :
2141 : /* Find the initializer for the variable, and check that it cannot
2142 : trap. */
2143 1861 : init = inits[0];
2144 :
2145 2294 : vars->create (written ? 2 : 1);
2146 1861 : var = predcom_tmp_var (ref, 0, tmp_vars);
2147 1861 : vars->quick_push (var);
2148 1861 : if (written)
2149 1428 : vars->quick_push ((*vars)[0]);
2150 :
2151 5150 : FOR_EACH_VEC_ELT (*vars, i, var)
2152 3289 : (*vars)[i] = make_ssa_name (var);
2153 :
2154 1861 : var = (*vars)[0];
2155 :
2156 1861 : init = force_gimple_operand (init, &stmts, written, NULL_TREE);
2157 1861 : if (stmts)
2158 1428 : gsi_insert_seq_on_edge_immediate (entry, stmts);
2159 :
2160 1861 : if (written)
2161 : {
2162 1428 : next = (*vars)[1];
2163 1428 : phi = create_phi_node (var, loop->header);
2164 1428 : add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
2165 1428 : add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
2166 : }
2167 : else
2168 : {
2169 433 : gassign *init_stmt = gimple_build_assign (var, init);
2170 433 : gsi_insert_on_edge_immediate (entry, init_stmt);
2171 : }
2172 1861 : }
2173 :
2174 :
2175 : /* Execute load motion for references in chain CHAIN. Uids of the newly
2176 : created temporary variables are marked in TMP_VARS. */
2177 :
2178 : static void
2179 11200 : execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
2180 : {
2181 11200 : auto_vec<tree> vars;
2182 11200 : dref a;
2183 11200 : unsigned n_writes = 0, ridx, i;
2184 11200 : tree var;
2185 :
2186 11200 : gcc_assert (chain->type == CT_INVARIANT);
2187 11200 : gcc_assert (!chain->combined);
2188 24182 : FOR_EACH_VEC_ELT (chain->refs, i, a)
2189 12982 : if (DR_IS_WRITE (a->ref))
2190 11043 : n_writes++;
2191 :
2192 : /* If there are no reads in the loop, there is nothing to do. */
2193 22400 : if (n_writes == chain->refs.length ())
2194 9339 : return;
2195 :
2196 1861 : initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
2197 : &vars, chain->inits, tmp_vars);
2198 :
2199 1861 : ridx = 0;
2200 7169 : FOR_EACH_VEC_ELT (chain->refs, i, a)
2201 : {
2202 3447 : bool is_read = DR_IS_READ (a->ref);
2203 :
2204 3447 : if (DR_IS_WRITE (a->ref))
2205 : {
2206 1508 : n_writes--;
2207 1508 : if (n_writes)
2208 : {
2209 80 : var = vars[0];
2210 80 : var = make_ssa_name (SSA_NAME_VAR (var));
2211 80 : vars[0] = var;
2212 : }
2213 : else
2214 : ridx = 1;
2215 : }
2216 :
2217 3447 : replace_ref_with (a->stmt, vars[ridx],
2218 3447 : !is_read, !is_read);
2219 : }
2220 11200 : }
2221 :
2222 : /* Returns the single statement in that NAME is used, excepting
2223 : the looparound phi nodes contained in one of the chains. If there is no
2224 : such statement, or more statements, NULL is returned. */
2225 :
2226 : gimple *
2227 48809 : pcom_worker::single_nonlooparound_use (tree name)
2228 : {
2229 48809 : use_operand_p use;
2230 48809 : imm_use_iterator it;
2231 48809 : gimple *stmt, *ret = NULL;
2232 :
2233 147008 : FOR_EACH_IMM_USE_FAST (use, it, name)
2234 : {
2235 51556 : stmt = USE_STMT (use);
2236 :
2237 51556 : if (gimple_code (stmt) == GIMPLE_PHI)
2238 : {
2239 : /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2240 : could not be processed anyway, so just fail for them. */
2241 192 : if (bitmap_bit_p (m_looparound_phis,
2242 96 : SSA_NAME_VERSION (PHI_RESULT (stmt))))
2243 0 : continue;
2244 :
2245 : return NULL;
2246 : }
2247 51460 : else if (is_gimple_debug (stmt))
2248 723 : continue;
2249 50737 : else if (ret != NULL)
2250 : return NULL;
2251 : else
2252 : ret = stmt;
2253 2166 : }
2254 :
2255 46643 : return ret;
2256 : }
2257 :
2258 : /* Remove statement STMT, as well as the chain of assignments in that it is
2259 : used. */
2260 :
2261 : void
2262 160 : pcom_worker::remove_stmt (gimple *stmt)
2263 : {
2264 160 : tree name;
2265 160 : gimple *next;
2266 160 : gimple_stmt_iterator psi;
2267 :
2268 160 : if (gimple_code (stmt) == GIMPLE_PHI)
2269 : {
2270 0 : name = PHI_RESULT (stmt);
2271 0 : next = single_nonlooparound_use (name);
2272 0 : reset_debug_uses (stmt);
2273 0 : psi = gsi_for_stmt (stmt);
2274 0 : remove_phi_node (&psi, true);
2275 :
2276 0 : if (!next
2277 0 : || !gimple_assign_ssa_name_copy_p (next)
2278 0 : || gimple_assign_rhs1 (next) != name)
2279 0 : return;
2280 :
2281 : stmt = next;
2282 : }
2283 :
2284 204 : while (1)
2285 : {
2286 182 : gimple_stmt_iterator bsi;
2287 :
2288 182 : bsi = gsi_for_stmt (stmt);
2289 :
2290 182 : name = gimple_assign_lhs (stmt);
2291 182 : if (TREE_CODE (name) == SSA_NAME)
2292 : {
2293 110 : next = single_nonlooparound_use (name);
2294 110 : reset_debug_uses (stmt);
2295 : }
2296 : else
2297 : {
2298 : /* This is a store to be eliminated. */
2299 144 : gcc_assert (gimple_vdef (stmt) != NULL);
2300 : next = NULL;
2301 : }
2302 :
2303 182 : unlink_stmt_vdef (stmt);
2304 182 : gsi_remove (&bsi, true);
2305 182 : release_defs (stmt);
2306 :
2307 182 : if (!next
2308 64 : || !gimple_assign_ssa_name_copy_p (next)
2309 204 : || gimple_assign_rhs1 (next) != name)
2310 160 : return;
2311 :
2312 22 : stmt = next;
2313 22 : }
2314 : }
2315 :
2316 : /* Perform the predictive commoning optimization for a chain CHAIN.
2317 : Uids of the newly created temporary variables are marked in TMP_VARS.*/
2318 :
2319 : void
2320 16578 : pcom_worker::execute_pred_commoning_chain (chain_p chain,
2321 : bitmap tmp_vars)
2322 : {
2323 16578 : unsigned i;
2324 16578 : dref a;
2325 16578 : tree var;
2326 16578 : bool in_lhs;
2327 :
2328 16578 : if (chain->combined)
2329 : {
2330 : /* For combined chains, just remove the statements that are used to
2331 : compute the values of the expression (except for the root one).
2332 : We delay this until after all chains are processed. */
2333 : }
2334 16520 : else if (chain->type == CT_STORE_STORE)
2335 : {
2336 59 : if (chain->length > 0)
2337 : {
2338 37 : if (chain->inv_store_elimination)
2339 : {
2340 : /* If dead stores in this chain only store loop invariant
2341 : values, we can simply record the invariant value and use
2342 : it directly after loop. */
2343 14 : initialize_root_vars_store_elim_1 (chain);
2344 : }
2345 : else
2346 : {
2347 : /* If dead stores in this chain store loop variant values,
2348 : we need to set up the variables by loading from memory
2349 : before loop and propagating it with PHI nodes. */
2350 23 : initialize_root_vars_store_elim_2 (m_loop, chain, tmp_vars);
2351 : }
2352 :
2353 : /* For inter-iteration store elimination chain, stores at each
2354 : distance in loop's last (chain->length - 1) iterations can't
2355 : be eliminated, because there is no following killing store.
2356 : We need to generate these stores after loop. */
2357 37 : finalize_eliminated_stores (m_loop, chain);
2358 : }
2359 :
2360 59 : bool last_store_p = true;
2361 254 : for (i = chain->refs.length (); i > 0; i--)
2362 : {
2363 136 : a = chain->refs[i - 1];
2364 : /* Preserve the last store of the chain. Eliminate other stores
2365 : which are killed by the last one. */
2366 136 : if (DR_IS_WRITE (a->ref))
2367 : {
2368 131 : if (last_store_p)
2369 : last_store_p = false;
2370 : else
2371 72 : remove_stmt (a->stmt);
2372 :
2373 131 : continue;
2374 : }
2375 :
2376 : /* Any load in Store-Store chain must be dominated by a previous
2377 : store, we replace the load reference with rhs of the store. */
2378 5 : dref b = get_chain_last_write_before_load (chain, i - 1);
2379 5 : gcc_assert (b != NULL);
2380 5 : var = gimple_assign_rhs1 (b->stmt);
2381 5 : replace_ref_with (a->stmt, var, false, false);
2382 : }
2383 : }
2384 : else
2385 : {
2386 : /* For non-combined chains, set up the variables that hold its value. */
2387 16461 : initialize_root_vars (m_loop, chain, tmp_vars);
2388 16461 : a = get_chain_root (chain);
2389 16461 : in_lhs = (chain->type == CT_STORE_LOAD
2390 16461 : || chain->type == CT_COMBINATION);
2391 16461 : replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2392 :
2393 : /* Replace the uses of the original references by these variables. */
2394 49691 : for (i = 1; chain->refs.iterate (i, &a); i++)
2395 : {
2396 16769 : var = chain->vars[chain->length - a->distance];
2397 16769 : replace_ref_with (a->stmt, var, false, false);
2398 : }
2399 : }
2400 16578 : }
2401 :
2402 : /* Determines the unroll factor necessary to remove as many temporary variable
2403 : copies as possible. CHAINS is the list of chains that will be
2404 : optimized. */
2405 :
2406 : static unsigned
2407 3403 : determine_unroll_factor (const vec<chain_p> &chains)
2408 : {
2409 3403 : chain_p chain;
2410 3403 : unsigned factor = 1, af, nfactor, i;
2411 3403 : unsigned max = param_max_unroll_times;
2412 :
2413 7718 : FOR_EACH_VEC_ELT (chains, i, chain)
2414 : {
2415 4342 : if (chain->type == CT_INVARIANT)
2416 1783 : continue;
2417 : /* For now we can't handle unrolling when eliminating stores. */
2418 2559 : else if (chain->type == CT_STORE_STORE)
2419 : return 1;
2420 :
2421 2532 : if (chain->combined)
2422 : {
2423 : /* For combined chains, we can't handle unrolling if we replace
2424 : looparound PHIs. */
2425 : dref a;
2426 : unsigned j;
2427 146 : for (j = 1; chain->refs.iterate (j, &a); j++)
2428 88 : if (gimple_code (a->stmt) == GIMPLE_PHI)
2429 27 : return 1;
2430 58 : continue;
2431 58 : }
2432 :
2433 : /* The best unroll factor for this chain is equal to the number of
2434 : temporary variables that we create for it. */
2435 2474 : af = chain->length;
2436 2474 : if (chain->has_max_use_after)
2437 1446 : af++;
2438 :
2439 2474 : nfactor = factor * af / gcd (factor, af);
2440 2474 : if (nfactor <= max)
2441 4315 : factor = nfactor;
2442 : }
2443 :
2444 : return factor;
2445 : }
2446 :
2447 : /* Perform the predictive commoning optimization for chains.
2448 : Uids of the newly created temporary variables are marked in TMP_VARS. */
2449 :
2450 : void
2451 16705 : pcom_worker::execute_pred_commoning (bitmap tmp_vars)
2452 : {
2453 16705 : chain_p chain;
2454 16705 : unsigned i;
2455 :
2456 44483 : FOR_EACH_VEC_ELT (m_chains, i, chain)
2457 : {
2458 27778 : if (chain->type == CT_INVARIANT)
2459 11200 : execute_load_motion (m_loop, chain, tmp_vars);
2460 : else
2461 16578 : execute_pred_commoning_chain (chain, tmp_vars);
2462 : }
2463 :
2464 44483 : FOR_EACH_VEC_ELT (m_chains, i, chain)
2465 : {
2466 27778 : if (chain->type == CT_INVARIANT)
2467 : ;
2468 16578 : else if (chain->combined)
2469 : {
2470 : /* For combined chains, just remove the statements that are used to
2471 : compute the values of the expression (except for the root one). */
2472 : dref a;
2473 : unsigned j;
2474 27924 : for (j = 1; chain->refs.iterate (j, &a); j++)
2475 88 : remove_stmt (a->stmt);
2476 : }
2477 : }
2478 16705 : }
2479 :
2480 : /* For each reference in CHAINS, if its defining statement is
2481 : phi node, record the ssa name that is defined by it. */
2482 :
2483 : static void
2484 716 : replace_phis_by_defined_names (vec<chain_p> &chains)
2485 : {
2486 716 : chain_p chain;
2487 716 : dref a;
2488 716 : unsigned i, j;
2489 :
2490 2240 : FOR_EACH_VEC_ELT (chains, i, chain)
2491 3107 : FOR_EACH_VEC_ELT (chain->refs, j, a)
2492 : {
2493 1583 : if (gimple_code (a->stmt) == GIMPLE_PHI)
2494 : {
2495 1 : a->name_defined_by_phi = PHI_RESULT (a->stmt);
2496 1 : a->stmt = NULL;
2497 : }
2498 : }
2499 716 : }
2500 :
2501 : /* For each reference in CHAINS, if name_defined_by_phi is not
2502 : NULL, use it to set the stmt field. */
2503 :
2504 : static void
2505 716 : replace_names_by_phis (vec<chain_p> chains)
2506 : {
2507 716 : chain_p chain;
2508 716 : dref a;
2509 716 : unsigned i, j;
2510 :
2511 2240 : FOR_EACH_VEC_ELT (chains, i, chain)
2512 3107 : FOR_EACH_VEC_ELT (chain->refs, j, a)
2513 1583 : if (a->stmt == NULL)
2514 : {
2515 1 : a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2516 1 : gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2517 1 : a->name_defined_by_phi = NULL_TREE;
2518 : }
2519 716 : }
2520 :
2521 : /* Wrapper over execute_pred_commoning, to pass it as a callback
2522 : to tree_transform_and_unroll_loop. */
2523 :
2524 : struct epcc_data
2525 : {
2526 : vec<chain_p> chains;
2527 : bitmap tmp_vars;
2528 : pcom_worker *worker;
2529 : };
2530 :
2531 : static void
2532 716 : execute_pred_commoning_cbck (class loop *loop ATTRIBUTE_UNUSED, void *data)
2533 : {
2534 716 : struct epcc_data *const dta = (struct epcc_data *) data;
2535 716 : pcom_worker *worker = dta->worker;
2536 :
2537 : /* Restore phi nodes that were replaced by ssa names before
2538 : tree_transform_and_unroll_loop (see detailed description in
2539 : tree_predictive_commoning_loop). */
2540 716 : replace_names_by_phis (dta->chains);
2541 716 : worker->execute_pred_commoning (dta->tmp_vars);
2542 716 : }
2543 :
2544 : /* Base NAME and all the names in the chain of phi nodes that use it
2545 : on variable VAR. The phi nodes are recognized by being in the copies of
2546 : the header of the LOOP. */
2547 :
2548 : static void
2549 761 : base_names_in_chain_on (class loop *loop, tree name, tree var)
2550 : {
2551 761 : gimple *stmt, *phi;
2552 761 : imm_use_iterator iter;
2553 :
2554 761 : replace_ssa_name_symbol (name, var);
2555 :
2556 2411 : while (1)
2557 : {
2558 1586 : phi = NULL;
2559 3940 : FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2560 : {
2561 1593 : if (gimple_code (stmt) == GIMPLE_PHI
2562 1593 : && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
2563 : {
2564 : phi = stmt;
2565 : break;
2566 : }
2567 1586 : }
2568 1586 : if (!phi)
2569 761 : return;
2570 :
2571 825 : name = PHI_RESULT (phi);
2572 825 : replace_ssa_name_symbol (name, var);
2573 825 : }
2574 : }
2575 :
2576 : /* Given an unrolled LOOP after predictive commoning, remove the
2577 : register copies arising from phi nodes by changing the base
2578 : variables of SSA names. TMP_VARS is the set of the temporary variables
2579 : for those we want to perform this. */
2580 :
2581 : static void
2582 716 : eliminate_temp_copies (class loop *loop, bitmap tmp_vars)
2583 : {
2584 716 : edge e;
2585 716 : gphi *phi;
2586 716 : gimple *stmt;
2587 716 : tree name, use, var;
2588 716 : gphi_iterator psi;
2589 :
2590 716 : e = loop_latch_edge (loop);
2591 3685 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
2592 : {
2593 2969 : phi = psi.phi ();
2594 2969 : name = PHI_RESULT (phi);
2595 2969 : var = SSA_NAME_VAR (name);
2596 1960 : if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
2597 2208 : continue;
2598 761 : use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2599 761 : gcc_assert (TREE_CODE (use) == SSA_NAME);
2600 :
2601 : /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2602 761 : stmt = SSA_NAME_DEF_STMT (use);
2603 793 : while (gimple_code (stmt) == GIMPLE_PHI
2604 : /* In case we could not unroll the loop enough to eliminate
2605 : all copies, we may reach the loop header before the defining
2606 : statement (in that case, some register copies will be present
2607 : in loop latch in the final code, corresponding to the newly
2608 : created looparound phi nodes). */
2609 793 : && gimple_bb (stmt) != loop->header)
2610 : {
2611 32 : gcc_assert (single_pred_p (gimple_bb (stmt)));
2612 32 : use = PHI_ARG_DEF (stmt, 0);
2613 32 : stmt = SSA_NAME_DEF_STMT (use);
2614 : }
2615 :
2616 761 : base_names_in_chain_on (loop, use, var);
2617 : }
2618 716 : }
2619 :
2620 : /* Returns true if CHAIN is suitable to be combined. */
2621 :
2622 : static bool
2623 101722 : chain_can_be_combined_p (chain_p chain)
2624 : {
2625 101722 : return (!chain->combined
2626 101592 : && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2627 : }
2628 :
2629 : /* Returns the modify statement that uses NAME. Skips over assignment
2630 : statements, NAME is replaced with the actual name used in the returned
2631 : statement. */
2632 :
2633 : gimple *
2634 47842 : pcom_worker::find_use_stmt (tree *name)
2635 : {
2636 48699 : gimple *stmt;
2637 48699 : tree rhs, lhs;
2638 :
2639 : /* Skip over assignments. */
2640 49556 : while (1)
2641 : {
2642 48699 : stmt = single_nonlooparound_use (*name);
2643 48699 : if (!stmt)
2644 : return NULL;
2645 :
2646 46533 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
2647 : return NULL;
2648 :
2649 45207 : lhs = gimple_assign_lhs (stmt);
2650 45207 : if (TREE_CODE (lhs) != SSA_NAME)
2651 : return NULL;
2652 :
2653 45185 : if (gimple_assign_copy_p (stmt))
2654 : {
2655 857 : rhs = gimple_assign_rhs1 (stmt);
2656 857 : if (rhs != *name)
2657 : return NULL;
2658 :
2659 857 : *name = lhs;
2660 : }
2661 44328 : else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2662 : == GIMPLE_BINARY_RHS)
2663 : return stmt;
2664 : else
2665 : return NULL;
2666 : }
2667 : }
2668 :
2669 : /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2670 :
2671 : static bool
2672 897 : may_reassociate_p (tree type, enum tree_code code)
2673 : {
2674 502 : if (FLOAT_TYPE_P (type)
2675 1185 : && !flag_unsafe_math_optimizations)
2676 : return false;
2677 :
2678 569 : return (commutative_tree_code (code)
2679 569 : && associative_tree_code (code));
2680 : }
2681 :
2682 : /* If the operation used in STMT is associative and commutative, go through the
2683 : tree of the same operations and returns its root. Distance to the root
2684 : is stored in DISTANCE. */
2685 :
2686 : gimple *
2687 897 : pcom_worker::find_associative_operation_root (gimple *stmt, unsigned *distance)
2688 : {
2689 897 : tree lhs;
2690 897 : gimple *next;
2691 897 : enum tree_code code = gimple_assign_rhs_code (stmt);
2692 897 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2693 897 : unsigned dist = 0;
2694 :
2695 897 : if (!may_reassociate_p (type, code))
2696 : return NULL;
2697 :
2698 2607 : while (1)
2699 : {
2700 1549 : lhs = gimple_assign_lhs (stmt);
2701 1549 : gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2702 :
2703 1549 : next = find_use_stmt (&lhs);
2704 1549 : if (!next
2705 1549 : || gimple_assign_rhs_code (next) != code)
2706 : break;
2707 :
2708 1058 : stmt = next;
2709 1058 : dist++;
2710 : }
2711 :
2712 491 : if (distance)
2713 130 : *distance = dist;
2714 : return stmt;
2715 : }
2716 :
2717 : /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2718 : is no such statement, returns NULL_TREE. In case the operation used on
2719 : NAME1 and NAME2 is associative and commutative, returns the root of the
2720 : tree formed by this operation instead of the statement that uses NAME1 or
2721 : NAME2. */
2722 :
2723 : gimple *
2724 44928 : pcom_worker::find_common_use_stmt (tree *name1, tree *name2)
2725 : {
2726 44928 : gimple *stmt1, *stmt2;
2727 :
2728 44928 : stmt1 = find_use_stmt (name1);
2729 44928 : if (!stmt1)
2730 : return NULL;
2731 :
2732 724 : stmt2 = find_use_stmt (name2);
2733 724 : if (!stmt2)
2734 : return NULL;
2735 :
2736 616 : if (stmt1 == stmt2)
2737 : return stmt1;
2738 :
2739 586 : stmt1 = find_associative_operation_root (stmt1, NULL);
2740 586 : if (!stmt1)
2741 : return NULL;
2742 181 : stmt2 = find_associative_operation_root (stmt2, NULL);
2743 181 : if (!stmt2)
2744 : return NULL;
2745 :
2746 180 : return (stmt1 == stmt2 ? stmt1 : NULL);
2747 : }
2748 :
2749 : /* Checks whether R1 and R2 are combined together using CODE, with the result
2750 : in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2751 : if it is true. If CODE is ERROR_MARK, set these values instead. */
2752 :
2753 : bool
2754 44928 : pcom_worker::combinable_refs_p (dref r1, dref r2,
2755 : enum tree_code *code, bool *swap, tree *rslt_type)
2756 : {
2757 44928 : enum tree_code acode;
2758 44928 : bool aswap;
2759 44928 : tree atype;
2760 44928 : tree name1, name2;
2761 44928 : gimple *stmt;
2762 :
2763 44928 : name1 = name_for_ref (r1);
2764 44928 : name2 = name_for_ref (r2);
2765 44928 : gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2766 :
2767 44928 : stmt = find_common_use_stmt (&name1, &name2);
2768 :
2769 44928 : if (!stmt
2770 : /* A simple post-dominance check - make sure the combination
2771 : is executed under the same condition as the references. */
2772 44928 : || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2773 20 : && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2774 : return false;
2775 :
2776 79 : acode = gimple_assign_rhs_code (stmt);
2777 79 : aswap = (!commutative_tree_code (acode)
2778 79 : && gimple_assign_rhs1 (stmt) != name1);
2779 79 : atype = TREE_TYPE (gimple_assign_lhs (stmt));
2780 :
2781 79 : if (*code == ERROR_MARK)
2782 : {
2783 35 : *code = acode;
2784 35 : *swap = aswap;
2785 35 : *rslt_type = atype;
2786 35 : return true;
2787 : }
2788 :
2789 44 : return (*code == acode
2790 44 : && *swap == aswap
2791 88 : && *rslt_type == atype);
2792 : }
2793 :
2794 : /* Remove OP from the operation on rhs of STMT, and replace STMT with
2795 : an assignment of the remaining operand. */
2796 :
2797 : static void
2798 130 : remove_name_from_operation (gimple *stmt, tree op)
2799 : {
2800 130 : tree other_op;
2801 130 : gimple_stmt_iterator si;
2802 :
2803 130 : gcc_assert (is_gimple_assign (stmt));
2804 :
2805 130 : if (gimple_assign_rhs1 (stmt) == op)
2806 59 : other_op = gimple_assign_rhs2 (stmt);
2807 : else
2808 : other_op = gimple_assign_rhs1 (stmt);
2809 :
2810 130 : si = gsi_for_stmt (stmt);
2811 130 : gimple_assign_set_rhs_from_tree (&si, other_op);
2812 :
2813 : /* We should not have reallocated STMT. */
2814 130 : gcc_assert (gsi_stmt (si) == stmt);
2815 :
2816 130 : update_stmt (stmt);
2817 130 : }
2818 :
2819 : /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2820 : are combined in a single statement, and returns this statement. */
2821 :
2822 : gimple *
2823 65 : pcom_worker::reassociate_to_the_same_stmt (tree name1, tree name2)
2824 : {
2825 65 : gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2826 65 : gassign *new_stmt, *tmp_stmt;
2827 65 : tree new_name, tmp_name, var, r1, r2;
2828 65 : unsigned dist1, dist2;
2829 65 : enum tree_code code;
2830 65 : tree type = TREE_TYPE (name1);
2831 65 : gimple_stmt_iterator bsi;
2832 :
2833 65 : stmt1 = find_use_stmt (&name1);
2834 65 : stmt2 = find_use_stmt (&name2);
2835 65 : root1 = find_associative_operation_root (stmt1, &dist1);
2836 65 : root2 = find_associative_operation_root (stmt2, &dist2);
2837 65 : code = gimple_assign_rhs_code (stmt1);
2838 :
2839 65 : gcc_assert (root1 && root2 && root1 == root2
2840 : && code == gimple_assign_rhs_code (stmt2));
2841 :
2842 : /* Find the root of the nearest expression in that both NAME1 and NAME2
2843 : are used. */
2844 65 : r1 = name1;
2845 65 : s1 = stmt1;
2846 65 : r2 = name2;
2847 65 : s2 = stmt2;
2848 :
2849 231 : while (dist1 > dist2)
2850 : {
2851 166 : s1 = find_use_stmt (&r1);
2852 166 : r1 = gimple_assign_lhs (s1);
2853 166 : dist1--;
2854 : }
2855 126 : while (dist2 > dist1)
2856 : {
2857 61 : s2 = find_use_stmt (&r2);
2858 61 : r2 = gimple_assign_lhs (s2);
2859 61 : dist2--;
2860 : }
2861 :
2862 134 : while (s1 != s2)
2863 : {
2864 69 : s1 = find_use_stmt (&r1);
2865 69 : r1 = gimple_assign_lhs (s1);
2866 69 : s2 = find_use_stmt (&r2);
2867 69 : r2 = gimple_assign_lhs (s2);
2868 : }
2869 :
2870 : /* Remove NAME1 and NAME2 from the statements in that they are used
2871 : currently. */
2872 65 : remove_name_from_operation (stmt1, name1);
2873 65 : remove_name_from_operation (stmt2, name2);
2874 :
2875 : /* Insert the new statement combining NAME1 and NAME2 before S1, and
2876 : combine it with the rhs of S1. */
2877 65 : var = create_tmp_reg (type, "predreastmp");
2878 65 : new_name = make_ssa_name (var);
2879 65 : new_stmt = gimple_build_assign (new_name, code, name1, name2);
2880 :
2881 65 : var = create_tmp_reg (type, "predreastmp");
2882 65 : tmp_name = make_ssa_name (var);
2883 :
2884 : /* Rhs of S1 may now be either a binary expression with operation
2885 : CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2886 : so that name1 or name2 was removed from it). */
2887 65 : tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2888 : gimple_assign_rhs1 (s1),
2889 : gimple_assign_rhs2 (s1));
2890 :
2891 65 : bsi = gsi_for_stmt (s1);
2892 65 : gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2893 65 : s1 = gsi_stmt (bsi);
2894 65 : update_stmt (s1);
2895 :
2896 65 : gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2897 65 : gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2898 :
2899 65 : return new_stmt;
2900 : }
2901 :
2902 : /* Returns the statement that combines references R1 and R2. In case R1
2903 : and R2 are not used in the same statement, but they are used with an
2904 : associative and commutative operation in the same expression, reassociate
2905 : the expression so that they are used in the same statement. */
2906 :
2907 : gimple *
2908 73 : pcom_worker::stmt_combining_refs (dref r1, dref r2)
2909 : {
2910 73 : gimple *stmt1, *stmt2;
2911 73 : tree name1 = name_for_ref (r1);
2912 73 : tree name2 = name_for_ref (r2);
2913 :
2914 73 : stmt1 = find_use_stmt (&name1);
2915 73 : stmt2 = find_use_stmt (&name2);
2916 73 : if (stmt1 == stmt2)
2917 : return stmt1;
2918 :
2919 65 : return reassociate_to_the_same_stmt (name1, name2);
2920 : }
2921 :
2922 : /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2923 : description of the new chain is returned, otherwise we return NULL. */
2924 :
2925 : chain_p
2926 59237 : pcom_worker::combine_chains (chain_p ch1, chain_p ch2)
2927 : {
2928 59237 : dref r1, r2, nw;
2929 59237 : enum tree_code op = ERROR_MARK;
2930 59237 : bool swap = false;
2931 59237 : chain_p new_chain;
2932 59237 : unsigned i;
2933 59237 : tree rslt_type = NULL_TREE;
2934 :
2935 59237 : if (ch1 == ch2)
2936 : return NULL;
2937 45120 : if (ch1->length != ch2->length)
2938 : return NULL;
2939 :
2940 135258 : if (ch1->refs.length () != ch2->refs.length ())
2941 : return NULL;
2942 :
2943 79 : for (i = 0; (ch1->refs.iterate (i, &r1)
2944 89885 : && ch2->refs.iterate (i, &r2)); i++)
2945 : {
2946 44928 : if (r1->distance != r2->distance)
2947 : return NULL;
2948 :
2949 44928 : if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2950 : return NULL;
2951 : }
2952 :
2953 29 : if (swap)
2954 1 : std::swap (ch1, ch2);
2955 :
2956 29 : new_chain = new struct chain (CT_COMBINATION);
2957 29 : new_chain->op = op;
2958 29 : new_chain->ch1 = ch1;
2959 29 : new_chain->ch2 = ch2;
2960 29 : new_chain->rslt_type = rslt_type;
2961 29 : new_chain->length = ch1->length;
2962 :
2963 102 : for (i = 0; (ch1->refs.iterate (i, &r1)
2964 175 : && ch2->refs.iterate (i, &r2)); i++)
2965 : {
2966 73 : nw = XCNEW (class dref_d);
2967 73 : nw->stmt = stmt_combining_refs (r1, r2);
2968 73 : nw->distance = r1->distance;
2969 :
2970 73 : new_chain->refs.safe_push (nw);
2971 : }
2972 :
2973 29 : ch1->combined = true;
2974 29 : ch2->combined = true;
2975 29 : return new_chain;
2976 : }
2977 :
2978 : /* Recursively update position information of all offspring chains to ROOT
2979 : chain's position information. */
2980 :
2981 : static void
2982 29 : update_pos_for_combined_chains (chain_p root)
2983 : {
2984 29 : chain_p ch1 = root->ch1, ch2 = root->ch2;
2985 29 : dref ref, ref1, ref2;
2986 29 : for (unsigned j = 0; (root->refs.iterate (j, &ref)
2987 73 : && ch1->refs.iterate (j, &ref1)
2988 175 : && ch2->refs.iterate (j, &ref2)); ++j)
2989 73 : ref1->pos = ref2->pos = ref->pos;
2990 :
2991 29 : if (ch1->type == CT_COMBINATION)
2992 13 : update_pos_for_combined_chains (ch1);
2993 29 : if (ch2->type == CT_COMBINATION)
2994 : update_pos_for_combined_chains (ch2);
2995 29 : }
2996 :
2997 : /* Returns true if statement S1 dominates statement S2. */
2998 :
2999 : static bool
3000 12 : pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
3001 : {
3002 12 : basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
3003 :
3004 12 : if (!bb1 || s1 == s2)
3005 : return true;
3006 :
3007 12 : if (bb1 == bb2)
3008 12 : return gimple_uid (s1) < gimple_uid (s2);
3009 :
3010 0 : return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
3011 : }
3012 :
3013 : /* Try to combine the chains. */
3014 :
3015 : void
3016 16705 : pcom_worker::try_combine_chains ()
3017 : {
3018 16705 : unsigned i, j;
3019 16705 : chain_p ch1, ch2, cch;
3020 16705 : auto_vec<chain_p> worklist;
3021 16705 : bool combined_p = false;
3022 :
3023 44454 : FOR_EACH_VEC_ELT (m_chains, i, ch1)
3024 55498 : if (chain_can_be_combined_p (ch1))
3025 14146 : worklist.safe_push (ch1);
3026 :
3027 61760 : while (!worklist.is_empty ())
3028 : {
3029 14175 : ch1 = worklist.pop ();
3030 14175 : if (!chain_can_be_combined_p (ch1))
3031 29 : continue;
3032 :
3033 104795 : FOR_EACH_VEC_ELT (m_chains, j, ch2)
3034 : {
3035 59798 : if (!chain_can_be_combined_p (ch2))
3036 561 : continue;
3037 :
3038 59237 : cch = combine_chains (ch1, ch2);
3039 59237 : if (cch)
3040 : {
3041 29 : worklist.safe_push (cch);
3042 29 : m_chains.safe_push (cch);
3043 29 : combined_p = true;
3044 29 : break;
3045 : }
3046 : }
3047 : }
3048 16705 : if (!combined_p)
3049 16694 : return;
3050 :
3051 : /* Setup UID for all statements in dominance order. */
3052 11 : basic_block *bbs = get_loop_body_in_dom_order (m_loop);
3053 11 : renumber_gimple_stmt_uids_in_blocks (bbs, m_loop->num_nodes);
3054 11 : free (bbs);
3055 :
3056 : /* Re-association in combined chains may generate statements different to
3057 : order of references of the original chain. We need to keep references
3058 : of combined chain in dominance order so that all uses will be inserted
3059 : after definitions. Note:
3060 : A) This is necessary for all combined chains.
3061 : B) This is only necessary for ZERO distance references because other
3062 : references inherit value from loop carried PHIs.
3063 :
3064 : We first update position information for all combined chains. */
3065 11 : dref ref;
3066 90 : for (i = 0; m_chains.iterate (i, &ch1); ++i)
3067 : {
3068 79 : if (ch1->type != CT_COMBINATION || ch1->combined)
3069 63 : continue;
3070 :
3071 55 : for (j = 0; ch1->refs.iterate (j, &ref); ++j)
3072 39 : ref->pos = gimple_uid (ref->stmt);
3073 :
3074 16 : update_pos_for_combined_chains (ch1);
3075 : }
3076 : /* Then sort references according to newly updated position information. */
3077 101 : for (i = 0; m_chains.iterate (i, &ch1); ++i)
3078 : {
3079 79 : if (ch1->type != CT_COMBINATION && !ch1->combined)
3080 5 : continue;
3081 :
3082 : /* Find the first reference with non-ZERO distance. */
3083 74 : if (ch1->length == 0)
3084 86 : j = ch1->refs.length();
3085 : else
3086 : {
3087 124 : for (j = 0; ch1->refs.iterate (j, &ref); ++j)
3088 124 : if (ref->distance != 0)
3089 : break;
3090 : }
3091 :
3092 : /* Sort all ZERO distance references by position. */
3093 74 : qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
3094 :
3095 74 : if (ch1->combined)
3096 58 : continue;
3097 :
3098 : /* For ZERO length chain, has_max_use_after must be true since root
3099 : combined stmt must dominates others. */
3100 16 : if (ch1->length == 0)
3101 : {
3102 4 : ch1->has_max_use_after = true;
3103 4 : continue;
3104 : }
3105 : /* Check if there is use at max distance after root for combined chains
3106 : and set flag accordingly. */
3107 12 : ch1->has_max_use_after = false;
3108 12 : gimple *root_stmt = get_chain_root (ch1)->stmt;
3109 107 : for (j = 1; ch1->refs.iterate (j, &ref); ++j)
3110 : {
3111 19 : if (ref->distance == ch1->length
3112 19 : && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
3113 : {
3114 3 : ch1->has_max_use_after = true;
3115 3 : break;
3116 : }
3117 : }
3118 : }
3119 16705 : }
3120 :
3121 : /* Prepare initializers for store elimination CHAIN in LOOP. Returns false
3122 : if this is impossible because one of these initializers may trap, true
3123 : otherwise. */
3124 :
3125 : static bool
3126 59 : prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
3127 : {
3128 59 : unsigned i, n = chain->length;
3129 :
3130 : /* For now we can't eliminate stores if some of them are conditional
3131 : executed. */
3132 59 : if (!chain->all_always_accessed)
3133 : return false;
3134 :
3135 : /* Nothing to intialize for intra-iteration store elimination. */
3136 59 : if (n == 0 && chain->type == CT_STORE_STORE)
3137 : return true;
3138 :
3139 : /* For store elimination chain, there is nothing to initialize if stores
3140 : to be eliminated only store loop invariant values into memory. */
3141 37 : if (chain->type == CT_STORE_STORE
3142 37 : && is_inv_store_elimination_chain (loop, chain))
3143 : {
3144 14 : chain->inv_store_elimination = true;
3145 14 : return true;
3146 : }
3147 :
3148 23 : chain->inits.create (n);
3149 23 : chain->inits.safe_grow_cleared (n, true);
3150 :
3151 : /* For store eliminatin chain like below:
3152 :
3153 : for (i = 0; i < len; i++)
3154 : {
3155 : a[i] = 1;
3156 : // a[i + 1] = ...
3157 : a[i + 2] = 3;
3158 : }
3159 :
3160 : store to a[i + 1] is missed in loop body, it acts like bubbles. The
3161 : content of a[i + 1] remain the same if the loop iterates fewer times
3162 : than chain->length. We need to set up root variables for such stores
3163 : by loading from memory before loop. Note we only need to load bubble
3164 : elements because loop body is guaranteed to be executed at least once
3165 : after loop's preheader edge. */
3166 23 : auto_vec<bool> bubbles;
3167 23 : bubbles.safe_grow_cleared (n + 1, true);
3168 100 : for (i = 0; i < chain->refs.length (); i++)
3169 54 : bubbles[chain->refs[i]->distance] = true;
3170 :
3171 23 : struct data_reference *dr = get_chain_root (chain)->ref;
3172 62 : for (i = 0; i < n; i++)
3173 : {
3174 39 : if (bubbles[i])
3175 29 : continue;
3176 :
3177 10 : gimple_seq stmts = NULL;
3178 :
3179 10 : tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
3180 10 : if (stmts)
3181 2 : gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3182 :
3183 10 : chain->inits[i] = init;
3184 : }
3185 :
3186 23 : return true;
3187 23 : }
3188 :
3189 : /* Prepare initializers for CHAIN. Returns false if this is impossible
3190 : because one of these initializers may trap, true otherwise. */
3191 :
3192 : bool
3193 30805 : pcom_worker::prepare_initializers_chain (chain_p chain)
3194 : {
3195 30805 : unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
3196 30805 : struct data_reference *dr = get_chain_root (chain)->ref;
3197 30805 : tree init;
3198 30805 : dref laref;
3199 30805 : edge entry = loop_preheader_edge (m_loop);
3200 :
3201 30805 : if (chain->type == CT_STORE_STORE)
3202 59 : return prepare_initializers_chain_store_elim (m_loop, chain);
3203 :
3204 : /* Find the initializers for the variables, and check that they cannot
3205 : trap. */
3206 30746 : chain->inits.create (n);
3207 86933 : for (i = 0; i < n; i++)
3208 25441 : chain->inits.quick_push (NULL_TREE);
3209 :
3210 : /* If we have replaced some looparound phi nodes, use their initializers
3211 : instead of creating our own. */
3212 81449 : FOR_EACH_VEC_ELT (chain->refs, i, laref)
3213 : {
3214 50703 : if (gimple_code (laref->stmt) != GIMPLE_PHI)
3215 50702 : continue;
3216 :
3217 1 : gcc_assert (laref->distance > 0);
3218 1 : chain->inits[n - laref->distance]
3219 2 : = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
3220 : }
3221 :
3222 53131 : for (i = 0; i < n; i++)
3223 : {
3224 25441 : gimple_seq stmts = NULL;
3225 :
3226 25441 : if (chain->inits[i] != NULL_TREE)
3227 1 : continue;
3228 :
3229 25440 : init = ref_at_iteration (dr, (int) i - n, &stmts);
3230 25440 : if (!chain->all_always_accessed && tree_could_trap_p (init))
3231 : {
3232 3056 : gimple_seq_discard (stmts);
3233 3056 : return false;
3234 : }
3235 :
3236 22384 : if (stmts)
3237 1145 : gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3238 :
3239 22384 : chain->inits[i] = init;
3240 : }
3241 :
3242 : return true;
3243 : }
3244 :
3245 : /* Prepare initializers for chains, and free chains that cannot
3246 : be used because the initializers might trap. */
3247 :
3248 : void
3249 16705 : pcom_worker::prepare_initializers ()
3250 : {
3251 16705 : chain_p chain;
3252 16705 : unsigned i;
3253 :
3254 47510 : for (i = 0; i < m_chains.length (); )
3255 : {
3256 30805 : chain = m_chains[i];
3257 30805 : if (prepare_initializers_chain (chain))
3258 27749 : i++;
3259 : else
3260 : {
3261 3056 : release_chain (chain);
3262 3056 : m_chains.unordered_remove (i);
3263 : }
3264 : }
3265 16705 : }
3266 :
3267 : /* Generates finalizer memory references for CHAIN. Returns true
3268 : if finalizer code for CHAIN can be generated, otherwise false. */
3269 :
3270 : bool
3271 37 : pcom_worker::prepare_finalizers_chain (chain_p chain)
3272 : {
3273 37 : unsigned i, n = chain->length;
3274 37 : struct data_reference *dr = get_chain_root (chain)->ref;
3275 37 : tree fini, niters = number_of_latch_executions (m_loop);
3276 :
3277 : /* For now we can't eliminate stores if some of them are conditional
3278 : executed. */
3279 37 : if (!chain->all_always_accessed)
3280 : return false;
3281 :
3282 37 : chain->finis.create (n);
3283 139 : for (i = 0; i < n; i++)
3284 65 : chain->finis.quick_push (NULL_TREE);
3285 :
3286 : /* We never use looparound phi node for store elimination chains. */
3287 :
3288 : /* Find the finalizers for the variables, and check that they cannot
3289 : trap. */
3290 102 : for (i = 0; i < n; i++)
3291 : {
3292 65 : gimple_seq stmts = NULL;
3293 65 : gcc_assert (chain->finis[i] == NULL_TREE);
3294 :
3295 65 : if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3296 : {
3297 11 : niters = unshare_expr (niters);
3298 11 : niters = force_gimple_operand (niters, &stmts, true, NULL);
3299 11 : if (stmts)
3300 : {
3301 11 : gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3302 11 : stmts = NULL;
3303 : }
3304 : }
3305 65 : fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3306 65 : if (stmts)
3307 26 : gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3308 :
3309 65 : chain->finis[i] = fini;
3310 : }
3311 :
3312 : return true;
3313 : }
3314 :
3315 : /* Generates finalizer memory reference for chains. Returns true if
3316 : finalizer code generation for chains breaks loop closed ssa form. */
3317 :
3318 : bool
3319 16705 : pcom_worker::prepare_finalizers ()
3320 : {
3321 16705 : chain_p chain;
3322 16705 : unsigned i;
3323 16705 : bool loop_closed_ssa = false;
3324 :
3325 44454 : for (i = 0; i < m_chains.length ();)
3326 : {
3327 27749 : chain = m_chains[i];
3328 :
3329 : /* Finalizer is only necessary for inter-iteration store elimination
3330 : chains. */
3331 27749 : if (chain->length == 0 || chain->type != CT_STORE_STORE)
3332 : {
3333 27712 : i++;
3334 27712 : continue;
3335 : }
3336 :
3337 37 : if (prepare_finalizers_chain (chain))
3338 : {
3339 37 : i++;
3340 : /* Be conservative, assume loop closed ssa form is corrupted
3341 : by store-store chain. Though it's not always the case if
3342 : eliminated stores only store loop invariant values into
3343 : memory. */
3344 37 : loop_closed_ssa = true;
3345 : }
3346 : else
3347 : {
3348 0 : release_chain (chain);
3349 0 : m_chains.unordered_remove (i);
3350 : }
3351 : }
3352 16705 : return loop_closed_ssa;
3353 : }
3354 :
3355 : /* Insert all initializing gimple stmts into LOOP's entry edge. */
3356 :
3357 : static void
3358 16705 : insert_init_seqs (class loop *loop, vec<chain_p> &chains)
3359 : {
3360 16705 : unsigned i;
3361 16705 : edge entry = loop_preheader_edge (loop);
3362 :
3363 61188 : for (i = 0; i < chains.length (); ++i)
3364 27778 : if (chains[i]->init_seq)
3365 : {
3366 1112 : gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3367 1112 : chains[i]->init_seq = NULL;
3368 : }
3369 16705 : }
3370 :
3371 : /* Performs predictive commoning for LOOP. Sets bit 1<<1 of return value
3372 : if LOOP was unrolled; Sets bit 1<<2 of return value if loop closed ssa
3373 : form was corrupted. Non-zero return value indicates some changes were
3374 : applied to this loop. */
3375 :
3376 : unsigned
3377 427656 : pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
3378 : {
3379 427656 : struct component *components;
3380 427656 : unsigned unroll_factor = 0;
3381 427656 : class tree_niter_desc desc;
3382 427656 : bool unroll = false, loop_closed_ssa = false;
3383 :
3384 427656 : if (dump_file && (dump_flags & TDF_DETAILS))
3385 56 : fprintf (dump_file, "Processing loop %d\n", m_loop->num);
3386 :
3387 : /* Nothing for predicitive commoning if loop only iterates 1 time. */
3388 427656 : if (get_max_loop_iterations_int (m_loop) == 0)
3389 : {
3390 25076 : if (dump_file && (dump_flags & TDF_DETAILS))
3391 1 : fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3392 :
3393 25076 : return 0;
3394 : }
3395 :
3396 : /* Find the data references and split them into components according to their
3397 : dependence relations. */
3398 402580 : auto_vec<loop_p, 3> loop_nest;
3399 402580 : if (!compute_data_dependences_for_loop (m_loop, true, &loop_nest, &m_datarefs,
3400 : &m_dependences))
3401 : {
3402 145900 : if (dump_file && (dump_flags & TDF_DETAILS))
3403 0 : fprintf (dump_file, "Cannot analyze data dependencies\n");
3404 145900 : return 0;
3405 : }
3406 :
3407 256680 : if (dump_file && (dump_flags & TDF_DETAILS))
3408 55 : dump_data_dependence_relations (dump_file, m_dependences);
3409 :
3410 256680 : components = split_data_refs_to_components ();
3411 :
3412 256680 : loop_nest.release ();
3413 256680 : if (!components)
3414 : return 0;
3415 :
3416 171506 : if (dump_file && (dump_flags & TDF_DETAILS))
3417 : {
3418 54 : fprintf (dump_file, "Initial state:\n\n");
3419 54 : dump_components (dump_file, components);
3420 : }
3421 :
3422 : /* Find the suitable components and split them into chains. */
3423 171506 : components = filter_suitable_components (components);
3424 :
3425 171506 : auto_bitmap tmp_vars;
3426 171506 : determine_roots (components);
3427 171506 : release_components (components);
3428 :
3429 171506 : if (!m_chains.exists ())
3430 : {
3431 154801 : if (dump_file && (dump_flags & TDF_DETAILS))
3432 20 : fprintf (dump_file,
3433 : "Predictive commoning failed: no suitable chains\n");
3434 154801 : return 0;
3435 : }
3436 :
3437 16705 : prepare_initializers ();
3438 16705 : loop_closed_ssa = prepare_finalizers ();
3439 :
3440 : /* Try to combine the chains that are always worked with together. */
3441 16705 : try_combine_chains ();
3442 :
3443 16705 : insert_init_seqs (m_loop, m_chains);
3444 :
3445 16705 : if (dump_file && (dump_flags & TDF_DETAILS))
3446 : {
3447 34 : fprintf (dump_file, "Before commoning:\n\n");
3448 34 : dump_chains (dump_file, m_chains);
3449 : }
3450 :
3451 16705 : if (allow_unroll_p)
3452 : /* Determine the unroll factor, and if the loop should be unrolled, ensure
3453 : that its number of iterations is divisible by the factor. */
3454 3403 : unroll_factor = determine_unroll_factor (m_chains);
3455 :
3456 3403 : if (unroll_factor > 1)
3457 731 : unroll = can_unroll_loop_p (m_loop, unroll_factor, &desc);
3458 :
3459 : /* Execute the predictive commoning transformations, and possibly unroll the
3460 : loop. */
3461 731 : if (unroll)
3462 : {
3463 716 : struct epcc_data dta;
3464 :
3465 716 : if (dump_file && (dump_flags & TDF_DETAILS))
3466 9 : fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3467 :
3468 716 : dta.tmp_vars = tmp_vars;
3469 716 : dta.chains = m_chains.to_vec_legacy ();
3470 716 : dta.worker = this;
3471 :
3472 : /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3473 : execute_pred_commoning_cbck is called may cause phi nodes to be
3474 : reallocated, which is a problem since CHAINS may point to these
3475 : statements. To fix this, we store the ssa names defined by the
3476 : phi nodes here instead of the phi nodes themselves, and restore
3477 : the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3478 716 : replace_phis_by_defined_names (m_chains);
3479 :
3480 716 : tree_transform_and_unroll_loop (m_loop, unroll_factor, &desc,
3481 : execute_pred_commoning_cbck, &dta);
3482 716 : eliminate_temp_copies (m_loop, tmp_vars);
3483 : }
3484 : else
3485 : {
3486 15989 : if (dump_file && (dump_flags & TDF_DETAILS))
3487 25 : fprintf (dump_file,
3488 : "Executing predictive commoning without unrolling.\n");
3489 15989 : execute_pred_commoning (tmp_vars);
3490 : }
3491 :
3492 49363 : return (unroll ? 2 : 1) | (loop_closed_ssa ? 4 : 1);
3493 574086 : }
3494 :
3495 : /* Runs predictive commoning. */
3496 :
3497 : unsigned
3498 206832 : tree_predictive_commoning (bool allow_unroll_p)
3499 : {
3500 206832 : unsigned ret = 0, changed = 0;
3501 :
3502 206832 : initialize_original_copy_tables ();
3503 1069352 : for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
3504 448856 : if (optimize_loop_for_speed_p (loop))
3505 : {
3506 427656 : pcom_worker w(loop);
3507 427656 : changed |= w.tree_predictive_commoning_loop (allow_unroll_p);
3508 427656 : }
3509 206832 : free_original_copy_tables ();
3510 :
3511 206832 : if (changed > 0)
3512 : {
3513 11482 : ret = TODO_update_ssa_only_virtuals;
3514 :
3515 : /* Some loop(s) got unrolled. */
3516 11482 : if (changed > 1)
3517 : {
3518 670 : scev_reset ();
3519 :
3520 : /* Need to fix up loop closed SSA. */
3521 670 : if (changed >= 4)
3522 36 : rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3523 :
3524 : ret |= TODO_cleanup_cfg;
3525 : }
3526 : }
3527 :
3528 : if (ret != 0)
3529 11482 : cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup;
3530 :
3531 206832 : return ret;
3532 : }
3533 :
3534 : /* Predictive commoning Pass. */
3535 :
3536 : static unsigned
3537 206832 : run_tree_predictive_commoning (struct function *fun, bool allow_unroll_p)
3538 : {
3539 413664 : if (number_of_loops (fun) <= 1)
3540 : return 0;
3541 :
3542 206832 : return tree_predictive_commoning (allow_unroll_p);
3543 : }
3544 :
3545 : namespace {
3546 :
3547 : const pass_data pass_data_predcom =
3548 : {
3549 : GIMPLE_PASS, /* type */
3550 : "pcom", /* name */
3551 : OPTGROUP_LOOP, /* optinfo_flags */
3552 : TV_PREDCOM, /* tv_id */
3553 : PROP_cfg, /* properties_required */
3554 : 0, /* properties_provided */
3555 : 0, /* properties_destroyed */
3556 : 0, /* todo_flags_start */
3557 : 0, /* todo_flags_finish */
3558 : };
3559 :
3560 : class pass_predcom : public gimple_opt_pass
3561 : {
3562 : public:
3563 285722 : pass_predcom (gcc::context *ctxt)
3564 571444 : : gimple_opt_pass (pass_data_predcom, ctxt)
3565 : {}
3566 :
3567 : /* opt_pass methods: */
3568 : bool
3569 241458 : gate (function *) final override
3570 : {
3571 241458 : if (flag_predictive_commoning != 0)
3572 : return true;
3573 : /* Loop vectorization enables predictive commoning implicitly
3574 : only if predictive commoning isn't set explicitly, and it
3575 : doesn't allow unrolling. */
3576 213380 : if (flag_tree_loop_vectorize
3577 178755 : && !OPTION_SET_P (flag_predictive_commoning))
3578 178755 : return true;
3579 :
3580 : return false;
3581 : }
3582 :
3583 : unsigned int
3584 206832 : execute (function *fun) final override
3585 : {
3586 206832 : bool allow_unroll_p = flag_predictive_commoning != 0;
3587 206832 : return run_tree_predictive_commoning (fun, allow_unroll_p);
3588 : }
3589 :
3590 : }; // class pass_predcom
3591 :
3592 : } // anon namespace
3593 :
3594 : gimple_opt_pass *
3595 285722 : make_pass_predcom (gcc::context *ctxt)
3596 : {
3597 285722 : return new pass_predcom (ctxt);
3598 : }
|