Line data Source code
1 : /* Loop invariant motion.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "tree.h"
25 : #include "gimple.h"
26 : #include "cfghooks.h"
27 : #include "tree-pass.h"
28 : #include "ssa.h"
29 : #include "gimple-pretty-print.h"
30 : #include "fold-const.h"
31 : #include "cfganal.h"
32 : #include "tree-eh.h"
33 : #include "gimple-iterator.h"
34 : #include "tree-cfg.h"
35 : #include "tree-ssa-loop-manip.h"
36 : #include "tree-ssa-loop.h"
37 : #include "tree-into-ssa.h"
38 : #include "cfgloop.h"
39 : #include "tree-affine.h"
40 : #include "tree-ssa-propagate.h"
41 : #include "trans-mem.h"
42 : #include "gimple-fold.h"
43 : #include "tree-scalar-evolution.h"
44 : #include "tree-ssa-loop-niter.h"
45 : #include "alias.h"
46 : #include "builtins.h"
47 : #include "tree-dfa.h"
48 : #include "tree-ssa.h"
49 : #include "dbgcnt.h"
50 : #include "insn-codes.h"
51 : #include "optabs-tree.h"
52 :
53 : /* TODO: Support for predicated code motion. I.e.
54 :
55 : while (1)
56 : {
57 : if (cond)
58 : {
59 : a = inv;
60 : something;
61 : }
62 : }
63 :
64 : Where COND and INV are invariants, but evaluating INV may trap or be
65 : invalid from some other reason if !COND. This may be transformed to
66 :
67 : if (cond)
68 : a = inv;
69 : while (1)
70 : {
71 : if (cond)
72 : something;
73 : } */
74 :
75 : /* The auxiliary data kept for each statement. */
76 :
77 : struct lim_aux_data
78 : {
79 : class loop *max_loop; /* The outermost loop in that the statement
80 : is invariant. */
81 :
82 : class loop *tgt_loop; /* The loop out of that we want to move the
83 : invariant. */
84 :
85 : class loop *always_executed_in;
86 : /* The outermost loop for that we are sure
87 : the statement is executed if the loop
88 : is entered. */
89 :
90 : unsigned cost; /* Cost of the computation performed by the
91 : statement. */
92 :
93 : unsigned ref; /* The simple_mem_ref in this stmt or 0. */
94 :
95 : vec<gimple *> depends; /* Vector of statements that must be also
96 : hoisted out of the loop when this statement
97 : is hoisted; i.e. those that define the
98 : operands of the statement and are inside of
99 : the MAX_LOOP loop. */
100 : };
101 :
102 : /* Maps statements to their lim_aux_data. */
103 :
104 : static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
105 :
106 : /* Description of a memory reference location. */
107 :
108 : struct mem_ref_loc
109 : {
110 : tree *ref; /* The reference itself. */
111 : gimple *stmt; /* The statement in that it occurs. */
112 : };
113 :
114 :
115 : /* Description of a memory reference. */
116 :
117 : class im_mem_ref
118 : {
119 : public:
120 : unsigned id : 30; /* ID assigned to the memory reference
121 : (its index in memory_accesses.refs_list) */
122 : unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
123 : unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
124 : hashval_t hash; /* Its hash value. */
125 :
126 : /* The memory access itself and associated caching of alias-oracle
127 : query meta-data. We are using mem.ref == error_mark_node for the
128 : case the reference is represented by its single access stmt
129 : in accesses_in_loop[0]. */
130 : ao_ref mem;
131 :
132 : bitmap stored; /* The set of loops in that this memory location
133 : is stored to. */
134 : bitmap loaded; /* The set of loops in that this memory location
135 : is loaded from. */
136 : vec<mem_ref_loc> accesses_in_loop;
137 : /* The locations of the accesses. */
138 :
139 : /* The following set is computed on demand. */
140 : bitmap_head dep_loop; /* The set of loops in that the memory
141 : reference is {in,}dependent in
142 : different modes. */
143 : };
144 :
145 : static bool in_loop_pipeline;
146 :
147 : /* We use six bits per loop in the ref->dep_loop bitmap to record
148 : the dep_kind x dep_state combinations. */
149 :
150 : enum dep_kind { lim_raw, sm_war, sm_waw };
151 : enum dep_state { dep_unknown, dep_independent, dep_dependent };
152 :
153 : /* coldest outermost loop for given loop. */
154 : vec<class loop *> coldest_outermost_loop;
155 : /* hotter outer loop nearest to given loop. */
156 : vec<class loop *> hotter_than_inner_loop;
157 :
158 : /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
159 :
160 : static void
161 2061538 : record_loop_dependence (class loop *loop, im_mem_ref *ref,
162 : dep_kind kind, dep_state state)
163 : {
164 2061538 : gcc_assert (state != dep_unknown);
165 2061538 : unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
166 2061538 : bitmap_set_bit (&ref->dep_loop, bit);
167 2061538 : }
168 :
169 : /* Query the loop dependence cache of REF for LOOP, KIND. */
170 :
171 : static dep_state
172 662834 : query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
173 : {
174 662834 : unsigned first_bit = 6 * loop->num + kind * 2;
175 662834 : if (bitmap_bit_p (&ref->dep_loop, first_bit))
176 : return dep_independent;
177 662834 : else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
178 0 : return dep_dependent;
179 : return dep_unknown;
180 : }
181 :
182 : /* Mem_ref hashtable helpers. */
183 :
184 : struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
185 : {
186 : typedef ao_ref *compare_type;
187 : static inline hashval_t hash (const im_mem_ref *);
188 : static inline bool equal (const im_mem_ref *, const ao_ref *);
189 : };
190 :
191 : /* A hash function for class im_mem_ref object OBJ. */
192 :
193 : inline hashval_t
194 10632302 : mem_ref_hasher::hash (const im_mem_ref *mem)
195 : {
196 10632302 : return mem->hash;
197 : }
198 :
199 : /* An equality function for class im_mem_ref object MEM1 with
200 : memory reference OBJ2. */
201 :
202 : inline bool
203 12375741 : mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
204 : {
205 12375741 : if (!obj2->max_size_known_p ())
206 1483259 : return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
207 :
208 10892482 : if (!mem1->ref_decomposed)
209 : return false;
210 :
211 : /* obj2 is now known decomposable and the hash is based on offset,
212 : size and base, matching gather_mem_refs_stmt. */
213 10412621 : bool refs_equal
214 10412621 : = ((TREE_CODE (mem1->mem.base) == MEM_REF
215 4552661 : && TREE_CODE (obj2->base) == MEM_REF
216 3020350 : && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
217 3020350 : TREE_OPERAND (obj2->base, 0), 0)
218 1828629 : && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT
219 : + mem1->mem.offset,
220 : mem_ref_offset (obj2->base) * BITS_PER_UNIT
221 : + obj2->offset))
222 14538522 : || (operand_equal_p (mem1->mem.base, obj2->base, 0)
223 787048 : && known_eq (mem1->mem.offset, obj2->offset)));
224 :
225 9532446 : return (refs_equal
226 880175 : && known_eq (mem1->mem.size, obj2->size)
227 877479 : && known_eq (mem1->mem.max_size, obj2->max_size)
228 877479 : && mem1->mem.volatile_p == obj2->volatile_p
229 877463 : && (mem1->mem.ref_alias_set == obj2->ref_alias_set
230 : /* We are not canonicalizing alias-sets but for the
231 : special-case we didn't canonicalize yet and the
232 : incoming ref is a alias-set zero MEM we pick
233 : the correct one already. */
234 15099 : || (!mem1->ref_canonical
235 14210 : && (TREE_CODE (obj2->ref) == MEM_REF
236 14210 : || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
237 6758 : && obj2->ref_alias_set == 0)
238 : /* Likewise if there's a canonical ref with alias-set zero. */
239 12420 : || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
240 865071 : && types_compatible_p (TREE_TYPE (mem1->mem.ref),
241 865071 : TREE_TYPE (obj2->ref)));
242 : }
243 :
244 :
245 : /* Description of memory accesses in loops. */
246 :
247 : static struct
248 : {
249 : /* The hash table of memory references accessed in loops. */
250 : hash_table<mem_ref_hasher> *refs;
251 :
252 : /* The list of memory references. */
253 : vec<im_mem_ref *> refs_list;
254 :
255 : /* The set of memory references accessed in each loop. */
256 : vec<bitmap_head> refs_loaded_in_loop;
257 :
258 : /* The set of memory references stored in each loop. */
259 : vec<bitmap_head> refs_stored_in_loop;
260 :
261 : /* The set of memory references stored in each loop, including subloops . */
262 : vec<bitmap_head> all_refs_stored_in_loop;
263 :
264 : /* Cache for expanding memory addresses. */
265 : hash_map<tree, name_expansion *> *ttae_cache;
266 : } memory_accesses;
267 :
268 : /* Obstack for the bitmaps in the above data structures. */
269 : static bitmap_obstack lim_bitmap_obstack;
270 : static obstack mem_ref_obstack;
271 :
272 : static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
273 : static bool ref_always_stored_p (class loop *, im_mem_ref *);
274 : static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
275 :
276 : /* Minimum cost of an expensive expression. */
277 : #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
278 :
279 : /* The outermost loop for which execution of the header guarantees that the
280 : block will be executed. */
281 : #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
282 : #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
283 :
284 : /* ID of the shared unanalyzable mem. */
285 : #define UNANALYZABLE_MEM_ID 0
286 :
287 : /* Whether the reference was analyzable. */
288 : #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
289 :
290 : static struct lim_aux_data *
291 17452924 : init_lim_data (gimple *stmt)
292 : {
293 17452924 : lim_aux_data *p = XCNEW (struct lim_aux_data);
294 17452924 : lim_aux_data_map->put (stmt, p);
295 :
296 17452924 : return p;
297 : }
298 :
299 : static struct lim_aux_data *
300 92877910 : get_lim_data (gimple *stmt)
301 : {
302 92877910 : lim_aux_data **p = lim_aux_data_map->get (stmt);
303 92877910 : if (!p)
304 : return NULL;
305 :
306 47449847 : return *p;
307 : }
308 :
309 : /* Releases the memory occupied by DATA. */
310 :
311 : static void
312 17452745 : free_lim_aux_data (struct lim_aux_data *data)
313 : {
314 0 : data->depends.release ();
315 17452745 : free (data);
316 0 : }
317 :
318 : static void
319 17452745 : clear_lim_data (gimple *stmt)
320 : {
321 17452745 : lim_aux_data **p = lim_aux_data_map->get (stmt);
322 17452745 : if (!p)
323 : return;
324 :
325 17452745 : free_lim_aux_data (*p);
326 17452745 : *p = NULL;
327 : }
328 :
329 :
330 : /* The possibilities of statement movement. */
331 : enum move_pos
332 : {
333 : MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
334 : MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
335 : become executed -- memory accesses, ... */
336 : MOVE_POSSIBLE /* Unlimited movement. */
337 : };
338 :
339 :
340 : /* If it is possible to hoist the statement STMT unconditionally,
341 : returns MOVE_POSSIBLE.
342 : If it is possible to hoist the statement STMT, but we must avoid making
343 : it executed if it would not be executed in the original program (e.g.
344 : because it may trap), return MOVE_PRESERVE_EXECUTION.
345 : Otherwise return MOVE_IMPOSSIBLE. */
346 :
347 : static enum move_pos
348 45772020 : movement_possibility_1 (gimple *stmt)
349 : {
350 45772020 : tree lhs;
351 45772020 : enum move_pos ret = MOVE_POSSIBLE;
352 :
353 45772020 : if (flag_unswitch_loops
354 45772020 : && gimple_code (stmt) == GIMPLE_COND)
355 : {
356 : /* If we perform unswitching, force the operands of the invariant
357 : condition to be moved out of the loop. */
358 : return MOVE_POSSIBLE;
359 : }
360 :
361 45422469 : if (gimple_code (stmt) == GIMPLE_PHI
362 2589097 : && gimple_phi_num_args (stmt) <= 2
363 4841592 : && !virtual_operand_p (gimple_phi_result (stmt))
364 46871788 : && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
365 : return MOVE_POSSIBLE;
366 :
367 43973420 : if (gimple_get_lhs (stmt) == NULL_TREE)
368 : return MOVE_IMPOSSIBLE;
369 :
370 31042832 : if (gimple_vdef (stmt))
371 : return MOVE_IMPOSSIBLE;
372 :
373 13117644 : if (stmt_ends_bb_p (stmt)
374 11949845 : || gimple_has_volatile_ops (stmt)
375 13063329 : || gimple_has_side_effects (stmt)
376 26175083 : || stmt_could_throw_p (cfun, stmt))
377 458139 : return MOVE_IMPOSSIBLE;
378 :
379 12659505 : if (is_gimple_call (stmt))
380 : {
381 : /* While pure or const call is guaranteed to have no side effects, we
382 : cannot move it arbitrarily. Consider code like
383 :
384 : char *s = something ();
385 :
386 : while (1)
387 : {
388 : if (s)
389 : t = strlen (s);
390 : else
391 : t = 0;
392 : }
393 :
394 : Here the strlen call cannot be moved out of the loop, even though
395 : s is invariant. In addition to possibly creating a call with
396 : invalid arguments, moving out a function call that is not executed
397 : may cause performance regressions in case the call is costly and
398 : not executed at all. */
399 125064 : ret = MOVE_PRESERVE_EXECUTION;
400 125064 : lhs = gimple_call_lhs (stmt);
401 : }
402 12534441 : else if (is_gimple_assign (stmt))
403 11394393 : lhs = gimple_assign_lhs (stmt);
404 : else
405 : return MOVE_IMPOSSIBLE;
406 :
407 11519457 : if (TREE_CODE (lhs) == SSA_NAME
408 11519457 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
409 : return MOVE_IMPOSSIBLE;
410 :
411 11519225 : if (TREE_CODE (lhs) != SSA_NAME
412 11519225 : || gimple_could_trap_p (stmt))
413 2372918 : return MOVE_PRESERVE_EXECUTION;
414 :
415 9146307 : if (is_gimple_assign (stmt))
416 : {
417 9028584 : auto code = gimple_assign_rhs_code (stmt);
418 9028584 : tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
419 : /* For shifts and rotates and possibly out-of-bound shift operands
420 : we currently cannot rewrite them into something unconditionally
421 : well-defined. */
422 9028584 : if ((code == LSHIFT_EXPR
423 : || code == RSHIFT_EXPR
424 : || code == LROTATE_EXPR
425 9028584 : || code == RROTATE_EXPR)
426 9028584 : && (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
427 : /* We cannot use ranges at 'stmt' here. */
428 119958 : || wi::ltu_p (wi::to_wide (gimple_assign_rhs2 (stmt)),
429 8987244 : element_precision (type))))
430 161298 : ret = MOVE_PRESERVE_EXECUTION;
431 : }
432 :
433 : /* Non local loads in a transaction cannot be hoisted out. Well,
434 : unless the load happens on every path out of the loop, but we
435 : don't take this into account yet. */
436 9146307 : if (flag_tm
437 434 : && gimple_in_transaction (stmt)
438 9146410 : && gimple_assign_single_p (stmt))
439 : {
440 21 : tree rhs = gimple_assign_rhs1 (stmt);
441 21 : if (DECL_P (rhs) && is_global_var (rhs))
442 : {
443 18 : if (dump_file)
444 : {
445 2 : fprintf (dump_file, "Cannot hoist conditional load of ");
446 2 : print_generic_expr (dump_file, rhs, TDF_SLIM);
447 2 : fprintf (dump_file, " because it is in a transaction.\n");
448 : }
449 18 : return MOVE_IMPOSSIBLE;
450 : }
451 : }
452 :
453 : return ret;
454 : }
455 :
456 : static enum move_pos
457 45772020 : movement_possibility (gimple *stmt)
458 : {
459 45772020 : enum move_pos pos = movement_possibility_1 (stmt);
460 45772020 : if (pos == MOVE_POSSIBLE)
461 : {
462 10665868 : use_operand_p use_p;
463 10665868 : ssa_op_iter ssa_iter;
464 34658231 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE)
465 13344958 : if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME
466 13344958 : && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p)))
467 18463 : return MOVE_PRESERVE_EXECUTION;
468 : }
469 : return pos;
470 : }
471 :
472 :
473 : /* Compare the profile count inequality of bb and loop's preheader, it is
474 : three-state as stated in profile-count.h, FALSE is returned if inequality
475 : cannot be decided. */
476 : bool
477 13815035 : bb_colder_than_loop_preheader (basic_block bb, class loop *loop)
478 : {
479 13815035 : gcc_assert (bb && loop);
480 13815035 : return bb->count < loop_preheader_edge (loop)->src->count;
481 : }
482 :
483 : /* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile
484 : count.
485 : It does three steps check:
486 : 1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just
487 : return NULL which means it should not be moved out at all;
488 : 2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of
489 : OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP;
490 : 3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a
491 : hotter loop between OUTERMOST_LOOP and loop in pre-computed
492 : HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return
493 : OUTERMOST_LOOP.
494 : At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as
495 : the hoist target. */
496 :
497 : static class loop *
498 12203321 : get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
499 : basic_block curr_bb)
500 : {
501 12203321 : gcc_assert (outermost_loop == loop
502 : || flow_loop_nested_p (outermost_loop, loop));
503 :
504 : /* If bb_colder_than_loop_preheader returns false due to three-state
505 : comparison, OUTERMOST_LOOP is returned finally to preserve the behavior.
506 : Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP. */
507 12203321 : if (curr_bb && bb_colder_than_loop_preheader (curr_bb, loop))
508 : return NULL;
509 :
510 11267716 : class loop *coldest_loop = coldest_outermost_loop[loop->num];
511 22535432 : if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
512 : {
513 247049 : class loop *hotter_loop = hotter_than_inner_loop[loop->num];
514 247049 : if (!hotter_loop
515 253720 : || loop_depth (hotter_loop) < loop_depth (outermost_loop))
516 : return outermost_loop;
517 :
518 : /* hotter_loop is between OUTERMOST_LOOP and LOOP like:
519 : [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
520 : hotter_loop, second_coldest_loop, ..., loop]
521 : return second_coldest_loop to be the hoist target. */
522 23 : class loop *aloop;
523 23 : for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
524 23 : if (aloop == loop || flow_loop_nested_p (aloop, loop))
525 23 : return aloop;
526 : }
527 : return coldest_loop;
528 : }
529 :
530 : /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
531 : loop to that we could move the expression using DEF if it did not have
532 : other operands, i.e. the outermost loop enclosing LOOP in that the value
533 : of DEF is invariant. */
534 :
535 : static class loop *
536 19618298 : outermost_invariant_loop (tree def, class loop *loop)
537 : {
538 19618298 : gimple *def_stmt;
539 19618298 : basic_block def_bb;
540 19618298 : class loop *max_loop;
541 19618298 : struct lim_aux_data *lim_data;
542 :
543 19618298 : if (!def)
544 947433 : return superloop_at_depth (loop, 1);
545 :
546 18670865 : if (TREE_CODE (def) != SSA_NAME)
547 : {
548 3904081 : gcc_assert (is_gimple_min_invariant (def));
549 3904081 : return superloop_at_depth (loop, 1);
550 : }
551 :
552 14766784 : def_stmt = SSA_NAME_DEF_STMT (def);
553 14766784 : def_bb = gimple_bb (def_stmt);
554 14766784 : if (!def_bb)
555 72946 : return superloop_at_depth (loop, 1);
556 :
557 14693838 : max_loop = find_common_loop (loop, def_bb->loop_father);
558 :
559 14693838 : lim_data = get_lim_data (def_stmt);
560 14693838 : if (lim_data != NULL && lim_data->max_loop != NULL)
561 688425 : max_loop = find_common_loop (max_loop,
562 : loop_outer (lim_data->max_loop));
563 14693838 : if (max_loop == loop)
564 : return NULL;
565 1845930 : max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
566 :
567 1845930 : return max_loop;
568 : }
569 :
570 : /* DATA is a structure containing information associated with a statement
571 : inside LOOP. DEF is one of the operands of this statement.
572 :
573 : Find the outermost loop enclosing LOOP in that value of DEF is invariant
574 : and record this in DATA->max_loop field. If DEF itself is defined inside
575 : this loop as well (i.e. we need to hoist it out of the loop if we want
576 : to hoist the statement represented by DATA), record the statement in that
577 : DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
578 : add the cost of the computation of DEF to the DATA->cost.
579 :
580 : If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
581 :
582 : static bool
583 11475820 : add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
584 : bool add_cost)
585 : {
586 11475820 : gimple *def_stmt = SSA_NAME_DEF_STMT (def);
587 11475820 : basic_block def_bb = gimple_bb (def_stmt);
588 11475820 : class loop *max_loop;
589 11475820 : struct lim_aux_data *def_data;
590 :
591 11475820 : if (!def_bb)
592 : return true;
593 :
594 11151084 : max_loop = outermost_invariant_loop (def, loop);
595 11151084 : if (!max_loop)
596 : return false;
597 :
598 1386597 : if (flow_loop_nested_p (data->max_loop, max_loop))
599 418703 : data->max_loop = max_loop;
600 :
601 1386597 : def_data = get_lim_data (def_stmt);
602 1386597 : if (!def_data)
603 : return true;
604 :
605 671514 : if (add_cost
606 : /* Only add the cost if the statement defining DEF is inside LOOP,
607 : i.e. if it is likely that by moving the invariants dependent
608 : on it, we will be able to avoid creating a new register for
609 : it (since it will be only used in these dependent invariants). */
610 650923 : && def_bb->loop_father == loop)
611 456553 : data->cost += def_data->cost;
612 :
613 671514 : data->depends.safe_push (def_stmt);
614 :
615 671514 : return true;
616 : }
617 :
618 : /* Returns an estimate for a cost of statement STMT. The values here
619 : are just ad-hoc constants, similar to costs for inlining. */
620 :
621 : static unsigned
622 769233 : stmt_cost (gimple *stmt)
623 : {
624 : /* Always try to create possibilities for unswitching. */
625 769233 : if (gimple_code (stmt) == GIMPLE_COND
626 769233 : || gimple_code (stmt) == GIMPLE_PHI)
627 12791 : return LIM_EXPENSIVE;
628 :
629 : /* We should be hoisting calls if possible. */
630 756442 : if (is_gimple_call (stmt))
631 : {
632 830 : tree fndecl;
633 :
634 : /* Unless the call is a builtin_constant_p; this always folds to a
635 : constant, so moving it is useless. */
636 830 : fndecl = gimple_call_fndecl (stmt);
637 830 : if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
638 : return 0;
639 :
640 830 : return LIM_EXPENSIVE;
641 : }
642 :
643 : /* Hoisting memory references out should almost surely be a win. */
644 755612 : if (gimple_references_memory_p (stmt))
645 66268 : return LIM_EXPENSIVE;
646 :
647 689344 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
648 : return 1;
649 :
650 689344 : enum tree_code code = gimple_assign_rhs_code (stmt);
651 689344 : switch (code)
652 : {
653 116461 : case MULT_EXPR:
654 116461 : case WIDEN_MULT_EXPR:
655 116461 : case WIDEN_MULT_PLUS_EXPR:
656 116461 : case WIDEN_MULT_MINUS_EXPR:
657 116461 : case DOT_PROD_EXPR:
658 116461 : case TRUNC_DIV_EXPR:
659 116461 : case CEIL_DIV_EXPR:
660 116461 : case FLOOR_DIV_EXPR:
661 116461 : case ROUND_DIV_EXPR:
662 116461 : case EXACT_DIV_EXPR:
663 116461 : case CEIL_MOD_EXPR:
664 116461 : case FLOOR_MOD_EXPR:
665 116461 : case ROUND_MOD_EXPR:
666 116461 : case TRUNC_MOD_EXPR:
667 116461 : case RDIV_EXPR:
668 : /* Division and multiplication are usually expensive. */
669 116461 : return LIM_EXPENSIVE;
670 :
671 4332 : case LSHIFT_EXPR:
672 4332 : case RSHIFT_EXPR:
673 4332 : case WIDEN_LSHIFT_EXPR:
674 4332 : case LROTATE_EXPR:
675 4332 : case RROTATE_EXPR:
676 : /* Shifts and rotates are usually expensive. */
677 4332 : return LIM_EXPENSIVE;
678 :
679 457 : case COND_EXPR:
680 457 : case VEC_COND_EXPR:
681 : /* Conditionals are expensive. */
682 457 : return LIM_EXPENSIVE;
683 :
684 1340 : case CONSTRUCTOR:
685 : /* Make vector construction cost proportional to the number
686 : of elements. */
687 1340 : return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
688 :
689 : case SSA_NAME:
690 : case PAREN_EXPR:
691 : /* Whether or not something is wrapped inside a PAREN_EXPR
692 : should not change move cost. Nor should an intermediate
693 : unpropagated SSA name copy. */
694 : return 0;
695 :
696 527841 : default:
697 : /* Comparisons are usually expensive. */
698 527841 : if (TREE_CODE_CLASS (code) == tcc_comparison)
699 7519 : return LIM_EXPENSIVE;
700 : return 1;
701 : }
702 : }
703 :
704 : /* Finds the outermost loop between OUTER and LOOP in that the memory reference
705 : REF is independent. If REF is not independent in LOOP, NULL is returned
706 : instead. */
707 :
708 : static class loop *
709 744478 : outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
710 : {
711 744478 : class loop *aloop;
712 :
713 744478 : if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
714 : return NULL;
715 :
716 84444 : for (aloop = outer;
717 685444 : aloop != loop;
718 84444 : aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
719 10065 : if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
720 100245 : && ref_indep_loop_p (aloop, ref, lim_raw))
721 : return aloop;
722 :
723 586889 : if (ref_indep_loop_p (loop, ref, lim_raw))
724 : return loop;
725 : else
726 : return NULL;
727 : }
728 :
729 : /* If there is a simple load or store to a memory reference in STMT, returns
730 : the location of the memory reference, and sets IS_STORE according to whether
731 : it is a store or load. Otherwise, returns NULL. */
732 :
733 : static tree *
734 6880958 : simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
735 : {
736 6880958 : tree *lhs, *rhs;
737 :
738 : /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
739 6880958 : if (!gimple_assign_single_p (stmt))
740 : return NULL;
741 :
742 5646892 : lhs = gimple_assign_lhs_ptr (stmt);
743 5646892 : rhs = gimple_assign_rhs1_ptr (stmt);
744 :
745 8805958 : if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
746 : {
747 3159066 : *is_store = false;
748 3159066 : return rhs;
749 : }
750 2487826 : else if (gimple_vdef (stmt)
751 2487826 : && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
752 : {
753 2044705 : *is_store = true;
754 2044705 : return lhs;
755 : }
756 : else
757 443121 : return NULL;
758 : }
759 :
760 : /* From a controlling predicate in DOM determine the arguments from
761 : the PHI node PHI that are chosen if the predicate evaluates to
762 : true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
763 : they are non-NULL. Returns true if the arguments can be determined,
764 : else return false. */
765 :
766 : static bool
767 11744 : extract_true_false_args_from_phi (basic_block dom, gphi *phi,
768 : tree *true_arg_p, tree *false_arg_p)
769 : {
770 11744 : edge te, fe;
771 11744 : if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
772 : &te, &fe))
773 : return false;
774 :
775 10856 : if (true_arg_p)
776 932 : *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
777 10856 : if (false_arg_p)
778 932 : *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
779 :
780 : return true;
781 : }
782 :
783 : /* Determine the outermost loop to that it is possible to hoist a statement
784 : STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
785 : the outermost loop in that the value computed by STMT is invariant.
786 : If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
787 : we preserve the fact whether STMT is executed. It also fills other related
788 : information to LIM_DATA (STMT).
789 :
790 : The function returns false if STMT cannot be hoisted outside of the loop it
791 : is defined in, and true otherwise. */
792 :
793 : static bool
794 12159860 : determine_max_movement (gimple *stmt, bool must_preserve_exec)
795 : {
796 12159860 : basic_block bb = gimple_bb (stmt);
797 12159860 : class loop *loop = bb->loop_father;
798 12159860 : class loop *level;
799 12159860 : struct lim_aux_data *lim_data = get_lim_data (stmt);
800 12159860 : tree val;
801 12159860 : ssa_op_iter iter;
802 :
803 12159860 : if (must_preserve_exec)
804 1506783 : level = ALWAYS_EXECUTED_IN (bb);
805 : else
806 10653077 : level = superloop_at_depth (loop, 1);
807 12159860 : lim_data->max_loop = get_coldest_out_loop (level, loop, bb);
808 12159860 : if (!lim_data->max_loop)
809 : return false;
810 :
811 11226053 : if (gphi *phi = dyn_cast <gphi *> (stmt))
812 : {
813 1398681 : use_operand_p use_p;
814 1398681 : unsigned min_cost = UINT_MAX;
815 1398681 : unsigned total_cost = 0;
816 1398681 : struct lim_aux_data *def_data;
817 :
818 : /* We will end up promoting dependencies to be unconditionally
819 : evaluated. For this reason the PHI cost (and thus the
820 : cost we remove from the loop by doing the invariant motion)
821 : is that of the cheapest PHI argument dependency chain. */
822 1605854 : FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
823 : {
824 1581930 : val = USE_FROM_PTR (use_p);
825 :
826 1581930 : if (TREE_CODE (val) != SSA_NAME)
827 : {
828 : /* Assign const 1 to constants. */
829 129605 : min_cost = MIN (min_cost, 1);
830 129605 : total_cost += 1;
831 129605 : continue;
832 : }
833 1452325 : if (!add_dependency (val, lim_data, loop, false))
834 : return false;
835 :
836 77568 : gimple *def_stmt = SSA_NAME_DEF_STMT (val);
837 77568 : if (gimple_bb (def_stmt)
838 77568 : && gimple_bb (def_stmt)->loop_father == loop)
839 : {
840 1383 : def_data = get_lim_data (def_stmt);
841 1383 : if (def_data)
842 : {
843 1383 : min_cost = MIN (min_cost, def_data->cost);
844 1383 : total_cost += def_data->cost;
845 : }
846 : }
847 : }
848 :
849 23924 : min_cost = MIN (min_cost, total_cost);
850 23924 : lim_data->cost += min_cost;
851 :
852 23924 : if (gimple_phi_num_args (phi) > 1)
853 : {
854 23753 : basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
855 23753 : gimple *cond;
856 47506 : if (gsi_end_p (gsi_last_bb (dom)))
857 : return false;
858 15763 : cond = gsi_stmt (gsi_last_bb (dom));
859 15763 : if (gimple_code (cond) != GIMPLE_COND)
860 : return false;
861 : /* Verify that this is an extended form of a diamond and
862 : the PHI arguments are completely controlled by the
863 : predicate in DOM. */
864 10812 : if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
865 : return false;
866 :
867 : /* Check if one of the dependent statement is a vector compare whether
868 : the target supports it, otherwise it's invalid to hoist it out of
869 : the gcond it belonged to. */
870 9924 : if (VECTOR_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond))))
871 : {
872 5 : tree type = TREE_TYPE (gimple_cond_lhs (cond));
873 5 : auto code = gimple_cond_code (cond);
874 5 : if (!target_supports_op_p (type, code, optab_vector))
875 : return false;
876 : }
877 :
878 : /* Fold in dependencies and cost of the condition. */
879 11647 : FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
880 : {
881 10599 : if (!add_dependency (val, lim_data, loop, false))
882 : return false;
883 1728 : def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
884 1728 : if (def_data)
885 926 : lim_data->cost += def_data->cost;
886 : }
887 :
888 : /* We want to avoid unconditionally executing very expensive
889 : operations. As costs for our dependencies cannot be
890 : negative just claim we are not invariand for this case.
891 : We also are not sure whether the control-flow inside the
892 : loop will vanish. */
893 1048 : if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
894 137 : && !(min_cost != 0
895 137 : && total_cost / min_cost <= 2))
896 : return false;
897 :
898 : /* Assume that the control-flow in the loop will vanish.
899 : ??? We should verify this and not artificially increase
900 : the cost if that is not the case. */
901 932 : lim_data->cost += stmt_cost (stmt);
902 : }
903 :
904 1103 : return true;
905 : }
906 :
907 : /* A stmt that receives abnormal edges cannot be hoisted. */
908 9827372 : if (is_a <gcall *> (stmt)
909 9827372 : && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE))
910 : return false;
911 :
912 11459382 : FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
913 10011874 : if (!add_dependency (val, lim_data, loop, true))
914 : return false;
915 :
916 2883157 : if (gimple_vuse (stmt))
917 : {
918 745500 : im_mem_ref *ref
919 745500 : = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
920 745500 : if (ref
921 745500 : && MEM_ANALYZABLE (ref))
922 : {
923 744478 : lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
924 : loop, ref);
925 744478 : if (!lim_data->max_loop)
926 : return false;
927 : }
928 1022 : else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
929 : return false;
930 : }
931 :
932 768301 : lim_data->cost += stmt_cost (stmt);
933 :
934 768301 : return true;
935 : }
936 :
937 : /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
938 : and that one of the operands of this statement is computed by STMT.
939 : Ensure that STMT (together with all the statements that define its
940 : operands) is hoisted at least out of the loop LEVEL. */
941 :
942 : static void
943 680658 : set_level (gimple *stmt, class loop *orig_loop, class loop *level)
944 : {
945 680658 : class loop *stmt_loop = gimple_bb (stmt)->loop_father;
946 680658 : struct lim_aux_data *lim_data;
947 680658 : gimple *dep_stmt;
948 680658 : unsigned i;
949 :
950 680658 : stmt_loop = find_common_loop (orig_loop, stmt_loop);
951 680658 : lim_data = get_lim_data (stmt);
952 680658 : if (lim_data != NULL && lim_data->tgt_loop != NULL)
953 168242 : stmt_loop = find_common_loop (stmt_loop,
954 : loop_outer (lim_data->tgt_loop));
955 680658 : if (flow_loop_nested_p (stmt_loop, level))
956 680658 : return;
957 :
958 452319 : gcc_assert (level == lim_data->max_loop
959 : || flow_loop_nested_p (lim_data->max_loop, level));
960 :
961 452319 : lim_data->tgt_loop = level;
962 779112 : FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
963 326793 : set_level (dep_stmt, orig_loop, level);
964 : }
965 :
966 : /* Determines an outermost loop from that we want to hoist the statement STMT.
967 : For now we chose the outermost possible loop. TODO -- use profiling
968 : information to set it more sanely. */
969 :
970 : static void
971 291185 : set_profitable_level (gimple *stmt)
972 : {
973 291185 : set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
974 291185 : }
975 :
976 : /* Returns true if STMT is a call that has side effects. */
977 :
978 : static bool
979 69970720 : nonpure_call_p (gimple *stmt)
980 : {
981 0 : if (gimple_code (stmt) != GIMPLE_CALL)
982 : return false;
983 :
984 2231327 : return gimple_has_side_effects (stmt);
985 : }
986 :
987 : /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
988 :
989 : static gimple *
990 35 : rewrite_reciprocal (gimple_stmt_iterator *bsi)
991 : {
992 35 : gassign *stmt, *stmt1, *stmt2;
993 35 : tree name, lhs, type;
994 35 : tree real_one;
995 35 : gimple_stmt_iterator gsi;
996 :
997 35 : stmt = as_a <gassign *> (gsi_stmt (*bsi));
998 35 : lhs = gimple_assign_lhs (stmt);
999 35 : type = TREE_TYPE (lhs);
1000 :
1001 35 : real_one = build_one_cst (type);
1002 :
1003 35 : name = make_temp_ssa_name (type, NULL, "reciptmp");
1004 70 : stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
1005 : gimple_assign_rhs2 (stmt));
1006 35 : stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
1007 : gimple_assign_rhs1 (stmt));
1008 :
1009 : /* Replace division stmt with reciprocal and multiply stmts.
1010 : The multiply stmt is not invariant, so update iterator
1011 : and avoid rescanning. */
1012 35 : gsi = *bsi;
1013 35 : gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1014 35 : gsi_replace (&gsi, stmt2, true);
1015 :
1016 : /* Continue processing with invariant reciprocal statement. */
1017 35 : return stmt1;
1018 : }
1019 :
1020 : /* Check if the pattern at *BSI is a bittest of the form
1021 : (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
1022 :
1023 : static gimple *
1024 9049 : rewrite_bittest (gimple_stmt_iterator *bsi)
1025 : {
1026 9049 : gassign *stmt;
1027 9049 : gimple *stmt1;
1028 9049 : gassign *stmt2;
1029 9049 : gimple *use_stmt;
1030 9049 : gcond *cond_stmt;
1031 9049 : tree lhs, name, t, a, b;
1032 9049 : use_operand_p use;
1033 :
1034 9049 : stmt = as_a <gassign *> (gsi_stmt (*bsi));
1035 9049 : lhs = gimple_assign_lhs (stmt);
1036 :
1037 : /* Verify that the single use of lhs is a comparison against zero. */
1038 9049 : if (TREE_CODE (lhs) != SSA_NAME
1039 9049 : || !single_imm_use (lhs, &use, &use_stmt))
1040 453 : return stmt;
1041 13983 : cond_stmt = dyn_cast <gcond *> (use_stmt);
1042 4963 : if (!cond_stmt)
1043 : return stmt;
1044 4963 : if (gimple_cond_lhs (cond_stmt) != lhs
1045 4787 : || (gimple_cond_code (cond_stmt) != NE_EXPR
1046 2200 : && gimple_cond_code (cond_stmt) != EQ_EXPR)
1047 9750 : || !integer_zerop (gimple_cond_rhs (cond_stmt)))
1048 206 : return stmt;
1049 :
1050 : /* Get at the operands of the shift. The rhs is TMP1 & 1. */
1051 4757 : stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
1052 4757 : if (gimple_code (stmt1) != GIMPLE_ASSIGN)
1053 : return stmt;
1054 :
1055 : /* There is a conversion in between possibly inserted by fold. */
1056 4686 : if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
1057 : {
1058 233 : t = gimple_assign_rhs1 (stmt1);
1059 233 : if (TREE_CODE (t) != SSA_NAME
1060 233 : || !has_single_use (t))
1061 : return stmt;
1062 157 : stmt1 = SSA_NAME_DEF_STMT (t);
1063 157 : if (gimple_code (stmt1) != GIMPLE_ASSIGN)
1064 : return stmt;
1065 : }
1066 :
1067 : /* Verify that B is loop invariant but A is not. Verify that with
1068 : all the stmt walking we are still in the same loop. */
1069 4568 : if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
1070 10892 : || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
1071 : return stmt;
1072 :
1073 3162 : a = gimple_assign_rhs1 (stmt1);
1074 3162 : b = gimple_assign_rhs2 (stmt1);
1075 :
1076 3162 : if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
1077 3191 : && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
1078 : {
1079 29 : gimple_stmt_iterator rsi;
1080 :
1081 : /* 1 << B */
1082 29 : t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
1083 : build_int_cst (TREE_TYPE (a), 1), b);
1084 29 : name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1085 29 : stmt1 = gimple_build_assign (name, t);
1086 :
1087 : /* A & (1 << B) */
1088 29 : t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1089 29 : name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1090 29 : stmt2 = gimple_build_assign (name, t);
1091 :
1092 : /* Replace the SSA_NAME we compare against zero. Adjust
1093 : the type of zero accordingly. */
1094 29 : SET_USE (use, name);
1095 29 : gimple_cond_set_rhs (cond_stmt,
1096 29 : build_int_cst_type (TREE_TYPE (name),
1097 : 0));
1098 :
1099 : /* Don't use gsi_replace here, none of the new assignments sets
1100 : the variable originally set in stmt. Move bsi to stmt1, and
1101 : then remove the original stmt, so that we get a chance to
1102 : retain debug info for it. */
1103 29 : rsi = *bsi;
1104 29 : gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1105 29 : gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1106 29 : gimple *to_release = gsi_stmt (rsi);
1107 29 : gsi_remove (&rsi, true);
1108 29 : release_defs (to_release);
1109 :
1110 29 : return stmt1;
1111 : }
1112 :
1113 : return stmt;
1114 : }
1115 :
1116 : /* Determine the outermost loops in that statements in basic block BB are
1117 : invariant, and record them to the LIM_DATA associated with the
1118 : statements. */
1119 :
1120 : static void
1121 14117797 : compute_invariantness (basic_block bb)
1122 : {
1123 14117797 : enum move_pos pos;
1124 14117797 : gimple_stmt_iterator bsi;
1125 14117797 : gimple *stmt;
1126 14117797 : bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1127 14117797 : class loop *outermost = ALWAYS_EXECUTED_IN (bb);
1128 14117797 : struct lim_aux_data *lim_data;
1129 :
1130 14117797 : if (!loop_outer (bb->loop_father))
1131 8124109 : return;
1132 :
1133 5993688 : if (dump_file && (dump_flags & TDF_DETAILS))
1134 435 : fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1135 435 : bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1136 :
1137 : /* Look at PHI nodes, but only if there is at most two.
1138 : ??? We could relax this further by post-processing the inserted
1139 : code and transforming adjacent cond-exprs with the same predicate
1140 : to control flow again. */
1141 5993688 : bsi = gsi_start_phis (bb);
1142 5993688 : if (!gsi_end_p (bsi)
1143 5993688 : && ((gsi_next (&bsi), gsi_end_p (bsi))
1144 1357384 : || (gsi_next (&bsi), gsi_end_p (bsi))))
1145 4296624 : for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1146 : {
1147 2589097 : stmt = gsi_stmt (bsi);
1148 :
1149 2589097 : pos = movement_possibility (stmt);
1150 2589097 : if (pos == MOVE_IMPOSSIBLE)
1151 1140048 : continue;
1152 :
1153 1449049 : lim_data = get_lim_data (stmt);
1154 1449049 : if (! lim_data)
1155 1449049 : lim_data = init_lim_data (stmt);
1156 1449049 : lim_data->always_executed_in = outermost;
1157 :
1158 1449049 : if (!determine_max_movement (stmt, false))
1159 : {
1160 1447946 : lim_data->max_loop = NULL;
1161 1447946 : continue;
1162 : }
1163 :
1164 1103 : if (dump_file && (dump_flags & TDF_DETAILS))
1165 : {
1166 2 : print_gimple_stmt (dump_file, stmt, 2);
1167 2 : fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1168 2 : loop_depth (lim_data->max_loop),
1169 : lim_data->cost);
1170 : }
1171 :
1172 1103 : if (lim_data->cost >= LIM_EXPENSIVE)
1173 935 : set_profitable_level (stmt);
1174 : }
1175 :
1176 55170299 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1177 : {
1178 43182923 : stmt = gsi_stmt (bsi);
1179 :
1180 43182923 : pos = movement_possibility (stmt);
1181 43182923 : if (pos == MOVE_IMPOSSIBLE)
1182 : {
1183 32481779 : if (nonpure_call_p (stmt))
1184 : {
1185 : maybe_never = true;
1186 : outermost = NULL;
1187 : }
1188 : /* Make sure to note always_executed_in for stores to make
1189 : store-motion work. */
1190 30150863 : else if (stmt_makes_single_store (stmt))
1191 : {
1192 2476596 : struct lim_aux_data *lim_data = get_lim_data (stmt);
1193 2476596 : if (! lim_data)
1194 0 : lim_data = init_lim_data (stmt);
1195 2476596 : lim_data->always_executed_in = outermost;
1196 : }
1197 31314165 : continue;
1198 31314165 : }
1199 :
1200 11868758 : if (is_gimple_assign (stmt)
1201 11868758 : && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1202 : == GIMPLE_BINARY_RHS))
1203 : {
1204 5733414 : tree op0 = gimple_assign_rhs1 (stmt);
1205 5733414 : tree op1 = gimple_assign_rhs2 (stmt);
1206 11466828 : class loop *ol1 = outermost_invariant_loop (op1,
1207 : loop_containing_stmt (stmt));
1208 :
1209 : /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1210 : to be hoisted out of loop, saving expensive divide. */
1211 5733414 : if (pos == MOVE_POSSIBLE
1212 5162322 : && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1213 980 : && flag_unsafe_math_optimizations
1214 420 : && !flag_trapping_math
1215 420 : && ol1 != NULL
1216 5733452 : && outermost_invariant_loop (op0, ol1) == NULL)
1217 35 : stmt = rewrite_reciprocal (&bsi);
1218 :
1219 : /* If the shift count is invariant, convert (A >> B) & 1 to
1220 : A & (1 << B) allowing the bit mask to be hoisted out of the loop
1221 : saving an expensive shift. */
1222 5733414 : if (pos == MOVE_POSSIBLE
1223 5162322 : && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1224 186771 : && integer_onep (op1)
1225 18506 : && TREE_CODE (op0) == SSA_NAME
1226 5751920 : && has_single_use (op0))
1227 9049 : stmt = rewrite_bittest (&bsi);
1228 : }
1229 :
1230 11868758 : lim_data = get_lim_data (stmt);
1231 11868758 : if (! lim_data)
1232 9080079 : lim_data = init_lim_data (stmt);
1233 11868758 : lim_data->always_executed_in = outermost;
1234 :
1235 11868758 : if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1236 1157947 : continue;
1237 :
1238 10710811 : if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1239 : {
1240 9942510 : lim_data->max_loop = NULL;
1241 9942510 : continue;
1242 : }
1243 :
1244 768301 : if (dump_file && (dump_flags & TDF_DETAILS))
1245 : {
1246 283 : print_gimple_stmt (dump_file, stmt, 2);
1247 283 : fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1248 283 : loop_depth (lim_data->max_loop),
1249 : lim_data->cost);
1250 : }
1251 :
1252 768301 : if (lim_data->cost >= LIM_EXPENSIVE)
1253 290250 : set_profitable_level (stmt);
1254 : /* When we run before PRE and PRE is active hoist all expressions
1255 : to the always executed loop since PRE would do so anyway
1256 : and we can preserve range info while PRE cannot. */
1257 478051 : else if (flag_tree_pre && !in_loop_pipeline
1258 101307 : && outermost)
1259 : {
1260 59761 : class loop *mloop = lim_data->max_loop;
1261 179283 : if (loop_depth (outermost) > loop_depth (mloop))
1262 : {
1263 8477 : mloop = outermost;
1264 8477 : if (dump_file && (dump_flags & TDF_DETAILS))
1265 1 : fprintf (dump_file, " constraining to loop depth %d\n\n\n",
1266 : loop_depth (mloop));
1267 : }
1268 59761 : set_level (stmt, bb->loop_father, mloop);
1269 : }
1270 : }
1271 : }
1272 :
1273 : /* Hoist the statements in basic block BB out of the loops prescribed by
1274 : data stored in LIM_DATA structures associated with each statement. Callback
1275 : for walk_dominator_tree. */
1276 :
1277 : unsigned int
1278 14163521 : move_computations_worker (basic_block bb)
1279 : {
1280 14163521 : class loop *level;
1281 14163521 : unsigned cost = 0;
1282 14163521 : struct lim_aux_data *lim_data;
1283 14163521 : unsigned int todo = 0;
1284 :
1285 14163521 : if (!loop_outer (bb->loop_father))
1286 : return todo;
1287 :
1288 10480223 : for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1289 : {
1290 4480295 : gassign *new_stmt;
1291 4480295 : gphi *stmt = bsi.phi ();
1292 :
1293 4480295 : lim_data = get_lim_data (stmt);
1294 4480295 : if (lim_data == NULL)
1295 : {
1296 3031246 : gsi_next (&bsi);
1297 3031246 : continue;
1298 : }
1299 :
1300 1449049 : cost = lim_data->cost;
1301 1449049 : level = lim_data->tgt_loop;
1302 1449049 : clear_lim_data (stmt);
1303 :
1304 1449049 : if (!level)
1305 : {
1306 1448114 : gsi_next (&bsi);
1307 1448114 : continue;
1308 : }
1309 :
1310 935 : if (dump_file && (dump_flags & TDF_DETAILS))
1311 : {
1312 2 : fprintf (dump_file, "Moving PHI node\n");
1313 2 : print_gimple_stmt (dump_file, stmt, 0);
1314 2 : fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1315 : cost, level->num);
1316 : }
1317 :
1318 935 : if (gimple_phi_num_args (stmt) == 1)
1319 : {
1320 3 : tree arg = PHI_ARG_DEF (stmt, 0);
1321 3 : new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1322 3 : TREE_CODE (arg), arg);
1323 : }
1324 : else
1325 : {
1326 932 : basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1327 932 : gimple *cond = gsi_stmt (gsi_last_bb (dom));
1328 932 : tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1329 : /* Get the PHI arguments corresponding to the true and false
1330 : edges of COND. */
1331 932 : extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1332 932 : gcc_assert (arg0 && arg1);
1333 : /* For `bool_val != 0`, reuse bool_val. */
1334 932 : if (gimple_cond_code (cond) == NE_EXPR
1335 540 : && integer_zerop (gimple_cond_rhs (cond))
1336 1398 : && types_compatible_p (TREE_TYPE (gimple_cond_lhs (cond)),
1337 : boolean_type_node))
1338 : {
1339 53 : t = gimple_cond_lhs (cond);
1340 : }
1341 : else
1342 : {
1343 879 : t = make_ssa_name (boolean_type_node);
1344 879 : new_stmt = gimple_build_assign (t, gimple_cond_code (cond),
1345 : gimple_cond_lhs (cond),
1346 : gimple_cond_rhs (cond));
1347 879 : gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1348 : }
1349 932 : new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1350 : COND_EXPR, t, arg0, arg1);
1351 932 : todo |= TODO_cleanup_cfg;
1352 : }
1353 935 : if (!ALWAYS_EXECUTED_IN (bb)
1354 935 : || (ALWAYS_EXECUTED_IN (bb) != level
1355 92 : && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1356 436 : reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
1357 935 : gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1358 935 : remove_phi_node (&bsi, false);
1359 : }
1360 :
1361 55248430 : for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1362 : {
1363 43248574 : edge e;
1364 :
1365 43248574 : gimple *stmt = gsi_stmt (bsi);
1366 :
1367 43248574 : lim_data = get_lim_data (stmt);
1368 43248574 : if (lim_data == NULL)
1369 : {
1370 27244878 : gsi_next (&bsi);
1371 27244878 : continue;
1372 : }
1373 :
1374 16003696 : cost = lim_data->cost;
1375 16003696 : level = lim_data->tgt_loop;
1376 16003696 : clear_lim_data (stmt);
1377 :
1378 16003696 : if (!level)
1379 : {
1380 15512582 : gsi_next (&bsi);
1381 15512582 : continue;
1382 : }
1383 :
1384 : /* We do not really want to move conditionals out of the loop; we just
1385 : placed it here to force its operands to be moved if necessary. */
1386 491114 : if (gimple_code (stmt) == GIMPLE_COND)
1387 : {
1388 11859 : gsi_next (&bsi);
1389 11859 : continue;
1390 : }
1391 :
1392 479255 : if (dump_file && (dump_flags & TDF_DETAILS))
1393 : {
1394 353 : fprintf (dump_file, "Moving statement ");
1395 353 : print_gimple_stmt (dump_file, stmt, 0);
1396 353 : fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1397 : cost, level->num);
1398 : }
1399 :
1400 479255 : e = loop_preheader_edge (level);
1401 958510 : gcc_assert (!gimple_vdef (stmt));
1402 958510 : if (gimple_vuse (stmt))
1403 : {
1404 : /* The new VUSE is the one from the virtual PHI in the loop
1405 : header or the one already present. */
1406 84407 : gphi_iterator gsi2;
1407 84407 : for (gsi2 = gsi_start_phis (e->dest);
1408 196888 : !gsi_end_p (gsi2); gsi_next (&gsi2))
1409 : {
1410 179365 : gphi *phi = gsi2.phi ();
1411 358730 : if (virtual_operand_p (gimple_phi_result (phi)))
1412 : {
1413 66884 : SET_USE (gimple_vuse_op (stmt),
1414 : PHI_ARG_DEF_FROM_EDGE (phi, e));
1415 66884 : break;
1416 : }
1417 : }
1418 : }
1419 479255 : gsi_remove (&bsi, false);
1420 479255 : if (gimple_has_lhs (stmt)
1421 479255 : && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1422 437247 : && (!ALWAYS_EXECUTED_IN (bb)
1423 370139 : || !(ALWAYS_EXECUTED_IN (bb) == level
1424 95341 : || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1425 201402 : reset_flow_sensitive_info (gimple_get_lhs (stmt));
1426 : /* In case this is a stmt that is not unconditionally executed
1427 : when the target loop header is executed and the stmt may
1428 : invoke undefined integer or pointer overflow rewrite it to
1429 : unsigned arithmetic. */
1430 479255 : if (gimple_needing_rewrite_undefined (stmt)
1431 479255 : && (!ALWAYS_EXECUTED_IN (bb)
1432 80844 : || !(ALWAYS_EXECUTED_IN (bb) == level
1433 18958 : || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1434 19964 : gsi_insert_seq_on_edge (e, rewrite_to_defined_unconditional (stmt));
1435 : else
1436 459291 : gsi_insert_on_edge (e, stmt);
1437 : }
1438 :
1439 5999928 : return todo;
1440 : }
1441 :
1442 : /* Checks whether the statement defining variable *INDEX can be hoisted
1443 : out of the loop passed in DATA. Callback for for_each_index. */
1444 :
1445 : static bool
1446 1780255 : may_move_till (tree ref, tree *index, void *data)
1447 : {
1448 1780255 : class loop *loop = (class loop *) data, *max_loop;
1449 :
1450 : /* If REF is an array reference, check also that the step and the lower
1451 : bound is invariant in LOOP. */
1452 1780255 : if (TREE_CODE (ref) == ARRAY_REF)
1453 : {
1454 475158 : tree step = TREE_OPERAND (ref, 3);
1455 475158 : tree lbound = TREE_OPERAND (ref, 2);
1456 :
1457 475158 : max_loop = outermost_invariant_loop (step, loop);
1458 475158 : if (!max_loop)
1459 : return false;
1460 :
1461 475158 : max_loop = outermost_invariant_loop (lbound, loop);
1462 475158 : if (!max_loop)
1463 : return false;
1464 : }
1465 :
1466 1780255 : max_loop = outermost_invariant_loop (*index, loop);
1467 1780255 : if (!max_loop)
1468 : return false;
1469 :
1470 : return true;
1471 : }
1472 :
1473 : /* If OP is SSA NAME, force the statement that defines it to be
1474 : moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1475 :
1476 : static void
1477 38077 : force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1478 : {
1479 38077 : gimple *stmt;
1480 :
1481 38077 : if (!op
1482 38077 : || is_gimple_min_invariant (op))
1483 33542 : return;
1484 :
1485 4535 : gcc_assert (TREE_CODE (op) == SSA_NAME);
1486 :
1487 4535 : stmt = SSA_NAME_DEF_STMT (op);
1488 4535 : if (gimple_nop_p (stmt))
1489 : return;
1490 :
1491 2919 : set_level (stmt, orig_loop, loop);
1492 : }
1493 :
1494 : /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1495 : the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1496 : for_each_index. */
1497 :
1498 : struct fmt_data
1499 : {
1500 : class loop *loop;
1501 : class loop *orig_loop;
1502 : };
1503 :
1504 : static bool
1505 20241 : force_move_till (tree ref, tree *index, void *data)
1506 : {
1507 20241 : struct fmt_data *fmt_data = (struct fmt_data *) data;
1508 :
1509 20241 : if (TREE_CODE (ref) == ARRAY_REF)
1510 : {
1511 8918 : tree step = TREE_OPERAND (ref, 3);
1512 8918 : tree lbound = TREE_OPERAND (ref, 2);
1513 :
1514 8918 : force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1515 8918 : force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1516 : }
1517 :
1518 20241 : force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1519 :
1520 20241 : return true;
1521 : }
1522 :
1523 : /* A function to free the mem_ref object OBJ. */
1524 :
1525 : static void
1526 5197001 : memref_free (class im_mem_ref *mem)
1527 : {
1528 0 : mem->accesses_in_loop.release ();
1529 0 : }
1530 :
1531 : /* Allocates and returns a memory reference description for MEM whose hash
1532 : value is HASH and id is ID. */
1533 :
1534 : static im_mem_ref *
1535 5197001 : mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1536 : {
1537 5197001 : im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1538 5197001 : if (mem)
1539 4270550 : ref->mem = *mem;
1540 : else
1541 926451 : ao_ref_init (&ref->mem, error_mark_node);
1542 5197001 : ref->id = id;
1543 5197001 : ref->ref_canonical = false;
1544 5197001 : ref->ref_decomposed = false;
1545 5197001 : ref->hash = hash;
1546 5197001 : ref->stored = NULL;
1547 5197001 : ref->loaded = NULL;
1548 5197001 : bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1549 5197001 : ref->accesses_in_loop.create (1);
1550 :
1551 5197001 : return ref;
1552 : }
1553 :
1554 : /* Records memory reference location *LOC in LOOP to the memory reference
1555 : description REF. The reference occurs in statement STMT. */
1556 :
1557 : static void
1558 5646892 : record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1559 : {
1560 5646892 : mem_ref_loc aref;
1561 5646892 : aref.stmt = stmt;
1562 5646892 : aref.ref = loc;
1563 0 : ref->accesses_in_loop.safe_push (aref);
1564 0 : }
1565 :
1566 : /* Set the LOOP bit in REF stored bitmap and allocate that if
1567 : necessary. Return whether a bit was changed. */
1568 :
1569 : static bool
1570 4377886 : set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1571 : {
1572 4377886 : if (!ref->stored)
1573 2489647 : ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1574 4377886 : return bitmap_set_bit (ref->stored, loop->num);
1575 : }
1576 :
1577 : /* Marks reference REF as stored in LOOP. */
1578 :
1579 : static void
1580 3645313 : mark_ref_stored (im_mem_ref *ref, class loop *loop)
1581 : {
1582 3645313 : while (loop != current_loops->tree_root
1583 7032813 : && set_ref_stored_in_loop (ref, loop))
1584 3387500 : loop = loop_outer (loop);
1585 3645313 : }
1586 :
1587 : /* Set the LOOP bit in REF loaded bitmap and allocate that if
1588 : necessary. Return whether a bit was changed. */
1589 :
1590 : static bool
1591 6090966 : set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1592 : {
1593 6090966 : if (!ref->loaded)
1594 3331868 : ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1595 6090966 : return bitmap_set_bit (ref->loaded, loop->num);
1596 : }
1597 :
1598 : /* Marks reference REF as loaded in LOOP. */
1599 :
1600 : static void
1601 4836253 : mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1602 : {
1603 4836253 : while (loop != current_loops->tree_root
1604 9642430 : && set_ref_loaded_in_loop (ref, loop))
1605 4806177 : loop = loop_outer (loop);
1606 4836253 : }
1607 :
1608 : /* Gathers memory references in statement STMT in LOOP, storing the
1609 : information about them in the memory_accesses structure. Marks
1610 : the vops accessed through unrecognized statements there as
1611 : well. */
1612 :
1613 : static void
1614 43182859 : gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1615 : {
1616 43182859 : tree *mem = NULL;
1617 43182859 : hashval_t hash;
1618 43182859 : im_mem_ref **slot;
1619 43182859 : im_mem_ref *ref;
1620 43182859 : bool is_stored;
1621 43182859 : unsigned id;
1622 :
1623 58817804 : if (!gimple_vuse (stmt))
1624 : return;
1625 :
1626 6880958 : mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1627 6880958 : if (!mem && is_gimple_assign (stmt))
1628 : {
1629 : /* For aggregate copies record distinct references but use them
1630 : only for disambiguation purposes. */
1631 443121 : id = memory_accesses.refs_list.length ();
1632 443121 : ref = mem_ref_alloc (NULL, 0, id);
1633 443121 : memory_accesses.refs_list.safe_push (ref);
1634 443121 : if (dump_file && (dump_flags & TDF_DETAILS))
1635 : {
1636 24 : fprintf (dump_file, "Unhandled memory reference %u: ", id);
1637 24 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1638 : }
1639 443121 : record_mem_ref_loc (ref, stmt, mem);
1640 886242 : is_stored = gimple_vdef (stmt);
1641 : }
1642 6437837 : else if (!mem)
1643 : {
1644 : /* We use the shared mem_ref for all unanalyzable refs. */
1645 1234066 : id = UNANALYZABLE_MEM_ID;
1646 1234066 : ref = memory_accesses.refs_list[id];
1647 1234066 : if (dump_file && (dump_flags & TDF_DETAILS))
1648 : {
1649 9 : fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1650 9 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1651 : }
1652 2468132 : is_stored = gimple_vdef (stmt);
1653 : }
1654 : else
1655 : {
1656 : /* We are looking for equal refs that might differ in structure
1657 : such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1658 : make sure we can canonicalize the ref in the hashtable if
1659 : non-operand_equal_p refs are found. For the lookup we mark
1660 : the case we want strict equality with aor.max_size == -1. */
1661 5203771 : ao_ref aor;
1662 5203771 : ao_ref_init (&aor, *mem);
1663 5203771 : ao_ref_base (&aor);
1664 5203771 : ao_ref_alias_set (&aor);
1665 5203771 : HOST_WIDE_INT offset, size, max_size;
1666 5203771 : poly_int64 saved_maxsize = aor.max_size, mem_off;
1667 5203771 : tree mem_base;
1668 5203771 : bool ref_decomposed;
1669 5203771 : if (aor.max_size_known_p ()
1670 5025678 : && aor.offset.is_constant (&offset)
1671 5025678 : && aor.size.is_constant (&size)
1672 5025678 : && aor.max_size.is_constant (&max_size)
1673 5025678 : && size == max_size
1674 4417614 : && (size % BITS_PER_UNIT) == 0
1675 : /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1676 : size. Make sure this is consistent with the extraction. */
1677 4406835 : && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1678 4406835 : && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1679 : aor.size)
1680 9610514 : && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1681 : {
1682 4405644 : ref_decomposed = true;
1683 4405644 : tree base = ao_ref_base (&aor);
1684 4405644 : poly_int64 moffset;
1685 4405644 : HOST_WIDE_INT mcoffset;
1686 4405644 : if (TREE_CODE (base) == MEM_REF
1687 1989439 : && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
1688 4405644 : && moffset.is_constant (&mcoffset))
1689 : {
1690 1989439 : hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
1691 1989439 : hash = iterative_hash_host_wide_int (mcoffset, hash);
1692 : }
1693 : else
1694 : {
1695 2416205 : hash = iterative_hash_expr (base, 0);
1696 2416205 : hash = iterative_hash_host_wide_int (offset, hash);
1697 : }
1698 4405644 : hash = iterative_hash_host_wide_int (size, hash);
1699 : }
1700 : else
1701 : {
1702 798127 : ref_decomposed = false;
1703 798127 : hash = iterative_hash_expr (aor.ref, 0);
1704 798127 : aor.max_size = -1;
1705 : }
1706 5203771 : slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1707 5203771 : aor.max_size = saved_maxsize;
1708 5203771 : if (*slot)
1709 : {
1710 933221 : if (!(*slot)->ref_canonical
1711 933221 : && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1712 : {
1713 : /* If we didn't yet canonicalize the hashtable ref (which
1714 : we'll end up using for code insertion) and hit a second
1715 : equal ref that is not structurally equivalent create
1716 : a canonical ref which is a bare MEM_REF. */
1717 49108 : if (TREE_CODE (*mem) == MEM_REF
1718 49108 : || TREE_CODE (*mem) == TARGET_MEM_REF)
1719 : {
1720 14549 : (*slot)->mem.ref = *mem;
1721 14549 : (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1722 : }
1723 : else
1724 : {
1725 34559 : tree ref_alias_type = reference_alias_ptr_type (*mem);
1726 34559 : unsigned int ref_align = get_object_alignment (*mem);
1727 34559 : tree ref_type = TREE_TYPE (*mem);
1728 34559 : tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1729 : unshare_expr (mem_base));
1730 34559 : if (TYPE_ALIGN (ref_type) != ref_align)
1731 8600 : ref_type = build_aligned_type (ref_type, ref_align);
1732 34559 : tree new_ref
1733 34559 : = fold_build2 (MEM_REF, ref_type, tmp,
1734 : build_int_cst (ref_alias_type, mem_off));
1735 34559 : if ((*slot)->mem.volatile_p)
1736 22 : TREE_THIS_VOLATILE (new_ref) = 1;
1737 34559 : (*slot)->mem.ref = new_ref;
1738 : /* Make sure the recorded base and offset are consistent
1739 : with the newly built ref. */
1740 34559 : if (TREE_CODE (TREE_OPERAND (new_ref, 0)) == ADDR_EXPR)
1741 : ;
1742 : else
1743 : {
1744 19696 : (*slot)->mem.base = new_ref;
1745 19696 : (*slot)->mem.offset = 0;
1746 : }
1747 34559 : gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1748 : && is_gimple_mem_ref_addr
1749 : (TREE_OPERAND ((*slot)->mem.ref,
1750 : 0)));
1751 34559 : (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1752 : }
1753 49108 : (*slot)->ref_canonical = true;
1754 : }
1755 933221 : ref = *slot;
1756 933221 : id = ref->id;
1757 : }
1758 : else
1759 : {
1760 4270550 : id = memory_accesses.refs_list.length ();
1761 4270550 : ref = mem_ref_alloc (&aor, hash, id);
1762 4270550 : ref->ref_decomposed = ref_decomposed;
1763 4270550 : memory_accesses.refs_list.safe_push (ref);
1764 4270550 : *slot = ref;
1765 :
1766 4270550 : if (dump_file && (dump_flags & TDF_DETAILS))
1767 : {
1768 499 : fprintf (dump_file, "Memory reference %u: ", id);
1769 499 : print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1770 499 : fprintf (dump_file, "\n");
1771 : }
1772 : }
1773 :
1774 5203771 : record_mem_ref_loc (ref, stmt, mem);
1775 : }
1776 6880958 : if (is_stored)
1777 : {
1778 3645313 : bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1779 3645313 : mark_ref_stored (ref, loop);
1780 : }
1781 : /* A not simple memory op is also a read when it is a write. */
1782 6880958 : if (!is_stored || id == UNANALYZABLE_MEM_ID
1783 2487826 : || ref->mem.ref == error_mark_node)
1784 : {
1785 4836253 : bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1786 4836253 : mark_ref_loaded (ref, loop);
1787 : }
1788 6880958 : init_lim_data (stmt)->ref = ref->id;
1789 6880958 : return;
1790 : }
1791 :
1792 : static unsigned *bb_loop_postorder;
1793 :
1794 : /* qsort sort function to sort blocks after their loop fathers postorder. */
1795 :
1796 : static int
1797 155841677 : sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1798 : void *bb_loop_postorder_)
1799 : {
1800 155841677 : unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1801 155841677 : basic_block bb1 = *(const basic_block *)bb1_;
1802 155841677 : basic_block bb2 = *(const basic_block *)bb2_;
1803 155841677 : class loop *loop1 = bb1->loop_father;
1804 155841677 : class loop *loop2 = bb2->loop_father;
1805 155841677 : if (loop1->num == loop2->num)
1806 72838932 : return bb1->index - bb2->index;
1807 83002745 : return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1808 : }
1809 :
1810 : /* qsort sort function to sort ref locs after their loop fathers postorder. */
1811 :
1812 : static int
1813 933206 : sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1814 : void *bb_loop_postorder_)
1815 : {
1816 933206 : unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1817 933206 : const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1818 933206 : const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1819 933206 : class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1820 933206 : class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1821 933206 : if (loop1->num == loop2->num)
1822 : return 0;
1823 141471 : return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1824 : }
1825 :
1826 : /* Gathers memory references in loops. */
1827 :
1828 : static void
1829 483330 : analyze_memory_references (bool store_motion)
1830 : {
1831 483330 : gimple_stmt_iterator bsi;
1832 483330 : basic_block bb, *bbs;
1833 483330 : class loop *outer;
1834 483330 : unsigned i, n;
1835 :
1836 : /* Collect all basic-blocks in loops and sort them after their
1837 : loops postorder. */
1838 483330 : i = 0;
1839 483330 : bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1840 14601127 : FOR_EACH_BB_FN (bb, cfun)
1841 14117797 : if (bb->loop_father != current_loops->tree_root)
1842 5993688 : bbs[i++] = bb;
1843 483330 : n = i;
1844 483330 : gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1845 : bb_loop_postorder);
1846 :
1847 : /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1848 : That results in better locality for all the bitmaps. It also
1849 : automatically sorts the location list of gathered memory references
1850 : after their loop postorder number allowing to binary-search it. */
1851 6477018 : for (i = 0; i < n; ++i)
1852 : {
1853 5993688 : basic_block bb = bbs[i];
1854 55170235 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1855 43182859 : gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1856 : }
1857 :
1858 : /* Verify the list of gathered memory references is sorted after their
1859 : loop postorder number. */
1860 483330 : if (flag_checking)
1861 : {
1862 : im_mem_ref *ref;
1863 10877260 : FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1864 6130177 : for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1865 933206 : gcc_assert (sort_locs_in_loop_postorder_cmp
1866 : (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1867 : bb_loop_postorder) <= 0);
1868 : }
1869 :
1870 483330 : free (bbs);
1871 :
1872 483330 : if (!store_motion)
1873 483330 : return;
1874 :
1875 : /* Propagate the information about accessed memory references up
1876 : the loop hierarchy. */
1877 2758374 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1878 : {
1879 : /* Finalize the overall touched references (including subloops). */
1880 1308717 : bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1881 1308717 : &memory_accesses.refs_stored_in_loop[loop->num]);
1882 :
1883 : /* Propagate the information about accessed memory references up
1884 : the loop hierarchy. */
1885 1308717 : outer = loop_outer (loop);
1886 1308717 : if (outer == current_loops->tree_root)
1887 1010727 : continue;
1888 :
1889 297990 : bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1890 297990 : &memory_accesses.all_refs_stored_in_loop[loop->num]);
1891 483219 : }
1892 : }
1893 :
1894 : /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1895 : tree_to_aff_combination_expand. */
1896 :
1897 : static bool
1898 542777 : mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1899 : hash_map<tree, name_expansion *> **ttae_cache,
1900 : bool tbaa_p)
1901 : {
1902 542777 : gcc_checking_assert (mem1->mem.ref != error_mark_node
1903 : && mem2->mem.ref != error_mark_node);
1904 :
1905 : /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1906 : object and their offset differ in such a way that the locations cannot
1907 : overlap, then they cannot alias. */
1908 : poly_widest_int size1, size2;
1909 1085554 : aff_tree off1, off2;
1910 :
1911 : /* Perform basic offset and type-based disambiguation. */
1912 542777 : if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1913 : return false;
1914 :
1915 : /* The expansion of addresses may be a bit expensive, thus we only do
1916 : the check at -O2 and higher optimization levels. */
1917 67313 : if (optimize < 2)
1918 : return true;
1919 :
1920 57535 : get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1921 57535 : get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1922 57535 : aff_combination_expand (&off1, ttae_cache);
1923 57535 : aff_combination_expand (&off2, ttae_cache);
1924 57535 : aff_combination_scale (&off1, -1);
1925 57535 : aff_combination_add (&off2, &off1);
1926 :
1927 57535 : if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1928 : return false;
1929 :
1930 : return true;
1931 542777 : }
1932 :
1933 : /* Compare function for bsearch searching for reference locations
1934 : in a loop. */
1935 :
1936 : static int
1937 260628 : find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1938 : void *bb_loop_postorder_)
1939 : {
1940 260628 : unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1941 260628 : class loop *loop = (class loop *)const_cast<void *>(loop_);
1942 260628 : mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1943 260628 : class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1944 260628 : if (loop->num == loc_loop->num
1945 260628 : || flow_loop_nested_p (loop, loc_loop))
1946 200768 : return 0;
1947 59860 : return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1948 59860 : ? -1 : 1);
1949 : }
1950 :
1951 : /* Iterates over all locations of REF in LOOP and its subloops calling
1952 : fn.operator() with the location as argument. When that operator
1953 : returns true the iteration is stopped and true is returned.
1954 : Otherwise false is returned. */
1955 :
1956 : template <typename FN>
1957 : static bool
1958 200768 : for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1959 : {
1960 : unsigned i;
1961 : mem_ref_loc *loc;
1962 :
1963 : /* Search for the cluster of locs in the accesses_in_loop vector
1964 : which is sorted after postorder index of the loop father. */
1965 200768 : loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1966 : bb_loop_postorder);
1967 200768 : if (!loc)
1968 0 : return false;
1969 :
1970 : /* We have found one location inside loop or its sub-loops. Iterate
1971 : both forward and backward to cover the whole cluster. */
1972 200768 : i = loc - ref->accesses_in_loop.address ();
1973 275710 : while (i > 0)
1974 : {
1975 133593 : --i;
1976 133593 : mem_ref_loc *l = &ref->accesses_in_loop[i];
1977 133593 : if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1978 : break;
1979 110199 : if (fn (l))
1980 : return true;
1981 : }
1982 447886 : for (i = loc - ref->accesses_in_loop.address ();
1983 282375 : i < ref->accesses_in_loop.length (); ++i)
1984 : {
1985 209983 : mem_ref_loc *l = &ref->accesses_in_loop[i];
1986 209983 : if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1987 : break;
1988 191467 : if (fn (l))
1989 : return true;
1990 : }
1991 :
1992 : return false;
1993 : }
1994 :
1995 : /* Rewrites location LOC by TMP_VAR. */
1996 :
1997 : class rewrite_mem_ref_loc
1998 : {
1999 : public:
2000 35061 : rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
2001 : bool operator () (mem_ref_loc *loc);
2002 : tree tmp_var;
2003 : };
2004 :
2005 : bool
2006 55972 : rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
2007 : {
2008 55972 : *loc->ref = tmp_var;
2009 55972 : update_stmt (loc->stmt);
2010 55972 : return false;
2011 : }
2012 :
2013 : /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
2014 :
2015 : static void
2016 35061 : rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
2017 : {
2018 0 : for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
2019 0 : }
2020 :
2021 : /* Stores the first reference location in LOCP. */
2022 :
2023 : class first_mem_ref_loc_1
2024 : {
2025 : public:
2026 35061 : first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
2027 : bool operator () (mem_ref_loc *loc);
2028 : mem_ref_loc **locp;
2029 : };
2030 :
2031 : bool
2032 35061 : first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
2033 : {
2034 35061 : *locp = loc;
2035 35061 : return true;
2036 : }
2037 :
2038 : /* Returns the first reference location to REF in LOOP. */
2039 :
2040 : static mem_ref_loc *
2041 35061 : first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
2042 : {
2043 35061 : mem_ref_loc *locp = NULL;
2044 0 : for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
2045 35061 : return locp;
2046 : }
2047 :
2048 : /* Helper function for execute_sm. Emit code to store TMP_VAR into
2049 : MEM along edge EX.
2050 :
2051 : The store is only done if MEM has changed. We do this so no
2052 : changes to MEM occur on code paths that did not originally store
2053 : into it.
2054 :
2055 : The common case for execute_sm will transform:
2056 :
2057 : for (...) {
2058 : if (foo)
2059 : stuff;
2060 : else
2061 : MEM = TMP_VAR;
2062 : }
2063 :
2064 : into:
2065 :
2066 : lsm = MEM;
2067 : for (...) {
2068 : if (foo)
2069 : stuff;
2070 : else
2071 : lsm = TMP_VAR;
2072 : }
2073 : MEM = lsm;
2074 :
2075 : This function will generate:
2076 :
2077 : lsm = MEM;
2078 :
2079 : lsm_flag = false;
2080 : ...
2081 : for (...) {
2082 : if (foo)
2083 : stuff;
2084 : else {
2085 : lsm = TMP_VAR;
2086 : lsm_flag = true;
2087 : }
2088 : }
2089 : if (lsm_flag) <--
2090 : MEM = lsm; <-- (X)
2091 :
2092 : In case MEM and TMP_VAR are NULL the function will return the then
2093 : block so the caller can insert (X) and other related stmts.
2094 : */
2095 :
2096 : static basic_block
2097 14626 : execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
2098 : edge preheader, hash_set <basic_block> *flag_bbs,
2099 : edge &append_cond_position, edge &last_cond_fallthru)
2100 : {
2101 14626 : basic_block new_bb, then_bb, old_dest;
2102 14626 : bool loop_has_only_one_exit;
2103 14626 : edge then_old_edge;
2104 14626 : gimple_stmt_iterator gsi;
2105 14626 : gimple *stmt;
2106 14626 : bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
2107 :
2108 14626 : profile_count count_sum = profile_count::zero ();
2109 14626 : int nbbs = 0, ncount = 0;
2110 14626 : profile_probability flag_probability = profile_probability::uninitialized ();
2111 :
2112 : /* Flag is set in FLAG_BBS. Determine probability that flag will be true
2113 : at loop exit.
2114 :
2115 : This code may look fancy, but it cannot update profile very realistically
2116 : because we do not know the probability that flag will be true at given
2117 : loop exit.
2118 :
2119 : We look for two interesting extremes
2120 : - when exit is dominated by block setting the flag, we know it will
2121 : always be true. This is a common case.
2122 : - when all blocks setting the flag have very low frequency we know
2123 : it will likely be false.
2124 : In all other cases we default to 2/3 for flag being true. */
2125 :
2126 14626 : for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
2127 50722 : it != flag_bbs->end (); ++it)
2128 : {
2129 18048 : if ((*it)->count.initialized_p ())
2130 18035 : count_sum += (*it)->count, ncount ++;
2131 18048 : if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
2132 2445 : flag_probability = profile_probability::always ();
2133 18048 : nbbs++;
2134 : }
2135 :
2136 14626 : profile_probability cap
2137 14626 : = profile_probability::guessed_always ().apply_scale (2, 3);
2138 :
2139 14626 : if (flag_probability.initialized_p ())
2140 : ;
2141 12282 : else if (ncount == nbbs
2142 24766 : && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
2143 : {
2144 177 : flag_probability = count_sum.probability_in (preheader->count ());
2145 177 : if (flag_probability > cap)
2146 63 : flag_probability = cap;
2147 : }
2148 :
2149 14626 : if (!flag_probability.initialized_p ())
2150 12105 : flag_probability = cap;
2151 :
2152 : /* ?? Insert store after previous store if applicable. See note
2153 : below. */
2154 14626 : if (append_cond_position)
2155 5600 : ex = append_cond_position;
2156 :
2157 14626 : loop_has_only_one_exit = single_pred_p (ex->dest);
2158 :
2159 14626 : if (loop_has_only_one_exit)
2160 6027 : ex = split_block_after_labels (ex->dest);
2161 : else
2162 : {
2163 8599 : for (gphi_iterator gpi = gsi_start_phis (ex->dest);
2164 12694 : !gsi_end_p (gpi); gsi_next (&gpi))
2165 : {
2166 4938 : gphi *phi = gpi.phi ();
2167 9876 : if (virtual_operand_p (gimple_phi_result (phi)))
2168 4095 : continue;
2169 :
2170 : /* When the destination has a non-virtual PHI node with multiple
2171 : predecessors make sure we preserve the PHI structure by
2172 : forcing a forwarder block so that hoisting of that PHI will
2173 : still work. */
2174 843 : split_edge (ex);
2175 843 : break;
2176 : }
2177 : }
2178 :
2179 14626 : old_dest = ex->dest;
2180 14626 : new_bb = split_edge (ex);
2181 14626 : if (append_cond_position)
2182 5600 : new_bb->count += last_cond_fallthru->count ();
2183 14626 : then_bb = create_empty_bb (new_bb);
2184 14626 : then_bb->count = new_bb->count.apply_probability (flag_probability);
2185 14626 : if (irr)
2186 49 : then_bb->flags = BB_IRREDUCIBLE_LOOP;
2187 14626 : add_bb_to_loop (then_bb, new_bb->loop_father);
2188 :
2189 14626 : gsi = gsi_start_bb (new_bb);
2190 14626 : stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2191 : NULL_TREE, NULL_TREE);
2192 14626 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2193 :
2194 : /* Insert actual store. */
2195 14626 : if (mem)
2196 : {
2197 12256 : gsi = gsi_start_bb (then_bb);
2198 12256 : stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2199 12256 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2200 : }
2201 :
2202 14626 : edge e1 = single_succ_edge (new_bb);
2203 29203 : edge e2 = make_edge (new_bb, then_bb,
2204 : EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2205 14626 : e2->probability = flag_probability;
2206 :
2207 14626 : e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2208 14626 : e1->flags &= ~EDGE_FALLTHRU;
2209 :
2210 14626 : e1->probability = flag_probability.invert ();
2211 :
2212 29203 : then_old_edge = make_single_succ_edge (then_bb, old_dest,
2213 : EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2214 :
2215 14626 : set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2216 :
2217 14626 : if (append_cond_position)
2218 : {
2219 5600 : basic_block prevbb = last_cond_fallthru->src;
2220 5600 : redirect_edge_succ (last_cond_fallthru, new_bb);
2221 5600 : set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2222 5600 : set_immediate_dominator (CDI_DOMINATORS, old_dest,
2223 : recompute_dominator (CDI_DOMINATORS, old_dest));
2224 : }
2225 :
2226 : /* ?? Because stores may alias, they must happen in the exact
2227 : sequence they originally happened. Save the position right after
2228 : the (_lsm) store we just created so we can continue appending after
2229 : it and maintain the original order. */
2230 14626 : append_cond_position = then_old_edge;
2231 14626 : last_cond_fallthru = find_edge (new_bb, old_dest);
2232 :
2233 14626 : if (!loop_has_only_one_exit)
2234 8599 : for (gphi_iterator gpi = gsi_start_phis (old_dest);
2235 12524 : !gsi_end_p (gpi); gsi_next (&gpi))
2236 : {
2237 3925 : gphi *phi = gpi.phi ();
2238 3925 : unsigned i;
2239 :
2240 22140 : for (i = 0; i < gimple_phi_num_args (phi); i++)
2241 18215 : if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2242 : {
2243 3925 : tree arg = gimple_phi_arg_def (phi, i);
2244 3925 : add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2245 3925 : update_stmt (phi);
2246 : }
2247 : }
2248 :
2249 14626 : return then_bb;
2250 : }
2251 :
2252 : /* When REF is set on the location, set flag indicating the store. */
2253 :
2254 : class sm_set_flag_if_changed
2255 : {
2256 : public:
2257 7777 : sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2258 7777 : : flag (flag_), bbs (bbs_) {}
2259 : bool operator () (mem_ref_loc *loc);
2260 : tree flag;
2261 : hash_set <basic_block> *bbs;
2262 : };
2263 :
2264 : bool
2265 14815 : sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2266 : {
2267 : /* Only set the flag for writes. */
2268 14815 : if (is_gimple_assign (loc->stmt)
2269 14815 : && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2270 : {
2271 8906 : gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2272 8906 : gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2273 8906 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2274 8906 : bbs->add (gimple_bb (stmt));
2275 : }
2276 14815 : return false;
2277 : }
2278 :
2279 : /* Helper function for execute_sm. On every location where REF is
2280 : set, set an appropriate flag indicating the store. */
2281 :
2282 : static tree
2283 7777 : execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2284 : hash_set <basic_block> *bbs)
2285 : {
2286 7777 : tree flag;
2287 7777 : char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2288 7777 : flag = create_tmp_reg (boolean_type_node, str);
2289 7777 : for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2290 7777 : return flag;
2291 : }
2292 :
2293 35061 : struct sm_aux
2294 : {
2295 : tree tmp_var;
2296 : tree store_flag;
2297 : hash_set <basic_block> flag_bbs;
2298 : };
2299 :
2300 : /* Executes store motion of memory reference REF from LOOP.
2301 : Exits from the LOOP are stored in EXITS. The initialization of the
2302 : temporary variable is put to the preheader of the loop, and assignments
2303 : to the reference from the temporary variable are emitted to exits. */
2304 :
2305 : static sm_aux *
2306 35061 : execute_sm (class loop *loop, im_mem_ref *ref,
2307 : hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
2308 : bool use_other_flag_var)
2309 : {
2310 35061 : gassign *load;
2311 35061 : struct fmt_data fmt_data;
2312 35061 : struct lim_aux_data *lim_data;
2313 35061 : bool multi_threaded_model_p = false;
2314 35061 : gimple_stmt_iterator gsi;
2315 35061 : sm_aux *aux = new sm_aux;
2316 :
2317 35061 : if (dump_file && (dump_flags & TDF_DETAILS))
2318 : {
2319 82 : fprintf (dump_file, "Executing store motion of ");
2320 82 : print_generic_expr (dump_file, ref->mem.ref);
2321 82 : fprintf (dump_file, " from loop %d\n", loop->num);
2322 : }
2323 :
2324 35061 : aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2325 35061 : get_lsm_tmp_name (ref->mem.ref, ~0));
2326 :
2327 35061 : fmt_data.loop = loop;
2328 35061 : fmt_data.orig_loop = loop;
2329 35061 : for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2330 :
2331 70122 : bool always_stored = ref_always_stored_p (loop, ref);
2332 35061 : if (maybe_mt
2333 11845 : && (bb_in_transaction (loop_preheader_edge (loop)->src)
2334 11844 : || (ref_can_have_store_data_races (ref->mem.ref) && ! always_stored)
2335 : /* Do not speculate a load/store when that's not a noop, either
2336 : because the mode cannot be transferred or because there's
2337 : UB involved for out-of-bound values. */
2338 3827 : || !mode_can_transfer_bits (TYPE_MODE (TREE_TYPE (ref->mem.ref)))
2339 3827 : || TREE_CODE (TREE_TYPE (ref->mem.ref)) == BOOLEAN_TYPE
2340 3813 : || (TREE_CODE (ref->mem.ref) == COMPONENT_REF
2341 915 : && DECL_BIT_FIELD (TREE_OPERAND (ref->mem.ref, 1)))))
2342 : multi_threaded_model_p = true;
2343 :
2344 35061 : if (multi_threaded_model_p && !use_other_flag_var)
2345 7777 : aux->store_flag
2346 7777 : = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2347 : else
2348 27284 : aux->store_flag = NULL_TREE;
2349 :
2350 : /* Remember variable setup. */
2351 35061 : aux_map.put (ref, aux);
2352 :
2353 35061 : rewrite_mem_refs (loop, ref, aux->tmp_var);
2354 :
2355 : /* Emit the load code on a random exit edge or into the latch if
2356 : the loop does not exit, so that we are sure it will be processed
2357 : by move_computations after all dependencies. */
2358 35061 : gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2359 :
2360 : /* Avoid doing a load if there was no load of the ref in the loop.
2361 : Esp. when the ref is not always stored we cannot optimize it
2362 : away later. But when it is not always stored we must use a conditional
2363 : store then. */
2364 35061 : if ((!always_stored && !multi_threaded_model_p)
2365 35061 : || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2366 18333 : load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2367 : else
2368 : {
2369 : /* If not emitting a load mark the uninitialized state on the
2370 : loop entry as not to be warned for. */
2371 16728 : tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2372 16728 : suppress_warning (uninit, OPT_Wuninitialized);
2373 16728 : uninit = get_or_create_ssa_default_def (cfun, uninit);
2374 16728 : load = gimple_build_assign (aux->tmp_var, uninit);
2375 : }
2376 35061 : lim_data = init_lim_data (load);
2377 35061 : lim_data->max_loop = loop;
2378 35061 : lim_data->tgt_loop = loop;
2379 35061 : gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2380 :
2381 35061 : if (aux->store_flag)
2382 : {
2383 7777 : load = gimple_build_assign (aux->store_flag, boolean_false_node);
2384 7777 : lim_data = init_lim_data (load);
2385 7777 : lim_data->max_loop = loop;
2386 7777 : lim_data->tgt_loop = loop;
2387 7777 : gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2388 : }
2389 :
2390 35061 : return aux;
2391 : }
2392 :
2393 : /* sm_ord is used for ordinary stores we can retain order with respect
2394 : to other stores
2395 : sm_unord is used for conditional executed stores which need to be
2396 : able to execute in arbitrary order with respect to other stores
2397 : sm_other is used for stores we do not try to apply store motion to. */
2398 : enum sm_kind { sm_ord, sm_unord, sm_other };
2399 : struct seq_entry
2400 : {
2401 : seq_entry () = default;
2402 52211 : seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2403 52211 : : first (f), second (k), from (fr) {}
2404 : unsigned first;
2405 : sm_kind second;
2406 : tree from;
2407 : };
2408 :
2409 : static void
2410 33300 : execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2411 : hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2412 : edge &append_cond_position, edge &last_cond_fallthru,
2413 : bitmap clobbers_to_prune)
2414 : {
2415 : /* Sink the stores to exit from the loop. */
2416 117052 : for (unsigned i = seq.length (); i > 0; --i)
2417 : {
2418 50452 : im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2419 50452 : if (seq[i-1].second == sm_other)
2420 : {
2421 4937 : gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2422 4937 : gassign *store;
2423 4937 : if (ref->mem.ref == error_mark_node)
2424 : {
2425 179 : tree lhs = gimple_assign_lhs (ref->accesses_in_loop[0].stmt);
2426 179 : if (dump_file && (dump_flags & TDF_DETAILS))
2427 : {
2428 3 : fprintf (dump_file, "Re-issuing dependent ");
2429 3 : print_generic_expr (dump_file, unshare_expr (seq[i-1].from));
2430 3 : fprintf (dump_file, " of ");
2431 3 : print_generic_expr (dump_file, lhs);
2432 3 : fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2433 3 : loop->num, ex->src->index, ex->dest->index);
2434 : }
2435 179 : store = gimple_build_assign (unshare_expr (lhs),
2436 179 : unshare_expr (seq[i-1].from));
2437 179 : bitmap_set_bit (clobbers_to_prune, seq[i-1].first);
2438 : }
2439 : else
2440 : {
2441 4758 : if (dump_file && (dump_flags & TDF_DETAILS))
2442 : {
2443 0 : fprintf (dump_file, "Re-issuing dependent store of ");
2444 0 : print_generic_expr (dump_file, ref->mem.ref);
2445 0 : fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2446 0 : loop->num, ex->src->index, ex->dest->index);
2447 : }
2448 4758 : store = gimple_build_assign (unshare_expr (ref->mem.ref),
2449 4758 : seq[i-1].from);
2450 : }
2451 4937 : gsi_insert_on_edge (ex, store);
2452 : }
2453 : else
2454 : {
2455 45515 : sm_aux *aux = *aux_map.get (ref);
2456 45515 : if (!aux->store_flag || kind == sm_ord)
2457 : {
2458 33259 : gassign *store;
2459 33259 : store = gimple_build_assign (unshare_expr (ref->mem.ref),
2460 : aux->tmp_var);
2461 33259 : gsi_insert_on_edge (ex, store);
2462 33259 : }
2463 : else
2464 12256 : execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2465 : aux->store_flag,
2466 : loop_preheader_edge (loop), &aux->flag_bbs,
2467 : append_cond_position, last_cond_fallthru);
2468 : }
2469 : }
2470 33300 : }
2471 :
2472 : /* Push the SM candidate at index PTR in the sequence SEQ down until
2473 : we hit the next SM candidate. Return true if that went OK and
2474 : false if we could not disambiguate against another unrelated ref.
2475 : Update *AT to the index where the candidate now resides. */
2476 :
2477 : static bool
2478 37622 : sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2479 : {
2480 37622 : *at = ptr;
2481 38398 : for (; ptr > 0; --ptr)
2482 : {
2483 17707 : seq_entry &new_cand = seq[ptr];
2484 17707 : seq_entry &against = seq[ptr-1];
2485 17707 : if (against.second == sm_ord
2486 5811 : || (against.second == sm_other && against.from != NULL_TREE))
2487 : /* Found the tail of the sequence. */
2488 : break;
2489 : /* We may not ignore self-dependences here. */
2490 1077 : if (new_cand.first == against.first
2491 : /* ??? We could actually handle clobbers here, but not easily
2492 : with LIMs dependence analysis. */
2493 855 : || (memory_accesses.refs_list[new_cand.first]->mem.ref
2494 855 : == error_mark_node)
2495 850 : || (memory_accesses.refs_list[against.first]->mem.ref
2496 : == error_mark_node)
2497 1903 : || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2498 826 : memory_accesses.refs_list[against.first],
2499 : false))
2500 : /* ??? Prune new_cand from the list of refs to apply SM to. */
2501 301 : return false;
2502 776 : std::swap (new_cand, against);
2503 776 : *at = ptr - 1;
2504 : }
2505 : return true;
2506 : }
2507 :
2508 : /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2509 : walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2510 :
2511 : static int
2512 35682 : sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2513 : vec<seq_entry> &seq, bitmap refs_not_in_seq,
2514 : bitmap refs_not_supported, bool forked,
2515 : bitmap fully_visited)
2516 : {
2517 35682 : if (!vdef)
2518 26542 : for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2519 114280 : gsi_prev (&gsi))
2520 : {
2521 140371 : vdef = gimple_vdef (gsi_stmt (gsi));
2522 52633 : if (vdef)
2523 : break;
2524 : }
2525 26542 : if (!vdef)
2526 : {
2527 8715 : gphi *vphi = get_virtual_phi (bb);
2528 8715 : if (vphi)
2529 4348 : vdef = gimple_phi_result (vphi);
2530 : }
2531 35682 : if (!vdef)
2532 : {
2533 4367 : if (single_pred_p (bb))
2534 : /* This handles the perfect nest case. */
2535 3575 : return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2536 : seq, refs_not_in_seq, refs_not_supported,
2537 3575 : forked, fully_visited);
2538 : return 0;
2539 : }
2540 55362 : do
2541 : {
2542 110724 : gimple *def = SSA_NAME_DEF_STMT (vdef);
2543 55362 : if (gimple_bb (def) != bb)
2544 : {
2545 : /* If we forked by processing a PHI do not allow our walk to
2546 : merge again until we handle that robustly. */
2547 3448 : if (forked)
2548 : {
2549 : /* Mark refs_not_in_seq as unsupported. */
2550 1736 : bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2551 1736 : return 1;
2552 : }
2553 : /* Otherwise it doesn't really matter if we end up in different
2554 : BBs. */
2555 : bb = gimple_bb (def);
2556 : }
2557 53626 : if (gphi *phi = dyn_cast <gphi *> (def))
2558 : {
2559 : /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2560 : this is still linear.
2561 : Eventually we want to cache intermediate results per BB
2562 : (but we can't easily cache for different exits?). */
2563 : /* Stop at PHIs with possible backedges. */
2564 10552 : if (bb == bb->loop_father->header
2565 5386 : || bb->flags & BB_IRREDUCIBLE_LOOP)
2566 : {
2567 : /* Mark refs_not_in_seq as unsupported. */
2568 5171 : bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2569 5171 : return 1;
2570 : }
2571 5381 : if (gimple_phi_num_args (phi) == 1)
2572 1527 : return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2573 : gimple_phi_arg_def (phi, 0), seq,
2574 : refs_not_in_seq, refs_not_supported,
2575 1527 : false, fully_visited);
2576 7708 : if (bitmap_bit_p (fully_visited,
2577 3854 : SSA_NAME_VERSION (gimple_phi_result (phi))))
2578 : return 1;
2579 3750 : auto_vec<seq_entry> first_edge_seq;
2580 3750 : auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2581 3750 : int eret;
2582 3750 : bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2583 3750 : eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2584 : gimple_phi_arg_def (phi, 0),
2585 : first_edge_seq,
2586 : tem_refs_not_in_seq, refs_not_supported,
2587 : true, fully_visited);
2588 3750 : if (eret != 1)
2589 : return -1;
2590 : /* Simplify our lives by pruning the sequence of !sm_ord. */
2591 3532 : while (!first_edge_seq.is_empty ()
2592 8457 : && first_edge_seq.last ().second != sm_ord)
2593 1175 : first_edge_seq.pop ();
2594 7613 : for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2595 : {
2596 5849 : tree vuse = gimple_phi_arg_def (phi, i);
2597 5849 : edge e = gimple_phi_arg_edge (phi, i);
2598 5849 : auto_vec<seq_entry> edge_seq;
2599 5849 : bitmap_and_compl (tem_refs_not_in_seq,
2600 : refs_not_in_seq, refs_not_supported);
2601 : /* If we've marked all refs we search for as unsupported
2602 : we can stop processing and use the sequence as before
2603 : the PHI. */
2604 5849 : if (bitmap_empty_p (tem_refs_not_in_seq))
2605 : return 1;
2606 3863 : eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2607 : tem_refs_not_in_seq, refs_not_supported,
2608 : true, fully_visited);
2609 3863 : if (eret != 1)
2610 : return -1;
2611 : /* Simplify our lives by pruning the sequence of !sm_ord. */
2612 3375 : while (!edge_seq.is_empty ()
2613 8473 : && edge_seq.last ().second != sm_ord)
2614 1235 : edge_seq.pop ();
2615 7726 : unsigned min_len = MIN(first_edge_seq.length (),
2616 : edge_seq.length ());
2617 : /* Incrementally merge seqs into first_edge_seq. */
2618 3863 : int first_uneq = -1;
2619 3863 : auto_vec<seq_entry, 2> extra_refs;
2620 6054 : for (unsigned int i = 0; i < min_len; ++i)
2621 : {
2622 : /* ??? We can more intelligently merge when we face different
2623 : order by additional sinking operations in one sequence.
2624 : For now we simply mark them as to be processed by the
2625 : not order-preserving SM code. */
2626 2191 : if (first_edge_seq[i].first != edge_seq[i].first)
2627 : {
2628 35 : if (first_edge_seq[i].second == sm_ord)
2629 22 : bitmap_set_bit (refs_not_supported,
2630 22 : first_edge_seq[i].first);
2631 35 : if (edge_seq[i].second == sm_ord)
2632 24 : bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2633 35 : first_edge_seq[i].second = sm_other;
2634 35 : first_edge_seq[i].from = NULL_TREE;
2635 : /* Record the dropped refs for later processing. */
2636 35 : if (first_uneq == -1)
2637 33 : first_uneq = i;
2638 70 : extra_refs.safe_push (seq_entry (edge_seq[i].first,
2639 35 : sm_other, NULL_TREE));
2640 : }
2641 : /* sm_other prevails. */
2642 2156 : else if (first_edge_seq[i].second != edge_seq[i].second)
2643 : {
2644 : /* Make sure the ref is marked as not supported. */
2645 0 : bitmap_set_bit (refs_not_supported,
2646 0 : first_edge_seq[i].first);
2647 0 : first_edge_seq[i].second = sm_other;
2648 0 : first_edge_seq[i].from = NULL_TREE;
2649 : }
2650 2156 : else if (first_edge_seq[i].second == sm_other
2651 7 : && first_edge_seq[i].from != NULL_TREE
2652 2163 : && (edge_seq[i].from == NULL_TREE
2653 7 : || !operand_equal_p (first_edge_seq[i].from,
2654 7 : edge_seq[i].from, 0)))
2655 7 : first_edge_seq[i].from = NULL_TREE;
2656 : }
2657 : /* Any excess elements become sm_other since they are now
2658 : coonditionally executed. */
2659 10629 : if (first_edge_seq.length () > edge_seq.length ())
2660 : {
2661 6728 : for (unsigned i = edge_seq.length ();
2662 8762 : i < first_edge_seq.length (); ++i)
2663 : {
2664 6728 : if (first_edge_seq[i].second == sm_ord)
2665 3587 : bitmap_set_bit (refs_not_supported,
2666 3587 : first_edge_seq[i].first);
2667 6728 : first_edge_seq[i].second = sm_other;
2668 : }
2669 : }
2670 1829 : else if (edge_seq.length () > first_edge_seq.length ())
2671 : {
2672 8 : if (first_uneq == -1)
2673 0 : first_uneq = first_edge_seq.length ();
2674 8 : for (unsigned i = first_edge_seq.length ();
2675 26 : i < edge_seq.length (); ++i)
2676 : {
2677 18 : if (edge_seq[i].second == sm_ord)
2678 8 : bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2679 36 : extra_refs.safe_push (seq_entry (edge_seq[i].first,
2680 18 : sm_other, NULL_TREE));
2681 : }
2682 : }
2683 : /* Put unmerged refs at first_uneq to force dependence checking
2684 : on them. */
2685 3863 : if (first_uneq != -1)
2686 : {
2687 : /* Missing ordered_splice_at. */
2688 66 : if ((unsigned)first_uneq == first_edge_seq.length ())
2689 0 : first_edge_seq.safe_splice (extra_refs);
2690 : else
2691 : {
2692 33 : unsigned fes_length = first_edge_seq.length ();
2693 33 : first_edge_seq.safe_grow (fes_length
2694 33 : + extra_refs.length ());
2695 33 : memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2696 33 : &first_edge_seq[first_uneq],
2697 33 : (fes_length - first_uneq) * sizeof (seq_entry));
2698 66 : memcpy (&first_edge_seq[first_uneq],
2699 33 : extra_refs.address (),
2700 33 : extra_refs.length () * sizeof (seq_entry));
2701 : }
2702 : }
2703 5849 : }
2704 : /* Use the sequence from the first edge and push SMs down. */
2705 4783 : for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2706 : {
2707 3019 : unsigned id = first_edge_seq[i].first;
2708 3019 : seq.safe_push (first_edge_seq[i]);
2709 3019 : unsigned new_idx;
2710 3019 : if ((first_edge_seq[i].second == sm_ord
2711 2376 : || (first_edge_seq[i].second == sm_other
2712 2376 : && first_edge_seq[i].from != NULL_TREE))
2713 4025 : && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2714 : {
2715 125 : if (first_edge_seq[i].second == sm_ord)
2716 0 : bitmap_set_bit (refs_not_supported, id);
2717 : /* Mark it sm_other. */
2718 125 : seq[new_idx].second = sm_other;
2719 125 : seq[new_idx].from = NULL_TREE;
2720 : }
2721 : }
2722 1764 : bitmap_set_bit (fully_visited,
2723 1764 : SSA_NAME_VERSION (gimple_phi_result (phi)));
2724 1764 : return 1;
2725 3750 : }
2726 43074 : lim_aux_data *data = get_lim_data (def);
2727 43074 : im_mem_ref *ref = memory_accesses.refs_list[data->ref];
2728 43074 : if (data->ref == UNANALYZABLE_MEM_ID)
2729 : return -1;
2730 : /* Stop at memory references which we can't move. */
2731 43074 : else if ((ref->mem.ref == error_mark_node
2732 : /* We can move end-of-storage/object down. */
2733 423 : && !gimple_clobber_p (ref->accesses_in_loop[0].stmt,
2734 : CLOBBER_STORAGE_END)
2735 291 : && !gimple_clobber_p (ref->accesses_in_loop[0].stmt,
2736 : CLOBBER_OBJECT_END))
2737 43390 : || TREE_THIS_VOLATILE (ref->mem.ref))
2738 : {
2739 : /* Mark refs_not_in_seq as unsupported. */
2740 139 : bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2741 139 : return 1;
2742 : }
2743 : /* One of the stores we want to apply SM to and we've not yet seen. */
2744 42935 : else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2745 : {
2746 33588 : seq.safe_push (seq_entry (data->ref, sm_ord));
2747 :
2748 : /* 1) push it down the queue until a SMed
2749 : and not ignored ref is reached, skipping all not SMed refs
2750 : and ignored refs via non-TBAA disambiguation. */
2751 33588 : unsigned new_idx;
2752 67176 : if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2753 : /* If that fails but we did not fork yet continue, we'll see
2754 : to re-materialize all of the stores in the sequence then.
2755 : Further stores will only be pushed up to this one. */
2756 33588 : && forked)
2757 : {
2758 0 : bitmap_set_bit (refs_not_supported, data->ref);
2759 : /* Mark it sm_other. */
2760 0 : seq[new_idx].second = sm_other;
2761 : }
2762 :
2763 : /* 2) check whether we've seen all refs we want to SM and if so
2764 : declare success for the active exit */
2765 33588 : if (bitmap_empty_p (refs_not_in_seq))
2766 18888 : return 1;
2767 : }
2768 : else
2769 : /* Another store not part of the final sequence. Simply push it. */
2770 9347 : seq.safe_push (seq_entry (data->ref, sm_other,
2771 9347 : gimple_assign_rhs1 (def)));
2772 :
2773 79409 : vdef = gimple_vuse (def);
2774 : }
2775 : while (1);
2776 : }
2777 :
2778 : /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2779 : edges of the LOOP. */
2780 :
2781 : static void
2782 21430 : hoist_memory_references (class loop *loop, bitmap mem_refs,
2783 : const vec<edge> &exits)
2784 : {
2785 21430 : im_mem_ref *ref;
2786 21430 : unsigned i;
2787 21430 : bitmap_iterator bi;
2788 :
2789 : /* There's a special case we can use ordered re-materialization for
2790 : conditionally executed stores which is when all stores in the loop
2791 : happen in the same basic-block. In that case we know we'll reach
2792 : all stores and thus can simply process that BB and emit a single
2793 : conditional block of ordered materializations. See PR102436. */
2794 21430 : basic_block single_store_bb = NULL;
2795 31557 : EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
2796 : 0, i, bi)
2797 : {
2798 29358 : bool fail = false;
2799 29358 : ref = memory_accesses.refs_list[i];
2800 134528 : for (auto loc : ref->accesses_in_loop)
2801 144353 : if (!gimple_vdef (loc.stmt))
2802 : ;
2803 32214 : else if (!single_store_bb)
2804 : {
2805 21430 : single_store_bb = gimple_bb (loc.stmt);
2806 21430 : bool conditional = false;
2807 76994 : for (edge e : exits)
2808 23025 : if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
2809 : {
2810 : /* Conditional as seen from e. */
2811 : conditional = true;
2812 : break;
2813 : }
2814 21430 : if (!conditional)
2815 : {
2816 : fail = true;
2817 : break;
2818 : }
2819 : }
2820 10784 : else if (single_store_bb != gimple_bb (loc.stmt))
2821 : {
2822 : fail = true;
2823 : break;
2824 : }
2825 29358 : if (fail)
2826 : {
2827 : single_store_bb = NULL;
2828 : break;
2829 : }
2830 : }
2831 21430 : if (single_store_bb)
2832 : {
2833 : /* Analyze the single block with stores. */
2834 2199 : auto_bitmap fully_visited;
2835 2199 : auto_bitmap refs_not_supported;
2836 2199 : auto_bitmap refs_not_in_seq;
2837 2199 : auto_vec<seq_entry> seq;
2838 2199 : bitmap_copy (refs_not_in_seq, mem_refs);
2839 2199 : int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
2840 : seq, refs_not_in_seq, refs_not_supported,
2841 : false, fully_visited);
2842 2199 : if (res != 1)
2843 : {
2844 : /* Unhandled refs can still fail this. */
2845 0 : bitmap_clear (mem_refs);
2846 0 : return;
2847 : }
2848 :
2849 : /* We cannot handle sm_other since we neither remember the
2850 : stored location nor the value at the point we execute them. */
2851 5322 : for (unsigned i = 0; i < seq.length (); ++i)
2852 : {
2853 3123 : unsigned new_i;
2854 3123 : if (seq[i].second == sm_other
2855 3123 : && seq[i].from != NULL_TREE)
2856 438 : seq[i].from = NULL_TREE;
2857 2685 : else if ((seq[i].second == sm_ord
2858 0 : || (seq[i].second == sm_other
2859 0 : && seq[i].from != NULL_TREE))
2860 2685 : && !sm_seq_push_down (seq, i, &new_i))
2861 : {
2862 63 : bitmap_set_bit (refs_not_supported, seq[new_i].first);
2863 63 : seq[new_i].second = sm_other;
2864 63 : seq[new_i].from = NULL_TREE;
2865 : }
2866 : }
2867 2199 : bitmap_and_compl_into (mem_refs, refs_not_supported);
2868 2199 : if (bitmap_empty_p (mem_refs))
2869 : return;
2870 :
2871 : /* Prune seq. */
2872 2544 : while (seq.last ().second == sm_other
2873 2544 : && seq.last ().from == NULL_TREE)
2874 396 : seq.pop ();
2875 :
2876 2148 : hash_map<im_mem_ref *, sm_aux *> aux_map;
2877 :
2878 : /* Execute SM but delay the store materialization for ordered
2879 : sequences on exit. Remember a created flag var and make
2880 : sure to re-use it. */
2881 2148 : sm_aux *flag_var_aux = nullptr;
2882 4770 : EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2883 : {
2884 2622 : ref = memory_accesses.refs_list[i];
2885 2622 : sm_aux *aux = execute_sm (loop, ref, aux_map, true,
2886 : flag_var_aux != nullptr);
2887 2622 : if (aux->store_flag)
2888 1501 : flag_var_aux = aux;
2889 : }
2890 :
2891 : /* Materialize ordered store sequences on exits. */
2892 2148 : edge e;
2893 2148 : auto_bitmap clobbers_to_prune;
2894 5881 : FOR_EACH_VEC_ELT (exits, i, e)
2895 : {
2896 3733 : edge append_cond_position = NULL;
2897 3733 : edge last_cond_fallthru = NULL;
2898 3733 : edge insert_e = e;
2899 : /* Construct the single flag variable control flow and insert
2900 : the ordered seq of stores in the then block. With
2901 : -fstore-data-races we can do the stores unconditionally. */
2902 3733 : if (flag_var_aux)
2903 2370 : insert_e
2904 : = single_pred_edge
2905 2370 : (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
2906 : flag_var_aux->store_flag,
2907 : loop_preheader_edge (loop),
2908 : &flag_var_aux->flag_bbs,
2909 : append_cond_position,
2910 : last_cond_fallthru));
2911 3733 : execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
2912 : append_cond_position, last_cond_fallthru,
2913 : clobbers_to_prune);
2914 3733 : gsi_commit_one_edge_insert (insert_e, NULL);
2915 : }
2916 :
2917 : /* Remove clobbers inside the loop we re-materialized on exits. */
2918 2148 : EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
2919 : {
2920 0 : gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
2921 0 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2922 0 : unlink_stmt_vdef (stmt);
2923 0 : release_defs (stmt);
2924 0 : gimple_set_vdef (stmt, NULL_TREE);
2925 0 : gsi_remove (&gsi, true);
2926 : }
2927 :
2928 4770 : for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2929 7392 : iter != aux_map.end (); ++iter)
2930 5244 : delete (*iter).second;
2931 :
2932 2148 : return;
2933 4347 : }
2934 :
2935 : /* To address PR57359 before actually applying store-motion check
2936 : the candidates found for validity with regards to reordering
2937 : relative to other stores which we until here disambiguated using
2938 : TBAA which isn't valid.
2939 : What matters is the order of the last stores to the mem_refs
2940 : with respect to the other stores of the loop at the point of the
2941 : loop exits. */
2942 :
2943 : /* For each exit compute the store order, pruning from mem_refs
2944 : on the fly. */
2945 : /* The complexity of this is at least
2946 : O(number of exits * number of SM refs) but more approaching
2947 : O(number of exits * number of SM refs * number of stores). */
2948 : /* ??? Somehow do this in a single sweep over the loop body. */
2949 19231 : auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2950 19231 : auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2951 19231 : edge e;
2952 39207 : FOR_EACH_VEC_ELT (exits, i, e)
2953 : {
2954 22283 : vec<seq_entry> seq;
2955 22283 : seq.create (4);
2956 22283 : auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2957 22283 : bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2958 22283 : if (bitmap_empty_p (refs_not_in_seq))
2959 : {
2960 1515 : seq.release ();
2961 1515 : break;
2962 : }
2963 20768 : auto_bitmap fully_visited;
2964 20768 : int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2965 : seq, refs_not_in_seq,
2966 : refs_not_supported, false,
2967 : fully_visited);
2968 20768 : if (res != 1)
2969 : {
2970 792 : bitmap_copy (refs_not_supported, mem_refs);
2971 792 : seq.release ();
2972 792 : break;
2973 : }
2974 19976 : sms.safe_push (std::make_pair (e, seq));
2975 23075 : }
2976 :
2977 : /* Prune pruned mem_refs from earlier processed exits. */
2978 19231 : bool changed = !bitmap_empty_p (refs_not_supported);
2979 19231 : while (changed)
2980 : {
2981 6143 : changed = false;
2982 6143 : std::pair<edge, vec<seq_entry> > *seq;
2983 40280 : FOR_EACH_VEC_ELT (sms, i, seq)
2984 : {
2985 : bool need_to_push = false;
2986 13750 : for (unsigned i = 0; i < seq->second.length (); ++i)
2987 : {
2988 6297 : sm_kind kind = seq->second[i].second;
2989 6297 : if (kind == sm_other && seq->second[i].from == NULL_TREE)
2990 : break;
2991 5198 : unsigned id = seq->second[i].first;
2992 5198 : unsigned new_idx;
2993 5198 : if (kind == sm_ord
2994 5198 : && bitmap_bit_p (refs_not_supported, id))
2995 : {
2996 2545 : seq->second[i].second = sm_other;
2997 2545 : gcc_assert (seq->second[i].from == NULL_TREE);
2998 : need_to_push = true;
2999 : }
3000 2653 : else if (need_to_push
3001 2653 : && !sm_seq_push_down (seq->second, i, &new_idx))
3002 : {
3003 : /* We need to push down both sm_ord and sm_other
3004 : but for the latter we need to disqualify all
3005 : following refs. */
3006 113 : if (kind == sm_ord)
3007 : {
3008 7 : if (bitmap_set_bit (refs_not_supported, id))
3009 7 : changed = true;
3010 7 : seq->second[new_idx].second = sm_other;
3011 : }
3012 : else
3013 : {
3014 411 : for (unsigned j = seq->second.length () - 1;
3015 305 : j > new_idx; --j)
3016 199 : if (seq->second[j].second == sm_ord
3017 321 : && bitmap_set_bit (refs_not_supported,
3018 122 : seq->second[j].first))
3019 : changed = true;
3020 106 : seq->second.truncate (new_idx);
3021 106 : break;
3022 : }
3023 : }
3024 : }
3025 : }
3026 : }
3027 19231 : std::pair<edge, vec<seq_entry> > *seq;
3028 59183 : FOR_EACH_VEC_ELT (sms, i, seq)
3029 : {
3030 : /* Prune sm_other from the end. */
3031 63829 : while (!seq->second.is_empty ()
3032 43853 : && seq->second.last ().second == sm_other)
3033 5171 : seq->second.pop ();
3034 : /* Prune duplicates from the start. */
3035 19976 : auto_bitmap seen (&lim_bitmap_obstack);
3036 19976 : unsigned j, k;
3037 47461 : for (j = k = 0; j < seq->second.length (); ++j)
3038 27485 : if (bitmap_set_bit (seen, seq->second[j].first))
3039 : {
3040 27376 : if (k != j)
3041 77 : seq->second[k] = seq->second[j];
3042 27376 : ++k;
3043 : }
3044 19976 : seq->second.truncate (k);
3045 : /* And verify. */
3046 19976 : seq_entry *e;
3047 87304 : FOR_EACH_VEC_ELT (seq->second, j, e)
3048 27376 : gcc_assert (e->second == sm_ord
3049 : || (e->second == sm_other && e->from != NULL_TREE));
3050 19976 : }
3051 :
3052 : /* Verify dependence for refs we cannot handle with the order preserving
3053 : code (refs_not_supported) or prune them from mem_refs. */
3054 19231 : auto_vec<seq_entry> unord_refs;
3055 34989 : EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
3056 : {
3057 15758 : ref = memory_accesses.refs_list[i];
3058 15758 : if (!ref_indep_loop_p (loop, ref, sm_waw))
3059 6535 : bitmap_clear_bit (mem_refs, i);
3060 : /* We've now verified store order for ref with respect to all other
3061 : stores in the loop does not matter. */
3062 : else
3063 9223 : unord_refs.safe_push (seq_entry (i, sm_unord));
3064 : }
3065 :
3066 19231 : hash_map<im_mem_ref *, sm_aux *> aux_map;
3067 :
3068 : /* Execute SM but delay the store materialization for ordered
3069 : sequences on exit. */
3070 51670 : EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
3071 : {
3072 32439 : ref = memory_accesses.refs_list[i];
3073 32439 : execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
3074 : false);
3075 : }
3076 :
3077 : /* Materialize ordered store sequences on exits. */
3078 19231 : auto_bitmap clobbers_to_prune;
3079 43068 : FOR_EACH_VEC_ELT (exits, i, e)
3080 : {
3081 23837 : edge append_cond_position = NULL;
3082 23837 : edge last_cond_fallthru = NULL;
3083 23837 : if (i < sms.length ())
3084 : {
3085 19976 : gcc_assert (sms[i].first == e);
3086 19976 : execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
3087 : append_cond_position, last_cond_fallthru,
3088 : clobbers_to_prune);
3089 19976 : sms[i].second.release ();
3090 : }
3091 23837 : if (!unord_refs.is_empty ())
3092 9591 : execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
3093 : append_cond_position, last_cond_fallthru,
3094 : clobbers_to_prune);
3095 : /* Commit edge inserts here to preserve the order of stores
3096 : when an exit exits multiple loops. */
3097 23837 : gsi_commit_one_edge_insert (e, NULL);
3098 : }
3099 :
3100 : /* Remove clobbers inside the loop we re-materialized on exits. */
3101 19410 : EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
3102 : {
3103 179 : gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
3104 179 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3105 179 : unlink_stmt_vdef (stmt);
3106 179 : release_defs (stmt);
3107 179 : gimple_set_vdef (stmt, NULL_TREE);
3108 179 : gsi_remove (&gsi, true);
3109 : }
3110 :
3111 51670 : for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
3112 103340 : iter != aux_map.end (); ++iter)
3113 64878 : delete (*iter).second;
3114 19231 : }
3115 :
3116 : class ref_always_stored
3117 : {
3118 : public:
3119 80195 : ref_always_stored (class loop *loop_)
3120 80195 : : loop (loop_) {}
3121 : bool operator () (mem_ref_loc *loc);
3122 : class loop *loop;
3123 : };
3124 :
3125 : bool
3126 152357 : ref_always_stored::operator () (mem_ref_loc *loc)
3127 : {
3128 : /* Make sure the statement is a store. */
3129 152357 : tree lhs = gimple_get_lhs (loc->stmt);
3130 152357 : if (!lhs
3131 152357 : || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
3132 : return false;
3133 :
3134 96315 : struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
3135 96315 : if (!lim_data)
3136 : return false;
3137 :
3138 96315 : class loop *must_exec = lim_data->always_executed_in;
3139 96315 : if (!must_exec)
3140 : return false;
3141 :
3142 36448 : if (must_exec == loop
3143 36448 : || flow_loop_nested_p (must_exec, loop))
3144 33136 : return true;
3145 :
3146 : return false;
3147 : }
3148 :
3149 : /* Returns true if REF is always stored in LOOP. */
3150 :
3151 : static bool
3152 80195 : ref_always_stored_p (class loop *loop, im_mem_ref *ref)
3153 : {
3154 80195 : return for_all_locs_in_loop (loop, ref, ref_always_stored (loop));
3155 : }
3156 :
3157 : /* Returns true if LOAD_REF and STORE_REF form a "self write" pattern
3158 : where the stored value comes from the loaded value via SSA.
3159 : Example: a[i] = a[0] is safe to hoist a[0] even when i==0. */
3160 :
3161 : static bool
3162 319138 : is_self_write (im_mem_ref *load_ref, im_mem_ref *store_ref)
3163 : {
3164 : /* Only handle the simple case with a single access per ref.
3165 : Bail out on multiple accesses to be conservative. */
3166 319138 : if (load_ref->accesses_in_loop.length () != 1
3167 409884 : || store_ref->accesses_in_loop.length () != 1)
3168 : return false;
3169 :
3170 90763 : gimple *load_stmt = load_ref->accesses_in_loop[0].stmt;
3171 90763 : gimple *store_stmt = store_ref->accesses_in_loop[0].stmt;
3172 :
3173 90763 : if (!is_gimple_assign (load_stmt) || !is_gimple_assign (store_stmt))
3174 : return false;
3175 :
3176 90763 : tree loaded_val = gimple_assign_lhs (load_stmt);
3177 90763 : tree stored_val = gimple_assign_rhs1 (store_stmt);
3178 :
3179 90763 : if (TREE_CODE (loaded_val) != SSA_NAME || TREE_CODE (stored_val) != SSA_NAME)
3180 : return false;
3181 :
3182 : /* Self write: stored value is the loaded value. */
3183 64864 : if (stored_val != loaded_val)
3184 : return false;
3185 :
3186 :
3187 : /* TODO: Try to factor this out with mem_ref_hasher::equal
3188 : into im_compare_access_position_and_size
3189 : or a similar helper to centralize this delicate handling
3190 : complete for MEM_REF offsets and base pointer equality.
3191 :
3192 : TODO: Also ensure max_size_known_p agrees or resort to
3193 : alignment considerations to rule out partial overlaps.
3194 :
3195 : See:
3196 : https://gcc.gnu.org/pipermail/gcc-patches/2025-December/704155.html
3197 : For more context on TODOs above. */
3198 :
3199 : /* They may alias. Verify exact same location. */
3200 1673 : return (operand_equal_p (load_ref->mem.base, store_ref->mem.base, 0)
3201 46 : && known_eq (load_ref->mem.size, store_ref->mem.size)
3202 1719 : && known_eq (load_ref->mem.offset, store_ref->mem.offset));
3203 :
3204 : }
3205 :
3206 : /* Returns true if REF1 and REF2 are independent. */
3207 :
3208 : static bool
3209 605577 : refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
3210 : {
3211 605577 : if (ref1 == ref2)
3212 : return true;
3213 :
3214 542777 : if (dump_file && (dump_flags & TDF_DETAILS))
3215 3041 : fprintf (dump_file, "Querying dependency of refs %u and %u: ",
3216 3041 : ref1->id, ref2->id);
3217 :
3218 542777 : if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
3219 : {
3220 64910 : if (dump_file && (dump_flags & TDF_DETAILS))
3221 99 : fprintf (dump_file, "dependent.\n");
3222 64910 : return false;
3223 : }
3224 : else
3225 : {
3226 477867 : if (dump_file && (dump_flags & TDF_DETAILS))
3227 2942 : fprintf (dump_file, "independent.\n");
3228 477867 : return true;
3229 : }
3230 : }
3231 :
3232 : /* Returns true if REF is independent on all other accessess in LOOP.
3233 : KIND specifies the kind of dependence to consider.
3234 : lim_raw assumes REF is not stored in LOOP and disambiguates RAW
3235 : dependences so if true REF can be hoisted out of LOOP
3236 : sm_war disambiguates a store REF against all other loads to see
3237 : whether the store can be sunk across loads out of LOOP
3238 : sm_waw disambiguates a store REF against all other stores to see
3239 : whether the store can be sunk across stores out of LOOP. */
3240 :
3241 : static bool
3242 2061538 : ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
3243 : {
3244 2061538 : bool indep_p = true;
3245 2061538 : bitmap refs_to_check;
3246 :
3247 2061538 : if (kind == sm_war)
3248 974407 : refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
3249 : else
3250 1087131 : refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
3251 :
3252 2061538 : if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
3253 2061538 : || ref->mem.ref == error_mark_node)
3254 : indep_p = false;
3255 : else
3256 : {
3257 : /* tri-state, { unknown, independent, dependent } */
3258 662834 : dep_state state = query_loop_dependence (loop, ref, kind);
3259 662834 : if (state != dep_unknown)
3260 0 : return state == dep_independent ? true : false;
3261 :
3262 662834 : class loop *inner = loop->inner;
3263 812507 : while (inner)
3264 : {
3265 450872 : if (!ref_indep_loop_p (inner, ref, kind))
3266 : {
3267 : indep_p = false;
3268 : break;
3269 : }
3270 149673 : inner = inner->next;
3271 : }
3272 :
3273 662834 : if (indep_p)
3274 : {
3275 361635 : unsigned i;
3276 361635 : bitmap_iterator bi;
3277 923464 : EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
3278 : {
3279 631021 : im_mem_ref *aref = memory_accesses.refs_list[i];
3280 631021 : if (aref->mem.ref == error_mark_node)
3281 : {
3282 26253 : gimple *stmt = aref->accesses_in_loop[0].stmt;
3283 26253 : if ((kind == sm_war
3284 9353 : && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
3285 : kind != sm_waw))
3286 34938 : || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
3287 : kind != sm_waw))
3288 : {
3289 : indep_p = false;
3290 : break;
3291 : }
3292 : }
3293 : /* For hoisting loads (lim_raw), allow "self write": the store
3294 : writes back the loaded value. Example: a[i] = a[0]
3295 : is safe even when i==0 causes aliasing. */
3296 604768 : else if (kind == lim_raw
3297 319138 : && ref->loaded && aref->stored
3298 923906 : && is_self_write (ref, aref))
3299 : {
3300 17 : if (dump_file && (dump_flags & TDF_DETAILS))
3301 2 : fprintf (dump_file,
3302 : "Dependency of refs %u and %u: "
3303 : "independent (self write).\n",
3304 2 : ref->id, aref->id);
3305 : }
3306 :
3307 604751 : else if (!refs_independent_p (ref, aref, kind != sm_waw))
3308 : {
3309 : indep_p = false;
3310 : break;
3311 : }
3312 : }
3313 : }
3314 : }
3315 :
3316 2061538 : if (dump_file && (dump_flags & TDF_DETAILS))
3317 763 : fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
3318 : kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
3319 371 : ref->id, loop->num, indep_p ? "independent" : "dependent");
3320 :
3321 : /* Record the computed result in the cache. */
3322 2061538 : record_loop_dependence (loop, ref, kind,
3323 : indep_p ? dep_independent : dep_dependent);
3324 :
3325 2061538 : return indep_p;
3326 : }
3327 :
3328 : class ref_in_loop_hot_body
3329 : {
3330 : public:
3331 42674 : ref_in_loop_hot_body (class loop *loop_) : l (loop_) {}
3332 : bool operator () (mem_ref_loc *loc);
3333 : class loop *l;
3334 : };
3335 :
3336 : /* Check the coldest loop between loop L and innermost loop. If there is one
3337 : cold loop between L and INNER_LOOP, store motion can be performed, otherwise
3338 : no cold loop means no store motion. get_coldest_out_loop also handles cases
3339 : when l is inner_loop. */
3340 : bool
3341 43461 : ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
3342 : {
3343 43461 : basic_block curr_bb = gimple_bb (loc->stmt);
3344 43461 : class loop *inner_loop = curr_bb->loop_father;
3345 43461 : return get_coldest_out_loop (l, inner_loop, curr_bb);
3346 : }
3347 :
3348 :
3349 : /* Returns true if we can perform store motion of REF from LOOP. */
3350 :
3351 : static bool
3352 2878290 : can_sm_ref_p (class loop *loop, im_mem_ref *ref)
3353 : {
3354 2878290 : tree base;
3355 :
3356 : /* Can't hoist unanalyzable refs. */
3357 2878290 : if (!MEM_ANALYZABLE (ref))
3358 : return false;
3359 :
3360 : /* Can't hoist/sink aggregate copies. */
3361 2523768 : if (ref->mem.ref == error_mark_node)
3362 : return false;
3363 :
3364 : /* It should be movable. */
3365 2098783 : if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
3366 2098497 : || TREE_THIS_VOLATILE (ref->mem.ref)
3367 4180413 : || !for_each_index (&ref->mem.ref, may_move_till, loop))
3368 1147119 : return false;
3369 :
3370 : /* If it can throw fail, we do not properly update EH info. */
3371 951664 : if (tree_could_throw_p (ref->mem.ref))
3372 : return false;
3373 :
3374 : /* If it can trap, it must be always executed in LOOP.
3375 : Readonly memory locations may trap when storing to them, but
3376 : tree_could_trap_p is a predicate for rvalues, so check that
3377 : explicitly. */
3378 928363 : base = get_base_address (ref->mem.ref);
3379 928363 : if ((tree_could_trap_p (ref->mem.ref)
3380 883590 : || (DECL_P (base) && TREE_READONLY (base))
3381 883237 : || TREE_CODE (base) == STRING_CST)
3382 : /* ??? We can at least use false here, allowing loads? We
3383 : are forcing conditional stores if the ref is not always
3384 : stored to later anyway. So this would only guard
3385 : the load we need to emit. Thus when the ref is not
3386 : loaded we can elide this completely? */
3387 973497 : && !ref_always_stored_p (loop, ref))
3388 : return false;
3389 :
3390 : /* Verify all loads of ref can be hoisted. */
3391 893234 : if (ref->loaded
3392 102027 : && bitmap_bit_p (ref->loaded, loop->num)
3393 990083 : && !ref_indep_loop_p (loop, ref, lim_raw))
3394 : return false;
3395 :
3396 : /* Verify the candidate can be disambiguated against all loads,
3397 : that is, we can elide all in-loop stores. Disambiguation
3398 : against stores is done later when we cannot guarantee preserving
3399 : the order of stores. */
3400 820990 : if (!ref_indep_loop_p (loop, ref, sm_war))
3401 : return false;
3402 :
3403 : /* Verify whether the candidate is hot for LOOP. Only do store motion if the
3404 : candidate's profile count is hot. Statement in cold BB shouldn't be moved
3405 : out of it's loop_father. */
3406 42674 : if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
3407 : return false;
3408 :
3409 : return true;
3410 : }
3411 :
3412 : /* Marks the references in LOOP for that store motion should be performed
3413 : in REFS_TO_SM. SM_EXECUTED is the set of references for that store
3414 : motion was performed in one of the outer loops. */
3415 :
3416 : static void
3417 1267467 : find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
3418 : {
3419 1267467 : bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
3420 1267467 : unsigned i;
3421 1267467 : bitmap_iterator bi;
3422 1267467 : im_mem_ref *ref;
3423 :
3424 4145757 : EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
3425 : {
3426 2878290 : ref = memory_accesses.refs_list[i];
3427 2878290 : if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
3428 41663 : bitmap_set_bit (refs_to_sm, i);
3429 : }
3430 1267467 : }
3431 :
3432 : /* Checks whether LOOP (with exits stored in EXITS array) is suitable
3433 : for a store motion optimization (i.e. whether we can insert statement
3434 : on its exits). */
3435 :
3436 : static bool
3437 1308717 : loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
3438 : const vec<edge> &exits)
3439 : {
3440 1308717 : unsigned i;
3441 1308717 : edge ex;
3442 :
3443 3375739 : FOR_EACH_VEC_ELT (exits, i, ex)
3444 2108272 : if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
3445 : return false;
3446 :
3447 : return true;
3448 : }
3449 :
3450 : /* Try to perform store motion for all memory references modified inside
3451 : LOOP. SM_EXECUTED is the bitmap of the memory references for that
3452 : store motion was executed in one of the outer loops. */
3453 :
3454 : static void
3455 1308717 : store_motion_loop (class loop *loop, bitmap sm_executed)
3456 : {
3457 1308717 : auto_vec<edge> exits = get_loop_exit_edges (loop);
3458 1308717 : class loop *subloop;
3459 1308717 : bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
3460 :
3461 1308717 : if (loop_suitable_for_sm (loop, exits))
3462 : {
3463 1267467 : find_refs_for_sm (loop, sm_executed, sm_in_loop);
3464 1267467 : if (!bitmap_empty_p (sm_in_loop))
3465 21430 : hoist_memory_references (loop, sm_in_loop, exits);
3466 : }
3467 :
3468 1308717 : bitmap_ior_into (sm_executed, sm_in_loop);
3469 1606707 : for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3470 297990 : store_motion_loop (subloop, sm_executed);
3471 1308717 : bitmap_and_compl_into (sm_executed, sm_in_loop);
3472 1308717 : BITMAP_FREE (sm_in_loop);
3473 1308717 : }
3474 :
3475 : /* Try to perform store motion for all memory references modified inside
3476 : loops. */
3477 :
3478 : static void
3479 483219 : do_store_motion (void)
3480 : {
3481 483219 : class loop *loop;
3482 483219 : bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
3483 :
3484 1493946 : for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3485 1010727 : store_motion_loop (loop, sm_executed);
3486 :
3487 483219 : BITMAP_FREE (sm_executed);
3488 483219 : }
3489 :
3490 : /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3491 : for each such basic block bb records the outermost loop for that execution
3492 : of its header implies execution of bb. CONTAINS_CALL is the bitmap of
3493 : blocks that contain a nonpure call. */
3494 :
3495 : static void
3496 1309242 : fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3497 : {
3498 1309242 : basic_block bb = NULL, last = NULL;
3499 1309242 : edge e;
3500 1309242 : class loop *inn_loop = loop;
3501 :
3502 1607489 : for (class loop *inner = loop->inner; inner; inner = inner->next)
3503 298247 : fill_always_executed_in_1 (inner, contains_call);
3504 :
3505 1309242 : auto_vec<basic_block, 64> worklist;
3506 1309242 : worklist.reserve_exact (loop->num_nodes);
3507 1309242 : worklist.quick_push (loop->header);
3508 2927193 : do
3509 : {
3510 2927193 : edge_iterator ei;
3511 2927193 : bb = worklist.pop ();
3512 :
3513 2927193 : if (!flow_bb_inside_loop_p (inn_loop, bb))
3514 : {
3515 : /* When we are leaving a possibly infinite inner loop
3516 : we have to stop processing. */
3517 172532 : if (!finite_loop_p (inn_loop))
3518 : break;
3519 : /* If the loop was finite we can continue with processing
3520 : the loop we exited to. */
3521 164219 : inn_loop = bb->loop_father;
3522 : }
3523 :
3524 2918880 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3525 1703932 : last = bb;
3526 :
3527 2918880 : if (bitmap_bit_p (contains_call, bb->index))
3528 : break;
3529 :
3530 : /* If LOOP exits from this BB stop processing. */
3531 5640808 : FOR_EACH_EDGE (e, ei, bb->succs)
3532 4013017 : if (!flow_bb_inside_loop_p (loop, e->dest))
3533 : break;
3534 2586834 : if (e)
3535 : break;
3536 :
3537 : /* A loop might be infinite (TODO use simple loop analysis
3538 : to disprove this if possible). */
3539 1627791 : if (bb->flags & BB_IRREDUCIBLE_LOOP)
3540 : break;
3541 :
3542 1627676 : if (bb->loop_father->header == bb)
3543 : /* Record that we enter into a subloop since it might not
3544 : be finite. */
3545 : /* ??? Entering into a not always executed subloop makes
3546 : fill_always_executed_in quadratic in loop depth since
3547 : we walk those loops N times. This is not a problem
3548 : in practice though, see PR102253 for a worst-case testcase. */
3549 606848 : inn_loop = bb->loop_father;
3550 :
3551 : /* Walk the body of LOOP sorted by dominance relation. Additionally,
3552 : if a basic block S dominates the latch, then only blocks dominated
3553 : by S are after it.
3554 : This is get_loop_body_in_dom_order using a worklist algorithm and
3555 : stopping once we are no longer interested in visiting further
3556 : blocks. */
3557 1627676 : unsigned old_len = worklist.length ();
3558 1627676 : unsigned postpone = 0;
3559 1627676 : for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3560 3507746 : son;
3561 1880070 : son = next_dom_son (CDI_DOMINATORS, son))
3562 : {
3563 1880070 : if (!flow_bb_inside_loop_p (loop, son))
3564 20133 : continue;
3565 1859937 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3566 532578 : postpone = worklist.length ();
3567 1859937 : worklist.quick_push (son);
3568 : }
3569 1627676 : if (postpone)
3570 : /* Postponing the block that dominates the latch means
3571 : processing it last and thus putting it earliest in the
3572 : worklist. */
3573 156872 : std::swap (worklist[old_len], worklist[postpone]);
3574 : }
3575 3255352 : while (!worklist.is_empty ());
3576 :
3577 2098622 : while (1)
3578 : {
3579 1703932 : if (last->loop_father == loop
3580 1703932 : || (ALWAYS_EXECUTED_IN (last)
3581 156140 : && loop_outer (ALWAYS_EXECUTED_IN (last)) == loop))
3582 : {
3583 1703255 : if (dump_enabled_p ())
3584 1901 : dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3585 : last->index, loop->num);
3586 1703255 : SET_ALWAYS_EXECUTED_IN (last, loop);
3587 : }
3588 1703932 : if (last == loop->header)
3589 : break;
3590 394690 : last = get_immediate_dominator (CDI_DOMINATORS, last);
3591 : }
3592 1309242 : }
3593 :
3594 : /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3595 : for each such basic block bb records the outermost loop for that execution
3596 : of its header implies execution of bb. */
3597 :
3598 : static void
3599 483330 : fill_always_executed_in (void)
3600 : {
3601 483330 : basic_block bb;
3602 483330 : class loop *loop;
3603 :
3604 483330 : auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3605 483330 : bitmap_clear (contains_call);
3606 14601127 : FOR_EACH_BB_FN (bb, cfun)
3607 : {
3608 14117797 : if (loop_depth (bb->loop_father) == 0)
3609 : {
3610 : /* Outside of loops we can skip scanning all stmts. */
3611 8124109 : bitmap_set_bit (contains_call, bb->index);
3612 8124109 : continue;
3613 : }
3614 5993688 : gimple_stmt_iterator gsi;
3615 49699856 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3616 : {
3617 39720268 : if (nonpure_call_p (gsi_stmt (gsi)))
3618 : break;
3619 : }
3620 :
3621 5993688 : if (!gsi_end_p (gsi))
3622 944075 : bitmap_set_bit (contains_call, bb->index);
3623 : }
3624 :
3625 1494325 : for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3626 1010995 : fill_always_executed_in_1 (loop, contains_call);
3627 483330 : }
3628 :
3629 : /* Find the coldest loop preheader for LOOP, also find the nearest hotter loop
3630 : to LOOP. Then recursively iterate each inner loop. */
3631 :
3632 : void
3633 1309242 : fill_coldest_and_hotter_out_loop (class loop *coldest_loop,
3634 : class loop *hotter_loop, class loop *loop)
3635 : {
3636 1309242 : if (bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3637 : coldest_loop))
3638 27193 : coldest_loop = loop;
3639 :
3640 1309242 : coldest_outermost_loop[loop->num] = coldest_loop;
3641 :
3642 1309242 : hotter_than_inner_loop[loop->num] = NULL;
3643 1309242 : class loop *outer_loop = loop_outer (loop);
3644 1309242 : if (hotter_loop
3645 1309242 : && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3646 : hotter_loop))
3647 2153 : hotter_than_inner_loop[loop->num] = hotter_loop;
3648 :
3649 1309242 : if (outer_loop && outer_loop != current_loops->tree_root
3650 1607489 : && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3651 : outer_loop))
3652 33422 : hotter_than_inner_loop[loop->num] = outer_loop;
3653 :
3654 1309242 : if (dump_enabled_p ())
3655 : {
3656 1469 : dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, ",
3657 : loop->num, coldest_loop->num);
3658 1469 : if (hotter_than_inner_loop[loop->num])
3659 245 : dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n",
3660 245 : hotter_than_inner_loop[loop->num]->num);
3661 : else
3662 1224 : dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n");
3663 : }
3664 :
3665 1309242 : class loop *inner_loop;
3666 1607489 : for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next)
3667 596494 : fill_coldest_and_hotter_out_loop (coldest_loop,
3668 298247 : hotter_than_inner_loop[loop->num],
3669 : inner_loop);
3670 1309242 : }
3671 :
3672 : /* Compute the global information needed by the loop invariant motion pass. */
3673 :
3674 : static void
3675 483330 : tree_ssa_lim_initialize (bool store_motion)
3676 : {
3677 483330 : unsigned i;
3678 :
3679 483330 : bitmap_obstack_initialize (&lim_bitmap_obstack);
3680 483330 : gcc_obstack_init (&mem_ref_obstack);
3681 483330 : lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3682 :
3683 483330 : if (flag_tm)
3684 104 : compute_transaction_bits ();
3685 :
3686 483330 : memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3687 483330 : memory_accesses.refs_list.create (100);
3688 : /* Allocate a special, unanalyzable mem-ref with ID zero. */
3689 483330 : memory_accesses.refs_list.quick_push
3690 483330 : (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3691 :
3692 966660 : memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3693 966660 : memory_accesses.refs_loaded_in_loop.quick_grow_cleared (number_of_loops (cfun));
3694 966660 : memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3695 966660 : memory_accesses.refs_stored_in_loop.quick_grow_cleared (number_of_loops (cfun));
3696 483330 : if (store_motion)
3697 : {
3698 966438 : memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3699 483219 : memory_accesses.all_refs_stored_in_loop.quick_grow_cleared
3700 966438 : (number_of_loops (cfun));
3701 : }
3702 :
3703 5999494 : for (i = 0; i < number_of_loops (cfun); i++)
3704 : {
3705 2516417 : bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3706 : &lim_bitmap_obstack);
3707 2516417 : bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3708 : &lim_bitmap_obstack);
3709 2516417 : if (store_motion)
3710 2515316 : bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3711 : &lim_bitmap_obstack);
3712 : }
3713 :
3714 483330 : memory_accesses.ttae_cache = NULL;
3715 :
3716 : /* Initialize bb_loop_postorder with a mapping from loop->num to
3717 : its postorder index. */
3718 483330 : i = 0;
3719 966660 : bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3720 2759232 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3721 1309242 : bb_loop_postorder[loop->num] = i++;
3722 483330 : }
3723 :
3724 : /* Cleans up after the invariant motion pass. */
3725 :
3726 : static void
3727 483330 : tree_ssa_lim_finalize (void)
3728 : {
3729 483330 : basic_block bb;
3730 483330 : unsigned i;
3731 483330 : im_mem_ref *ref;
3732 :
3733 14646851 : FOR_EACH_BB_FN (bb, cfun)
3734 14163521 : SET_ALWAYS_EXECUTED_IN (bb, NULL);
3735 :
3736 483330 : bitmap_obstack_release (&lim_bitmap_obstack);
3737 966660 : delete lim_aux_data_map;
3738 :
3739 483330 : delete memory_accesses.refs;
3740 483330 : memory_accesses.refs = NULL;
3741 :
3742 5680331 : FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3743 5197001 : memref_free (ref);
3744 483330 : memory_accesses.refs_list.release ();
3745 483330 : obstack_free (&mem_ref_obstack, NULL);
3746 :
3747 483330 : memory_accesses.refs_loaded_in_loop.release ();
3748 483330 : memory_accesses.refs_stored_in_loop.release ();
3749 483330 : memory_accesses.all_refs_stored_in_loop.release ();
3750 :
3751 483330 : if (memory_accesses.ttae_cache)
3752 4675 : free_affine_expand_cache (&memory_accesses.ttae_cache);
3753 :
3754 483330 : free (bb_loop_postorder);
3755 :
3756 483330 : coldest_outermost_loop.release ();
3757 483330 : hotter_than_inner_loop.release ();
3758 483330 : }
3759 :
3760 : /* Moves invariants from loops. Only "expensive" invariants are moved out --
3761 : i.e. those that are likely to be win regardless of the register pressure.
3762 : Only perform store motion if STORE_MOTION is true. */
3763 :
3764 : unsigned int
3765 483330 : loop_invariant_motion_in_fun (function *fun, bool store_motion)
3766 : {
3767 483330 : unsigned int todo = 0;
3768 :
3769 483330 : tree_ssa_lim_initialize (store_motion);
3770 :
3771 483330 : mark_ssa_maybe_undefs ();
3772 :
3773 : /* Gathers information about memory accesses in the loops. */
3774 483330 : analyze_memory_references (store_motion);
3775 :
3776 : /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3777 483330 : fill_always_executed_in ();
3778 :
3779 : /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
3780 : */
3781 483330 : class loop *loop;
3782 966660 : coldest_outermost_loop.create (number_of_loops (cfun));
3783 966660 : coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
3784 966660 : hotter_than_inner_loop.create (number_of_loops (cfun));
3785 966660 : hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
3786 1494325 : for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3787 1010995 : fill_coldest_and_hotter_out_loop (loop, NULL, loop);
3788 :
3789 483330 : int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3790 483330 : int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3791 :
3792 : /* For each statement determine the outermost loop in that it is
3793 : invariant and cost for computing the invariant. */
3794 14601127 : for (int i = 0; i < n; ++i)
3795 14117797 : compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3796 :
3797 : /* Execute store motion. Force the necessary invariants to be moved
3798 : out of the loops as well. */
3799 483330 : if (store_motion)
3800 483219 : do_store_motion ();
3801 :
3802 483330 : free (rpo);
3803 483330 : rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3804 483330 : n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3805 :
3806 : /* Move the expressions that are expensive enough. */
3807 14646851 : for (int i = 0; i < n; ++i)
3808 14163521 : todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3809 :
3810 483330 : free (rpo);
3811 :
3812 483330 : gsi_commit_edge_inserts ();
3813 483330 : if (need_ssa_update_p (fun))
3814 14969 : rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3815 :
3816 483330 : tree_ssa_lim_finalize ();
3817 :
3818 483330 : return todo;
3819 : }
3820 :
3821 : /* Loop invariant motion pass. */
3822 :
3823 : namespace {
3824 :
3825 : const pass_data pass_data_lim =
3826 : {
3827 : GIMPLE_PASS, /* type */
3828 : "lim", /* name */
3829 : OPTGROUP_LOOP, /* optinfo_flags */
3830 : TV_LIM, /* tv_id */
3831 : PROP_cfg, /* properties_required */
3832 : 0, /* properties_provided */
3833 : 0, /* properties_destroyed */
3834 : 0, /* todo_flags_start */
3835 : 0, /* todo_flags_finish */
3836 : };
3837 :
3838 : class pass_lim : public gimple_opt_pass
3839 : {
3840 : public:
3841 1195312 : pass_lim (gcc::context *ctxt)
3842 2390624 : : gimple_opt_pass (pass_data_lim, ctxt)
3843 : {}
3844 :
3845 : /* opt_pass methods: */
3846 896484 : opt_pass * clone () final override { return new pass_lim (m_ctxt); }
3847 1282393 : bool gate (function *) final override { return flag_tree_loop_im != 0; }
3848 : unsigned int execute (function *) final override;
3849 :
3850 : }; // class pass_lim
3851 :
3852 : unsigned int
3853 1282055 : pass_lim::execute (function *fun)
3854 : {
3855 1282055 : in_loop_pipeline = scev_initialized_p ();
3856 1282055 : if (!in_loop_pipeline)
3857 1040654 : loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3858 :
3859 2564110 : if (number_of_loops (fun) <= 1)
3860 : return 0;
3861 483235 : unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3862 :
3863 483235 : if (!in_loop_pipeline)
3864 241834 : loop_optimizer_finalize ();
3865 : else
3866 241401 : scev_reset ();
3867 : return todo;
3868 : }
3869 :
3870 : } // anon namespace
3871 :
3872 : gimple_opt_pass *
3873 298828 : make_pass_lim (gcc::context *ctxt)
3874 : {
3875 298828 : return new pass_lim (ctxt);
3876 : }
3877 :
|