Line data Source code
1 : /* Loop invariant motion.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "tree.h"
25 : #include "gimple.h"
26 : #include "cfghooks.h"
27 : #include "tree-pass.h"
28 : #include "ssa.h"
29 : #include "gimple-pretty-print.h"
30 : #include "fold-const.h"
31 : #include "cfganal.h"
32 : #include "tree-eh.h"
33 : #include "gimple-iterator.h"
34 : #include "tree-cfg.h"
35 : #include "tree-ssa-loop-manip.h"
36 : #include "tree-ssa-loop.h"
37 : #include "tree-into-ssa.h"
38 : #include "cfgloop.h"
39 : #include "tree-affine.h"
40 : #include "tree-ssa-propagate.h"
41 : #include "trans-mem.h"
42 : #include "gimple-fold.h"
43 : #include "tree-scalar-evolution.h"
44 : #include "tree-ssa-loop-niter.h"
45 : #include "alias.h"
46 : #include "builtins.h"
47 : #include "tree-dfa.h"
48 : #include "tree-ssa.h"
49 : #include "dbgcnt.h"
50 : #include "insn-codes.h"
51 : #include "optabs-tree.h"
52 :
53 : /* TODO: Support for predicated code motion. I.e.
54 :
55 : while (1)
56 : {
57 : if (cond)
58 : {
59 : a = inv;
60 : something;
61 : }
62 : }
63 :
64 : Where COND and INV are invariants, but evaluating INV may trap or be
65 : invalid from some other reason if !COND. This may be transformed to
66 :
67 : if (cond)
68 : a = inv;
69 : while (1)
70 : {
71 : if (cond)
72 : something;
73 : } */
74 :
75 : /* The auxiliary data kept for each statement. */
76 :
77 : struct lim_aux_data
78 : {
79 : class loop *max_loop; /* The outermost loop in that the statement
80 : is invariant. */
81 :
82 : class loop *tgt_loop; /* The loop out of that we want to move the
83 : invariant. */
84 :
85 : class loop *always_executed_in;
86 : /* The outermost loop for that we are sure
87 : the statement is executed if the loop
88 : is entered. */
89 :
90 : unsigned cost; /* Cost of the computation performed by the
91 : statement. */
92 :
93 : unsigned ref; /* The simple_mem_ref in this stmt or 0. */
94 :
95 : vec<gimple *> depends; /* Vector of statements that must be also
96 : hoisted out of the loop when this statement
97 : is hoisted; i.e. those that define the
98 : operands of the statement and are inside of
99 : the MAX_LOOP loop. */
100 : };
101 :
102 : /* Maps statements to their lim_aux_data. */
103 :
104 : static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
105 :
106 : /* Description of a memory reference location. */
107 :
108 : struct mem_ref_loc
109 : {
110 : tree *ref; /* The reference itself. */
111 : gimple *stmt; /* The statement in that it occurs. */
112 : };
113 :
114 :
115 : /* Description of a memory reference. */
116 :
117 : class im_mem_ref
118 : {
119 : public:
120 : unsigned id : 30; /* ID assigned to the memory reference
121 : (its index in memory_accesses.refs_list) */
122 : unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
123 : unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
124 : hashval_t hash; /* Its hash value. */
125 :
126 : /* The memory access itself and associated caching of alias-oracle
127 : query meta-data. We are using mem.ref == error_mark_node for the
128 : case the reference is represented by its single access stmt
129 : in accesses_in_loop[0]. */
130 : ao_ref mem;
131 :
132 : bitmap stored; /* The set of loops in that this memory location
133 : is stored to. */
134 : bitmap loaded; /* The set of loops in that this memory location
135 : is loaded from. */
136 : vec<mem_ref_loc> accesses_in_loop;
137 : /* The locations of the accesses. */
138 :
139 : /* The following set is computed on demand. */
140 : bitmap_head dep_loop; /* The set of loops in that the memory
141 : reference is {in,}dependent in
142 : different modes. */
143 : };
144 :
145 : static bool in_loop_pipeline;
146 :
147 : /* We use six bits per loop in the ref->dep_loop bitmap to record
148 : the dep_kind x dep_state combinations. */
149 :
150 : enum dep_kind { lim_raw, sm_war, sm_waw };
151 : enum dep_state { dep_unknown, dep_independent, dep_dependent };
152 :
153 : /* coldest outermost loop for given loop. */
154 : vec<class loop *> coldest_outermost_loop;
155 : /* hotter outer loop nearest to given loop. */
156 : vec<class loop *> hotter_than_inner_loop;
157 :
158 : /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
159 :
160 : static void
161 2064512 : record_loop_dependence (class loop *loop, im_mem_ref *ref,
162 : dep_kind kind, dep_state state)
163 : {
164 2064512 : gcc_assert (state != dep_unknown);
165 2064512 : unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
166 2064512 : bitmap_set_bit (&ref->dep_loop, bit);
167 2064512 : }
168 :
169 : /* Query the loop dependence cache of REF for LOOP, KIND. */
170 :
171 : static dep_state
172 662016 : query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
173 : {
174 662016 : unsigned first_bit = 6 * loop->num + kind * 2;
175 662016 : if (bitmap_bit_p (&ref->dep_loop, first_bit))
176 : return dep_independent;
177 662016 : else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
178 0 : return dep_dependent;
179 : return dep_unknown;
180 : }
181 :
182 : /* Mem_ref hashtable helpers. */
183 :
184 : struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
185 : {
186 : typedef ao_ref *compare_type;
187 : static inline hashval_t hash (const im_mem_ref *);
188 : static inline bool equal (const im_mem_ref *, const ao_ref *);
189 : };
190 :
191 : /* A hash function for class im_mem_ref object OBJ. */
192 :
193 : inline hashval_t
194 10731821 : mem_ref_hasher::hash (const im_mem_ref *mem)
195 : {
196 10731821 : return mem->hash;
197 : }
198 :
199 : /* An equality function for class im_mem_ref object MEM1 with
200 : memory reference OBJ2. */
201 :
202 : inline bool
203 12488348 : mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
204 : {
205 12488348 : if (obj2->max_size_known_p ())
206 11013113 : return (mem1->ref_decomposed
207 10547045 : && ((TREE_CODE (mem1->mem.base) == MEM_REF
208 4606593 : && TREE_CODE (obj2->base) == MEM_REF
209 3059018 : && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
210 3059018 : TREE_OPERAND (obj2->base, 0), 0)
211 12863291 : && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
212 : mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
213 10120027 : || (operand_equal_p (mem1->mem.base, obj2->base, 0)
214 801686 : && known_eq (mem1->mem.offset, obj2->offset)))
215 885800 : && known_eq (mem1->mem.size, obj2->size)
216 883038 : && known_eq (mem1->mem.max_size, obj2->max_size)
217 883038 : && mem1->mem.volatile_p == obj2->volatile_p
218 883022 : && (mem1->mem.ref_alias_set == obj2->ref_alias_set
219 : /* We are not canonicalizing alias-sets but for the
220 : special-case we didn't canonicalize yet and the
221 : incoming ref is a alias-set zero MEM we pick
222 : the correct one already. */
223 14595 : || (!mem1->ref_canonical
224 13718 : && (TREE_CODE (obj2->ref) == MEM_REF
225 13718 : || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
226 6779 : && obj2->ref_alias_set == 0)
227 : /* Likewise if there's a canonical ref with alias-set zero. */
228 11940 : || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
229 11884223 : && types_compatible_p (TREE_TYPE (mem1->mem.ref),
230 871110 : TREE_TYPE (obj2->ref)));
231 : else
232 1475235 : return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
233 : }
234 :
235 :
236 : /* Description of memory accesses in loops. */
237 :
238 : static struct
239 : {
240 : /* The hash table of memory references accessed in loops. */
241 : hash_table<mem_ref_hasher> *refs;
242 :
243 : /* The list of memory references. */
244 : vec<im_mem_ref *> refs_list;
245 :
246 : /* The set of memory references accessed in each loop. */
247 : vec<bitmap_head> refs_loaded_in_loop;
248 :
249 : /* The set of memory references stored in each loop. */
250 : vec<bitmap_head> refs_stored_in_loop;
251 :
252 : /* The set of memory references stored in each loop, including subloops . */
253 : vec<bitmap_head> all_refs_stored_in_loop;
254 :
255 : /* Cache for expanding memory addresses. */
256 : hash_map<tree, name_expansion *> *ttae_cache;
257 : } memory_accesses;
258 :
259 : /* Obstack for the bitmaps in the above data structures. */
260 : static bitmap_obstack lim_bitmap_obstack;
261 : static obstack mem_ref_obstack;
262 :
263 : static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
264 : static bool ref_always_stored_p (class loop *, im_mem_ref *);
265 : static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
266 :
267 : /* Minimum cost of an expensive expression. */
268 : #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
269 :
270 : /* The outermost loop for which execution of the header guarantees that the
271 : block will be executed. */
272 : #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
273 : #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
274 :
275 : /* ID of the shared unanalyzable mem. */
276 : #define UNANALYZABLE_MEM_ID 0
277 :
278 : /* Whether the reference was analyzable. */
279 : #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
280 :
281 : static struct lim_aux_data *
282 17537367 : init_lim_data (gimple *stmt)
283 : {
284 17537367 : lim_aux_data *p = XCNEW (struct lim_aux_data);
285 17537367 : lim_aux_data_map->put (stmt, p);
286 :
287 17537367 : return p;
288 : }
289 :
290 : static struct lim_aux_data *
291 93834447 : get_lim_data (gimple *stmt)
292 : {
293 93834447 : lim_aux_data **p = lim_aux_data_map->get (stmt);
294 93834447 : if (!p)
295 : return NULL;
296 :
297 47670420 : return *p;
298 : }
299 :
300 : /* Releases the memory occupied by DATA. */
301 :
302 : static void
303 17537188 : free_lim_aux_data (struct lim_aux_data *data)
304 : {
305 0 : data->depends.release ();
306 17537188 : free (data);
307 0 : }
308 :
309 : static void
310 17537188 : clear_lim_data (gimple *stmt)
311 : {
312 17537188 : lim_aux_data **p = lim_aux_data_map->get (stmt);
313 17537188 : if (!p)
314 : return;
315 :
316 17537188 : free_lim_aux_data (*p);
317 17537188 : *p = NULL;
318 : }
319 :
320 :
321 : /* The possibilities of statement movement. */
322 : enum move_pos
323 : {
324 : MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
325 : MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
326 : become executed -- memory accesses, ... */
327 : MOVE_POSSIBLE /* Unlimited movement. */
328 : };
329 :
330 :
331 : /* If it is possible to hoist the statement STMT unconditionally,
332 : returns MOVE_POSSIBLE.
333 : If it is possible to hoist the statement STMT, but we must avoid making
334 : it executed if it would not be executed in the original program (e.g.
335 : because it may trap), return MOVE_PRESERVE_EXECUTION.
336 : Otherwise return MOVE_IMPOSSIBLE. */
337 :
338 : static enum move_pos
339 46560663 : movement_possibility_1 (gimple *stmt)
340 : {
341 46560663 : tree lhs;
342 46560663 : enum move_pos ret = MOVE_POSSIBLE;
343 :
344 46560663 : if (flag_unswitch_loops
345 46560663 : && gimple_code (stmt) == GIMPLE_COND)
346 : {
347 : /* If we perform unswitching, force the operands of the invariant
348 : condition to be moved out of the loop. */
349 : return MOVE_POSSIBLE;
350 : }
351 :
352 46211861 : if (gimple_code (stmt) == GIMPLE_PHI
353 2600781 : && gimple_phi_num_args (stmt) <= 2
354 4863636 : && !virtual_operand_p (gimple_phi_result (stmt))
355 47663408 : && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
356 : return MOVE_POSSIBLE;
357 :
358 44760571 : if (gimple_get_lhs (stmt) == NULL_TREE)
359 : return MOVE_IMPOSSIBLE;
360 :
361 31197787 : if (gimple_vdef (stmt))
362 : return MOVE_IMPOSSIBLE;
363 :
364 13190669 : if (stmt_ends_bb_p (stmt)
365 12013427 : || gimple_has_volatile_ops (stmt)
366 13136366 : || gimple_has_side_effects (stmt)
367 26321155 : || stmt_could_throw_p (cfun, stmt))
368 458133 : return MOVE_IMPOSSIBLE;
369 :
370 12732536 : if (is_gimple_call (stmt))
371 : {
372 : /* While pure or const call is guaranteed to have no side effects, we
373 : cannot move it arbitrarily. Consider code like
374 :
375 : char *s = something ();
376 :
377 : while (1)
378 : {
379 : if (s)
380 : t = strlen (s);
381 : else
382 : t = 0;
383 : }
384 :
385 : Here the strlen call cannot be moved out of the loop, even though
386 : s is invariant. In addition to possibly creating a call with
387 : invalid arguments, moving out a function call that is not executed
388 : may cause performance regressions in case the call is costly and
389 : not executed at all. */
390 129317 : ret = MOVE_PRESERVE_EXECUTION;
391 129317 : lhs = gimple_call_lhs (stmt);
392 : }
393 12603219 : else if (is_gimple_assign (stmt))
394 11453728 : lhs = gimple_assign_lhs (stmt);
395 : else
396 : return MOVE_IMPOSSIBLE;
397 :
398 11583045 : if (TREE_CODE (lhs) == SSA_NAME
399 11583045 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
400 : return MOVE_IMPOSSIBLE;
401 :
402 11582813 : if (TREE_CODE (lhs) != SSA_NAME
403 11582813 : || gimple_could_trap_p (stmt))
404 2390166 : return MOVE_PRESERVE_EXECUTION;
405 :
406 9192647 : if (is_gimple_assign (stmt))
407 : {
408 9070665 : auto code = gimple_assign_rhs_code (stmt);
409 9070665 : tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
410 : /* For shifts and rotates and possibly out-of-bound shift operands
411 : we currently cannot rewrite them into something unconditionally
412 : well-defined. */
413 9070665 : if ((code == LSHIFT_EXPR
414 : || code == RSHIFT_EXPR
415 : || code == LROTATE_EXPR
416 9070665 : || code == RROTATE_EXPR)
417 9070665 : && (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
418 : /* We cannot use ranges at 'stmt' here. */
419 119694 : || wi::ltu_p (wi::to_wide (gimple_assign_rhs2 (stmt)),
420 9029301 : element_precision (type))))
421 161058 : ret = MOVE_PRESERVE_EXECUTION;
422 : }
423 :
424 : /* Non local loads in a transaction cannot be hoisted out. Well,
425 : unless the load happens on every path out of the loop, but we
426 : don't take this into account yet. */
427 9192647 : if (flag_tm
428 434 : && gimple_in_transaction (stmt)
429 9192750 : && gimple_assign_single_p (stmt))
430 : {
431 21 : tree rhs = gimple_assign_rhs1 (stmt);
432 21 : if (DECL_P (rhs) && is_global_var (rhs))
433 : {
434 18 : if (dump_file)
435 : {
436 2 : fprintf (dump_file, "Cannot hoist conditional load of ");
437 2 : print_generic_expr (dump_file, rhs, TDF_SLIM);
438 2 : fprintf (dump_file, " because it is in a transaction.\n");
439 : }
440 18 : return MOVE_IMPOSSIBLE;
441 : }
442 : }
443 :
444 : return ret;
445 : }
446 :
447 : static enum move_pos
448 46560663 : movement_possibility (gimple *stmt)
449 : {
450 46560663 : enum move_pos pos = movement_possibility_1 (stmt);
451 46560663 : if (pos == MOVE_POSSIBLE)
452 : {
453 10709681 : use_operand_p use_p;
454 10709681 : ssa_op_iter ssa_iter;
455 34771077 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE)
456 13370143 : if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME
457 13370143 : && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p)))
458 18428 : return MOVE_PRESERVE_EXECUTION;
459 : }
460 : return pos;
461 : }
462 :
463 :
464 : /* Compare the profile count inequality of bb and loop's preheader, it is
465 : three-state as stated in profile-count.h, FALSE is returned if inequality
466 : cannot be decided. */
467 : bool
468 13864907 : bb_colder_than_loop_preheader (basic_block bb, class loop *loop)
469 : {
470 13864907 : gcc_assert (bb && loop);
471 13864907 : return bb->count < loop_preheader_edge (loop)->src->count;
472 : }
473 :
474 : /* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile
475 : count.
476 : It does three steps check:
477 : 1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just
478 : return NULL which means it should not be moved out at all;
479 : 2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of
480 : OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP;
481 : 3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a
482 : hotter loop between OUTERMOST_LOOP and loop in pre-computed
483 : HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return
484 : OUTERMOST_LOOP.
485 : At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as
486 : the hoist target. */
487 :
488 : static class loop *
489 12249299 : get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
490 : basic_block curr_bb)
491 : {
492 12249299 : gcc_assert (outermost_loop == loop
493 : || flow_loop_nested_p (outermost_loop, loop));
494 :
495 : /* If bb_colder_than_loop_preheader returns false due to three-state
496 : comparision, OUTERMOST_LOOP is returned finally to preserve the behavior.
497 : Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP. */
498 12249299 : if (curr_bb && bb_colder_than_loop_preheader (curr_bb, loop))
499 : return NULL;
500 :
501 11300364 : class loop *coldest_loop = coldest_outermost_loop[loop->num];
502 22600728 : if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
503 : {
504 246192 : class loop *hotter_loop = hotter_than_inner_loop[loop->num];
505 246192 : if (!hotter_loop
506 252864 : || loop_depth (hotter_loop) < loop_depth (outermost_loop))
507 : return outermost_loop;
508 :
509 : /* hotter_loop is between OUTERMOST_LOOP and LOOP like:
510 : [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
511 : hotter_loop, second_coldest_loop, ..., loop]
512 : return second_coldest_loop to be the hoist target. */
513 23 : class loop *aloop;
514 23 : for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
515 23 : if (aloop == loop || flow_loop_nested_p (aloop, loop))
516 23 : return aloop;
517 : }
518 : return coldest_loop;
519 : }
520 :
521 : /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
522 : loop to that we could move the expression using DEF if it did not have
523 : other operands, i.e. the outermost loop enclosing LOOP in that the value
524 : of DEF is invariant. */
525 :
526 : static class loop *
527 19657295 : outermost_invariant_loop (tree def, class loop *loop)
528 : {
529 19657295 : gimple *def_stmt;
530 19657295 : basic_block def_bb;
531 19657295 : class loop *max_loop;
532 19657295 : struct lim_aux_data *lim_data;
533 :
534 19657295 : if (!def)
535 945273 : return superloop_at_depth (loop, 1);
536 :
537 18712022 : if (TREE_CODE (def) != SSA_NAME)
538 : {
539 3921940 : gcc_assert (is_gimple_min_invariant (def));
540 3921940 : return superloop_at_depth (loop, 1);
541 : }
542 :
543 14790082 : def_stmt = SSA_NAME_DEF_STMT (def);
544 14790082 : def_bb = gimple_bb (def_stmt);
545 14790082 : if (!def_bb)
546 72777 : return superloop_at_depth (loop, 1);
547 :
548 14717305 : max_loop = find_common_loop (loop, def_bb->loop_father);
549 :
550 14717305 : lim_data = get_lim_data (def_stmt);
551 14717305 : if (lim_data != NULL && lim_data->max_loop != NULL)
552 690781 : max_loop = find_common_loop (max_loop,
553 : loop_outer (lim_data->max_loop));
554 14717305 : if (max_loop == loop)
555 : return NULL;
556 1843036 : max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
557 :
558 1843036 : return max_loop;
559 : }
560 :
561 : /* DATA is a structure containing information associated with a statement
562 : inside LOOP. DEF is one of the operands of this statement.
563 :
564 : Find the outermost loop enclosing LOOP in that value of DEF is invariant
565 : and record this in DATA->max_loop field. If DEF itself is defined inside
566 : this loop as well (i.e. we need to hoist it out of the loop if we want
567 : to hoist the statement represented by DATA), record the statement in that
568 : DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
569 : add the cost of the computation of DEF to the DATA->cost.
570 :
571 : If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
572 :
573 : static bool
574 11497716 : add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
575 : bool add_cost)
576 : {
577 11497716 : gimple *def_stmt = SSA_NAME_DEF_STMT (def);
578 11497716 : basic_block def_bb = gimple_bb (def_stmt);
579 11497716 : class loop *max_loop;
580 11497716 : struct lim_aux_data *def_data;
581 :
582 11497716 : if (!def_bb)
583 : return true;
584 :
585 11176543 : max_loop = outermost_invariant_loop (def, loop);
586 11176543 : if (!max_loop)
587 : return false;
588 :
589 1386958 : if (flow_loop_nested_p (data->max_loop, max_loop))
590 422800 : data->max_loop = max_loop;
591 :
592 1386958 : def_data = get_lim_data (def_stmt);
593 1386958 : if (!def_data)
594 : return true;
595 :
596 679043 : if (add_cost
597 : /* Only add the cost if the statement defining DEF is inside LOOP,
598 : i.e. if it is likely that by moving the invariants dependent
599 : on it, we will be able to avoid creating a new register for
600 : it (since it will be only used in these dependent invariants). */
601 658428 : && def_bb->loop_father == loop)
602 461306 : data->cost += def_data->cost;
603 :
604 679043 : data->depends.safe_push (def_stmt);
605 :
606 679043 : return true;
607 : }
608 :
609 : /* Returns an estimate for a cost of statement STMT. The values here
610 : are just ad-hoc constants, similar to costs for inlining. */
611 :
612 : static unsigned
613 770402 : stmt_cost (gimple *stmt)
614 : {
615 : /* Always try to create possibilities for unswitching. */
616 770402 : if (gimple_code (stmt) == GIMPLE_COND
617 770402 : || gimple_code (stmt) == GIMPLE_PHI)
618 12726 : return LIM_EXPENSIVE;
619 :
620 : /* We should be hoisting calls if possible. */
621 757676 : if (is_gimple_call (stmt))
622 : {
623 834 : tree fndecl;
624 :
625 : /* Unless the call is a builtin_constant_p; this always folds to a
626 : constant, so moving it is useless. */
627 834 : fndecl = gimple_call_fndecl (stmt);
628 834 : if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
629 : return 0;
630 :
631 834 : return LIM_EXPENSIVE;
632 : }
633 :
634 : /* Hoisting memory references out should almost surely be a win. */
635 756842 : if (gimple_references_memory_p (stmt))
636 66180 : return LIM_EXPENSIVE;
637 :
638 690662 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
639 : return 1;
640 :
641 690662 : enum tree_code code = gimple_assign_rhs_code (stmt);
642 690662 : switch (code)
643 : {
644 118593 : case MULT_EXPR:
645 118593 : case WIDEN_MULT_EXPR:
646 118593 : case WIDEN_MULT_PLUS_EXPR:
647 118593 : case WIDEN_MULT_MINUS_EXPR:
648 118593 : case DOT_PROD_EXPR:
649 118593 : case TRUNC_DIV_EXPR:
650 118593 : case CEIL_DIV_EXPR:
651 118593 : case FLOOR_DIV_EXPR:
652 118593 : case ROUND_DIV_EXPR:
653 118593 : case EXACT_DIV_EXPR:
654 118593 : case CEIL_MOD_EXPR:
655 118593 : case FLOOR_MOD_EXPR:
656 118593 : case ROUND_MOD_EXPR:
657 118593 : case TRUNC_MOD_EXPR:
658 118593 : case RDIV_EXPR:
659 : /* Division and multiplication are usually expensive. */
660 118593 : return LIM_EXPENSIVE;
661 :
662 4328 : case LSHIFT_EXPR:
663 4328 : case RSHIFT_EXPR:
664 4328 : case WIDEN_LSHIFT_EXPR:
665 4328 : case LROTATE_EXPR:
666 4328 : case RROTATE_EXPR:
667 : /* Shifts and rotates are usually expensive. */
668 4328 : return LIM_EXPENSIVE;
669 :
670 461 : case COND_EXPR:
671 461 : case VEC_COND_EXPR:
672 : /* Conditionals are expensive. */
673 461 : return LIM_EXPENSIVE;
674 :
675 1329 : case CONSTRUCTOR:
676 : /* Make vector construction cost proportional to the number
677 : of elements. */
678 1329 : return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
679 :
680 : case SSA_NAME:
681 : case PAREN_EXPR:
682 : /* Whether or not something is wrapped inside a PAREN_EXPR
683 : should not change move cost. Nor should an intermediate
684 : unpropagated SSA name copy. */
685 : return 0;
686 :
687 528349 : default:
688 : /* Comparisons are usually expensive. */
689 528349 : if (TREE_CODE_CLASS (code) == tcc_comparison)
690 7499 : return LIM_EXPENSIVE;
691 : return 1;
692 : }
693 : }
694 :
695 : /* Finds the outermost loop between OUTER and LOOP in that the memory reference
696 : REF is independent. If REF is not independent in LOOP, NULL is returned
697 : instead. */
698 :
699 : static class loop *
700 750914 : outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
701 : {
702 750914 : class loop *aloop;
703 :
704 750914 : if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
705 : return NULL;
706 :
707 84447 : for (aloop = outer;
708 689481 : aloop != loop;
709 84447 : aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
710 10034 : if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
711 100222 : && ref_indep_loop_p (aloop, ref, lim_raw))
712 : return aloop;
713 :
714 590947 : if (ref_indep_loop_p (loop, ref, lim_raw))
715 : return loop;
716 : else
717 : return NULL;
718 : }
719 :
720 : /* If there is a simple load or store to a memory reference in STMT, returns
721 : the location of the memory reference, and sets IS_STORE according to whether
722 : it is a store or load. Otherwise, returns NULL. */
723 :
724 : static tree *
725 6936931 : simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
726 : {
727 6936931 : tree *lhs, *rhs;
728 :
729 : /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
730 6936931 : if (!gimple_assign_single_p (stmt))
731 : return NULL;
732 :
733 5685216 : lhs = gimple_assign_lhs_ptr (stmt);
734 5685216 : rhs = gimple_assign_rhs1_ptr (stmt);
735 :
736 8876258 : if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
737 : {
738 3191042 : *is_store = false;
739 3191042 : return rhs;
740 : }
741 2494174 : else if (gimple_vdef (stmt)
742 2494174 : && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
743 : {
744 2048248 : *is_store = true;
745 2048248 : return lhs;
746 : }
747 : else
748 445926 : return NULL;
749 : }
750 :
751 : /* From a controlling predicate in DOM determine the arguments from
752 : the PHI node PHI that are chosen if the predicate evaluates to
753 : true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
754 : they are non-NULL. Returns true if the arguments can be determined,
755 : else return false. */
756 :
757 : static bool
758 11430 : extract_true_false_args_from_phi (basic_block dom, gphi *phi,
759 : tree *true_arg_p, tree *false_arg_p)
760 : {
761 11430 : edge te, fe;
762 11430 : if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
763 : &te, &fe))
764 : return false;
765 :
766 10541 : if (true_arg_p)
767 917 : *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
768 10541 : if (false_arg_p)
769 917 : *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
770 :
771 : return true;
772 : }
773 :
774 : /* Determine the outermost loop to that it is possible to hoist a statement
775 : STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
776 : the outermost loop in that the value computed by STMT is invariant.
777 : If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
778 : we preserve the fact whether STMT is executed. It also fills other related
779 : information to LIM_DATA (STMT).
780 :
781 : The function returns false if STMT cannot be hoisted outside of the loop it
782 : is defined in, and true otherwise. */
783 :
784 : static bool
785 12206045 : determine_max_movement (gimple *stmt, bool must_preserve_exec)
786 : {
787 12206045 : basic_block bb = gimple_bb (stmt);
788 12206045 : class loop *loop = bb->loop_father;
789 12206045 : class loop *level;
790 12206045 : struct lim_aux_data *lim_data = get_lim_data (stmt);
791 12206045 : tree val;
792 12206045 : ssa_op_iter iter;
793 :
794 12206045 : if (must_preserve_exec)
795 1509168 : level = ALWAYS_EXECUTED_IN (bb);
796 : else
797 10696877 : level = superloop_at_depth (loop, 1);
798 12206045 : lim_data->max_loop = get_coldest_out_loop (level, loop, bb);
799 12206045 : if (!lim_data->max_loop)
800 : return false;
801 :
802 11258849 : if (gphi *phi = dyn_cast <gphi *> (stmt))
803 : {
804 1400672 : use_operand_p use_p;
805 1400672 : unsigned min_cost = UINT_MAX;
806 1400672 : unsigned total_cost = 0;
807 1400672 : struct lim_aux_data *def_data;
808 :
809 : /* We will end up promoting dependencies to be unconditionally
810 : evaluated. For this reason the PHI cost (and thus the
811 : cost we remove from the loop by doing the invariant motion)
812 : is that of the cheapest PHI argument dependency chain. */
813 1607408 : FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
814 : {
815 1583779 : val = USE_FROM_PTR (use_p);
816 :
817 1583779 : if (TREE_CODE (val) != SSA_NAME)
818 : {
819 : /* Assign const 1 to constants. */
820 129051 : min_cost = MIN (min_cost, 1);
821 129051 : total_cost += 1;
822 129051 : continue;
823 : }
824 1454728 : if (!add_dependency (val, lim_data, loop, false))
825 : return false;
826 :
827 77685 : gimple *def_stmt = SSA_NAME_DEF_STMT (val);
828 77685 : if (gimple_bb (def_stmt)
829 77685 : && gimple_bb (def_stmt)->loop_father == loop)
830 : {
831 1412 : def_data = get_lim_data (def_stmt);
832 1412 : if (def_data)
833 : {
834 1412 : min_cost = MIN (min_cost, def_data->cost);
835 1412 : total_cost += def_data->cost;
836 : }
837 : }
838 : }
839 :
840 23629 : min_cost = MIN (min_cost, total_cost);
841 23629 : lim_data->cost += min_cost;
842 :
843 23629 : if (gimple_phi_num_args (phi) > 1)
844 : {
845 23455 : basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
846 23455 : gimple *cond;
847 46910 : if (gsi_end_p (gsi_last_bb (dom)))
848 : return false;
849 15458 : cond = gsi_stmt (gsi_last_bb (dom));
850 15458 : if (gimple_code (cond) != GIMPLE_COND)
851 : return false;
852 : /* Verify that this is an extended form of a diamond and
853 : the PHI arguments are completely controlled by the
854 : predicate in DOM. */
855 10513 : if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
856 : return false;
857 :
858 : /* Check if one of the depedent statement is a vector compare whether
859 : the target supports it, otherwise it's invalid to hoist it out of
860 : the gcond it belonged to. */
861 9624 : if (VECTOR_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond))))
862 : {
863 5 : tree type = TREE_TYPE (gimple_cond_lhs (cond));
864 5 : auto code = gimple_cond_code (cond);
865 5 : if (!target_supports_op_p (type, code, optab_vector))
866 : return false;
867 : }
868 :
869 : /* Fold in dependencies and cost of the condition. */
870 11327 : FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
871 : {
872 10294 : if (!add_dependency (val, lim_data, loop, false))
873 : return false;
874 1708 : def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
875 1708 : if (def_data)
876 922 : lim_data->cost += def_data->cost;
877 : }
878 :
879 : /* We want to avoid unconditionally executing very expensive
880 : operations. As costs for our dependencies cannot be
881 : negative just claim we are not invariand for this case.
882 : We also are not sure whether the control-flow inside the
883 : loop will vanish. */
884 1033 : if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
885 137 : && !(min_cost != 0
886 137 : && total_cost / min_cost <= 2))
887 : return false;
888 :
889 : /* Assume that the control-flow in the loop will vanish.
890 : ??? We should verify this and not artificially increase
891 : the cost if that is not the case. */
892 917 : lim_data->cost += stmt_cost (stmt);
893 : }
894 :
895 1091 : return true;
896 : }
897 :
898 : /* A stmt that receives abnormal edges cannot be hoisted. */
899 9858177 : if (is_a <gcall *> (stmt)
900 9858177 : && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE))
901 : return false;
902 :
903 11486888 : FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
904 10031670 : if (!add_dependency (val, lim_data, loop, true))
905 : return false;
906 :
907 2898627 : if (gimple_vuse (stmt))
908 : {
909 751938 : im_mem_ref *ref
910 751938 : = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
911 751938 : if (ref
912 751938 : && MEM_ANALYZABLE (ref))
913 : {
914 750914 : lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
915 : loop, ref);
916 750914 : if (!lim_data->max_loop)
917 : return false;
918 : }
919 1024 : else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
920 : return false;
921 : }
922 :
923 769485 : lim_data->cost += stmt_cost (stmt);
924 :
925 769485 : return true;
926 : }
927 :
928 : /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
929 : and that one of the operands of this statement is computed by STMT.
930 : Ensure that STMT (together with all the statements that define its
931 : operands) is hoisted at least out of the loop LEVEL. */
932 :
933 : static void
934 692131 : set_level (gimple *stmt, class loop *orig_loop, class loop *level)
935 : {
936 692131 : class loop *stmt_loop = gimple_bb (stmt)->loop_father;
937 692131 : struct lim_aux_data *lim_data;
938 692131 : gimple *dep_stmt;
939 692131 : unsigned i;
940 :
941 692131 : stmt_loop = find_common_loop (orig_loop, stmt_loop);
942 692131 : lim_data = get_lim_data (stmt);
943 692131 : if (lim_data != NULL && lim_data->tgt_loop != NULL)
944 171545 : stmt_loop = find_common_loop (stmt_loop,
945 : loop_outer (lim_data->tgt_loop));
946 692131 : if (flow_loop_nested_p (stmt_loop, level))
947 692131 : return;
948 :
949 459148 : gcc_assert (level == lim_data->max_loop
950 : || flow_loop_nested_p (lim_data->max_loop, level));
951 :
952 459148 : lim_data->tgt_loop = level;
953 792479 : FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
954 333331 : set_level (dep_stmt, orig_loop, level);
955 : }
956 :
957 : /* Determines an outermost loop from that we want to hoist the statement STMT.
958 : For now we chose the outermost possible loop. TODO -- use profiling
959 : information to set it more sanely. */
960 :
961 : static void
962 296205 : set_profitable_level (gimple *stmt)
963 : {
964 296205 : set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
965 296205 : }
966 :
967 : /* Returns true if STMT is a call that has side effects. */
968 :
969 : static bool
970 71374583 : nonpure_call_p (gimple *stmt)
971 : {
972 0 : if (gimple_code (stmt) != GIMPLE_CALL)
973 : return false;
974 :
975 2262512 : return gimple_has_side_effects (stmt);
976 : }
977 :
978 : /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
979 :
980 : static gimple *
981 35 : rewrite_reciprocal (gimple_stmt_iterator *bsi)
982 : {
983 35 : gassign *stmt, *stmt1, *stmt2;
984 35 : tree name, lhs, type;
985 35 : tree real_one;
986 35 : gimple_stmt_iterator gsi;
987 :
988 35 : stmt = as_a <gassign *> (gsi_stmt (*bsi));
989 35 : lhs = gimple_assign_lhs (stmt);
990 35 : type = TREE_TYPE (lhs);
991 :
992 35 : real_one = build_one_cst (type);
993 :
994 35 : name = make_temp_ssa_name (type, NULL, "reciptmp");
995 70 : stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
996 : gimple_assign_rhs2 (stmt));
997 35 : stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
998 : gimple_assign_rhs1 (stmt));
999 :
1000 : /* Replace division stmt with reciprocal and multiply stmts.
1001 : The multiply stmt is not invariant, so update iterator
1002 : and avoid rescanning. */
1003 35 : gsi = *bsi;
1004 35 : gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1005 35 : gsi_replace (&gsi, stmt2, true);
1006 :
1007 : /* Continue processing with invariant reciprocal statement. */
1008 35 : return stmt1;
1009 : }
1010 :
1011 : /* Check if the pattern at *BSI is a bittest of the form
1012 : (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
1013 :
1014 : static gimple *
1015 9013 : rewrite_bittest (gimple_stmt_iterator *bsi)
1016 : {
1017 9013 : gassign *stmt;
1018 9013 : gimple *stmt1;
1019 9013 : gassign *stmt2;
1020 9013 : gimple *use_stmt;
1021 9013 : gcond *cond_stmt;
1022 9013 : tree lhs, name, t, a, b;
1023 9013 : use_operand_p use;
1024 :
1025 9013 : stmt = as_a <gassign *> (gsi_stmt (*bsi));
1026 9013 : lhs = gimple_assign_lhs (stmt);
1027 :
1028 : /* Verify that the single use of lhs is a comparison against zero. */
1029 9013 : if (TREE_CODE (lhs) != SSA_NAME
1030 9013 : || !single_imm_use (lhs, &use, &use_stmt))
1031 455 : return stmt;
1032 13999 : cond_stmt = dyn_cast <gcond *> (use_stmt);
1033 5015 : if (!cond_stmt)
1034 : return stmt;
1035 5015 : if (gimple_cond_lhs (cond_stmt) != lhs
1036 4839 : || (gimple_cond_code (cond_stmt) != NE_EXPR
1037 2205 : && gimple_cond_code (cond_stmt) != EQ_EXPR)
1038 9854 : || !integer_zerop (gimple_cond_rhs (cond_stmt)))
1039 206 : return stmt;
1040 :
1041 : /* Get at the operands of the shift. The rhs is TMP1 & 1. */
1042 4809 : stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
1043 4809 : if (gimple_code (stmt1) != GIMPLE_ASSIGN)
1044 : return stmt;
1045 :
1046 : /* There is a conversion in between possibly inserted by fold. */
1047 4738 : if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
1048 : {
1049 256 : t = gimple_assign_rhs1 (stmt1);
1050 256 : if (TREE_CODE (t) != SSA_NAME
1051 256 : || !has_single_use (t))
1052 : return stmt;
1053 180 : stmt1 = SSA_NAME_DEF_STMT (t);
1054 180 : if (gimple_code (stmt1) != GIMPLE_ASSIGN)
1055 : return stmt;
1056 : }
1057 :
1058 : /* Verify that B is loop invariant but A is not. Verify that with
1059 : all the stmt walking we are still in the same loop. */
1060 4620 : if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
1061 10954 : || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
1062 : return stmt;
1063 :
1064 3167 : a = gimple_assign_rhs1 (stmt1);
1065 3167 : b = gimple_assign_rhs2 (stmt1);
1066 :
1067 3167 : if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
1068 3196 : && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
1069 : {
1070 29 : gimple_stmt_iterator rsi;
1071 :
1072 : /* 1 << B */
1073 29 : t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
1074 : build_int_cst (TREE_TYPE (a), 1), b);
1075 29 : name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1076 29 : stmt1 = gimple_build_assign (name, t);
1077 :
1078 : /* A & (1 << B) */
1079 29 : t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1080 29 : name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1081 29 : stmt2 = gimple_build_assign (name, t);
1082 :
1083 : /* Replace the SSA_NAME we compare against zero. Adjust
1084 : the type of zero accordingly. */
1085 29 : SET_USE (use, name);
1086 29 : gimple_cond_set_rhs (cond_stmt,
1087 29 : build_int_cst_type (TREE_TYPE (name),
1088 : 0));
1089 :
1090 : /* Don't use gsi_replace here, none of the new assignments sets
1091 : the variable originally set in stmt. Move bsi to stmt1, and
1092 : then remove the original stmt, so that we get a chance to
1093 : retain debug info for it. */
1094 29 : rsi = *bsi;
1095 29 : gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1096 29 : gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1097 29 : gimple *to_release = gsi_stmt (rsi);
1098 29 : gsi_remove (&rsi, true);
1099 29 : release_defs (to_release);
1100 :
1101 29 : return stmt1;
1102 : }
1103 :
1104 : return stmt;
1105 : }
1106 :
1107 : /* Determine the outermost loops in that statements in basic block BB are
1108 : invariant, and record them to the LIM_DATA associated with the
1109 : statements. */
1110 :
1111 : static void
1112 14264413 : compute_invariantness (basic_block bb)
1113 : {
1114 14264413 : enum move_pos pos;
1115 14264413 : gimple_stmt_iterator bsi;
1116 14264413 : gimple *stmt;
1117 14264413 : bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1118 14264413 : class loop *outermost = ALWAYS_EXECUTED_IN (bb);
1119 14264413 : struct lim_aux_data *lim_data;
1120 :
1121 14264413 : if (!loop_outer (bb->loop_father))
1122 8236701 : return;
1123 :
1124 6027712 : if (dump_file && (dump_flags & TDF_DETAILS))
1125 435 : fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1126 435 : bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1127 :
1128 : /* Look at PHI nodes, but only if there is at most two.
1129 : ??? We could relax this further by post-processing the inserted
1130 : code and transforming adjacent cond-exprs with the same predicate
1131 : to control flow again. */
1132 6027712 : bsi = gsi_start_phis (bb);
1133 6027712 : if (!gsi_end_p (bsi)
1134 6027712 : && ((gsi_next (&bsi), gsi_end_p (bsi))
1135 1361729 : || (gsi_next (&bsi), gsi_end_p (bsi))))
1136 4318011 : for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1137 : {
1138 2600781 : stmt = gsi_stmt (bsi);
1139 :
1140 2600781 : pos = movement_possibility (stmt);
1141 2600781 : if (pos == MOVE_IMPOSSIBLE)
1142 1149491 : continue;
1143 :
1144 1451290 : lim_data = get_lim_data (stmt);
1145 1451290 : if (! lim_data)
1146 1451290 : lim_data = init_lim_data (stmt);
1147 1451290 : lim_data->always_executed_in = outermost;
1148 :
1149 1451290 : if (!determine_max_movement (stmt, false))
1150 : {
1151 1450199 : lim_data->max_loop = NULL;
1152 1450199 : continue;
1153 : }
1154 :
1155 1091 : if (dump_file && (dump_flags & TDF_DETAILS))
1156 : {
1157 2 : print_gimple_stmt (dump_file, stmt, 2);
1158 2 : fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1159 2 : loop_depth (lim_data->max_loop),
1160 : lim_data->cost);
1161 : }
1162 :
1163 1091 : if (lim_data->cost >= LIM_EXPENSIVE)
1164 920 : set_profitable_level (stmt);
1165 : }
1166 :
1167 56015306 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1168 : {
1169 43959882 : stmt = gsi_stmt (bsi);
1170 :
1171 43959882 : pos = movement_possibility (stmt);
1172 43959882 : if (pos == MOVE_IMPOSSIBLE)
1173 : {
1174 33209122 : if (nonpure_call_p (stmt))
1175 : {
1176 : maybe_never = true;
1177 : outermost = NULL;
1178 : }
1179 : /* Make sure to note always_executed_in for stores to make
1180 : store-motion work. */
1181 30851536 : else if (stmt_makes_single_store (stmt))
1182 : {
1183 2482720 : struct lim_aux_data *lim_data = get_lim_data (stmt);
1184 2482720 : if (! lim_data)
1185 0 : lim_data = init_lim_data (stmt);
1186 2482720 : lim_data->always_executed_in = outermost;
1187 : }
1188 32028285 : continue;
1189 32028285 : }
1190 :
1191 11931597 : if (is_gimple_assign (stmt)
1192 11931597 : && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1193 : == GIMPLE_BINARY_RHS))
1194 : {
1195 5749084 : tree op0 = gimple_assign_rhs1 (stmt);
1196 5749084 : tree op1 = gimple_assign_rhs2 (stmt);
1197 11498168 : class loop *ol1 = outermost_invariant_loop (op1,
1198 : loop_containing_stmt (stmt));
1199 :
1200 : /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1201 : to be hoisted out of loop, saving expensive divide. */
1202 5749084 : if (pos == MOVE_POSSIBLE
1203 5178373 : && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1204 940 : && flag_unsafe_math_optimizations
1205 380 : && !flag_trapping_math
1206 380 : && ol1 != NULL
1207 5749122 : && outermost_invariant_loop (op0, ol1) == NULL)
1208 35 : stmt = rewrite_reciprocal (&bsi);
1209 :
1210 : /* If the shift count is invariant, convert (A >> B) & 1 to
1211 : A & (1 << B) allowing the bit mask to be hoisted out of the loop
1212 : saving an expensive shift. */
1213 5749084 : if (pos == MOVE_POSSIBLE
1214 5178373 : && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1215 186330 : && integer_onep (op1)
1216 18450 : && TREE_CODE (op0) == SSA_NAME
1217 5767534 : && has_single_use (op0))
1218 9013 : stmt = rewrite_bittest (&bsi);
1219 : }
1220 :
1221 11931597 : lim_data = get_lim_data (stmt);
1222 11931597 : if (! lim_data)
1223 9106526 : lim_data = init_lim_data (stmt);
1224 11931597 : lim_data->always_executed_in = outermost;
1225 :
1226 11931597 : if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1227 1176842 : continue;
1228 :
1229 10754755 : if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1230 : {
1231 9985270 : lim_data->max_loop = NULL;
1232 9985270 : continue;
1233 : }
1234 :
1235 769485 : if (dump_file && (dump_flags & TDF_DETAILS))
1236 : {
1237 283 : print_gimple_stmt (dump_file, stmt, 2);
1238 283 : fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1239 283 : loop_depth (lim_data->max_loop),
1240 : lim_data->cost);
1241 : }
1242 :
1243 769485 : if (lim_data->cost >= LIM_EXPENSIVE)
1244 295285 : set_profitable_level (stmt);
1245 : /* When we run before PRE and PRE is active hoist all expressions
1246 : to the always executed loop since PRE would do so anyway
1247 : and we can preserve range info while PRE cannot. */
1248 474200 : else if (flag_tree_pre && !in_loop_pipeline
1249 101409 : && outermost)
1250 : {
1251 59681 : class loop *mloop = lim_data->max_loop;
1252 179043 : if (loop_depth (outermost) > loop_depth (mloop))
1253 : {
1254 8481 : mloop = outermost;
1255 8481 : if (dump_file && (dump_flags & TDF_DETAILS))
1256 1 : fprintf (dump_file, " constraining to loop depth %d\n\n\n",
1257 : loop_depth (mloop));
1258 : }
1259 59681 : set_level (stmt, bb->loop_father, mloop);
1260 : }
1261 : }
1262 : }
1263 :
1264 : /* Hoist the statements in basic block BB out of the loops prescribed by
1265 : data stored in LIM_DATA structures associated with each statement. Callback
1266 : for walk_dominator_tree. */
1267 :
1268 : unsigned int
1269 14309669 : move_computations_worker (basic_block bb)
1270 : {
1271 14309669 : class loop *level;
1272 14309669 : unsigned cost = 0;
1273 14309669 : struct lim_aux_data *lim_data;
1274 14309669 : unsigned int todo = 0;
1275 :
1276 14309669 : if (!loop_outer (bb->loop_father))
1277 : return todo;
1278 :
1279 10537119 : for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1280 : {
1281 4503197 : gassign *new_stmt;
1282 4503197 : gphi *stmt = bsi.phi ();
1283 :
1284 4503197 : lim_data = get_lim_data (stmt);
1285 4503197 : if (lim_data == NULL)
1286 : {
1287 3051907 : gsi_next (&bsi);
1288 3051907 : continue;
1289 : }
1290 :
1291 1451290 : cost = lim_data->cost;
1292 1451290 : level = lim_data->tgt_loop;
1293 1451290 : clear_lim_data (stmt);
1294 :
1295 1451290 : if (!level)
1296 : {
1297 1450368 : gsi_next (&bsi);
1298 1450368 : continue;
1299 : }
1300 :
1301 922 : if (dump_file && (dump_flags & TDF_DETAILS))
1302 : {
1303 2 : fprintf (dump_file, "Moving PHI node\n");
1304 2 : print_gimple_stmt (dump_file, stmt, 0);
1305 2 : fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1306 : cost, level->num);
1307 : }
1308 :
1309 922 : if (gimple_phi_num_args (stmt) == 1)
1310 : {
1311 5 : tree arg = PHI_ARG_DEF (stmt, 0);
1312 5 : new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1313 5 : TREE_CODE (arg), arg);
1314 : }
1315 : else
1316 : {
1317 917 : basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1318 917 : gimple *cond = gsi_stmt (gsi_last_bb (dom));
1319 917 : tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1320 : /* Get the PHI arguments corresponding to the true and false
1321 : edges of COND. */
1322 917 : extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1323 917 : gcc_assert (arg0 && arg1);
1324 : /* For `bool_val != 0`, reuse bool_val. */
1325 917 : if (gimple_cond_code (cond) == NE_EXPR
1326 536 : && integer_zerop (gimple_cond_rhs (cond))
1327 1379 : && types_compatible_p (TREE_TYPE (gimple_cond_lhs (cond)),
1328 : boolean_type_node))
1329 : {
1330 53 : t = gimple_cond_lhs (cond);
1331 : }
1332 : else
1333 : {
1334 864 : t = make_ssa_name (boolean_type_node);
1335 864 : new_stmt = gimple_build_assign (t, gimple_cond_code (cond),
1336 : gimple_cond_lhs (cond),
1337 : gimple_cond_rhs (cond));
1338 864 : gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1339 : }
1340 917 : new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1341 : COND_EXPR, t, arg0, arg1);
1342 917 : todo |= TODO_cleanup_cfg;
1343 : }
1344 922 : if (!ALWAYS_EXECUTED_IN (bb)
1345 922 : || (ALWAYS_EXECUTED_IN (bb) != level
1346 92 : && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1347 427 : reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
1348 922 : gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1349 922 : remove_phi_node (&bsi, false);
1350 : }
1351 :
1352 56093041 : for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1353 : {
1354 44025197 : edge e;
1355 :
1356 44025197 : gimple *stmt = gsi_stmt (bsi);
1357 :
1358 44025197 : lim_data = get_lim_data (stmt);
1359 44025197 : if (lim_data == NULL)
1360 : {
1361 27939299 : gsi_next (&bsi);
1362 27939299 : continue;
1363 : }
1364 :
1365 16085898 : cost = lim_data->cost;
1366 16085898 : level = lim_data->tgt_loop;
1367 16085898 : clear_lim_data (stmt);
1368 :
1369 16085898 : if (!level)
1370 : {
1371 15588160 : gsi_next (&bsi);
1372 15588160 : continue;
1373 : }
1374 :
1375 : /* We do not really want to move conditionals out of the loop; we just
1376 : placed it here to force its operands to be moved if necessary. */
1377 497738 : if (gimple_code (stmt) == GIMPLE_COND)
1378 : {
1379 11809 : gsi_next (&bsi);
1380 11809 : continue;
1381 : }
1382 :
1383 485929 : if (dump_file && (dump_flags & TDF_DETAILS))
1384 : {
1385 353 : fprintf (dump_file, "Moving statement ");
1386 353 : print_gimple_stmt (dump_file, stmt, 0);
1387 353 : fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1388 : cost, level->num);
1389 : }
1390 :
1391 485929 : e = loop_preheader_edge (level);
1392 971858 : gcc_assert (!gimple_vdef (stmt));
1393 971858 : if (gimple_vuse (stmt))
1394 : {
1395 : /* The new VUSE is the one from the virtual PHI in the loop
1396 : header or the one already present. */
1397 84196 : gphi_iterator gsi2;
1398 84196 : for (gsi2 = gsi_start_phis (e->dest);
1399 196490 : !gsi_end_p (gsi2); gsi_next (&gsi2))
1400 : {
1401 178880 : gphi *phi = gsi2.phi ();
1402 357760 : if (virtual_operand_p (gimple_phi_result (phi)))
1403 : {
1404 66586 : SET_USE (gimple_vuse_op (stmt),
1405 : PHI_ARG_DEF_FROM_EDGE (phi, e));
1406 66586 : break;
1407 : }
1408 : }
1409 : }
1410 485929 : gsi_remove (&bsi, false);
1411 485929 : if (gimple_has_lhs (stmt)
1412 485929 : && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1413 444143 : && (!ALWAYS_EXECUTED_IN (bb)
1414 377558 : || !(ALWAYS_EXECUTED_IN (bb) == level
1415 97878 : || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1416 204277 : reset_flow_sensitive_info (gimple_get_lhs (stmt));
1417 : /* In case this is a stmt that is not unconditionally executed
1418 : when the target loop header is executed and the stmt may
1419 : invoke undefined integer or pointer overflow rewrite it to
1420 : unsigned arithmetic. */
1421 485929 : if (gimple_needing_rewrite_undefined (stmt)
1422 485929 : && (!ALWAYS_EXECUTED_IN (bb)
1423 81347 : || !(ALWAYS_EXECUTED_IN (bb) == level
1424 18950 : || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1425 20292 : gsi_insert_seq_on_edge (e, rewrite_to_defined_unconditional (stmt));
1426 : else
1427 465637 : gsi_insert_on_edge (e, stmt);
1428 : }
1429 :
1430 6033922 : return todo;
1431 : }
1432 :
1433 : /* Checks whether the statement defining variable *INDEX can be hoisted
1434 : out of the loop passed in DATA. Callback for for_each_index. */
1435 :
1436 : static bool
1437 1780276 : may_move_till (tree ref, tree *index, void *data)
1438 : {
1439 1780276 : class loop *loop = (class loop *) data, *max_loop;
1440 :
1441 : /* If REF is an array reference, check also that the step and the lower
1442 : bound is invariant in LOOP. */
1443 1780276 : if (TREE_CODE (ref) == ARRAY_REF)
1444 : {
1445 474079 : tree step = TREE_OPERAND (ref, 3);
1446 474079 : tree lbound = TREE_OPERAND (ref, 2);
1447 :
1448 474079 : max_loop = outermost_invariant_loop (step, loop);
1449 474079 : if (!max_loop)
1450 : return false;
1451 :
1452 474079 : max_loop = outermost_invariant_loop (lbound, loop);
1453 474079 : if (!max_loop)
1454 : return false;
1455 : }
1456 :
1457 1780276 : max_loop = outermost_invariant_loop (*index, loop);
1458 1780276 : if (!max_loop)
1459 : return false;
1460 :
1461 : return true;
1462 : }
1463 :
1464 : /* If OP is SSA NAME, force the statement that defines it to be
1465 : moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1466 :
1467 : static void
1468 37965 : force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1469 : {
1470 37965 : gimple *stmt;
1471 :
1472 37965 : if (!op
1473 37965 : || is_gimple_min_invariant (op))
1474 33445 : return;
1475 :
1476 4520 : gcc_assert (TREE_CODE (op) == SSA_NAME);
1477 :
1478 4520 : stmt = SSA_NAME_DEF_STMT (op);
1479 4520 : if (gimple_nop_p (stmt))
1480 : return;
1481 :
1482 2914 : set_level (stmt, orig_loop, loop);
1483 : }
1484 :
1485 : /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1486 : the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1487 : for_each_index. */
1488 :
1489 : struct fmt_data
1490 : {
1491 : class loop *loop;
1492 : class loop *orig_loop;
1493 : };
1494 :
1495 : static bool
1496 20121 : force_move_till (tree ref, tree *index, void *data)
1497 : {
1498 20121 : struct fmt_data *fmt_data = (struct fmt_data *) data;
1499 :
1500 20121 : if (TREE_CODE (ref) == ARRAY_REF)
1501 : {
1502 8922 : tree step = TREE_OPERAND (ref, 3);
1503 8922 : tree lbound = TREE_OPERAND (ref, 2);
1504 :
1505 8922 : force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1506 8922 : force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1507 : }
1508 :
1509 20121 : force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1510 :
1511 20121 : return true;
1512 : }
1513 :
1514 : /* A function to free the mem_ref object OBJ. */
1515 :
1516 : static void
1517 5231176 : memref_free (class im_mem_ref *mem)
1518 : {
1519 0 : mem->accesses_in_loop.release ();
1520 0 : }
1521 :
1522 : /* Allocates and returns a memory reference description for MEM whose hash
1523 : value is HASH and id is ID. */
1524 :
1525 : static im_mem_ref *
1526 5231176 : mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1527 : {
1528 5231176 : im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1529 5231176 : if (mem)
1530 4300338 : ref->mem = *mem;
1531 : else
1532 930838 : ao_ref_init (&ref->mem, error_mark_node);
1533 5231176 : ref->id = id;
1534 5231176 : ref->ref_canonical = false;
1535 5231176 : ref->ref_decomposed = false;
1536 5231176 : ref->hash = hash;
1537 5231176 : ref->stored = NULL;
1538 5231176 : ref->loaded = NULL;
1539 5231176 : bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1540 5231176 : ref->accesses_in_loop.create (1);
1541 :
1542 5231176 : return ref;
1543 : }
1544 :
1545 : /* Records memory reference location *LOC in LOOP to the memory reference
1546 : description REF. The reference occurs in statement STMT. */
1547 :
1548 : static void
1549 5685216 : record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1550 : {
1551 5685216 : mem_ref_loc aref;
1552 5685216 : aref.stmt = stmt;
1553 5685216 : aref.ref = loc;
1554 0 : ref->accesses_in_loop.safe_push (aref);
1555 0 : }
1556 :
1557 : /* Set the LOOP bit in REF stored bitmap and allocate that if
1558 : necessary. Return whether a bit was changed. */
1559 :
1560 : static bool
1561 4399828 : set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1562 : {
1563 4399828 : if (!ref->stored)
1564 2497085 : ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1565 4399828 : return bitmap_set_bit (ref->stored, loop->num);
1566 : }
1567 :
1568 : /* Marks reference REF as stored in LOOP. */
1569 :
1570 : static void
1571 3664896 : mark_ref_stored (im_mem_ref *ref, class loop *loop)
1572 : {
1573 3664896 : while (loop != current_loops->tree_root
1574 7063252 : && set_ref_stored_in_loop (ref, loop))
1575 3398356 : loop = loop_outer (loop);
1576 3664896 : }
1577 :
1578 : /* Set the LOOP bit in REF loaded bitmap and allocate that if
1579 : necessary. Return whether a bit was changed. */
1580 :
1581 : static bool
1582 6143712 : set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1583 : {
1584 6143712 : if (!ref->loaded)
1585 3366980 : ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1586 6143712 : return bitmap_set_bit (ref->loaded, loop->num);
1587 : }
1588 :
1589 : /* Marks reference REF as loaded in LOOP. */
1590 :
1591 : static void
1592 4888683 : mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1593 : {
1594 4888683 : while (loop != current_loops->tree_root
1595 9733296 : && set_ref_loaded_in_loop (ref, loop))
1596 4844613 : loop = loop_outer (loop);
1597 4888683 : }
1598 :
1599 : /* Gathers memory references in statement STMT in LOOP, storing the
1600 : information about them in the memory_accesses structure. Marks
1601 : the vops accessed through unrecognized statements there as
1602 : well. */
1603 :
1604 : static void
1605 43959818 : gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1606 : {
1607 43959818 : tree *mem = NULL;
1608 43959818 : hashval_t hash;
1609 43959818 : im_mem_ref **slot;
1610 43959818 : im_mem_ref *ref;
1611 43959818 : bool is_stored;
1612 43959818 : unsigned id;
1613 :
1614 59677908 : if (!gimple_vuse (stmt))
1615 : return;
1616 :
1617 6936931 : mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1618 6936931 : if (!mem && is_gimple_assign (stmt))
1619 : {
1620 : /* For aggregate copies record distinct references but use them
1621 : only for disambiguation purposes. */
1622 445926 : id = memory_accesses.refs_list.length ();
1623 445926 : ref = mem_ref_alloc (NULL, 0, id);
1624 445926 : memory_accesses.refs_list.safe_push (ref);
1625 445926 : if (dump_file && (dump_flags & TDF_DETAILS))
1626 : {
1627 24 : fprintf (dump_file, "Unhandled memory reference %u: ", id);
1628 24 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1629 : }
1630 445926 : record_mem_ref_loc (ref, stmt, mem);
1631 891852 : is_stored = gimple_vdef (stmt);
1632 : }
1633 6491005 : else if (!mem)
1634 : {
1635 : /* We use the shared mem_ref for all unanalyzable refs. */
1636 1251715 : id = UNANALYZABLE_MEM_ID;
1637 1251715 : ref = memory_accesses.refs_list[id];
1638 1251715 : if (dump_file && (dump_flags & TDF_DETAILS))
1639 : {
1640 9 : fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1641 9 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1642 : }
1643 2503430 : is_stored = gimple_vdef (stmt);
1644 : }
1645 : else
1646 : {
1647 : /* We are looking for equal refs that might differ in structure
1648 : such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1649 : make sure we can canonicalize the ref in the hashtable if
1650 : non-operand_equal_p refs are found. For the lookup we mark
1651 : the case we want strict equality with aor.max_size == -1. */
1652 5239290 : ao_ref aor;
1653 5239290 : ao_ref_init (&aor, *mem);
1654 5239290 : ao_ref_base (&aor);
1655 5239290 : ao_ref_alias_set (&aor);
1656 5239290 : HOST_WIDE_INT offset, size, max_size;
1657 5239290 : poly_int64 saved_maxsize = aor.max_size, mem_off;
1658 5239290 : tree mem_base;
1659 5239290 : bool ref_decomposed;
1660 5239290 : if (aor.max_size_known_p ()
1661 5061523 : && aor.offset.is_constant (&offset)
1662 5061523 : && aor.size.is_constant (&size)
1663 5061523 : && aor.max_size.is_constant (&max_size)
1664 5061523 : && size == max_size
1665 4458731 : && (size % BITS_PER_UNIT) == 0
1666 : /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1667 : size. Make sure this is consistent with the extraction. */
1668 4448004 : && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1669 4448004 : && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1670 : aor.size)
1671 9687202 : && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1672 : {
1673 4446860 : ref_decomposed = true;
1674 4446860 : tree base = ao_ref_base (&aor);
1675 4446860 : poly_int64 moffset;
1676 4446860 : HOST_WIDE_INT mcoffset;
1677 4446860 : if (TREE_CODE (base) == MEM_REF
1678 2005324 : && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
1679 4446860 : && moffset.is_constant (&mcoffset))
1680 : {
1681 2005324 : hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
1682 2005324 : hash = iterative_hash_host_wide_int (mcoffset, hash);
1683 : }
1684 : else
1685 : {
1686 2441536 : hash = iterative_hash_expr (base, 0);
1687 2441536 : hash = iterative_hash_host_wide_int (offset, hash);
1688 : }
1689 4446860 : hash = iterative_hash_host_wide_int (size, hash);
1690 : }
1691 : else
1692 : {
1693 792430 : ref_decomposed = false;
1694 792430 : hash = iterative_hash_expr (aor.ref, 0);
1695 792430 : aor.max_size = -1;
1696 : }
1697 5239290 : slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1698 5239290 : aor.max_size = saved_maxsize;
1699 5239290 : if (*slot)
1700 : {
1701 938952 : if (!(*slot)->ref_canonical
1702 938952 : && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1703 : {
1704 : /* If we didn't yet canonicalize the hashtable ref (which
1705 : we'll end up using for code insertion) and hit a second
1706 : equal ref that is not structurally equivalent create
1707 : a canonical ref which is a bare MEM_REF. */
1708 50523 : if (TREE_CODE (*mem) == MEM_REF
1709 50523 : || TREE_CODE (*mem) == TARGET_MEM_REF)
1710 : {
1711 14987 : (*slot)->mem.ref = *mem;
1712 14987 : (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1713 : }
1714 : else
1715 : {
1716 35536 : tree ref_alias_type = reference_alias_ptr_type (*mem);
1717 35536 : unsigned int ref_align = get_object_alignment (*mem);
1718 35536 : tree ref_type = TREE_TYPE (*mem);
1719 35536 : tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1720 : unshare_expr (mem_base));
1721 35536 : if (TYPE_ALIGN (ref_type) != ref_align)
1722 9194 : ref_type = build_aligned_type (ref_type, ref_align);
1723 35536 : tree new_ref
1724 35536 : = fold_build2 (MEM_REF, ref_type, tmp,
1725 : build_int_cst (ref_alias_type, mem_off));
1726 35536 : if ((*slot)->mem.volatile_p)
1727 22 : TREE_THIS_VOLATILE (new_ref) = 1;
1728 35536 : (*slot)->mem.ref = new_ref;
1729 : /* Make sure the recorded base and offset are consistent
1730 : with the newly built ref. */
1731 35536 : if (TREE_CODE (TREE_OPERAND (new_ref, 0)) == ADDR_EXPR)
1732 : ;
1733 : else
1734 : {
1735 19736 : (*slot)->mem.base = new_ref;
1736 19736 : (*slot)->mem.offset = 0;
1737 : }
1738 35536 : gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1739 : && is_gimple_mem_ref_addr
1740 : (TREE_OPERAND ((*slot)->mem.ref,
1741 : 0)));
1742 35536 : (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1743 : }
1744 50523 : (*slot)->ref_canonical = true;
1745 : }
1746 938952 : ref = *slot;
1747 938952 : id = ref->id;
1748 : }
1749 : else
1750 : {
1751 4300338 : id = memory_accesses.refs_list.length ();
1752 4300338 : ref = mem_ref_alloc (&aor, hash, id);
1753 4300338 : ref->ref_decomposed = ref_decomposed;
1754 4300338 : memory_accesses.refs_list.safe_push (ref);
1755 4300338 : *slot = ref;
1756 :
1757 4300338 : if (dump_file && (dump_flags & TDF_DETAILS))
1758 : {
1759 499 : fprintf (dump_file, "Memory reference %u: ", id);
1760 499 : print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1761 499 : fprintf (dump_file, "\n");
1762 : }
1763 : }
1764 :
1765 5239290 : record_mem_ref_loc (ref, stmt, mem);
1766 : }
1767 6936931 : if (is_stored)
1768 : {
1769 3664896 : bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1770 3664896 : mark_ref_stored (ref, loop);
1771 : }
1772 : /* A not simple memory op is also a read when it is a write. */
1773 6936931 : if (!is_stored || id == UNANALYZABLE_MEM_ID
1774 2494174 : || ref->mem.ref == error_mark_node)
1775 : {
1776 4888683 : bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1777 4888683 : mark_ref_loaded (ref, loop);
1778 : }
1779 6936931 : init_lim_data (stmt)->ref = ref->id;
1780 6936931 : return;
1781 : }
1782 :
1783 : static unsigned *bb_loop_postorder;
1784 :
1785 : /* qsort sort function to sort blocks after their loop fathers postorder. */
1786 :
1787 : static int
1788 157158310 : sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1789 : void *bb_loop_postorder_)
1790 : {
1791 157158310 : unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1792 157158310 : basic_block bb1 = *(const basic_block *)bb1_;
1793 157158310 : basic_block bb2 = *(const basic_block *)bb2_;
1794 157158310 : class loop *loop1 = bb1->loop_father;
1795 157158310 : class loop *loop2 = bb2->loop_father;
1796 157158310 : if (loop1->num == loop2->num)
1797 73837195 : return bb1->index - bb2->index;
1798 83321115 : return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1799 : }
1800 :
1801 : /* qsort sort function to sort ref locs after their loop fathers postorder. */
1802 :
1803 : static int
1804 938937 : sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1805 : void *bb_loop_postorder_)
1806 : {
1807 938937 : unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1808 938937 : const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1809 938937 : const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1810 938937 : class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1811 938937 : class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1812 938937 : if (loop1->num == loop2->num)
1813 : return 0;
1814 140854 : return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1815 : }
1816 :
1817 : /* Gathers memory references in loops. */
1818 :
1819 : static void
1820 484912 : analyze_memory_references (bool store_motion)
1821 : {
1822 484912 : gimple_stmt_iterator bsi;
1823 484912 : basic_block bb, *bbs;
1824 484912 : class loop *outer;
1825 484912 : unsigned i, n;
1826 :
1827 : /* Collect all basic-blocks in loops and sort them after their
1828 : loops postorder. */
1829 484912 : i = 0;
1830 484912 : bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1831 14749325 : FOR_EACH_BB_FN (bb, cfun)
1832 14264413 : if (bb->loop_father != current_loops->tree_root)
1833 6027712 : bbs[i++] = bb;
1834 484912 : n = i;
1835 484912 : gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1836 : bb_loop_postorder);
1837 :
1838 : /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1839 : That results in better locality for all the bitmaps. It also
1840 : automatically sorts the location list of gathered memory references
1841 : after their loop postorder number allowing to binary-search it. */
1842 6512624 : for (i = 0; i < n; ++i)
1843 : {
1844 6027712 : basic_block bb = bbs[i];
1845 56015242 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1846 43959818 : gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1847 : }
1848 :
1849 : /* Verify the list of gathered memory references is sorted after their
1850 : loop postorder number. */
1851 484912 : if (flag_checking)
1852 : {
1853 : im_mem_ref *ref;
1854 10947192 : FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1855 6170083 : for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1856 938937 : gcc_assert (sort_locs_in_loop_postorder_cmp
1857 : (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1858 : bb_loop_postorder) <= 0);
1859 : }
1860 :
1861 484912 : free (bbs);
1862 :
1863 484912 : if (!store_motion)
1864 484912 : return;
1865 :
1866 : /* Propagate the information about accessed memory references up
1867 : the loop hierarchy. */
1868 2766767 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1869 : {
1870 : /* Finalize the overall touched references (including subloops). */
1871 1312364 : bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1872 1312364 : &memory_accesses.refs_stored_in_loop[loop->num]);
1873 :
1874 : /* Propagate the information about accessed memory references up
1875 : the loop hierarchy. */
1876 1312364 : outer = loop_outer (loop);
1877 1312364 : if (outer == current_loops->tree_root)
1878 1014128 : continue;
1879 :
1880 298236 : bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1881 298236 : &memory_accesses.all_refs_stored_in_loop[loop->num]);
1882 484801 : }
1883 : }
1884 :
1885 : /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1886 : tree_to_aff_combination_expand. */
1887 :
1888 : static bool
1889 541364 : mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1890 : hash_map<tree, name_expansion *> **ttae_cache,
1891 : bool tbaa_p)
1892 : {
1893 541364 : gcc_checking_assert (mem1->mem.ref != error_mark_node
1894 : && mem2->mem.ref != error_mark_node);
1895 :
1896 : /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1897 : object and their offset differ in such a way that the locations cannot
1898 : overlap, then they cannot alias. */
1899 : poly_widest_int size1, size2;
1900 1082728 : aff_tree off1, off2;
1901 :
1902 : /* Perform basic offset and type-based disambiguation. */
1903 541364 : if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1904 : return false;
1905 :
1906 : /* The expansion of addresses may be a bit expensive, thus we only do
1907 : the check at -O2 and higher optimization levels. */
1908 66953 : if (optimize < 2)
1909 : return true;
1910 :
1911 57272 : get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1912 57272 : get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1913 57272 : aff_combination_expand (&off1, ttae_cache);
1914 57272 : aff_combination_expand (&off2, ttae_cache);
1915 57272 : aff_combination_scale (&off1, -1);
1916 57272 : aff_combination_add (&off2, &off1);
1917 :
1918 57272 : if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1919 : return false;
1920 :
1921 : return true;
1922 541364 : }
1923 :
1924 : /* Compare function for bsearch searching for reference locations
1925 : in a loop. */
1926 :
1927 : static int
1928 259441 : find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1929 : void *bb_loop_postorder_)
1930 : {
1931 259441 : unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1932 259441 : class loop *loop = (class loop *)const_cast<void *>(loop_);
1933 259441 : mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1934 259441 : class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1935 259441 : if (loop->num == loc_loop->num
1936 259441 : || flow_loop_nested_p (loop, loc_loop))
1937 199948 : return 0;
1938 59493 : return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1939 59493 : ? -1 : 1);
1940 : }
1941 :
1942 : /* Iterates over all locations of REF in LOOP and its subloops calling
1943 : fn.operator() with the location as argument. When that operator
1944 : returns true the iteration is stopped and true is returned.
1945 : Otherwise false is returned. */
1946 :
1947 : template <typename FN>
1948 : static bool
1949 199948 : for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1950 : {
1951 : unsigned i;
1952 : mem_ref_loc *loc;
1953 :
1954 : /* Search for the cluster of locs in the accesses_in_loop vector
1955 : which is sorted after postorder index of the loop father. */
1956 199948 : loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1957 : bb_loop_postorder);
1958 199948 : if (!loc)
1959 0 : return false;
1960 :
1961 : /* We have found one location inside loop or its sub-loops. Iterate
1962 : both forward and backward to cover the whole cluster. */
1963 199948 : i = loc - ref->accesses_in_loop.address ();
1964 274155 : while (i > 0)
1965 : {
1966 132394 : --i;
1967 132394 : mem_ref_loc *l = &ref->accesses_in_loop[i];
1968 132394 : if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1969 : break;
1970 109288 : if (fn (l))
1971 : return true;
1972 : }
1973 445705 : for (i = loc - ref->accesses_in_loop.address ();
1974 280838 : i < ref->accesses_in_loop.length (); ++i)
1975 : {
1976 208700 : mem_ref_loc *l = &ref->accesses_in_loop[i];
1977 208700 : if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1978 : break;
1979 190353 : if (fn (l))
1980 : return true;
1981 : }
1982 :
1983 : return false;
1984 : }
1985 :
1986 : /* Rewrites location LOC by TMP_VAR. */
1987 :
1988 : class rewrite_mem_ref_loc
1989 : {
1990 : public:
1991 34908 : rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1992 : bool operator () (mem_ref_loc *loc);
1993 : tree tmp_var;
1994 : };
1995 :
1996 : bool
1997 55665 : rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1998 : {
1999 55665 : *loc->ref = tmp_var;
2000 55665 : update_stmt (loc->stmt);
2001 55665 : return false;
2002 : }
2003 :
2004 : /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
2005 :
2006 : static void
2007 34908 : rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
2008 : {
2009 0 : for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
2010 0 : }
2011 :
2012 : /* Stores the first reference location in LOCP. */
2013 :
2014 : class first_mem_ref_loc_1
2015 : {
2016 : public:
2017 34908 : first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
2018 : bool operator () (mem_ref_loc *loc);
2019 : mem_ref_loc **locp;
2020 : };
2021 :
2022 : bool
2023 34908 : first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
2024 : {
2025 34908 : *locp = loc;
2026 34908 : return true;
2027 : }
2028 :
2029 : /* Returns the first reference location to REF in LOOP. */
2030 :
2031 : static mem_ref_loc *
2032 34908 : first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
2033 : {
2034 34908 : mem_ref_loc *locp = NULL;
2035 0 : for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
2036 34908 : return locp;
2037 : }
2038 :
2039 : /* Helper function for execute_sm. Emit code to store TMP_VAR into
2040 : MEM along edge EX.
2041 :
2042 : The store is only done if MEM has changed. We do this so no
2043 : changes to MEM occur on code paths that did not originally store
2044 : into it.
2045 :
2046 : The common case for execute_sm will transform:
2047 :
2048 : for (...) {
2049 : if (foo)
2050 : stuff;
2051 : else
2052 : MEM = TMP_VAR;
2053 : }
2054 :
2055 : into:
2056 :
2057 : lsm = MEM;
2058 : for (...) {
2059 : if (foo)
2060 : stuff;
2061 : else
2062 : lsm = TMP_VAR;
2063 : }
2064 : MEM = lsm;
2065 :
2066 : This function will generate:
2067 :
2068 : lsm = MEM;
2069 :
2070 : lsm_flag = false;
2071 : ...
2072 : for (...) {
2073 : if (foo)
2074 : stuff;
2075 : else {
2076 : lsm = TMP_VAR;
2077 : lsm_flag = true;
2078 : }
2079 : }
2080 : if (lsm_flag) <--
2081 : MEM = lsm; <-- (X)
2082 :
2083 : In case MEM and TMP_VAR are NULL the function will return the then
2084 : block so the caller can insert (X) and other related stmts.
2085 : */
2086 :
2087 : static basic_block
2088 14495 : execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
2089 : edge preheader, hash_set <basic_block> *flag_bbs,
2090 : edge &append_cond_position, edge &last_cond_fallthru)
2091 : {
2092 14495 : basic_block new_bb, then_bb, old_dest;
2093 14495 : bool loop_has_only_one_exit;
2094 14495 : edge then_old_edge;
2095 14495 : gimple_stmt_iterator gsi;
2096 14495 : gimple *stmt;
2097 14495 : bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
2098 :
2099 14495 : profile_count count_sum = profile_count::zero ();
2100 14495 : int nbbs = 0, ncount = 0;
2101 14495 : profile_probability flag_probability = profile_probability::uninitialized ();
2102 :
2103 : /* Flag is set in FLAG_BBS. Determine probability that flag will be true
2104 : at loop exit.
2105 :
2106 : This code may look fancy, but it cannot update profile very realistically
2107 : because we do not know the probability that flag will be true at given
2108 : loop exit.
2109 :
2110 : We look for two interesting extremes
2111 : - when exit is dominated by block setting the flag, we know it will
2112 : always be true. This is a common case.
2113 : - when all blocks setting the flag have very low frequency we know
2114 : it will likely be false.
2115 : In all other cases we default to 2/3 for flag being true. */
2116 :
2117 14495 : for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
2118 50265 : it != flag_bbs->end (); ++it)
2119 : {
2120 17885 : if ((*it)->count.initialized_p ())
2121 17872 : count_sum += (*it)->count, ncount ++;
2122 17885 : if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
2123 2438 : flag_probability = profile_probability::always ();
2124 17885 : nbbs++;
2125 : }
2126 :
2127 14495 : profile_probability cap
2128 14495 : = profile_probability::guessed_always ().apply_scale (2, 3);
2129 :
2130 14495 : if (flag_probability.initialized_p ())
2131 : ;
2132 12158 : else if (ncount == nbbs
2133 24525 : && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
2134 : {
2135 184 : flag_probability = count_sum.probability_in (preheader->count ());
2136 184 : if (flag_probability > cap)
2137 71 : flag_probability = cap;
2138 : }
2139 :
2140 14495 : if (!flag_probability.initialized_p ())
2141 11974 : flag_probability = cap;
2142 :
2143 : /* ?? Insert store after previous store if applicable. See note
2144 : below. */
2145 14495 : if (append_cond_position)
2146 5496 : ex = append_cond_position;
2147 :
2148 14495 : loop_has_only_one_exit = single_pred_p (ex->dest);
2149 :
2150 14495 : if (loop_has_only_one_exit)
2151 5974 : ex = split_block_after_labels (ex->dest);
2152 : else
2153 : {
2154 8521 : for (gphi_iterator gpi = gsi_start_phis (ex->dest);
2155 12647 : !gsi_end_p (gpi); gsi_next (&gpi))
2156 : {
2157 4951 : gphi *phi = gpi.phi ();
2158 9902 : if (virtual_operand_p (gimple_phi_result (phi)))
2159 4126 : continue;
2160 :
2161 : /* When the destination has a non-virtual PHI node with multiple
2162 : predecessors make sure we preserve the PHI structure by
2163 : forcing a forwarder block so that hoisting of that PHI will
2164 : still work. */
2165 825 : split_edge (ex);
2166 825 : break;
2167 : }
2168 : }
2169 :
2170 14495 : old_dest = ex->dest;
2171 14495 : new_bb = split_edge (ex);
2172 14495 : if (append_cond_position)
2173 5496 : new_bb->count += last_cond_fallthru->count ();
2174 14495 : then_bb = create_empty_bb (new_bb);
2175 14495 : then_bb->count = new_bb->count.apply_probability (flag_probability);
2176 14495 : if (irr)
2177 49 : then_bb->flags = BB_IRREDUCIBLE_LOOP;
2178 14495 : add_bb_to_loop (then_bb, new_bb->loop_father);
2179 :
2180 14495 : gsi = gsi_start_bb (new_bb);
2181 14495 : stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2182 : NULL_TREE, NULL_TREE);
2183 14495 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2184 :
2185 : /* Insert actual store. */
2186 14495 : if (mem)
2187 : {
2188 12087 : gsi = gsi_start_bb (then_bb);
2189 12087 : stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2190 12087 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2191 : }
2192 :
2193 14495 : edge e1 = single_succ_edge (new_bb);
2194 28941 : edge e2 = make_edge (new_bb, then_bb,
2195 : EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2196 14495 : e2->probability = flag_probability;
2197 :
2198 14495 : e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2199 14495 : e1->flags &= ~EDGE_FALLTHRU;
2200 :
2201 14495 : e1->probability = flag_probability.invert ();
2202 :
2203 28941 : then_old_edge = make_single_succ_edge (then_bb, old_dest,
2204 : EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2205 :
2206 14495 : set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2207 :
2208 14495 : if (append_cond_position)
2209 : {
2210 5496 : basic_block prevbb = last_cond_fallthru->src;
2211 5496 : redirect_edge_succ (last_cond_fallthru, new_bb);
2212 5496 : set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2213 5496 : set_immediate_dominator (CDI_DOMINATORS, old_dest,
2214 : recompute_dominator (CDI_DOMINATORS, old_dest));
2215 : }
2216 :
2217 : /* ?? Because stores may alias, they must happen in the exact
2218 : sequence they originally happened. Save the position right after
2219 : the (_lsm) store we just created so we can continue appending after
2220 : it and maintain the original order. */
2221 14495 : append_cond_position = then_old_edge;
2222 14495 : last_cond_fallthru = find_edge (new_bb, old_dest);
2223 :
2224 14495 : if (!loop_has_only_one_exit)
2225 8521 : for (gphi_iterator gpi = gsi_start_phis (old_dest);
2226 12485 : !gsi_end_p (gpi); gsi_next (&gpi))
2227 : {
2228 3964 : gphi *phi = gpi.phi ();
2229 3964 : unsigned i;
2230 :
2231 22673 : for (i = 0; i < gimple_phi_num_args (phi); i++)
2232 18709 : if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2233 : {
2234 3964 : tree arg = gimple_phi_arg_def (phi, i);
2235 3964 : add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2236 3964 : update_stmt (phi);
2237 : }
2238 : }
2239 :
2240 14495 : return then_bb;
2241 : }
2242 :
2243 : /* When REF is set on the location, set flag indicating the store. */
2244 :
2245 : class sm_set_flag_if_changed
2246 : {
2247 : public:
2248 7712 : sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2249 7712 : : flag (flag_), bbs (bbs_) {}
2250 : bool operator () (mem_ref_loc *loc);
2251 : tree flag;
2252 : hash_set <basic_block> *bbs;
2253 : };
2254 :
2255 : bool
2256 14680 : sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2257 : {
2258 : /* Only set the flag for writes. */
2259 14680 : if (is_gimple_assign (loc->stmt)
2260 14680 : && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2261 : {
2262 8829 : gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2263 8829 : gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2264 8829 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2265 8829 : bbs->add (gimple_bb (stmt));
2266 : }
2267 14680 : return false;
2268 : }
2269 :
2270 : /* Helper function for execute_sm. On every location where REF is
2271 : set, set an appropriate flag indicating the store. */
2272 :
2273 : static tree
2274 7712 : execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2275 : hash_set <basic_block> *bbs)
2276 : {
2277 7712 : tree flag;
2278 7712 : char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2279 7712 : flag = create_tmp_reg (boolean_type_node, str);
2280 7712 : for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2281 7712 : return flag;
2282 : }
2283 :
2284 34908 : struct sm_aux
2285 : {
2286 : tree tmp_var;
2287 : tree store_flag;
2288 : hash_set <basic_block> flag_bbs;
2289 : };
2290 :
2291 : /* Executes store motion of memory reference REF from LOOP.
2292 : Exits from the LOOP are stored in EXITS. The initialization of the
2293 : temporary variable is put to the preheader of the loop, and assignments
2294 : to the reference from the temporary variable are emitted to exits. */
2295 :
2296 : static sm_aux *
2297 34908 : execute_sm (class loop *loop, im_mem_ref *ref,
2298 : hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
2299 : bool use_other_flag_var)
2300 : {
2301 34908 : gassign *load;
2302 34908 : struct fmt_data fmt_data;
2303 34908 : struct lim_aux_data *lim_data;
2304 34908 : bool multi_threaded_model_p = false;
2305 34908 : gimple_stmt_iterator gsi;
2306 34908 : sm_aux *aux = new sm_aux;
2307 :
2308 34908 : if (dump_file && (dump_flags & TDF_DETAILS))
2309 : {
2310 82 : fprintf (dump_file, "Executing store motion of ");
2311 82 : print_generic_expr (dump_file, ref->mem.ref);
2312 82 : fprintf (dump_file, " from loop %d\n", loop->num);
2313 : }
2314 :
2315 34908 : aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2316 34908 : get_lsm_tmp_name (ref->mem.ref, ~0));
2317 :
2318 34908 : fmt_data.loop = loop;
2319 34908 : fmt_data.orig_loop = loop;
2320 34908 : for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2321 :
2322 69816 : bool always_stored = ref_always_stored_p (loop, ref);
2323 34908 : if (maybe_mt
2324 11734 : && (bb_in_transaction (loop_preheader_edge (loop)->src)
2325 11733 : || (ref_can_have_store_data_races (ref->mem.ref) && ! always_stored)
2326 : /* Do not speculate a load/store when that's not a noop, either
2327 : because the mode cannot be transferred or because there's
2328 : UB involved for out-of-bound values. */
2329 3753 : || !mode_can_transfer_bits (TYPE_MODE (TREE_TYPE (ref->mem.ref)))
2330 3753 : || TREE_CODE (TREE_TYPE (ref->mem.ref)) == BOOLEAN_TYPE
2331 3739 : || (TREE_CODE (ref->mem.ref) == COMPONENT_REF
2332 879 : && DECL_BIT_FIELD (TREE_OPERAND (ref->mem.ref, 1)))))
2333 : multi_threaded_model_p = true;
2334 :
2335 34908 : if (multi_threaded_model_p && !use_other_flag_var)
2336 7712 : aux->store_flag
2337 7712 : = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2338 : else
2339 27196 : aux->store_flag = NULL_TREE;
2340 :
2341 : /* Remember variable setup. */
2342 34908 : aux_map.put (ref, aux);
2343 :
2344 34908 : rewrite_mem_refs (loop, ref, aux->tmp_var);
2345 :
2346 : /* Emit the load code on a random exit edge or into the latch if
2347 : the loop does not exit, so that we are sure it will be processed
2348 : by move_computations after all dependencies. */
2349 34908 : gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2350 :
2351 : /* Avoid doing a load if there was no load of the ref in the loop.
2352 : Esp. when the ref is not always stored we cannot optimize it
2353 : away later. But when it is not always stored we must use a conditional
2354 : store then. */
2355 34908 : if ((!always_stored && !multi_threaded_model_p)
2356 34908 : || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2357 18210 : load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2358 : else
2359 : {
2360 : /* If not emitting a load mark the uninitialized state on the
2361 : loop entry as not to be warned for. */
2362 16698 : tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2363 16698 : suppress_warning (uninit, OPT_Wuninitialized);
2364 16698 : uninit = get_or_create_ssa_default_def (cfun, uninit);
2365 16698 : load = gimple_build_assign (aux->tmp_var, uninit);
2366 : }
2367 34908 : lim_data = init_lim_data (load);
2368 34908 : lim_data->max_loop = loop;
2369 34908 : lim_data->tgt_loop = loop;
2370 34908 : gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2371 :
2372 34908 : if (aux->store_flag)
2373 : {
2374 7712 : load = gimple_build_assign (aux->store_flag, boolean_false_node);
2375 7712 : lim_data = init_lim_data (load);
2376 7712 : lim_data->max_loop = loop;
2377 7712 : lim_data->tgt_loop = loop;
2378 7712 : gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2379 : }
2380 :
2381 34908 : return aux;
2382 : }
2383 :
2384 : /* sm_ord is used for ordinary stores we can retain order with respect
2385 : to other stores
2386 : sm_unord is used for conditional executed stores which need to be
2387 : able to execute in arbitrary order with respect to other stores
2388 : sm_other is used for stores we do not try to apply store motion to. */
2389 : enum sm_kind { sm_ord, sm_unord, sm_other };
2390 : struct seq_entry
2391 : {
2392 : seq_entry () = default;
2393 52008 : seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2394 52008 : : first (f), second (k), from (fr) {}
2395 : unsigned first;
2396 : sm_kind second;
2397 : tree from;
2398 : };
2399 :
2400 : static void
2401 33046 : execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2402 : hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2403 : edge &append_cond_position, edge &last_cond_fallthru,
2404 : bitmap clobbers_to_prune)
2405 : {
2406 : /* Sink the stores to exit from the loop. */
2407 116255 : for (unsigned i = seq.length (); i > 0; --i)
2408 : {
2409 50163 : im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2410 50163 : if (seq[i-1].second == sm_other)
2411 : {
2412 4923 : gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2413 4923 : gassign *store;
2414 4923 : if (ref->mem.ref == error_mark_node)
2415 : {
2416 179 : tree lhs = gimple_assign_lhs (ref->accesses_in_loop[0].stmt);
2417 179 : if (dump_file && (dump_flags & TDF_DETAILS))
2418 : {
2419 3 : fprintf (dump_file, "Re-issueing dependent ");
2420 3 : print_generic_expr (dump_file, unshare_expr (seq[i-1].from));
2421 3 : fprintf (dump_file, " of ");
2422 3 : print_generic_expr (dump_file, lhs);
2423 3 : fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2424 3 : loop->num, ex->src->index, ex->dest->index);
2425 : }
2426 179 : store = gimple_build_assign (unshare_expr (lhs),
2427 179 : unshare_expr (seq[i-1].from));
2428 179 : bitmap_set_bit (clobbers_to_prune, seq[i-1].first);
2429 : }
2430 : else
2431 : {
2432 4744 : if (dump_file && (dump_flags & TDF_DETAILS))
2433 : {
2434 0 : fprintf (dump_file, "Re-issueing dependent store of ");
2435 0 : print_generic_expr (dump_file, ref->mem.ref);
2436 0 : fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2437 0 : loop->num, ex->src->index, ex->dest->index);
2438 : }
2439 4744 : store = gimple_build_assign (unshare_expr (ref->mem.ref),
2440 4744 : seq[i-1].from);
2441 : }
2442 4923 : gsi_insert_on_edge (ex, store);
2443 : }
2444 : else
2445 : {
2446 45240 : sm_aux *aux = *aux_map.get (ref);
2447 45240 : if (!aux->store_flag || kind == sm_ord)
2448 : {
2449 33153 : gassign *store;
2450 33153 : store = gimple_build_assign (unshare_expr (ref->mem.ref),
2451 : aux->tmp_var);
2452 33153 : gsi_insert_on_edge (ex, store);
2453 33153 : }
2454 : else
2455 12087 : execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2456 : aux->store_flag,
2457 : loop_preheader_edge (loop), &aux->flag_bbs,
2458 : append_cond_position, last_cond_fallthru);
2459 : }
2460 : }
2461 33046 : }
2462 :
2463 : /* Push the SM candidate at index PTR in the sequence SEQ down until
2464 : we hit the next SM candidate. Return true if that went OK and
2465 : false if we could not disambiguate agains another unrelated ref.
2466 : Update *AT to the index where the candidate now resides. */
2467 :
2468 : static bool
2469 37564 : sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2470 : {
2471 37564 : *at = ptr;
2472 38340 : for (; ptr > 0; --ptr)
2473 : {
2474 17686 : seq_entry &new_cand = seq[ptr];
2475 17686 : seq_entry &against = seq[ptr-1];
2476 17686 : if (against.second == sm_ord
2477 5800 : || (against.second == sm_other && against.from != NULL_TREE))
2478 : /* Found the tail of the sequence. */
2479 : break;
2480 : /* We may not ignore self-dependences here. */
2481 1077 : if (new_cand.first == against.first
2482 : /* ??? We could actually handle clobbers here, but not easily
2483 : with LIMs dependence analysis. */
2484 855 : || (memory_accesses.refs_list[new_cand.first]->mem.ref
2485 855 : == error_mark_node)
2486 850 : || (memory_accesses.refs_list[against.first]->mem.ref
2487 : == error_mark_node)
2488 1903 : || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2489 826 : memory_accesses.refs_list[against.first],
2490 : false))
2491 : /* ??? Prune new_cand from the list of refs to apply SM to. */
2492 301 : return false;
2493 776 : std::swap (new_cand, against);
2494 776 : *at = ptr - 1;
2495 : }
2496 : return true;
2497 : }
2498 :
2499 : /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2500 : walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2501 :
2502 : static int
2503 35480 : sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2504 : vec<seq_entry> &seq, bitmap refs_not_in_seq,
2505 : bitmap refs_not_supported, bool forked,
2506 : bitmap fully_visited)
2507 : {
2508 35480 : if (!vdef)
2509 26382 : for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2510 113441 : gsi_prev (&gsi))
2511 : {
2512 139569 : vdef = gimple_vdef (gsi_stmt (gsi));
2513 52510 : if (vdef)
2514 : break;
2515 : }
2516 26382 : if (!vdef)
2517 : {
2518 8589 : gphi *vphi = get_virtual_phi (bb);
2519 8589 : if (vphi)
2520 4281 : vdef = gimple_phi_result (vphi);
2521 : }
2522 35480 : if (!vdef)
2523 : {
2524 4308 : if (single_pred_p (bb))
2525 : /* This handles the perfect nest case. */
2526 3516 : return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2527 : seq, refs_not_in_seq, refs_not_supported,
2528 3516 : forked, fully_visited);
2529 : return 0;
2530 : }
2531 55159 : do
2532 : {
2533 110318 : gimple *def = SSA_NAME_DEF_STMT (vdef);
2534 55159 : if (gimple_bb (def) != bb)
2535 : {
2536 : /* If we forked by processing a PHI do not allow our walk to
2537 : merge again until we handle that robustly. */
2538 3407 : if (forked)
2539 : {
2540 : /* Mark refs_not_in_seq as unsupported. */
2541 1694 : bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2542 1694 : return 1;
2543 : }
2544 : /* Otherwise it doesn't really matter if we end up in different
2545 : BBs. */
2546 : bb = gimple_bb (def);
2547 : }
2548 53465 : if (gphi *phi = dyn_cast <gphi *> (def))
2549 : {
2550 : /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2551 : this is still linear.
2552 : Eventually we want to cache intermediate results per BB
2553 : (but we can't easily cache for different exits?). */
2554 : /* Stop at PHIs with possible backedges. */
2555 10486 : if (bb == bb->loop_father->header
2556 5366 : || bb->flags & BB_IRREDUCIBLE_LOOP)
2557 : {
2558 : /* Mark refs_not_in_seq as unsupported. */
2559 5125 : bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2560 5125 : return 1;
2561 : }
2562 5361 : if (gimple_phi_num_args (phi) == 1)
2563 1528 : return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2564 : gimple_phi_arg_def (phi, 0), seq,
2565 : refs_not_in_seq, refs_not_supported,
2566 1528 : false, fully_visited);
2567 7666 : if (bitmap_bit_p (fully_visited,
2568 3833 : SSA_NAME_VERSION (gimple_phi_result (phi))))
2569 : return 1;
2570 3729 : auto_vec<seq_entry> first_edge_seq;
2571 3729 : auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2572 3729 : int eret;
2573 3729 : bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2574 3729 : eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2575 : gimple_phi_arg_def (phi, 0),
2576 : first_edge_seq,
2577 : tem_refs_not_in_seq, refs_not_supported,
2578 : true, fully_visited);
2579 3729 : if (eret != 1)
2580 : return -1;
2581 : /* Simplify our lives by pruning the sequence of !sm_ord. */
2582 3514 : while (!first_edge_seq.is_empty ()
2583 8422 : && first_edge_seq.last ().second != sm_ord)
2584 1179 : first_edge_seq.pop ();
2585 7570 : for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2586 : {
2587 5828 : tree vuse = gimple_phi_arg_def (phi, i);
2588 5828 : edge e = gimple_phi_arg_edge (phi, i);
2589 5828 : auto_vec<seq_entry> edge_seq;
2590 5828 : bitmap_and_compl (tem_refs_not_in_seq,
2591 : refs_not_in_seq, refs_not_supported);
2592 : /* If we've marked all refs we search for as unsupported
2593 : we can stop processing and use the sequence as before
2594 : the PHI. */
2595 5828 : if (bitmap_empty_p (tem_refs_not_in_seq))
2596 : return 1;
2597 3841 : eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2598 : tem_refs_not_in_seq, refs_not_supported,
2599 : true, fully_visited);
2600 3841 : if (eret != 1)
2601 : return -1;
2602 : /* Simplify our lives by pruning the sequence of !sm_ord. */
2603 3355 : while (!edge_seq.is_empty ()
2604 8411 : && edge_seq.last ().second != sm_ord)
2605 1215 : edge_seq.pop ();
2606 7682 : unsigned min_len = MIN(first_edge_seq.length (),
2607 : edge_seq.length ());
2608 : /* Incrementally merge seqs into first_edge_seq. */
2609 3841 : int first_uneq = -1;
2610 3841 : auto_vec<seq_entry, 2> extra_refs;
2611 6032 : for (unsigned int i = 0; i < min_len; ++i)
2612 : {
2613 : /* ??? We can more intelligently merge when we face different
2614 : order by additional sinking operations in one sequence.
2615 : For now we simply mark them as to be processed by the
2616 : not order-preserving SM code. */
2617 2191 : if (first_edge_seq[i].first != edge_seq[i].first)
2618 : {
2619 35 : if (first_edge_seq[i].second == sm_ord)
2620 22 : bitmap_set_bit (refs_not_supported,
2621 22 : first_edge_seq[i].first);
2622 35 : if (edge_seq[i].second == sm_ord)
2623 24 : bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2624 35 : first_edge_seq[i].second = sm_other;
2625 35 : first_edge_seq[i].from = NULL_TREE;
2626 : /* Record the dropped refs for later processing. */
2627 35 : if (first_uneq == -1)
2628 33 : first_uneq = i;
2629 70 : extra_refs.safe_push (seq_entry (edge_seq[i].first,
2630 35 : sm_other, NULL_TREE));
2631 : }
2632 : /* sm_other prevails. */
2633 2156 : else if (first_edge_seq[i].second != edge_seq[i].second)
2634 : {
2635 : /* Make sure the ref is marked as not supported. */
2636 0 : bitmap_set_bit (refs_not_supported,
2637 0 : first_edge_seq[i].first);
2638 0 : first_edge_seq[i].second = sm_other;
2639 0 : first_edge_seq[i].from = NULL_TREE;
2640 : }
2641 2156 : else if (first_edge_seq[i].second == sm_other
2642 7 : && first_edge_seq[i].from != NULL_TREE
2643 2163 : && (edge_seq[i].from == NULL_TREE
2644 7 : || !operand_equal_p (first_edge_seq[i].from,
2645 7 : edge_seq[i].from, 0)))
2646 7 : first_edge_seq[i].from = NULL_TREE;
2647 : }
2648 : /* Any excess elements become sm_other since they are now
2649 : coonditionally executed. */
2650 10563 : if (first_edge_seq.length () > edge_seq.length ())
2651 : {
2652 6708 : for (unsigned i = edge_seq.length ();
2653 8720 : i < first_edge_seq.length (); ++i)
2654 : {
2655 6708 : if (first_edge_seq[i].second == sm_ord)
2656 3566 : bitmap_set_bit (refs_not_supported,
2657 3566 : first_edge_seq[i].first);
2658 6708 : first_edge_seq[i].second = sm_other;
2659 : }
2660 : }
2661 1829 : else if (edge_seq.length () > first_edge_seq.length ())
2662 : {
2663 8 : if (first_uneq == -1)
2664 0 : first_uneq = first_edge_seq.length ();
2665 8 : for (unsigned i = first_edge_seq.length ();
2666 26 : i < edge_seq.length (); ++i)
2667 : {
2668 18 : if (edge_seq[i].second == sm_ord)
2669 8 : bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2670 36 : extra_refs.safe_push (seq_entry (edge_seq[i].first,
2671 18 : sm_other, NULL_TREE));
2672 : }
2673 : }
2674 : /* Put unmerged refs at first_uneq to force dependence checking
2675 : on them. */
2676 3841 : if (first_uneq != -1)
2677 : {
2678 : /* Missing ordered_splice_at. */
2679 66 : if ((unsigned)first_uneq == first_edge_seq.length ())
2680 0 : first_edge_seq.safe_splice (extra_refs);
2681 : else
2682 : {
2683 33 : unsigned fes_length = first_edge_seq.length ();
2684 33 : first_edge_seq.safe_grow (fes_length
2685 33 : + extra_refs.length ());
2686 33 : memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2687 33 : &first_edge_seq[first_uneq],
2688 33 : (fes_length - first_uneq) * sizeof (seq_entry));
2689 66 : memcpy (&first_edge_seq[first_uneq],
2690 33 : extra_refs.address (),
2691 33 : extra_refs.length () * sizeof (seq_entry));
2692 : }
2693 : }
2694 5828 : }
2695 : /* Use the sequence from the first edge and push SMs down. */
2696 4741 : for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2697 : {
2698 2999 : unsigned id = first_edge_seq[i].first;
2699 2999 : seq.safe_push (first_edge_seq[i]);
2700 2999 : unsigned new_idx;
2701 2999 : if ((first_edge_seq[i].second == sm_ord
2702 2356 : || (first_edge_seq[i].second == sm_other
2703 2356 : && first_edge_seq[i].from != NULL_TREE))
2704 4006 : && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2705 : {
2706 125 : if (first_edge_seq[i].second == sm_ord)
2707 0 : bitmap_set_bit (refs_not_supported, id);
2708 : /* Mark it sm_other. */
2709 125 : seq[new_idx].second = sm_other;
2710 125 : seq[new_idx].from = NULL_TREE;
2711 : }
2712 : }
2713 1742 : bitmap_set_bit (fully_visited,
2714 1742 : SSA_NAME_VERSION (gimple_phi_result (phi)));
2715 1742 : return 1;
2716 3729 : }
2717 42979 : lim_aux_data *data = get_lim_data (def);
2718 42979 : im_mem_ref *ref = memory_accesses.refs_list[data->ref];
2719 42979 : if (data->ref == UNANALYZABLE_MEM_ID)
2720 : return -1;
2721 : /* Stop at memory references which we can't move. */
2722 42979 : else if ((ref->mem.ref == error_mark_node
2723 : /* We can move end-of-storage/object down. */
2724 423 : && !gimple_clobber_p (ref->accesses_in_loop[0].stmt,
2725 : CLOBBER_STORAGE_END)
2726 291 : && !gimple_clobber_p (ref->accesses_in_loop[0].stmt,
2727 : CLOBBER_OBJECT_END))
2728 43295 : || TREE_THIS_VOLATILE (ref->mem.ref))
2729 : {
2730 : /* Mark refs_not_in_seq as unsupported. */
2731 139 : bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2732 139 : return 1;
2733 : }
2734 : /* One of the stores we want to apply SM to and we've not yet seen. */
2735 42840 : else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2736 : {
2737 33532 : seq.safe_push (seq_entry (data->ref, sm_ord));
2738 :
2739 : /* 1) push it down the queue until a SMed
2740 : and not ignored ref is reached, skipping all not SMed refs
2741 : and ignored refs via non-TBAA disambiguation. */
2742 33532 : unsigned new_idx;
2743 67064 : if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2744 : /* If that fails but we did not fork yet continue, we'll see
2745 : to re-materialize all of the stores in the sequence then.
2746 : Further stores will only be pushed up to this one. */
2747 33532 : && forked)
2748 : {
2749 0 : bitmap_set_bit (refs_not_supported, data->ref);
2750 : /* Mark it sm_other. */
2751 0 : seq[new_idx].second = sm_other;
2752 : }
2753 :
2754 : /* 2) check whether we've seen all refs we want to SM and if so
2755 : declare success for the active exit */
2756 33532 : if (bitmap_empty_p (refs_not_in_seq))
2757 18853 : return 1;
2758 : }
2759 : else
2760 : /* Another store not part of the final sequence. Simply push it. */
2761 9308 : seq.safe_push (seq_entry (data->ref, sm_other,
2762 9308 : gimple_assign_rhs1 (def)));
2763 :
2764 79146 : vdef = gimple_vuse (def);
2765 : }
2766 : while (1);
2767 : }
2768 :
2769 : /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2770 : edges of the LOOP. */
2771 :
2772 : static void
2773 21333 : hoist_memory_references (class loop *loop, bitmap mem_refs,
2774 : const vec<edge> &exits)
2775 : {
2776 21333 : im_mem_ref *ref;
2777 21333 : unsigned i;
2778 21333 : bitmap_iterator bi;
2779 :
2780 : /* There's a special case we can use ordered re-materialization for
2781 : conditionally excuted stores which is when all stores in the loop
2782 : happen in the same basic-block. In that case we know we'll reach
2783 : all stores and thus can simply process that BB and emit a single
2784 : conditional block of ordered materializations. See PR102436. */
2785 21333 : basic_block single_store_bb = NULL;
2786 31447 : EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
2787 : 0, i, bi)
2788 : {
2789 29247 : bool fail = false;
2790 29247 : ref = memory_accesses.refs_list[i];
2791 133950 : for (auto loc : ref->accesses_in_loop)
2792 143594 : if (!gimple_vdef (loc.stmt))
2793 : ;
2794 32043 : else if (!single_store_bb)
2795 : {
2796 21333 : single_store_bb = gimple_bb (loc.stmt);
2797 21333 : bool conditional = false;
2798 76672 : for (edge e : exits)
2799 22918 : if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
2800 : {
2801 : /* Conditional as seen from e. */
2802 : conditional = true;
2803 : break;
2804 : }
2805 21333 : if (!conditional)
2806 : {
2807 : fail = true;
2808 : break;
2809 : }
2810 : }
2811 10710 : else if (single_store_bb != gimple_bb (loc.stmt))
2812 : {
2813 : fail = true;
2814 : break;
2815 : }
2816 29247 : if (fail)
2817 : {
2818 : single_store_bb = NULL;
2819 : break;
2820 : }
2821 : }
2822 21333 : if (single_store_bb)
2823 : {
2824 : /* Analyze the single block with stores. */
2825 2200 : auto_bitmap fully_visited;
2826 2200 : auto_bitmap refs_not_supported;
2827 2200 : auto_bitmap refs_not_in_seq;
2828 2200 : auto_vec<seq_entry> seq;
2829 2200 : bitmap_copy (refs_not_in_seq, mem_refs);
2830 2200 : int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
2831 : seq, refs_not_in_seq, refs_not_supported,
2832 : false, fully_visited);
2833 2200 : if (res != 1)
2834 : {
2835 : /* Unhandled refs can still fail this. */
2836 0 : bitmap_clear (mem_refs);
2837 0 : return;
2838 : }
2839 :
2840 : /* We cannot handle sm_other since we neither remember the
2841 : stored location nor the value at the point we execute them. */
2842 5320 : for (unsigned i = 0; i < seq.length (); ++i)
2843 : {
2844 3120 : unsigned new_i;
2845 3120 : if (seq[i].second == sm_other
2846 3120 : && seq[i].from != NULL_TREE)
2847 438 : seq[i].from = NULL_TREE;
2848 2682 : else if ((seq[i].second == sm_ord
2849 0 : || (seq[i].second == sm_other
2850 0 : && seq[i].from != NULL_TREE))
2851 2682 : && !sm_seq_push_down (seq, i, &new_i))
2852 : {
2853 63 : bitmap_set_bit (refs_not_supported, seq[new_i].first);
2854 63 : seq[new_i].second = sm_other;
2855 63 : seq[new_i].from = NULL_TREE;
2856 : }
2857 : }
2858 2200 : bitmap_and_compl_into (mem_refs, refs_not_supported);
2859 2200 : if (bitmap_empty_p (mem_refs))
2860 : return;
2861 :
2862 : /* Prune seq. */
2863 2545 : while (seq.last ().second == sm_other
2864 2545 : && seq.last ().from == NULL_TREE)
2865 396 : seq.pop ();
2866 :
2867 2149 : hash_map<im_mem_ref *, sm_aux *> aux_map;
2868 :
2869 : /* Execute SM but delay the store materialization for ordered
2870 : sequences on exit. Remember a created flag var and make
2871 : sure to re-use it. */
2872 2149 : sm_aux *flag_var_aux = nullptr;
2873 4768 : EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2874 : {
2875 2619 : ref = memory_accesses.refs_list[i];
2876 2619 : sm_aux *aux = execute_sm (loop, ref, aux_map, true,
2877 : flag_var_aux != nullptr);
2878 2619 : if (aux->store_flag)
2879 1504 : flag_var_aux = aux;
2880 : }
2881 :
2882 : /* Materialize ordered store sequences on exits. */
2883 2149 : edge e;
2884 2149 : auto_bitmap clobbers_to_prune;
2885 5882 : FOR_EACH_VEC_ELT (exits, i, e)
2886 : {
2887 3733 : edge append_cond_position = NULL;
2888 3733 : edge last_cond_fallthru = NULL;
2889 3733 : edge insert_e = e;
2890 : /* Construct the single flag variable control flow and insert
2891 : the ordered seq of stores in the then block. With
2892 : -fstore-data-races we can do the stores unconditionally. */
2893 3733 : if (flag_var_aux)
2894 2408 : insert_e
2895 : = single_pred_edge
2896 2408 : (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
2897 : flag_var_aux->store_flag,
2898 : loop_preheader_edge (loop),
2899 : &flag_var_aux->flag_bbs,
2900 : append_cond_position,
2901 : last_cond_fallthru));
2902 3733 : execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
2903 : append_cond_position, last_cond_fallthru,
2904 : clobbers_to_prune);
2905 3733 : gsi_commit_one_edge_insert (insert_e, NULL);
2906 : }
2907 :
2908 : /* Remove clobbers inside the loop we re-materialized on exits. */
2909 2149 : EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
2910 : {
2911 0 : gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
2912 0 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2913 0 : unlink_stmt_vdef (stmt);
2914 0 : release_defs (stmt);
2915 0 : gimple_set_vdef (stmt, NULL_TREE);
2916 0 : gsi_remove (&gsi, true);
2917 : }
2918 :
2919 4768 : for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2920 7387 : iter != aux_map.end (); ++iter)
2921 5238 : delete (*iter).second;
2922 :
2923 2149 : return;
2924 4349 : }
2925 :
2926 : /* To address PR57359 before actually applying store-motion check
2927 : the candidates found for validity with regards to reordering
2928 : relative to other stores which we until here disambiguated using
2929 : TBAA which isn't valid.
2930 : What matters is the order of the last stores to the mem_refs
2931 : with respect to the other stores of the loop at the point of the
2932 : loop exits. */
2933 :
2934 : /* For each exit compute the store order, pruning from mem_refs
2935 : on the fly. */
2936 : /* The complexity of this is at least
2937 : O(number of exits * number of SM refs) but more approaching
2938 : O(number of exits * number of SM refs * number of stores). */
2939 : /* ??? Somehow do this in a single sweep over the loop body. */
2940 19133 : auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2941 19133 : auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2942 19133 : edge e;
2943 39007 : FOR_EACH_VEC_ELT (exits, i, e)
2944 : {
2945 22124 : vec<seq_entry> seq;
2946 22124 : seq.create (4);
2947 22124 : auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2948 22124 : bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2949 22124 : if (bitmap_empty_p (refs_not_in_seq))
2950 : {
2951 1458 : seq.release ();
2952 1458 : break;
2953 : }
2954 20666 : auto_bitmap fully_visited;
2955 20666 : int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2956 : seq, refs_not_in_seq,
2957 : refs_not_supported, false,
2958 : fully_visited);
2959 20666 : if (res != 1)
2960 : {
2961 792 : bitmap_copy (refs_not_supported, mem_refs);
2962 792 : seq.release ();
2963 792 : break;
2964 : }
2965 19874 : sms.safe_push (std::make_pair (e, seq));
2966 22916 : }
2967 :
2968 : /* Prune pruned mem_refs from earlier processed exits. */
2969 19133 : bool changed = !bitmap_empty_p (refs_not_supported);
2970 19133 : while (changed)
2971 : {
2972 6073 : changed = false;
2973 6073 : std::pair<edge, vec<seq_entry> > *seq;
2974 39962 : FOR_EACH_VEC_ELT (sms, i, seq)
2975 : {
2976 : bool need_to_push = false;
2977 13659 : for (unsigned i = 0; i < seq->second.length (); ++i)
2978 : {
2979 6281 : sm_kind kind = seq->second[i].second;
2980 6281 : if (kind == sm_other && seq->second[i].from == NULL_TREE)
2981 : break;
2982 5205 : unsigned id = seq->second[i].first;
2983 5205 : unsigned new_idx;
2984 5205 : if (kind == sm_ord
2985 5205 : && bitmap_bit_p (refs_not_supported, id))
2986 : {
2987 2540 : seq->second[i].second = sm_other;
2988 2540 : gcc_assert (seq->second[i].from == NULL_TREE);
2989 : need_to_push = true;
2990 : }
2991 2665 : else if (need_to_push
2992 2665 : && !sm_seq_push_down (seq->second, i, &new_idx))
2993 : {
2994 : /* We need to push down both sm_ord and sm_other
2995 : but for the latter we need to disqualify all
2996 : following refs. */
2997 113 : if (kind == sm_ord)
2998 : {
2999 7 : if (bitmap_set_bit (refs_not_supported, id))
3000 7 : changed = true;
3001 7 : seq->second[new_idx].second = sm_other;
3002 : }
3003 : else
3004 : {
3005 411 : for (unsigned j = seq->second.length () - 1;
3006 305 : j > new_idx; --j)
3007 199 : if (seq->second[j].second == sm_ord
3008 321 : && bitmap_set_bit (refs_not_supported,
3009 122 : seq->second[j].first))
3010 : changed = true;
3011 106 : seq->second.truncate (new_idx);
3012 106 : break;
3013 : }
3014 : }
3015 : }
3016 : }
3017 : }
3018 19133 : std::pair<edge, vec<seq_entry> > *seq;
3019 58881 : FOR_EACH_VEC_ELT (sms, i, seq)
3020 : {
3021 : /* Prune sm_other from the end. */
3022 63531 : while (!seq->second.is_empty ()
3023 43657 : && seq->second.last ().second == sm_other)
3024 5136 : seq->second.pop ();
3025 : /* Prune duplicates from the start. */
3026 19874 : auto_bitmap seen (&lim_bitmap_obstack);
3027 19874 : unsigned j, k;
3028 47318 : for (j = k = 0; j < seq->second.length (); ++j)
3029 27444 : if (bitmap_set_bit (seen, seq->second[j].first))
3030 : {
3031 27335 : if (k != j)
3032 77 : seq->second[k] = seq->second[j];
3033 27335 : ++k;
3034 : }
3035 19874 : seq->second.truncate (k);
3036 : /* And verify. */
3037 19874 : seq_entry *e;
3038 86957 : FOR_EACH_VEC_ELT (seq->second, j, e)
3039 27335 : gcc_assert (e->second == sm_ord
3040 : || (e->second == sm_other && e->from != NULL_TREE));
3041 19874 : }
3042 :
3043 : /* Verify dependence for refs we cannot handle with the order preserving
3044 : code (refs_not_supported) or prune them from mem_refs. */
3045 19133 : auto_vec<seq_entry> unord_refs;
3046 34788 : EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
3047 : {
3048 15655 : ref = memory_accesses.refs_list[i];
3049 15655 : if (!ref_indep_loop_p (loop, ref, sm_waw))
3050 6540 : bitmap_clear_bit (mem_refs, i);
3051 : /* We've now verified store order for ref with respect to all other
3052 : stores in the loop does not matter. */
3053 : else
3054 9115 : unord_refs.safe_push (seq_entry (i, sm_unord));
3055 : }
3056 :
3057 19133 : hash_map<im_mem_ref *, sm_aux *> aux_map;
3058 :
3059 : /* Execute SM but delay the store materialization for ordered
3060 : sequences on exit. */
3061 51422 : EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
3062 : {
3063 32289 : ref = memory_accesses.refs_list[i];
3064 32289 : execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
3065 : false);
3066 : }
3067 :
3068 : /* Materialize ordered store sequences on exits. */
3069 19133 : auto_bitmap clobbers_to_prune;
3070 42795 : FOR_EACH_VEC_ELT (exits, i, e)
3071 : {
3072 23662 : edge append_cond_position = NULL;
3073 23662 : edge last_cond_fallthru = NULL;
3074 23662 : if (i < sms.length ())
3075 : {
3076 19874 : gcc_assert (sms[i].first == e);
3077 19874 : execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
3078 : append_cond_position, last_cond_fallthru,
3079 : clobbers_to_prune);
3080 19874 : sms[i].second.release ();
3081 : }
3082 23662 : if (!unord_refs.is_empty ())
3083 9439 : execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
3084 : append_cond_position, last_cond_fallthru,
3085 : clobbers_to_prune);
3086 : /* Commit edge inserts here to preserve the order of stores
3087 : when an exit exits multiple loops. */
3088 23662 : gsi_commit_one_edge_insert (e, NULL);
3089 : }
3090 :
3091 : /* Remove clobbers inside the loop we re-materialized on exits. */
3092 19312 : EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
3093 : {
3094 179 : gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
3095 179 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3096 179 : unlink_stmt_vdef (stmt);
3097 179 : release_defs (stmt);
3098 179 : gimple_set_vdef (stmt, NULL_TREE);
3099 179 : gsi_remove (&gsi, true);
3100 : }
3101 :
3102 51422 : for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
3103 102844 : iter != aux_map.end (); ++iter)
3104 64578 : delete (*iter).second;
3105 19133 : }
3106 :
3107 : class ref_always_stored
3108 : {
3109 : public:
3110 79936 : ref_always_stored (class loop *loop_)
3111 79936 : : loop (loop_) {}
3112 : bool operator () (mem_ref_loc *loc);
3113 : class loop *loop;
3114 : };
3115 :
3116 : bool
3117 151134 : ref_always_stored::operator () (mem_ref_loc *loc)
3118 : {
3119 : /* Make sure the statement is a store. */
3120 151134 : tree lhs = gimple_get_lhs (loc->stmt);
3121 151134 : if (!lhs
3122 151134 : || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
3123 : return false;
3124 :
3125 95703 : struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
3126 95703 : if (!lim_data)
3127 : return false;
3128 :
3129 95703 : class loop *must_exec = lim_data->always_executed_in;
3130 95703 : if (!must_exec)
3131 : return false;
3132 :
3133 36346 : if (must_exec == loop
3134 36346 : || flow_loop_nested_p (must_exec, loop))
3135 33040 : return true;
3136 :
3137 : return false;
3138 : }
3139 :
3140 : /* Returns true if REF is always stored in LOOP. */
3141 :
3142 : static bool
3143 79936 : ref_always_stored_p (class loop *loop, im_mem_ref *ref)
3144 : {
3145 79936 : return for_all_locs_in_loop (loop, ref, ref_always_stored (loop));
3146 : }
3147 :
3148 : /* Returns true if LOAD_REF and STORE_REF form a "self write" pattern
3149 : where the stored value comes from the loaded value via SSA.
3150 : Example: a[i] = a[0] is safe to hoist a[0] even when i==0. */
3151 :
3152 : static bool
3153 318250 : is_self_write (im_mem_ref *load_ref, im_mem_ref *store_ref)
3154 : {
3155 : /* Only handle the simple case with a single access per ref.
3156 : Bail out on multiple accesses to be conservative. */
3157 318250 : if (load_ref->accesses_in_loop.length () != 1
3158 409192 : || store_ref->accesses_in_loop.length () != 1)
3159 : return false;
3160 :
3161 90959 : gimple *load_stmt = load_ref->accesses_in_loop[0].stmt;
3162 90959 : gimple *store_stmt = store_ref->accesses_in_loop[0].stmt;
3163 :
3164 90959 : if (!is_gimple_assign (load_stmt) || !is_gimple_assign (store_stmt))
3165 : return false;
3166 :
3167 90959 : tree loaded_val = gimple_assign_lhs (load_stmt);
3168 90959 : tree stored_val = gimple_assign_rhs1 (store_stmt);
3169 :
3170 90959 : if (TREE_CODE (loaded_val) != SSA_NAME || TREE_CODE (stored_val) != SSA_NAME)
3171 : return false;
3172 :
3173 : /* Self write: stored value is the loaded value. */
3174 65031 : if (stored_val != loaded_val)
3175 : return false;
3176 :
3177 :
3178 : /* TODO: Try to factor this out with mem_ref_hasher::equal
3179 : into im_compare_access_position_and_size
3180 : or a similar helper to centralize this delicate handling
3181 : complete for MEM_REF offsets and base pointer equality.
3182 :
3183 : TODO: Also ensure max_size_known_p agrees or resort to
3184 : alignment considerations to rule out partial overlaps.
3185 :
3186 : See:
3187 : https://gcc.gnu.org/pipermail/gcc-patches/2025-December/704155.html
3188 : For more context on TODOs above. */
3189 :
3190 : /* They may alias. Verify exact same location. */
3191 1672 : return (operand_equal_p (load_ref->mem.base, store_ref->mem.base, 0)
3192 46 : && known_eq (load_ref->mem.size, store_ref->mem.size)
3193 1718 : && known_eq (load_ref->mem.offset, store_ref->mem.offset));
3194 :
3195 : }
3196 :
3197 : /* Returns true if REF1 and REF2 are independent. */
3198 :
3199 : static bool
3200 603526 : refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
3201 : {
3202 603526 : if (ref1 == ref2)
3203 : return true;
3204 :
3205 541364 : if (dump_file && (dump_flags & TDF_DETAILS))
3206 3041 : fprintf (dump_file, "Querying dependency of refs %u and %u: ",
3207 3041 : ref1->id, ref2->id);
3208 :
3209 541364 : if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
3210 : {
3211 64550 : if (dump_file && (dump_flags & TDF_DETAILS))
3212 99 : fprintf (dump_file, "dependent.\n");
3213 64550 : return false;
3214 : }
3215 : else
3216 : {
3217 476814 : if (dump_file && (dump_flags & TDF_DETAILS))
3218 2942 : fprintf (dump_file, "independent.\n");
3219 476814 : return true;
3220 : }
3221 : }
3222 :
3223 : /* Returns true if REF is independent on all other accessess in LOOP.
3224 : KIND specifies the kind of dependence to consider.
3225 : lim_raw assumes REF is not stored in LOOP and disambiguates RAW
3226 : dependences so if true REF can be hoisted out of LOOP
3227 : sm_war disambiguates a store REF against all other loads to see
3228 : whether the store can be sunk across loads out of LOOP
3229 : sm_waw disambiguates a store REF against all other stores to see
3230 : whether the store can be sunk across stores out of LOOP. */
3231 :
3232 : static bool
3233 2064512 : ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
3234 : {
3235 2064512 : bool indep_p = true;
3236 2064512 : bitmap refs_to_check;
3237 :
3238 2064512 : if (kind == sm_war)
3239 973426 : refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
3240 : else
3241 1091086 : refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
3242 :
3243 2064512 : if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
3244 2064512 : || ref->mem.ref == error_mark_node)
3245 : indep_p = false;
3246 : else
3247 : {
3248 : /* tri-state, { unknown, independent, dependent } */
3249 662016 : dep_state state = query_loop_dependence (loop, ref, kind);
3250 662016 : if (state != dep_unknown)
3251 0 : return state == dep_independent ? true : false;
3252 :
3253 662016 : class loop *inner = loop->inner;
3254 811692 : while (inner)
3255 : {
3256 451027 : if (!ref_indep_loop_p (inner, ref, kind))
3257 : {
3258 : indep_p = false;
3259 : break;
3260 : }
3261 149676 : inner = inner->next;
3262 : }
3263 :
3264 662016 : if (indep_p)
3265 : {
3266 360665 : unsigned i;
3267 360665 : bitmap_iterator bi;
3268 920837 : EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
3269 : {
3270 629065 : im_mem_ref *aref = memory_accesses.refs_list[i];
3271 629065 : if (aref->mem.ref == error_mark_node)
3272 : {
3273 26348 : gimple *stmt = aref->accesses_in_loop[0].stmt;
3274 26348 : if ((kind == sm_war
3275 9366 : && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
3276 : kind != sm_waw))
3277 35046 : || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
3278 : kind != sm_waw))
3279 : {
3280 : indep_p = false;
3281 : break;
3282 : }
3283 : }
3284 : /* For hoisting loads (lim_raw), allow "self write": the store
3285 : writes back the loaded value. Example: a[i] = a[0]
3286 : is safe even when i==0 causes aliasing. */
3287 602717 : else if (kind == lim_raw
3288 318250 : && ref->loaded && aref->stored
3289 920967 : && is_self_write (ref, aref))
3290 : {
3291 17 : if (dump_file && (dump_flags & TDF_DETAILS))
3292 2 : fprintf (dump_file,
3293 : "Dependency of refs %u and %u: "
3294 : "independent (self write).\n",
3295 2 : ref->id, aref->id);
3296 : }
3297 :
3298 602700 : else if (!refs_independent_p (ref, aref, kind != sm_waw))
3299 : {
3300 : indep_p = false;
3301 : break;
3302 : }
3303 : }
3304 : }
3305 : }
3306 :
3307 2064512 : if (dump_file && (dump_flags & TDF_DETAILS))
3308 763 : fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
3309 : kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
3310 371 : ref->id, loop->num, indep_p ? "independent" : "dependent");
3311 :
3312 : /* Record the computed result in the cache. */
3313 2064512 : record_loop_dependence (loop, ref, kind,
3314 : indep_p ? dep_independent : dep_dependent);
3315 :
3316 2064512 : return indep_p;
3317 : }
3318 :
3319 : class ref_in_loop_hot_body
3320 : {
3321 : public:
3322 42484 : ref_in_loop_hot_body (class loop *loop_) : l (loop_) {}
3323 : bool operator () (mem_ref_loc *loc);
3324 : class loop *l;
3325 : };
3326 :
3327 : /* Check the coldest loop between loop L and innermost loop. If there is one
3328 : cold loop between L and INNER_LOOP, store motion can be performed, otherwise
3329 : no cold loop means no store motion. get_coldest_out_loop also handles cases
3330 : when l is inner_loop. */
3331 : bool
3332 43254 : ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
3333 : {
3334 43254 : basic_block curr_bb = gimple_bb (loc->stmt);
3335 43254 : class loop *inner_loop = curr_bb->loop_father;
3336 43254 : return get_coldest_out_loop (l, inner_loop, curr_bb);
3337 : }
3338 :
3339 :
3340 : /* Returns true if we can perform store motion of REF from LOOP. */
3341 :
3342 : static bool
3343 2883527 : can_sm_ref_p (class loop *loop, im_mem_ref *ref)
3344 : {
3345 2883527 : tree base;
3346 :
3347 : /* Can't hoist unanalyzable refs. */
3348 2883527 : if (!MEM_ANALYZABLE (ref))
3349 : return false;
3350 :
3351 : /* Can't hoist/sink aggregate copies. */
3352 2527295 : if (ref->mem.ref == error_mark_node)
3353 : return false;
3354 :
3355 : /* It should be movable. */
3356 2100913 : if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
3357 2100687 : || TREE_THIS_VOLATILE (ref->mem.ref)
3358 4184733 : || !for_each_index (&ref->mem.ref, may_move_till, loop))
3359 1150159 : return false;
3360 :
3361 : /* If it can throw fail, we do not properly update EH info. */
3362 950754 : if (tree_could_throw_p (ref->mem.ref))
3363 : return false;
3364 :
3365 : /* If it can trap, it must be always executed in LOOP.
3366 : Readonly memory locations may trap when storing to them, but
3367 : tree_could_trap_p is a predicate for rvalues, so check that
3368 : explicitly. */
3369 927453 : base = get_base_address (ref->mem.ref);
3370 927453 : if ((tree_could_trap_p (ref->mem.ref)
3371 882766 : || (DECL_P (base) && TREE_READONLY (base))
3372 882433 : || TREE_CODE (base) == STRING_CST)
3373 : /* ??? We can at least use false here, allowing loads? We
3374 : are forcing conditional stores if the ref is not always
3375 : stored to later anyway. So this would only guard
3376 : the load we need to emit. Thus when the ref is not
3377 : loaded we can elide this completely? */
3378 972481 : && !ref_always_stored_p (loop, ref))
3379 : return false;
3380 :
3381 : /* Verify all loads of ref can be hoisted. */
3382 892378 : if (ref->loaded
3383 101873 : && bitmap_bit_p (ref->loaded, loop->num)
3384 989109 : && !ref_indep_loop_p (loop, ref, lim_raw))
3385 : return false;
3386 :
3387 : /* Verify the candidate can be disambiguated against all loads,
3388 : that is, we can elide all in-loop stores. Disambiguation
3389 : against stores is done later when we cannot guarantee preserving
3390 : the order of stores. */
3391 819964 : if (!ref_indep_loop_p (loop, ref, sm_war))
3392 : return false;
3393 :
3394 : /* Verify whether the candidate is hot for LOOP. Only do store motion if the
3395 : candidate's profile count is hot. Statement in cold BB shouldn't be moved
3396 : out of it's loop_father. */
3397 42484 : if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
3398 : return false;
3399 :
3400 : return true;
3401 : }
3402 :
3403 : /* Marks the references in LOOP for that store motion should be performed
3404 : in REFS_TO_SM. SM_EXECUTED is the set of references for that store
3405 : motion was performed in one of the outer loops. */
3406 :
3407 : static void
3408 1270311 : find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
3409 : {
3410 1270311 : bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
3411 1270311 : unsigned i;
3412 1270311 : bitmap_iterator bi;
3413 1270311 : im_mem_ref *ref;
3414 :
3415 4153838 : EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
3416 : {
3417 2883527 : ref = memory_accesses.refs_list[i];
3418 2883527 : if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
3419 41515 : bitmap_set_bit (refs_to_sm, i);
3420 : }
3421 1270311 : }
3422 :
3423 : /* Checks whether LOOP (with exits stored in EXITS array) is suitable
3424 : for a store motion optimization (i.e. whether we can insert statement
3425 : on its exits). */
3426 :
3427 : static bool
3428 1312364 : loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
3429 : const vec<edge> &exits)
3430 : {
3431 1312364 : unsigned i;
3432 1312364 : edge ex;
3433 :
3434 3394419 : FOR_EACH_VEC_ELT (exits, i, ex)
3435 2124108 : if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
3436 : return false;
3437 :
3438 : return true;
3439 : }
3440 :
3441 : /* Try to perform store motion for all memory references modified inside
3442 : LOOP. SM_EXECUTED is the bitmap of the memory references for that
3443 : store motion was executed in one of the outer loops. */
3444 :
3445 : static void
3446 1312364 : store_motion_loop (class loop *loop, bitmap sm_executed)
3447 : {
3448 1312364 : auto_vec<edge> exits = get_loop_exit_edges (loop);
3449 1312364 : class loop *subloop;
3450 1312364 : bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
3451 :
3452 1312364 : if (loop_suitable_for_sm (loop, exits))
3453 : {
3454 1270311 : find_refs_for_sm (loop, sm_executed, sm_in_loop);
3455 1270311 : if (!bitmap_empty_p (sm_in_loop))
3456 21333 : hoist_memory_references (loop, sm_in_loop, exits);
3457 : }
3458 :
3459 1312364 : bitmap_ior_into (sm_executed, sm_in_loop);
3460 1610600 : for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3461 298236 : store_motion_loop (subloop, sm_executed);
3462 1312364 : bitmap_and_compl_into (sm_executed, sm_in_loop);
3463 1312364 : BITMAP_FREE (sm_in_loop);
3464 1312364 : }
3465 :
3466 : /* Try to perform store motion for all memory references modified inside
3467 : loops. */
3468 :
3469 : static void
3470 484801 : do_store_motion (void)
3471 : {
3472 484801 : class loop *loop;
3473 484801 : bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
3474 :
3475 1498929 : for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3476 1014128 : store_motion_loop (loop, sm_executed);
3477 :
3478 484801 : BITMAP_FREE (sm_executed);
3479 484801 : }
3480 :
3481 : /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3482 : for each such basic block bb records the outermost loop for that execution
3483 : of its header implies execution of bb. CONTAINS_CALL is the bitmap of
3484 : blocks that contain a nonpure call. */
3485 :
3486 : static void
3487 1312889 : fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3488 : {
3489 1312889 : basic_block bb = NULL, last = NULL;
3490 1312889 : edge e;
3491 1312889 : class loop *inn_loop = loop;
3492 :
3493 1611382 : for (class loop *inner = loop->inner; inner; inner = inner->next)
3494 298493 : fill_always_executed_in_1 (inner, contains_call);
3495 :
3496 1312889 : auto_vec<basic_block, 64> worklist;
3497 1312889 : worklist.reserve_exact (loop->num_nodes);
3498 1312889 : worklist.quick_push (loop->header);
3499 2914305 : do
3500 : {
3501 2914305 : edge_iterator ei;
3502 2914305 : bb = worklist.pop ();
3503 :
3504 2914305 : if (!flow_bb_inside_loop_p (inn_loop, bb))
3505 : {
3506 : /* When we are leaving a possibly infinite inner loop
3507 : we have to stop processing. */
3508 172484 : if (!finite_loop_p (inn_loop))
3509 : break;
3510 : /* If the loop was finite we can continue with processing
3511 : the loop we exited to. */
3512 164162 : inn_loop = bb->loop_father;
3513 : }
3514 :
3515 2905983 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3516 1705687 : last = bb;
3517 :
3518 2905983 : if (bitmap_bit_p (contains_call, bb->index))
3519 : break;
3520 :
3521 : /* If LOOP exits from this BB stop processing. */
3522 5595720 : FOR_EACH_EDGE (e, ei, bb->succs)
3523 3984519 : if (!flow_bb_inside_loop_p (loop, e->dest))
3524 : break;
3525 2572674 : if (e)
3526 : break;
3527 :
3528 : /* A loop might be infinite (TODO use simple loop analysis
3529 : to disprove this if possible). */
3530 1611201 : if (bb->flags & BB_IRREDUCIBLE_LOOP)
3531 : break;
3532 :
3533 1611085 : if (bb->loop_father->header == bb)
3534 : /* Record that we enter into a subloop since it might not
3535 : be finite. */
3536 : /* ??? Entering into a not always executed subloop makes
3537 : fill_always_executed_in quadratic in loop depth since
3538 : we walk those loops N times. This is not a problem
3539 : in practice though, see PR102253 for a worst-case testcase. */
3540 606000 : inn_loop = bb->loop_father;
3541 :
3542 : /* Walk the body of LOOP sorted by dominance relation. Additionally,
3543 : if a basic block S dominates the latch, then only blocks dominated
3544 : by S are after it.
3545 : This is get_loop_body_in_dom_order using a worklist algorithm and
3546 : stopping once we are no longer interested in visiting further
3547 : blocks. */
3548 1611085 : unsigned old_len = worklist.length ();
3549 1611085 : unsigned postpone = 0;
3550 1611085 : for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3551 3476987 : son;
3552 1865902 : son = next_dom_son (CDI_DOMINATORS, son))
3553 : {
3554 1865902 : if (!flow_bb_inside_loop_p (loop, son))
3555 20298 : continue;
3556 1845604 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3557 531667 : postpone = worklist.length ();
3558 1845604 : worklist.quick_push (son);
3559 : }
3560 1611085 : if (postpone)
3561 : /* Postponing the block that dominates the latch means
3562 : processing it last and thus putting it earliest in the
3563 : worklist. */
3564 156697 : std::swap (worklist[old_len], worklist[postpone]);
3565 : }
3566 3222170 : while (!worklist.is_empty ());
3567 :
3568 2098485 : while (1)
3569 : {
3570 1705687 : if (last->loop_father == loop
3571 1705687 : || (ALWAYS_EXECUTED_IN (last)
3572 156105 : && loop_outer (ALWAYS_EXECUTED_IN (last)) == loop))
3573 : {
3574 1705010 : if (dump_enabled_p ())
3575 1903 : dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3576 : last->index, loop->num);
3577 1705010 : SET_ALWAYS_EXECUTED_IN (last, loop);
3578 : }
3579 1705687 : if (last == loop->header)
3580 : break;
3581 392798 : last = get_immediate_dominator (CDI_DOMINATORS, last);
3582 : }
3583 1312889 : }
3584 :
3585 : /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3586 : for each such basic block bb records the outermost loop for that execution
3587 : of its header implies execution of bb. */
3588 :
3589 : static void
3590 484912 : fill_always_executed_in (void)
3591 : {
3592 484912 : basic_block bb;
3593 484912 : class loop *loop;
3594 :
3595 484912 : auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3596 484912 : bitmap_clear (contains_call);
3597 14749325 : FOR_EACH_BB_FN (bb, cfun)
3598 : {
3599 14264413 : if (loop_depth (bb->loop_father) == 0)
3600 : {
3601 : /* Outside of loops we can skip scanning all stmts. */
3602 8236701 : bitmap_set_bit (contains_call, bb->index);
3603 8236701 : continue;
3604 : }
3605 6027712 : gimple_stmt_iterator gsi;
3606 50443574 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3607 : {
3608 40427973 : if (nonpure_call_p (gsi_stmt (gsi)))
3609 : break;
3610 : }
3611 :
3612 6027712 : if (!gsi_end_p (gsi))
3613 958148 : bitmap_set_bit (contains_call, bb->index);
3614 : }
3615 :
3616 1499308 : for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3617 1014396 : fill_always_executed_in_1 (loop, contains_call);
3618 484912 : }
3619 :
3620 : /* Find the coldest loop preheader for LOOP, also find the nearest hotter loop
3621 : to LOOP. Then recursively iterate each inner loop. */
3622 :
3623 : void
3624 1312889 : fill_coldest_and_hotter_out_loop (class loop *coldest_loop,
3625 : class loop *hotter_loop, class loop *loop)
3626 : {
3627 1312889 : if (bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3628 : coldest_loop))
3629 27555 : coldest_loop = loop;
3630 :
3631 1312889 : coldest_outermost_loop[loop->num] = coldest_loop;
3632 :
3633 1312889 : hotter_than_inner_loop[loop->num] = NULL;
3634 1312889 : class loop *outer_loop = loop_outer (loop);
3635 1312889 : if (hotter_loop
3636 1312889 : && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3637 : hotter_loop))
3638 2154 : hotter_than_inner_loop[loop->num] = hotter_loop;
3639 :
3640 1312889 : if (outer_loop && outer_loop != current_loops->tree_root
3641 1611382 : && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3642 : outer_loop))
3643 33784 : hotter_than_inner_loop[loop->num] = outer_loop;
3644 :
3645 1312889 : if (dump_enabled_p ())
3646 : {
3647 1470 : dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, ",
3648 : loop->num, coldest_loop->num);
3649 1470 : if (hotter_than_inner_loop[loop->num])
3650 245 : dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n",
3651 245 : hotter_than_inner_loop[loop->num]->num);
3652 : else
3653 1225 : dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n");
3654 : }
3655 :
3656 1312889 : class loop *inner_loop;
3657 1611382 : for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next)
3658 596986 : fill_coldest_and_hotter_out_loop (coldest_loop,
3659 298493 : hotter_than_inner_loop[loop->num],
3660 : inner_loop);
3661 1312889 : }
3662 :
3663 : /* Compute the global information needed by the loop invariant motion pass. */
3664 :
3665 : static void
3666 484912 : tree_ssa_lim_initialize (bool store_motion)
3667 : {
3668 484912 : unsigned i;
3669 :
3670 484912 : bitmap_obstack_initialize (&lim_bitmap_obstack);
3671 484912 : gcc_obstack_init (&mem_ref_obstack);
3672 484912 : lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3673 :
3674 484912 : if (flag_tm)
3675 104 : compute_transaction_bits ();
3676 :
3677 484912 : memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3678 484912 : memory_accesses.refs_list.create (100);
3679 : /* Allocate a special, unanalyzable mem-ref with ID zero. */
3680 484912 : memory_accesses.refs_list.quick_push
3681 484912 : (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3682 :
3683 969824 : memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3684 969824 : memory_accesses.refs_loaded_in_loop.quick_grow_cleared (number_of_loops (cfun));
3685 969824 : memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3686 969824 : memory_accesses.refs_stored_in_loop.quick_grow_cleared (number_of_loops (cfun));
3687 484912 : if (store_motion)
3688 : {
3689 969602 : memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3690 484801 : memory_accesses.all_refs_stored_in_loop.quick_grow_cleared
3691 969602 : (number_of_loops (cfun));
3692 : }
3693 :
3694 6011146 : for (i = 0; i < number_of_loops (cfun); i++)
3695 : {
3696 2520661 : bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3697 : &lim_bitmap_obstack);
3698 2520661 : bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3699 : &lim_bitmap_obstack);
3700 2520661 : if (store_motion)
3701 2519560 : bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3702 : &lim_bitmap_obstack);
3703 : }
3704 :
3705 484912 : memory_accesses.ttae_cache = NULL;
3706 :
3707 : /* Initialize bb_loop_postorder with a mapping from loop->num to
3708 : its postorder index. */
3709 484912 : i = 0;
3710 969824 : bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3711 2767625 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3712 1312889 : bb_loop_postorder[loop->num] = i++;
3713 484912 : }
3714 :
3715 : /* Cleans up after the invariant motion pass. */
3716 :
3717 : static void
3718 484912 : tree_ssa_lim_finalize (void)
3719 : {
3720 484912 : basic_block bb;
3721 484912 : unsigned i;
3722 484912 : im_mem_ref *ref;
3723 :
3724 14794581 : FOR_EACH_BB_FN (bb, cfun)
3725 14309669 : SET_ALWAYS_EXECUTED_IN (bb, NULL);
3726 :
3727 484912 : bitmap_obstack_release (&lim_bitmap_obstack);
3728 969824 : delete lim_aux_data_map;
3729 :
3730 484912 : delete memory_accesses.refs;
3731 484912 : memory_accesses.refs = NULL;
3732 :
3733 5716088 : FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3734 5231176 : memref_free (ref);
3735 484912 : memory_accesses.refs_list.release ();
3736 484912 : obstack_free (&mem_ref_obstack, NULL);
3737 :
3738 484912 : memory_accesses.refs_loaded_in_loop.release ();
3739 484912 : memory_accesses.refs_stored_in_loop.release ();
3740 484912 : memory_accesses.all_refs_stored_in_loop.release ();
3741 :
3742 484912 : if (memory_accesses.ttae_cache)
3743 4570 : free_affine_expand_cache (&memory_accesses.ttae_cache);
3744 :
3745 484912 : free (bb_loop_postorder);
3746 :
3747 484912 : coldest_outermost_loop.release ();
3748 484912 : hotter_than_inner_loop.release ();
3749 484912 : }
3750 :
3751 : /* Moves invariants from loops. Only "expensive" invariants are moved out --
3752 : i.e. those that are likely to be win regardless of the register pressure.
3753 : Only perform store motion if STORE_MOTION is true. */
3754 :
3755 : unsigned int
3756 484912 : loop_invariant_motion_in_fun (function *fun, bool store_motion)
3757 : {
3758 484912 : unsigned int todo = 0;
3759 :
3760 484912 : tree_ssa_lim_initialize (store_motion);
3761 :
3762 484912 : mark_ssa_maybe_undefs ();
3763 :
3764 : /* Gathers information about memory accesses in the loops. */
3765 484912 : analyze_memory_references (store_motion);
3766 :
3767 : /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3768 484912 : fill_always_executed_in ();
3769 :
3770 : /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
3771 : */
3772 484912 : class loop *loop;
3773 969824 : coldest_outermost_loop.create (number_of_loops (cfun));
3774 969824 : coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
3775 969824 : hotter_than_inner_loop.create (number_of_loops (cfun));
3776 969824 : hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
3777 1499308 : for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3778 1014396 : fill_coldest_and_hotter_out_loop (loop, NULL, loop);
3779 :
3780 484912 : int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3781 484912 : int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3782 :
3783 : /* For each statement determine the outermost loop in that it is
3784 : invariant and cost for computing the invariant. */
3785 14749325 : for (int i = 0; i < n; ++i)
3786 14264413 : compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3787 :
3788 : /* Execute store motion. Force the necessary invariants to be moved
3789 : out of the loops as well. */
3790 484912 : if (store_motion)
3791 484801 : do_store_motion ();
3792 :
3793 484912 : free (rpo);
3794 484912 : rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3795 484912 : n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3796 :
3797 : /* Move the expressions that are expensive enough. */
3798 14794581 : for (int i = 0; i < n; ++i)
3799 14309669 : todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3800 :
3801 484912 : free (rpo);
3802 :
3803 484912 : gsi_commit_edge_inserts ();
3804 484912 : if (need_ssa_update_p (fun))
3805 14872 : rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3806 :
3807 484912 : tree_ssa_lim_finalize ();
3808 :
3809 484912 : return todo;
3810 : }
3811 :
3812 : /* Loop invariant motion pass. */
3813 :
3814 : namespace {
3815 :
3816 : const pass_data pass_data_lim =
3817 : {
3818 : GIMPLE_PASS, /* type */
3819 : "lim", /* name */
3820 : OPTGROUP_LOOP, /* optinfo_flags */
3821 : TV_LIM, /* tv_id */
3822 : PROP_cfg, /* properties_required */
3823 : 0, /* properties_provided */
3824 : 0, /* properties_destroyed */
3825 : 0, /* todo_flags_start */
3826 : 0, /* todo_flags_finish */
3827 : };
3828 :
3829 : class pass_lim : public gimple_opt_pass
3830 : {
3831 : public:
3832 1155068 : pass_lim (gcc::context *ctxt)
3833 2310136 : : gimple_opt_pass (pass_data_lim, ctxt)
3834 : {}
3835 :
3836 : /* opt_pass methods: */
3837 866301 : opt_pass * clone () final override { return new pass_lim (m_ctxt); }
3838 1287690 : bool gate (function *) final override { return flag_tree_loop_im != 0; }
3839 : unsigned int execute (function *) final override;
3840 :
3841 : }; // class pass_lim
3842 :
3843 : unsigned int
3844 1287362 : pass_lim::execute (function *fun)
3845 : {
3846 1287362 : in_loop_pipeline = scev_initialized_p ();
3847 1287362 : if (!in_loop_pipeline)
3848 1045170 : loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3849 :
3850 2574724 : if (number_of_loops (fun) <= 1)
3851 : return 0;
3852 484817 : unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3853 :
3854 484817 : if (!in_loop_pipeline)
3855 242625 : loop_optimizer_finalize ();
3856 : else
3857 242192 : scev_reset ();
3858 : return todo;
3859 : }
3860 :
3861 : } // anon namespace
3862 :
3863 : gimple_opt_pass *
3864 288767 : make_pass_lim (gcc::context *ctxt)
3865 : {
3866 288767 : return new pass_lim (ctxt);
3867 : }
3868 :
3869 :
|