Line data Source code
1 : /* Loop invariant motion.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "tree.h"
25 : #include "gimple.h"
26 : #include "cfghooks.h"
27 : #include "tree-pass.h"
28 : #include "ssa.h"
29 : #include "gimple-pretty-print.h"
30 : #include "fold-const.h"
31 : #include "cfganal.h"
32 : #include "tree-eh.h"
33 : #include "gimplify.h"
34 : #include "gimple-iterator.h"
35 : #include "tree-cfg.h"
36 : #include "tree-ssa-loop-manip.h"
37 : #include "tree-ssa-loop.h"
38 : #include "tree-into-ssa.h"
39 : #include "cfgloop.h"
40 : #include "tree-affine.h"
41 : #include "tree-ssa-propagate.h"
42 : #include "trans-mem.h"
43 : #include "gimple-fold.h"
44 : #include "tree-scalar-evolution.h"
45 : #include "tree-ssa-loop-niter.h"
46 : #include "alias.h"
47 : #include "builtins.h"
48 : #include "tree-dfa.h"
49 : #include "tree-ssa.h"
50 : #include "dbgcnt.h"
51 : #include "insn-codes.h"
52 : #include "optabs-tree.h"
53 :
54 : /* TODO: Support for predicated code motion. I.e.
55 :
56 : while (1)
57 : {
58 : if (cond)
59 : {
60 : a = inv;
61 : something;
62 : }
63 : }
64 :
65 : Where COND and INV are invariants, but evaluating INV may trap or be
66 : invalid from some other reason if !COND. This may be transformed to
67 :
68 : if (cond)
69 : a = inv;
70 : while (1)
71 : {
72 : if (cond)
73 : something;
74 : } */
75 :
76 : /* The auxiliary data kept for each statement. */
77 :
78 : struct lim_aux_data
79 : {
80 : class loop *max_loop; /* The outermost loop in that the statement
81 : is invariant. */
82 :
83 : class loop *tgt_loop; /* The loop out of that we want to move the
84 : invariant. */
85 :
86 : class loop *always_executed_in;
87 : /* The outermost loop for that we are sure
88 : the statement is executed if the loop
89 : is entered. */
90 :
91 : unsigned cost; /* Cost of the computation performed by the
92 : statement. */
93 :
94 : unsigned ref; /* The simple_mem_ref in this stmt or 0. */
95 :
96 : vec<gimple *> depends; /* Vector of statements that must be also
97 : hoisted out of the loop when this statement
98 : is hoisted; i.e. those that define the
99 : operands of the statement and are inside of
100 : the MAX_LOOP loop. */
101 : };
102 :
103 : /* Maps statements to their lim_aux_data. */
104 :
105 : static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
106 :
107 : /* Description of a memory reference location. */
108 :
109 : struct mem_ref_loc
110 : {
111 : tree *ref; /* The reference itself. */
112 : gimple *stmt; /* The statement in that it occurs. */
113 : };
114 :
115 :
116 : /* Description of a memory reference. */
117 :
118 : class im_mem_ref
119 : {
120 : public:
121 : unsigned id : 30; /* ID assigned to the memory reference
122 : (its index in memory_accesses.refs_list) */
123 : unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
124 : unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
125 : hashval_t hash; /* Its hash value. */
126 :
127 : /* The memory access itself and associated caching of alias-oracle
128 : query meta-data. We are using mem.ref == error_mark_node for the
129 : case the reference is represented by its single access stmt
130 : in accesses_in_loop[0]. */
131 : ao_ref mem;
132 :
133 : bitmap stored; /* The set of loops in that this memory location
134 : is stored to. */
135 : bitmap loaded; /* The set of loops in that this memory location
136 : is loaded from. */
137 : vec<mem_ref_loc> accesses_in_loop;
138 : /* The locations of the accesses. */
139 :
140 : /* The following set is computed on demand. */
141 : bitmap_head dep_loop; /* The set of loops in that the memory
142 : reference is {in,}dependent in
143 : different modes. */
144 : };
145 :
146 : static bool in_loop_pipeline;
147 :
148 : /* We use six bits per loop in the ref->dep_loop bitmap to record
149 : the dep_kind x dep_state combinations. */
150 :
151 : enum dep_kind { lim_raw, sm_war, sm_waw };
152 : enum dep_state { dep_unknown, dep_independent, dep_dependent };
153 :
154 : /* coldest outermost loop for given loop. */
155 : vec<class loop *> coldest_outermost_loop;
156 : /* hotter outer loop nearest to given loop. */
157 : vec<class loop *> hotter_than_inner_loop;
158 :
159 : /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
160 :
161 : static void
162 2060584 : record_loop_dependence (class loop *loop, im_mem_ref *ref,
163 : dep_kind kind, dep_state state)
164 : {
165 2060584 : gcc_assert (state != dep_unknown);
166 2060584 : unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
167 2060584 : bitmap_set_bit (&ref->dep_loop, bit);
168 2060584 : }
169 :
170 : /* Query the loop dependence cache of REF for LOOP, KIND. */
171 :
172 : static dep_state
173 661357 : query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
174 : {
175 661357 : unsigned first_bit = 6 * loop->num + kind * 2;
176 661357 : if (bitmap_bit_p (&ref->dep_loop, first_bit))
177 : return dep_independent;
178 661357 : else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
179 0 : return dep_dependent;
180 : return dep_unknown;
181 : }
182 :
183 : /* Mem_ref hashtable helpers. */
184 :
185 : struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
186 : {
187 : typedef ao_ref *compare_type;
188 : static inline hashval_t hash (const im_mem_ref *);
189 : static inline bool equal (const im_mem_ref *, const ao_ref *);
190 : };
191 :
192 : /* A hash function for class im_mem_ref object OBJ. */
193 :
194 : inline hashval_t
195 10726967 : mem_ref_hasher::hash (const im_mem_ref *mem)
196 : {
197 10726967 : return mem->hash;
198 : }
199 :
200 : /* An equality function for class im_mem_ref object MEM1 with
201 : memory reference OBJ2. */
202 :
203 : inline bool
204 12467615 : mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
205 : {
206 12467615 : if (obj2->max_size_known_p ())
207 10969030 : return (mem1->ref_decomposed
208 10484546 : && ((TREE_CODE (mem1->mem.base) == MEM_REF
209 4660391 : && TREE_CODE (obj2->base) == MEM_REF
210 3075223 : && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
211 3075223 : TREE_OPERAND (obj2->base, 0), 0)
212 12803890 : && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
213 : mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
214 10061663 : || (operand_equal_p (mem1->mem.base, obj2->base, 0)
215 779425 : && known_eq (mem1->mem.offset, obj2->offset)))
216 878180 : && known_eq (mem1->mem.size, obj2->size)
217 876000 : && known_eq (mem1->mem.max_size, obj2->max_size)
218 876000 : && mem1->mem.volatile_p == obj2->volatile_p
219 875984 : && (mem1->mem.ref_alias_set == obj2->ref_alias_set
220 : /* We are not canonicalizing alias-sets but for the
221 : special-case we didn't canonicalize yet and the
222 : incoming ref is a alias-set zero MEM we pick
223 : the correct one already. */
224 14984 : || (!mem1->ref_canonical
225 13993 : && (TREE_CODE (obj2->ref) == MEM_REF
226 13993 : || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
227 7045 : && obj2->ref_alias_set == 0)
228 : /* Likewise if there's a canonical ref with alias-set zero. */
229 12076 : || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
230 11832966 : && types_compatible_p (TREE_TYPE (mem1->mem.ref),
231 863936 : TREE_TYPE (obj2->ref)));
232 : else
233 1498585 : return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
234 : }
235 :
236 :
237 : /* Description of memory accesses in loops. */
238 :
239 : static struct
240 : {
241 : /* The hash table of memory references accessed in loops. */
242 : hash_table<mem_ref_hasher> *refs;
243 :
244 : /* The list of memory references. */
245 : vec<im_mem_ref *> refs_list;
246 :
247 : /* The set of memory references accessed in each loop. */
248 : vec<bitmap_head> refs_loaded_in_loop;
249 :
250 : /* The set of memory references stored in each loop. */
251 : vec<bitmap_head> refs_stored_in_loop;
252 :
253 : /* The set of memory references stored in each loop, including subloops . */
254 : vec<bitmap_head> all_refs_stored_in_loop;
255 :
256 : /* Cache for expanding memory addresses. */
257 : hash_map<tree, name_expansion *> *ttae_cache;
258 : } memory_accesses;
259 :
260 : /* Obstack for the bitmaps in the above data structures. */
261 : static bitmap_obstack lim_bitmap_obstack;
262 : static obstack mem_ref_obstack;
263 :
264 : static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
265 : static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
266 : static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
267 :
268 : /* Minimum cost of an expensive expression. */
269 : #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
270 :
271 : /* The outermost loop for which execution of the header guarantees that the
272 : block will be executed. */
273 : #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
274 : #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
275 :
276 : /* ID of the shared unanalyzable mem. */
277 : #define UNANALYZABLE_MEM_ID 0
278 :
279 : /* Whether the reference was analyzable. */
280 : #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
281 :
282 : static struct lim_aux_data *
283 17428542 : init_lim_data (gimple *stmt)
284 : {
285 17428542 : lim_aux_data *p = XCNEW (struct lim_aux_data);
286 17428542 : lim_aux_data_map->put (stmt, p);
287 :
288 17428542 : return p;
289 : }
290 :
291 : static struct lim_aux_data *
292 92622373 : get_lim_data (gimple *stmt)
293 : {
294 92622373 : lim_aux_data **p = lim_aux_data_map->get (stmt);
295 92622373 : if (!p)
296 : return NULL;
297 :
298 47408506 : return *p;
299 : }
300 :
301 : /* Releases the memory occupied by DATA. */
302 :
303 : static void
304 17428365 : free_lim_aux_data (struct lim_aux_data *data)
305 : {
306 0 : data->depends.release ();
307 17428365 : free (data);
308 0 : }
309 :
310 : static void
311 17428365 : clear_lim_data (gimple *stmt)
312 : {
313 17428365 : lim_aux_data **p = lim_aux_data_map->get (stmt);
314 17428365 : if (!p)
315 : return;
316 :
317 17428365 : free_lim_aux_data (*p);
318 17428365 : *p = NULL;
319 : }
320 :
321 :
322 : /* The possibilities of statement movement. */
323 : enum move_pos
324 : {
325 : MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
326 : MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
327 : become executed -- memory accesses, ... */
328 : MOVE_POSSIBLE /* Unlimited movement. */
329 : };
330 :
331 :
332 : /* If it is possible to hoist the statement STMT unconditionally,
333 : returns MOVE_POSSIBLE.
334 : If it is possible to hoist the statement STMT, but we must avoid making
335 : it executed if it would not be executed in the original program (e.g.
336 : because it may trap), return MOVE_PRESERVE_EXECUTION.
337 : Otherwise return MOVE_IMPOSSIBLE. */
338 :
339 : static enum move_pos
340 45622561 : movement_possibility_1 (gimple *stmt)
341 : {
342 45622561 : tree lhs;
343 45622561 : enum move_pos ret = MOVE_POSSIBLE;
344 :
345 45622561 : if (flag_unswitch_loops
346 45622561 : && gimple_code (stmt) == GIMPLE_COND)
347 : {
348 : /* If we perform unswitching, force the operands of the invariant
349 : condition to be moved out of the loop. */
350 : return MOVE_POSSIBLE;
351 : }
352 :
353 45274202 : if (gimple_code (stmt) == GIMPLE_PHI
354 2593474 : && gimple_phi_num_args (stmt) <= 2
355 4836728 : && !virtual_operand_p (gimple_phi_result (stmt))
356 46717787 : && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
357 : return MOVE_POSSIBLE;
358 :
359 43830872 : if (gimple_get_lhs (stmt) == NULL_TREE)
360 : return MOVE_IMPOSSIBLE;
361 :
362 31007036 : if (gimple_vdef (stmt))
363 : return MOVE_IMPOSSIBLE;
364 :
365 13101150 : if (stmt_ends_bb_p (stmt)
366 11923255 : || gimple_has_volatile_ops (stmt)
367 13046889 : || gimple_has_side_effects (stmt)
368 26142151 : || stmt_could_throw_p (cfun, stmt))
369 458096 : return MOVE_IMPOSSIBLE;
370 :
371 12643054 : if (is_gimple_call (stmt))
372 : {
373 : /* While pure or const call is guaranteed to have no side effects, we
374 : cannot move it arbitrarily. Consider code like
375 :
376 : char *s = something ();
377 :
378 : while (1)
379 : {
380 : if (s)
381 : t = strlen (s);
382 : else
383 : t = 0;
384 : }
385 :
386 : Here the strlen call cannot be moved out of the loop, even though
387 : s is invariant. In addition to possibly creating a call with
388 : invalid arguments, moving out a function call that is not executed
389 : may cause performance regressions in case the call is costly and
390 : not executed at all. */
391 128285 : ret = MOVE_PRESERVE_EXECUTION;
392 128285 : lhs = gimple_call_lhs (stmt);
393 : }
394 12514769 : else if (is_gimple_assign (stmt))
395 11364625 : lhs = gimple_assign_lhs (stmt);
396 : else
397 : return MOVE_IMPOSSIBLE;
398 :
399 11492910 : if (TREE_CODE (lhs) == SSA_NAME
400 11492910 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
401 : return MOVE_IMPOSSIBLE;
402 :
403 11492702 : if (TREE_CODE (lhs) != SSA_NAME
404 11492702 : || gimple_could_trap_p (stmt))
405 2371563 : return MOVE_PRESERVE_EXECUTION;
406 :
407 9121139 : if (is_gimple_assign (stmt))
408 : {
409 9000193 : auto code = gimple_assign_rhs_code (stmt);
410 9000193 : tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
411 : /* For shifts and rotates and possibly out-of-bound shift operands
412 : we currently cannot rewrite them into something unconditionally
413 : well-defined. */
414 9000193 : if ((code == LSHIFT_EXPR
415 : || code == RSHIFT_EXPR
416 : || code == LROTATE_EXPR
417 9000193 : || code == RROTATE_EXPR)
418 9000193 : && (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
419 : /* We cannot use ranges at 'stmt' here. */
420 117586 : || wi::ltu_p (wi::to_wide (gimple_assign_rhs2 (stmt)),
421 8958832 : element_precision (type))))
422 158947 : ret = MOVE_PRESERVE_EXECUTION;
423 : }
424 :
425 : /* Non local loads in a transaction cannot be hoisted out. Well,
426 : unless the load happens on every path out of the loop, but we
427 : don't take this into account yet. */
428 9121139 : if (flag_tm
429 434 : && gimple_in_transaction (stmt)
430 9121242 : && gimple_assign_single_p (stmt))
431 : {
432 21 : tree rhs = gimple_assign_rhs1 (stmt);
433 21 : if (DECL_P (rhs) && is_global_var (rhs))
434 : {
435 18 : if (dump_file)
436 : {
437 2 : fprintf (dump_file, "Cannot hoist conditional load of ");
438 2 : print_generic_expr (dump_file, rhs, TDF_SLIM);
439 2 : fprintf (dump_file, " because it is in a transaction.\n");
440 : }
441 18 : return MOVE_IMPOSSIBLE;
442 : }
443 : }
444 :
445 : return ret;
446 : }
447 :
448 : static enum move_pos
449 45622561 : movement_possibility (gimple *stmt)
450 : {
451 45622561 : enum move_pos pos = movement_possibility_1 (stmt);
452 45622561 : if (pos == MOVE_POSSIBLE)
453 : {
454 10632917 : use_operand_p use_p;
455 10632917 : ssa_op_iter ssa_iter;
456 34524214 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE)
457 13276471 : if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME
458 13276471 : && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p)))
459 18091 : return MOVE_PRESERVE_EXECUTION;
460 : }
461 : return pos;
462 : }
463 :
464 :
465 : /* Compare the profile count inequality of bb and loop's preheader, it is
466 : three-state as stated in profile-count.h, FALSE is returned if inequality
467 : cannot be decided. */
468 : bool
469 13777975 : bb_colder_than_loop_preheader (basic_block bb, class loop *loop)
470 : {
471 13777975 : gcc_assert (bb && loop);
472 13777975 : return bb->count < loop_preheader_edge (loop)->src->count;
473 : }
474 :
475 : /* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile
476 : count.
477 : It does three steps check:
478 : 1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just
479 : return NULL which means it should not be moved out at all;
480 : 2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of
481 : OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP;
482 : 3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a
483 : hotter loop between OUTERMOST_LOOP and loop in pre-computed
484 : HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return
485 : OUTERMOST_LOOP.
486 : At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as
487 : the hoist target. */
488 :
489 : static class loop *
490 12166821 : get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
491 : basic_block curr_bb)
492 : {
493 12166821 : gcc_assert (outermost_loop == loop
494 : || flow_loop_nested_p (outermost_loop, loop));
495 :
496 : /* If bb_colder_than_loop_preheader returns false due to three-state
497 : comparision, OUTERMOST_LOOP is returned finally to preserve the behavior.
498 : Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP. */
499 12166821 : if (curr_bb && bb_colder_than_loop_preheader (curr_bb, loop))
500 : return NULL;
501 :
502 11208660 : class loop *coldest_loop = coldest_outermost_loop[loop->num];
503 22417320 : if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
504 : {
505 245449 : class loop *hotter_loop = hotter_than_inner_loop[loop->num];
506 245449 : if (!hotter_loop
507 252116 : || loop_depth (hotter_loop) < loop_depth (outermost_loop))
508 : return outermost_loop;
509 :
510 : /* hotter_loop is between OUTERMOST_LOOP and LOOP like:
511 : [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
512 : hotter_loop, second_coldest_loop, ..., loop]
513 : return second_coldest_loop to be the hoist target. */
514 23 : class loop *aloop;
515 23 : for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
516 23 : if (aloop == loop || flow_loop_nested_p (aloop, loop))
517 23 : return aloop;
518 : }
519 : return coldest_loop;
520 : }
521 :
522 : /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
523 : loop to that we could move the expression using DEF if it did not have
524 : other operands, i.e. the outermost loop enclosing LOOP in that the value
525 : of DEF is invariant. */
526 :
527 : static class loop *
528 19525092 : outermost_invariant_loop (tree def, class loop *loop)
529 : {
530 19525092 : gimple *def_stmt;
531 19525092 : basic_block def_bb;
532 19525092 : class loop *max_loop;
533 19525092 : struct lim_aux_data *lim_data;
534 :
535 19525092 : if (!def)
536 944489 : return superloop_at_depth (loop, 1);
537 :
538 18580603 : if (TREE_CODE (def) != SSA_NAME)
539 : {
540 3898966 : gcc_assert (is_gimple_min_invariant (def));
541 3898966 : return superloop_at_depth (loop, 1);
542 : }
543 :
544 14681637 : def_stmt = SSA_NAME_DEF_STMT (def);
545 14681637 : def_bb = gimple_bb (def_stmt);
546 14681637 : if (!def_bb)
547 72579 : return superloop_at_depth (loop, 1);
548 :
549 14609058 : max_loop = find_common_loop (loop, def_bb->loop_father);
550 :
551 14609058 : lim_data = get_lim_data (def_stmt);
552 14609058 : if (lim_data != NULL && lim_data->max_loop != NULL)
553 686925 : max_loop = find_common_loop (max_loop,
554 : loop_outer (lim_data->max_loop));
555 14609058 : if (max_loop == loop)
556 : return NULL;
557 1832403 : max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
558 :
559 1832403 : return max_loop;
560 : }
561 :
562 : /* DATA is a structure containing information associated with a statement
563 : inside LOOP. DEF is one of the operands of this statement.
564 :
565 : Find the outermost loop enclosing LOOP in that value of DEF is invariant
566 : and record this in DATA->max_loop field. If DEF itself is defined inside
567 : this loop as well (i.e. we need to hoist it out of the loop if we want
568 : to hoist the statement represented by DATA), record the statement in that
569 : DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
570 : add the cost of the computation of DEF to the DATA->cost.
571 :
572 : If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
573 :
574 : static bool
575 11413377 : add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
576 : bool add_cost)
577 : {
578 11413377 : gimple *def_stmt = SSA_NAME_DEF_STMT (def);
579 11413377 : basic_block def_bb = gimple_bb (def_stmt);
580 11413377 : class loop *max_loop;
581 11413377 : struct lim_aux_data *def_data;
582 :
583 11413377 : if (!def_bb)
584 : return true;
585 :
586 11093137 : max_loop = outermost_invariant_loop (def, loop);
587 11093137 : if (!max_loop)
588 : return false;
589 :
590 1380193 : if (flow_loop_nested_p (data->max_loop, max_loop))
591 422856 : data->max_loop = max_loop;
592 :
593 1380193 : def_data = get_lim_data (def_stmt);
594 1380193 : if (!def_data)
595 : return true;
596 :
597 676279 : if (add_cost
598 : /* Only add the cost if the statement defining DEF is inside LOOP,
599 : i.e. if it is likely that by moving the invariants dependent
600 : on it, we will be able to avoid creating a new register for
601 : it (since it will be only used in these dependent invariants). */
602 655635 : && def_bb->loop_father == loop)
603 458890 : data->cost += def_data->cost;
604 :
605 676279 : data->depends.safe_push (def_stmt);
606 :
607 676279 : return true;
608 : }
609 :
610 : /* Returns an estimate for a cost of statement STMT. The values here
611 : are just ad-hoc constants, similar to costs for inlining. */
612 :
613 : static unsigned
614 767049 : stmt_cost (gimple *stmt)
615 : {
616 : /* Always try to create possibilities for unswitching. */
617 767049 : if (gimple_code (stmt) == GIMPLE_COND
618 767049 : || gimple_code (stmt) == GIMPLE_PHI)
619 12760 : return LIM_EXPENSIVE;
620 :
621 : /* We should be hoisting calls if possible. */
622 754289 : if (is_gimple_call (stmt))
623 : {
624 827 : tree fndecl;
625 :
626 : /* Unless the call is a builtin_constant_p; this always folds to a
627 : constant, so moving it is useless. */
628 827 : fndecl = gimple_call_fndecl (stmt);
629 827 : if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
630 : return 0;
631 :
632 827 : return LIM_EXPENSIVE;
633 : }
634 :
635 : /* Hoisting memory references out should almost surely be a win. */
636 753462 : if (gimple_references_memory_p (stmt))
637 65155 : return LIM_EXPENSIVE;
638 :
639 688307 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
640 : return 1;
641 :
642 688307 : enum tree_code code = gimple_assign_rhs_code (stmt);
643 688307 : switch (code)
644 : {
645 117975 : case MULT_EXPR:
646 117975 : case WIDEN_MULT_EXPR:
647 117975 : case WIDEN_MULT_PLUS_EXPR:
648 117975 : case WIDEN_MULT_MINUS_EXPR:
649 117975 : case DOT_PROD_EXPR:
650 117975 : case TRUNC_DIV_EXPR:
651 117975 : case CEIL_DIV_EXPR:
652 117975 : case FLOOR_DIV_EXPR:
653 117975 : case ROUND_DIV_EXPR:
654 117975 : case EXACT_DIV_EXPR:
655 117975 : case CEIL_MOD_EXPR:
656 117975 : case FLOOR_MOD_EXPR:
657 117975 : case ROUND_MOD_EXPR:
658 117975 : case TRUNC_MOD_EXPR:
659 117975 : case RDIV_EXPR:
660 : /* Division and multiplication are usually expensive. */
661 117975 : return LIM_EXPENSIVE;
662 :
663 4361 : case LSHIFT_EXPR:
664 4361 : case RSHIFT_EXPR:
665 4361 : case WIDEN_LSHIFT_EXPR:
666 4361 : case LROTATE_EXPR:
667 4361 : case RROTATE_EXPR:
668 : /* Shifts and rotates are usually expensive. */
669 4361 : return LIM_EXPENSIVE;
670 :
671 461 : case COND_EXPR:
672 461 : case VEC_COND_EXPR:
673 : /* Conditionals are expensive. */
674 461 : return LIM_EXPENSIVE;
675 :
676 1328 : case CONSTRUCTOR:
677 : /* Make vector construction cost proportional to the number
678 : of elements. */
679 1328 : return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
680 :
681 : case SSA_NAME:
682 : case PAREN_EXPR:
683 : /* Whether or not something is wrapped inside a PAREN_EXPR
684 : should not change move cost. Nor should an intermediate
685 : unpropagated SSA name copy. */
686 : return 0;
687 :
688 526572 : default:
689 : /* Comparisons are usually expensive. */
690 526572 : if (TREE_CODE_CLASS (code) == tcc_comparison)
691 7515 : return LIM_EXPENSIVE;
692 : return 1;
693 : }
694 : }
695 :
696 : /* Finds the outermost loop between OUTER and LOOP in that the memory reference
697 : REF is independent. If REF is not independent in LOOP, NULL is returned
698 : instead. */
699 :
700 : static class loop *
701 738219 : outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
702 : {
703 738219 : class loop *aloop;
704 :
705 738219 : if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
706 : return NULL;
707 :
708 84334 : for (aloop = outer;
709 683725 : aloop != loop;
710 84334 : aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
711 9884 : if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
712 100068 : && ref_indep_loop_p (aloop, ref, lim_raw))
713 : return aloop;
714 :
715 585321 : if (ref_indep_loop_p (loop, ref, lim_raw))
716 : return loop;
717 : else
718 : return NULL;
719 : }
720 :
721 : /* If there is a simple load or store to a memory reference in STMT, returns
722 : the location of the memory reference, and sets IS_STORE according to whether
723 : it is a store or load. Otherwise, returns NULL. */
724 :
725 : static tree *
726 6900028 : simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
727 : {
728 6900028 : tree *lhs, *rhs;
729 :
730 : /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
731 6900028 : if (!gimple_assign_single_p (stmt))
732 : return NULL;
733 :
734 5654678 : lhs = gimple_assign_lhs_ptr (stmt);
735 5654678 : rhs = gimple_assign_rhs1_ptr (stmt);
736 :
737 8819896 : if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
738 : {
739 3165218 : *is_store = false;
740 3165218 : return rhs;
741 : }
742 2489460 : else if (gimple_vdef (stmt)
743 2489460 : && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
744 : {
745 2043535 : *is_store = true;
746 2043535 : return lhs;
747 : }
748 : else
749 445925 : return NULL;
750 : }
751 :
752 : /* From a controlling predicate in DOM determine the arguments from
753 : the PHI node PHI that are chosen if the predicate evaluates to
754 : true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
755 : they are non-NULL. Returns true if the arguments can be determined,
756 : else return false. */
757 :
758 : static bool
759 11454 : extract_true_false_args_from_phi (basic_block dom, gphi *phi,
760 : tree *true_arg_p, tree *false_arg_p)
761 : {
762 11454 : edge te, fe;
763 11454 : if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
764 : &te, &fe))
765 : return false;
766 :
767 10565 : if (true_arg_p)
768 915 : *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
769 10565 : if (false_arg_p)
770 915 : *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
771 :
772 : return true;
773 : }
774 :
775 : /* Determine the outermost loop to that it is possible to hoist a statement
776 : STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
777 : the outermost loop in that the value computed by STMT is invariant.
778 : If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
779 : we preserve the fact whether STMT is executed. It also fills other related
780 : information to LIM_DATA (STMT).
781 :
782 : The function returns false if STMT cannot be hoisted outside of the loop it
783 : is defined in, and true otherwise. */
784 :
785 : static bool
786 12123583 : determine_max_movement (gimple *stmt, bool must_preserve_exec)
787 : {
788 12123583 : basic_block bb = gimple_bb (stmt);
789 12123583 : class loop *loop = bb->loop_father;
790 12123583 : class loop *level;
791 12123583 : struct lim_aux_data *lim_data = get_lim_data (stmt);
792 12123583 : tree val;
793 12123583 : ssa_op_iter iter;
794 :
795 12123583 : if (must_preserve_exec)
796 1503088 : level = ALWAYS_EXECUTED_IN (bb);
797 : else
798 10620495 : level = superloop_at_depth (loop, 1);
799 12123583 : lim_data->max_loop = get_coldest_out_loop (level, loop, bb);
800 12123583 : if (!lim_data->max_loop)
801 : return false;
802 :
803 11167174 : if (gphi *phi = dyn_cast <gphi *> (stmt))
804 : {
805 1391309 : use_operand_p use_p;
806 1391309 : unsigned min_cost = UINT_MAX;
807 1391309 : unsigned total_cost = 0;
808 1391309 : struct lim_aux_data *def_data;
809 :
810 : /* We will end up promoting dependencies to be unconditionally
811 : evaluated. For this reason the PHI cost (and thus the
812 : cost we remove from the loop by doing the invariant motion)
813 : is that of the cheapest PHI argument dependency chain. */
814 1596364 : FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
815 : {
816 1572720 : val = USE_FROM_PTR (use_p);
817 :
818 1572720 : if (TREE_CODE (val) != SSA_NAME)
819 : {
820 : /* Assign const 1 to constants. */
821 127409 : min_cost = MIN (min_cost, 1);
822 127409 : total_cost += 1;
823 127409 : continue;
824 : }
825 1445311 : if (!add_dependency (val, lim_data, loop, false))
826 : return false;
827 :
828 77646 : gimple *def_stmt = SSA_NAME_DEF_STMT (val);
829 77646 : if (gimple_bb (def_stmt)
830 77646 : && gimple_bb (def_stmt)->loop_father == loop)
831 : {
832 1406 : def_data = get_lim_data (def_stmt);
833 1406 : if (def_data)
834 : {
835 1406 : min_cost = MIN (min_cost, def_data->cost);
836 1406 : total_cost += def_data->cost;
837 : }
838 : }
839 : }
840 :
841 23644 : min_cost = MIN (min_cost, total_cost);
842 23644 : lim_data->cost += min_cost;
843 :
844 23644 : if (gimple_phi_num_args (phi) > 1)
845 : {
846 23470 : basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
847 23470 : gimple *cond;
848 46940 : if (gsi_end_p (gsi_last_bb (dom)))
849 : return false;
850 15479 : cond = gsi_stmt (gsi_last_bb (dom));
851 15479 : if (gimple_code (cond) != GIMPLE_COND)
852 : return false;
853 : /* Verify that this is an extended form of a diamond and
854 : the PHI arguments are completely controlled by the
855 : predicate in DOM. */
856 10539 : if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
857 : return false;
858 :
859 : /* Check if one of the depedent statement is a vector compare whether
860 : the target supports it, otherwise it's invalid to hoist it out of
861 : the gcond it belonged to. */
862 9650 : if (VECTOR_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond))))
863 : {
864 5 : tree type = TREE_TYPE (gimple_cond_lhs (cond));
865 5 : auto code = gimple_cond_code (cond);
866 5 : if (!target_supports_op_p (type, code, optab_vector))
867 : return false;
868 : }
869 :
870 : /* Fold in dependencies and cost of the condition. */
871 11351 : FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
872 : {
873 10320 : if (!add_dependency (val, lim_data, loop, false))
874 : return false;
875 1706 : def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
876 1706 : if (def_data)
877 920 : lim_data->cost += def_data->cost;
878 : }
879 :
880 : /* We want to avoid unconditionally executing very expensive
881 : operations. As costs for our dependencies cannot be
882 : negative just claim we are not invariand for this case.
883 : We also are not sure whether the control-flow inside the
884 : loop will vanish. */
885 1031 : if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
886 137 : && !(min_cost != 0
887 137 : && total_cost / min_cost <= 2))
888 : return false;
889 :
890 : /* Assume that the control-flow in the loop will vanish.
891 : ??? We should verify this and not artificially increase
892 : the cost if that is not the case. */
893 915 : lim_data->cost += stmt_cost (stmt);
894 : }
895 :
896 1089 : return true;
897 : }
898 :
899 : /* A stmt that receives abnormal edges cannot be hoisted. */
900 9775865 : if (is_a <gcall *> (stmt)
901 9775865 : && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE))
902 : return false;
903 :
904 11396919 : FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
905 9956720 : if (!add_dependency (val, lim_data, loop, true))
906 : return false;
907 :
908 2868553 : if (gimple_vuse (stmt))
909 : {
910 739245 : im_mem_ref *ref
911 739245 : = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
912 739245 : if (ref
913 739245 : && MEM_ANALYZABLE (ref))
914 : {
915 738219 : lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
916 : loop, ref);
917 738219 : if (!lim_data->max_loop)
918 : return false;
919 : }
920 1026 : else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
921 : return false;
922 : }
923 :
924 766134 : lim_data->cost += stmt_cost (stmt);
925 :
926 766134 : return true;
927 : }
928 :
929 : /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
930 : and that one of the operands of this statement is computed by STMT.
931 : Ensure that STMT (together with all the statements that define its
932 : operands) is hoisted at least out of the loop LEVEL. */
933 :
934 : static void
935 688449 : set_level (gimple *stmt, class loop *orig_loop, class loop *level)
936 : {
937 688449 : class loop *stmt_loop = gimple_bb (stmt)->loop_father;
938 688449 : struct lim_aux_data *lim_data;
939 688449 : gimple *dep_stmt;
940 688449 : unsigned i;
941 :
942 688449 : stmt_loop = find_common_loop (orig_loop, stmt_loop);
943 688449 : lim_data = get_lim_data (stmt);
944 688449 : if (lim_data != NULL && lim_data->tgt_loop != NULL)
945 170766 : stmt_loop = find_common_loop (stmt_loop,
946 : loop_outer (lim_data->tgt_loop));
947 688449 : if (flow_loop_nested_p (stmt_loop, level))
948 688449 : return;
949 :
950 456228 : gcc_assert (level == lim_data->max_loop
951 : || flow_loop_nested_p (lim_data->max_loop, level));
952 :
953 456228 : lim_data->tgt_loop = level;
954 788111 : FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
955 331883 : set_level (dep_stmt, orig_loop, level);
956 : }
957 :
958 : /* Determines an outermost loop from that we want to hoist the statement STMT.
959 : For now we chose the outermost possible loop. TODO -- use profiling
960 : information to set it more sanely. */
961 :
962 : static void
963 293944 : set_profitable_level (gimple *stmt)
964 : {
965 293944 : set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
966 293944 : }
967 :
968 : /* Returns true if STMT is a call that has side effects. */
969 :
970 : static bool
971 69589430 : nonpure_call_p (gimple *stmt)
972 : {
973 0 : if (gimple_code (stmt) != GIMPLE_CALL)
974 : return false;
975 :
976 2250138 : return gimple_has_side_effects (stmt);
977 : }
978 :
979 : /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
980 :
981 : static gimple *
982 35 : rewrite_reciprocal (gimple_stmt_iterator *bsi)
983 : {
984 35 : gassign *stmt, *stmt1, *stmt2;
985 35 : tree name, lhs, type;
986 35 : tree real_one;
987 35 : gimple_stmt_iterator gsi;
988 :
989 35 : stmt = as_a <gassign *> (gsi_stmt (*bsi));
990 35 : lhs = gimple_assign_lhs (stmt);
991 35 : type = TREE_TYPE (lhs);
992 :
993 35 : real_one = build_one_cst (type);
994 :
995 35 : name = make_temp_ssa_name (type, NULL, "reciptmp");
996 70 : stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
997 : gimple_assign_rhs2 (stmt));
998 35 : stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
999 : gimple_assign_rhs1 (stmt));
1000 :
1001 : /* Replace division stmt with reciprocal and multiply stmts.
1002 : The multiply stmt is not invariant, so update iterator
1003 : and avoid rescanning. */
1004 35 : gsi = *bsi;
1005 35 : gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1006 35 : gsi_replace (&gsi, stmt2, true);
1007 :
1008 : /* Continue processing with invariant reciprocal statement. */
1009 35 : return stmt1;
1010 : }
1011 :
1012 : /* Check if the pattern at *BSI is a bittest of the form
1013 : (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
1014 :
1015 : static gimple *
1016 9023 : rewrite_bittest (gimple_stmt_iterator *bsi)
1017 : {
1018 9023 : gassign *stmt;
1019 9023 : gimple *stmt1;
1020 9023 : gassign *stmt2;
1021 9023 : gimple *use_stmt;
1022 9023 : gcond *cond_stmt;
1023 9023 : tree lhs, name, t, a, b;
1024 9023 : use_operand_p use;
1025 :
1026 9023 : stmt = as_a <gassign *> (gsi_stmt (*bsi));
1027 9023 : lhs = gimple_assign_lhs (stmt);
1028 :
1029 : /* Verify that the single use of lhs is a comparison against zero. */
1030 9023 : if (TREE_CODE (lhs) != SSA_NAME
1031 9023 : || !single_imm_use (lhs, &use, &use_stmt))
1032 455 : return stmt;
1033 14019 : cond_stmt = dyn_cast <gcond *> (use_stmt);
1034 5025 : if (!cond_stmt)
1035 : return stmt;
1036 5025 : if (gimple_cond_lhs (cond_stmt) != lhs
1037 4849 : || (gimple_cond_code (cond_stmt) != NE_EXPR
1038 2221 : && gimple_cond_code (cond_stmt) != EQ_EXPR)
1039 9874 : || !integer_zerop (gimple_cond_rhs (cond_stmt)))
1040 206 : return stmt;
1041 :
1042 : /* Get at the operands of the shift. The rhs is TMP1 & 1. */
1043 4819 : stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
1044 4819 : if (gimple_code (stmt1) != GIMPLE_ASSIGN)
1045 : return stmt;
1046 :
1047 : /* There is a conversion in between possibly inserted by fold. */
1048 4748 : if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
1049 : {
1050 256 : t = gimple_assign_rhs1 (stmt1);
1051 256 : if (TREE_CODE (t) != SSA_NAME
1052 256 : || !has_single_use (t))
1053 : return stmt;
1054 180 : stmt1 = SSA_NAME_DEF_STMT (t);
1055 180 : if (gimple_code (stmt1) != GIMPLE_ASSIGN)
1056 : return stmt;
1057 : }
1058 :
1059 : /* Verify that B is loop invariant but A is not. Verify that with
1060 : all the stmt walking we are still in the same loop. */
1061 4630 : if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
1062 10984 : || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
1063 : return stmt;
1064 :
1065 3177 : a = gimple_assign_rhs1 (stmt1);
1066 3177 : b = gimple_assign_rhs2 (stmt1);
1067 :
1068 3177 : if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
1069 3206 : && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
1070 : {
1071 29 : gimple_stmt_iterator rsi;
1072 :
1073 : /* 1 << B */
1074 29 : t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
1075 : build_int_cst (TREE_TYPE (a), 1), b);
1076 29 : name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1077 29 : stmt1 = gimple_build_assign (name, t);
1078 :
1079 : /* A & (1 << B) */
1080 29 : t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1081 29 : name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1082 29 : stmt2 = gimple_build_assign (name, t);
1083 :
1084 : /* Replace the SSA_NAME we compare against zero. Adjust
1085 : the type of zero accordingly. */
1086 29 : SET_USE (use, name);
1087 29 : gimple_cond_set_rhs (cond_stmt,
1088 29 : build_int_cst_type (TREE_TYPE (name),
1089 : 0));
1090 :
1091 : /* Don't use gsi_replace here, none of the new assignments sets
1092 : the variable originally set in stmt. Move bsi to stmt1, and
1093 : then remove the original stmt, so that we get a chance to
1094 : retain debug info for it. */
1095 29 : rsi = *bsi;
1096 29 : gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1097 29 : gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1098 29 : gimple *to_release = gsi_stmt (rsi);
1099 29 : gsi_remove (&rsi, true);
1100 29 : release_defs (to_release);
1101 :
1102 29 : return stmt1;
1103 : }
1104 :
1105 : return stmt;
1106 : }
1107 :
1108 : /* Determine the outermost loops in that statements in basic block BB are
1109 : invariant, and record them to the LIM_DATA associated with the
1110 : statements. */
1111 :
1112 : static void
1113 14161142 : compute_invariantness (basic_block bb)
1114 : {
1115 14161142 : enum move_pos pos;
1116 14161142 : gimple_stmt_iterator bsi;
1117 14161142 : gimple *stmt;
1118 14161142 : bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1119 14161142 : class loop *outermost = ALWAYS_EXECUTED_IN (bb);
1120 14161142 : struct lim_aux_data *lim_data;
1121 :
1122 14161142 : if (!loop_outer (bb->loop_father))
1123 8157756 : return;
1124 :
1125 6003386 : if (dump_file && (dump_flags & TDF_DETAILS))
1126 435 : fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1127 435 : bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1128 :
1129 : /* Look at PHI nodes, but only if there is at most two.
1130 : ??? We could relax this further by post-processing the inserted
1131 : code and transforming adjacent cond-exprs with the same predicate
1132 : to control flow again. */
1133 6003386 : bsi = gsi_start_phis (bb);
1134 6003386 : if (!gsi_end_p (bsi)
1135 6003386 : && ((gsi_next (&bsi), gsi_end_p (bsi))
1136 1357614 : || (gsi_next (&bsi), gsi_end_p (bsi))))
1137 4302797 : for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1138 : {
1139 2593474 : stmt = gsi_stmt (bsi);
1140 :
1141 2593474 : pos = movement_possibility (stmt);
1142 2593474 : if (pos == MOVE_IMPOSSIBLE)
1143 1150144 : continue;
1144 :
1145 1443330 : lim_data = get_lim_data (stmt);
1146 1443330 : if (! lim_data)
1147 1443330 : lim_data = init_lim_data (stmt);
1148 1443330 : lim_data->always_executed_in = outermost;
1149 :
1150 1443330 : if (!determine_max_movement (stmt, false))
1151 : {
1152 1442241 : lim_data->max_loop = NULL;
1153 1442241 : continue;
1154 : }
1155 :
1156 1089 : if (dump_file && (dump_flags & TDF_DETAILS))
1157 : {
1158 2 : print_gimple_stmt (dump_file, stmt, 2);
1159 2 : fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1160 2 : loop_depth (lim_data->max_loop),
1161 : lim_data->cost);
1162 : }
1163 :
1164 1089 : if (lim_data->cost >= LIM_EXPENSIVE)
1165 918 : set_profitable_level (stmt);
1166 : }
1167 :
1168 55035859 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1169 : {
1170 43029087 : stmt = gsi_stmt (bsi);
1171 :
1172 43029087 : pos = movement_possibility (stmt);
1173 43029087 : if (pos == MOVE_IMPOSSIBLE)
1174 : {
1175 32363368 : if (nonpure_call_p (stmt))
1176 : {
1177 : maybe_never = true;
1178 : outermost = NULL;
1179 : }
1180 : /* Make sure to note always_executed_in for stores to make
1181 : store-motion work. */
1182 30016756 : else if (stmt_makes_single_store (stmt))
1183 : {
1184 2477996 : struct lim_aux_data *lim_data = get_lim_data (stmt);
1185 2477996 : if (! lim_data)
1186 0 : lim_data = init_lim_data (stmt);
1187 2477996 : lim_data->always_executed_in = outermost;
1188 : }
1189 31188044 : continue;
1190 31188044 : }
1191 :
1192 11841043 : if (is_gimple_assign (stmt)
1193 11841043 : && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1194 : == GIMPLE_BINARY_RHS))
1195 : {
1196 5705201 : tree op0 = gimple_assign_rhs1 (stmt);
1197 5705201 : tree op1 = gimple_assign_rhs2 (stmt);
1198 11410402 : class loop *ol1 = outermost_invariant_loop (op1,
1199 : loop_containing_stmt (stmt));
1200 :
1201 : /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1202 : to be hoisted out of loop, saving expensive divide. */
1203 5705201 : if (pos == MOVE_POSSIBLE
1204 5138219 : && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1205 940 : && flag_unsafe_math_optimizations
1206 380 : && !flag_trapping_math
1207 380 : && ol1 != NULL
1208 5705239 : && outermost_invariant_loop (op0, ol1) == NULL)
1209 35 : stmt = rewrite_reciprocal (&bsi);
1210 :
1211 : /* If the shift count is invariant, convert (A >> B) & 1 to
1212 : A & (1 << B) allowing the bit mask to be hoisted out of the loop
1213 : saving an expensive shift. */
1214 5705201 : if (pos == MOVE_POSSIBLE
1215 5138219 : && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1216 185480 : && integer_onep (op1)
1217 18478 : && TREE_CODE (op0) == SSA_NAME
1218 5723679 : && has_single_use (op0))
1219 9023 : stmt = rewrite_bittest (&bsi);
1220 : }
1221 :
1222 11841043 : lim_data = get_lim_data (stmt);
1223 11841043 : if (! lim_data)
1224 9042605 : lim_data = init_lim_data (stmt);
1225 11841043 : lim_data->always_executed_in = outermost;
1226 :
1227 11841043 : if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1228 1160790 : continue;
1229 :
1230 10680253 : if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1231 : {
1232 9914119 : lim_data->max_loop = NULL;
1233 9914119 : continue;
1234 : }
1235 :
1236 766134 : if (dump_file && (dump_flags & TDF_DETAILS))
1237 : {
1238 280 : print_gimple_stmt (dump_file, stmt, 2);
1239 280 : fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1240 280 : loop_depth (lim_data->max_loop),
1241 : lim_data->cost);
1242 : }
1243 :
1244 766134 : if (lim_data->cost >= LIM_EXPENSIVE)
1245 293026 : set_profitable_level (stmt);
1246 : /* When we run before PRE and PRE is active hoist all expressions
1247 : to the always executed loop since PRE would do so anyway
1248 : and we can preserve range info while PRE cannot. */
1249 473108 : else if (flag_tree_pre && !in_loop_pipeline
1250 101439 : && outermost)
1251 : {
1252 59713 : class loop *mloop = lim_data->max_loop;
1253 179139 : if (loop_depth (outermost) > loop_depth (mloop))
1254 : {
1255 8481 : mloop = outermost;
1256 8481 : if (dump_file && (dump_flags & TDF_DETAILS))
1257 1 : fprintf (dump_file, " constraining to loop depth %d\n\n\n",
1258 : loop_depth (mloop));
1259 : }
1260 59713 : set_level (stmt, bb->loop_father, mloop);
1261 : }
1262 : }
1263 : }
1264 :
1265 : /* Hoist the statements in basic block BB out of the loops prescribed by
1266 : data stored in LIM_DATA structures associated with each statement. Callback
1267 : for walk_dominator_tree. */
1268 :
1269 : unsigned int
1270 14206319 : move_computations_worker (basic_block bb)
1271 : {
1272 14206319 : class loop *level;
1273 14206319 : unsigned cost = 0;
1274 14206319 : struct lim_aux_data *lim_data;
1275 14206319 : unsigned int todo = 0;
1276 :
1277 14206319 : if (!loop_outer (bb->loop_father))
1278 : return todo;
1279 :
1280 10483116 : for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1281 : {
1282 4473522 : gassign *new_stmt;
1283 4473522 : gphi *stmt = bsi.phi ();
1284 :
1285 4473522 : lim_data = get_lim_data (stmt);
1286 4473522 : if (lim_data == NULL)
1287 : {
1288 3030192 : gsi_next (&bsi);
1289 3030192 : continue;
1290 : }
1291 :
1292 1443330 : cost = lim_data->cost;
1293 1443330 : level = lim_data->tgt_loop;
1294 1443330 : clear_lim_data (stmt);
1295 :
1296 1443330 : if (!level)
1297 : {
1298 1442410 : gsi_next (&bsi);
1299 1442410 : continue;
1300 : }
1301 :
1302 920 : if (dump_file && (dump_flags & TDF_DETAILS))
1303 : {
1304 2 : fprintf (dump_file, "Moving PHI node\n");
1305 2 : print_gimple_stmt (dump_file, stmt, 0);
1306 2 : fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1307 : cost, level->num);
1308 : }
1309 :
1310 920 : if (gimple_phi_num_args (stmt) == 1)
1311 : {
1312 5 : tree arg = PHI_ARG_DEF (stmt, 0);
1313 5 : new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1314 5 : TREE_CODE (arg), arg);
1315 : }
1316 : else
1317 : {
1318 915 : basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1319 915 : gimple *cond = gsi_stmt (gsi_last_bb (dom));
1320 915 : tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1321 : /* Get the PHI arguments corresponding to the true and false
1322 : edges of COND. */
1323 915 : extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1324 915 : gcc_assert (arg0 && arg1);
1325 : /* For `bool_val != 0`, reuse bool_val. */
1326 915 : if (gimple_cond_code (cond) == NE_EXPR
1327 536 : && integer_zerop (gimple_cond_rhs (cond))
1328 1377 : && types_compatible_p (TREE_TYPE (gimple_cond_lhs (cond)),
1329 : boolean_type_node))
1330 : {
1331 53 : t = gimple_cond_lhs (cond);
1332 : }
1333 : else
1334 : {
1335 862 : t = make_ssa_name (boolean_type_node);
1336 862 : new_stmt = gimple_build_assign (t, gimple_cond_code (cond),
1337 : gimple_cond_lhs (cond),
1338 : gimple_cond_rhs (cond));
1339 862 : gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1340 : }
1341 915 : new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1342 : COND_EXPR, t, arg0, arg1);
1343 915 : todo |= TODO_cleanup_cfg;
1344 : }
1345 920 : if (!ALWAYS_EXECUTED_IN (bb)
1346 920 : || (ALWAYS_EXECUTED_IN (bb) != level
1347 90 : && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1348 425 : reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
1349 920 : gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1350 920 : remove_phi_node (&bsi, false);
1351 : }
1352 :
1353 55113532 : for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1354 : {
1355 43094344 : edge e;
1356 :
1357 43094344 : gimple *stmt = gsi_stmt (bsi);
1358 :
1359 43094344 : lim_data = get_lim_data (stmt);
1360 43094344 : if (lim_data == NULL)
1361 : {
1362 27109309 : gsi_next (&bsi);
1363 27109309 : continue;
1364 : }
1365 :
1366 15985035 : cost = lim_data->cost;
1367 15985035 : level = lim_data->tgt_loop;
1368 15985035 : clear_lim_data (stmt);
1369 :
1370 15985035 : if (!level)
1371 : {
1372 15490256 : gsi_next (&bsi);
1373 15490256 : continue;
1374 : }
1375 :
1376 : /* We do not really want to move conditionals out of the loop; we just
1377 : placed it here to force its operands to be moved if necessary. */
1378 494779 : if (gimple_code (stmt) == GIMPLE_COND)
1379 : {
1380 11845 : gsi_next (&bsi);
1381 11845 : continue;
1382 : }
1383 :
1384 482934 : if (dump_file && (dump_flags & TDF_DETAILS))
1385 : {
1386 350 : fprintf (dump_file, "Moving statement ");
1387 350 : print_gimple_stmt (dump_file, stmt, 0);
1388 350 : fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1389 : cost, level->num);
1390 : }
1391 :
1392 482934 : e = loop_preheader_edge (level);
1393 965868 : gcc_assert (!gimple_vdef (stmt));
1394 965868 : if (gimple_vuse (stmt))
1395 : {
1396 : /* The new VUSE is the one from the virtual PHI in the loop
1397 : header or the one already present. */
1398 83159 : gphi_iterator gsi2;
1399 83159 : for (gsi2 = gsi_start_phis (e->dest);
1400 194396 : !gsi_end_p (gsi2); gsi_next (&gsi2))
1401 : {
1402 177235 : gphi *phi = gsi2.phi ();
1403 354470 : if (virtual_operand_p (gimple_phi_result (phi)))
1404 : {
1405 65998 : SET_USE (gimple_vuse_op (stmt),
1406 : PHI_ARG_DEF_FROM_EDGE (phi, e));
1407 65998 : break;
1408 : }
1409 : }
1410 : }
1411 482934 : gsi_remove (&bsi, false);
1412 482934 : if (gimple_has_lhs (stmt)
1413 482934 : && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1414 441182 : && (!ALWAYS_EXECUTED_IN (bb)
1415 374839 : || !(ALWAYS_EXECUTED_IN (bb) == level
1416 97867 : || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1417 203992 : reset_flow_sensitive_info (gimple_get_lhs (stmt));
1418 : /* In case this is a stmt that is not unconditionally executed
1419 : when the target loop header is executed and the stmt may
1420 : invoke undefined integer or pointer overflow rewrite it to
1421 : unsigned arithmetic. */
1422 482934 : if (gimple_needing_rewrite_undefined (stmt)
1423 482934 : && (!ALWAYS_EXECUTED_IN (bb)
1424 80793 : || !(ALWAYS_EXECUTED_IN (bb) == level
1425 18974 : || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1426 20257 : gsi_insert_seq_on_edge (e, rewrite_to_defined_unconditional (stmt));
1427 : else
1428 462677 : gsi_insert_on_edge (e, stmt);
1429 : }
1430 :
1431 6009594 : return todo;
1432 : }
1433 :
1434 : /* Checks whether the statement defining variable *INDEX can be hoisted
1435 : out of the loop passed in DATA. Callback for for_each_index. */
1436 :
1437 : static bool
1438 1776144 : may_move_till (tree ref, tree *index, void *data)
1439 : {
1440 1776144 : class loop *loop = (class loop *) data, *max_loop;
1441 :
1442 : /* If REF is an array reference, check also that the step and the lower
1443 : bound is invariant in LOOP. */
1444 1776144 : if (TREE_CODE (ref) == ARRAY_REF)
1445 : {
1446 473683 : tree step = TREE_OPERAND (ref, 3);
1447 473683 : tree lbound = TREE_OPERAND (ref, 2);
1448 :
1449 473683 : max_loop = outermost_invariant_loop (step, loop);
1450 473683 : if (!max_loop)
1451 : return false;
1452 :
1453 473683 : max_loop = outermost_invariant_loop (lbound, loop);
1454 473683 : if (!max_loop)
1455 : return false;
1456 : }
1457 :
1458 1776144 : max_loop = outermost_invariant_loop (*index, loop);
1459 1776144 : if (!max_loop)
1460 : return false;
1461 :
1462 : return true;
1463 : }
1464 :
1465 : /* If OP is SSA NAME, force the statement that defines it to be
1466 : moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1467 :
1468 : static void
1469 38010 : force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1470 : {
1471 38010 : gimple *stmt;
1472 :
1473 38010 : if (!op
1474 38010 : || is_gimple_min_invariant (op))
1475 33494 : return;
1476 :
1477 4516 : gcc_assert (TREE_CODE (op) == SSA_NAME);
1478 :
1479 4516 : stmt = SSA_NAME_DEF_STMT (op);
1480 4516 : if (gimple_nop_p (stmt))
1481 : return;
1482 :
1483 2909 : set_level (stmt, orig_loop, loop);
1484 : }
1485 :
1486 : /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1487 : the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1488 : for_each_index. */
1489 :
1490 : struct fmt_data
1491 : {
1492 : class loop *loop;
1493 : class loop *orig_loop;
1494 : };
1495 :
1496 : static bool
1497 20134 : force_move_till (tree ref, tree *index, void *data)
1498 : {
1499 20134 : struct fmt_data *fmt_data = (struct fmt_data *) data;
1500 :
1501 20134 : if (TREE_CODE (ref) == ARRAY_REF)
1502 : {
1503 8938 : tree step = TREE_OPERAND (ref, 3);
1504 8938 : tree lbound = TREE_OPERAND (ref, 2);
1505 :
1506 8938 : force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1507 8938 : force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1508 : }
1509 :
1510 20134 : force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1511 :
1512 20134 : return true;
1513 : }
1514 :
1515 : /* A function to free the mem_ref object OBJ. */
1516 :
1517 : static void
1518 5204315 : memref_free (class im_mem_ref *mem)
1519 : {
1520 0 : mem->accesses_in_loop.release ();
1521 0 : }
1522 :
1523 : /* Allocates and returns a memory reference description for MEM whose hash
1524 : value is HASH and id is ID. */
1525 :
1526 : static im_mem_ref *
1527 5204315 : mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1528 : {
1529 5204315 : im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1530 5204315 : if (mem)
1531 4276876 : ref->mem = *mem;
1532 : else
1533 927439 : ao_ref_init (&ref->mem, error_mark_node);
1534 5204315 : ref->id = id;
1535 5204315 : ref->ref_canonical = false;
1536 5204315 : ref->ref_decomposed = false;
1537 5204315 : ref->hash = hash;
1538 5204315 : ref->stored = NULL;
1539 5204315 : ref->loaded = NULL;
1540 5204315 : bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1541 5204315 : ref->accesses_in_loop.create (1);
1542 :
1543 5204315 : return ref;
1544 : }
1545 :
1546 : /* Records memory reference location *LOC in LOOP to the memory reference
1547 : description REF. The reference occurs in statement STMT. */
1548 :
1549 : static void
1550 5654678 : record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1551 : {
1552 5654678 : mem_ref_loc aref;
1553 5654678 : aref.stmt = stmt;
1554 5654678 : aref.ref = loc;
1555 0 : ref->accesses_in_loop.safe_push (aref);
1556 0 : }
1557 :
1558 : /* Set the LOOP bit in REF stored bitmap and allocate that if
1559 : necessary. Return whether a bit was changed. */
1560 :
1561 : static bool
1562 4388941 : set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1563 : {
1564 4388941 : if (!ref->stored)
1565 2491831 : ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1566 4388941 : return bitmap_set_bit (ref->stored, loop->num);
1567 : }
1568 :
1569 : /* Marks reference REF as stored in LOOP. */
1570 :
1571 : static void
1572 3654559 : mark_ref_stored (im_mem_ref *ref, class loop *loop)
1573 : {
1574 3654559 : while (loop != current_loops->tree_root
1575 7046650 : && set_ref_stored_in_loop (ref, loop))
1576 3392091 : loop = loop_outer (loop);
1577 3654559 : }
1578 :
1579 : /* Set the LOOP bit in REF loaded bitmap and allocate that if
1580 : necessary. Return whether a bit was changed. */
1581 :
1582 : static bool
1583 6111144 : set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1584 : {
1585 6111144 : if (!ref->loaded)
1586 3344703 : ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1587 6111144 : return bitmap_set_bit (ref->loaded, loop->num);
1588 : }
1589 :
1590 : /* Marks reference REF as loaded in LOOP. */
1591 :
1592 : static void
1593 4856493 : mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1594 : {
1595 4856493 : while (loop != current_loops->tree_root
1596 9677323 : && set_ref_loaded_in_loop (ref, loop))
1597 4820830 : loop = loop_outer (loop);
1598 4856493 : }
1599 :
1600 : /* Gathers memory references in statement STMT in LOOP, storing the
1601 : information about them in the memory_accesses structure. Marks
1602 : the vops accessed through unrecognized statements there as
1603 : well. */
1604 :
1605 : static void
1606 43029023 : gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1607 : {
1608 43029023 : tree *mem = NULL;
1609 43029023 : hashval_t hash;
1610 43029023 : im_mem_ref **slot;
1611 43029023 : im_mem_ref *ref;
1612 43029023 : bool is_stored;
1613 43029023 : unsigned id;
1614 :
1615 58646706 : if (!gimple_vuse (stmt))
1616 : return;
1617 :
1618 6900028 : mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1619 6900028 : if (!mem && is_gimple_assign (stmt))
1620 : {
1621 : /* For aggregate copies record distinct references but use them
1622 : only for disambiguation purposes. */
1623 445925 : id = memory_accesses.refs_list.length ();
1624 445925 : ref = mem_ref_alloc (NULL, 0, id);
1625 445925 : memory_accesses.refs_list.safe_push (ref);
1626 445925 : if (dump_file && (dump_flags & TDF_DETAILS))
1627 : {
1628 24 : fprintf (dump_file, "Unhandled memory reference %u: ", id);
1629 24 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1630 : }
1631 445925 : record_mem_ref_loc (ref, stmt, mem);
1632 891850 : is_stored = gimple_vdef (stmt);
1633 : }
1634 6454103 : else if (!mem)
1635 : {
1636 : /* We use the shared mem_ref for all unanalyzable refs. */
1637 1245350 : id = UNANALYZABLE_MEM_ID;
1638 1245350 : ref = memory_accesses.refs_list[id];
1639 1245350 : if (dump_file && (dump_flags & TDF_DETAILS))
1640 : {
1641 9 : fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1642 9 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1643 : }
1644 2490700 : is_stored = gimple_vdef (stmt);
1645 : }
1646 : else
1647 : {
1648 : /* We are looking for equal refs that might differ in structure
1649 : such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1650 : make sure we can canonicalize the ref in the hashtable if
1651 : non-operand_equal_p refs are found. For the lookup we mark
1652 : the case we want strict equality with aor.max_size == -1. */
1653 5208753 : ao_ref aor;
1654 5208753 : ao_ref_init (&aor, *mem);
1655 5208753 : ao_ref_base (&aor);
1656 5208753 : ao_ref_alias_set (&aor);
1657 5208753 : HOST_WIDE_INT offset, size, max_size;
1658 5208753 : poly_int64 saved_maxsize = aor.max_size, mem_off;
1659 5208753 : tree mem_base;
1660 5208753 : bool ref_decomposed;
1661 5208753 : if (aor.max_size_known_p ()
1662 5031462 : && aor.offset.is_constant (&offset)
1663 5031462 : && aor.size.is_constant (&size)
1664 5031462 : && aor.max_size.is_constant (&max_size)
1665 5031462 : && size == max_size
1666 4428192 : && (size % BITS_PER_UNIT) == 0
1667 : /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1668 : size. Make sure this is consistent with the extraction. */
1669 4417549 : && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1670 4417549 : && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1671 : aor.size)
1672 9626210 : && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1673 : {
1674 4416405 : ref_decomposed = true;
1675 4416405 : tree base = ao_ref_base (&aor);
1676 4416405 : poly_int64 moffset;
1677 4416405 : HOST_WIDE_INT mcoffset;
1678 4416405 : if (TREE_CODE (base) == MEM_REF
1679 1988051 : && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
1680 4416405 : && moffset.is_constant (&mcoffset))
1681 : {
1682 1988051 : hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
1683 1988051 : hash = iterative_hash_host_wide_int (mcoffset, hash);
1684 : }
1685 : else
1686 : {
1687 2428354 : hash = iterative_hash_expr (base, 0);
1688 2428354 : hash = iterative_hash_host_wide_int (offset, hash);
1689 : }
1690 4416405 : hash = iterative_hash_host_wide_int (size, hash);
1691 : }
1692 : else
1693 : {
1694 792348 : ref_decomposed = false;
1695 792348 : hash = iterative_hash_expr (aor.ref, 0);
1696 792348 : aor.max_size = -1;
1697 : }
1698 5208753 : slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1699 5208753 : aor.max_size = saved_maxsize;
1700 5208753 : if (*slot)
1701 : {
1702 931877 : if (!(*slot)->ref_canonical
1703 931877 : && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1704 : {
1705 : /* If we didn't yet canonicalize the hashtable ref (which
1706 : we'll end up using for code insertion) and hit a second
1707 : equal ref that is not structurally equivalent create
1708 : a canonical ref which is a bare MEM_REF. */
1709 49871 : if (TREE_CODE (*mem) == MEM_REF
1710 49871 : || TREE_CODE (*mem) == TARGET_MEM_REF)
1711 : {
1712 14133 : (*slot)->mem.ref = *mem;
1713 14133 : (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1714 : }
1715 : else
1716 : {
1717 35738 : tree ref_alias_type = reference_alias_ptr_type (*mem);
1718 35738 : unsigned int ref_align = get_object_alignment (*mem);
1719 35738 : tree ref_type = TREE_TYPE (*mem);
1720 35738 : tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1721 : unshare_expr (mem_base));
1722 35738 : if (TYPE_ALIGN (ref_type) != ref_align)
1723 8951 : ref_type = build_aligned_type (ref_type, ref_align);
1724 35738 : tree new_ref
1725 35738 : = fold_build2 (MEM_REF, ref_type, tmp,
1726 : build_int_cst (ref_alias_type, mem_off));
1727 35738 : if ((*slot)->mem.volatile_p)
1728 22 : TREE_THIS_VOLATILE (new_ref) = 1;
1729 35738 : (*slot)->mem.ref = new_ref;
1730 : /* Make sure the recorded base and offset are consistent
1731 : with the newly built ref. */
1732 35738 : if (TREE_CODE (TREE_OPERAND (new_ref, 0)) == ADDR_EXPR)
1733 : ;
1734 : else
1735 : {
1736 20449 : (*slot)->mem.base = new_ref;
1737 20449 : (*slot)->mem.offset = 0;
1738 : }
1739 35738 : gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1740 : && is_gimple_mem_ref_addr
1741 : (TREE_OPERAND ((*slot)->mem.ref,
1742 : 0)));
1743 35738 : (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1744 : }
1745 49871 : (*slot)->ref_canonical = true;
1746 : }
1747 931877 : ref = *slot;
1748 931877 : id = ref->id;
1749 : }
1750 : else
1751 : {
1752 4276876 : id = memory_accesses.refs_list.length ();
1753 4276876 : ref = mem_ref_alloc (&aor, hash, id);
1754 4276876 : ref->ref_decomposed = ref_decomposed;
1755 4276876 : memory_accesses.refs_list.safe_push (ref);
1756 4276876 : *slot = ref;
1757 :
1758 4276876 : if (dump_file && (dump_flags & TDF_DETAILS))
1759 : {
1760 498 : fprintf (dump_file, "Memory reference %u: ", id);
1761 498 : print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1762 498 : fprintf (dump_file, "\n");
1763 : }
1764 : }
1765 :
1766 5208753 : record_mem_ref_loc (ref, stmt, mem);
1767 : }
1768 6900028 : if (is_stored)
1769 : {
1770 3654559 : bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1771 3654559 : mark_ref_stored (ref, loop);
1772 : }
1773 : /* A not simple memory op is also a read when it is a write. */
1774 6900028 : if (!is_stored || id == UNANALYZABLE_MEM_ID
1775 2489460 : || ref->mem.ref == error_mark_node)
1776 : {
1777 4856493 : bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1778 4856493 : mark_ref_loaded (ref, loop);
1779 : }
1780 6900028 : init_lim_data (stmt)->ref = ref->id;
1781 6900028 : return;
1782 : }
1783 :
1784 : static unsigned *bb_loop_postorder;
1785 :
1786 : /* qsort sort function to sort blocks after their loop fathers postorder. */
1787 :
1788 : static int
1789 156706530 : sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1790 : void *bb_loop_postorder_)
1791 : {
1792 156706530 : unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1793 156706530 : basic_block bb1 = *(const basic_block *)bb1_;
1794 156706530 : basic_block bb2 = *(const basic_block *)bb2_;
1795 156706530 : class loop *loop1 = bb1->loop_father;
1796 156706530 : class loop *loop2 = bb2->loop_father;
1797 156706530 : if (loop1->num == loop2->num)
1798 73634986 : return bb1->index - bb2->index;
1799 83071544 : return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1800 : }
1801 :
1802 : /* qsort sort function to sort ref locs after their loop fathers postorder. */
1803 :
1804 : static int
1805 931862 : sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1806 : void *bb_loop_postorder_)
1807 : {
1808 931862 : unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1809 931862 : const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1810 931862 : const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1811 931862 : class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1812 931862 : class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1813 931862 : if (loop1->num == loop2->num)
1814 : return 0;
1815 140409 : return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1816 : }
1817 :
1818 : /* Gathers memory references in loops. */
1819 :
1820 : static void
1821 481514 : analyze_memory_references (bool store_motion)
1822 : {
1823 481514 : gimple_stmt_iterator bsi;
1824 481514 : basic_block bb, *bbs;
1825 481514 : class loop *outer;
1826 481514 : unsigned i, n;
1827 :
1828 : /* Collect all basic-blocks in loops and sort them after their
1829 : loops postorder. */
1830 481514 : i = 0;
1831 481514 : bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1832 14642656 : FOR_EACH_BB_FN (bb, cfun)
1833 14161142 : if (bb->loop_father != current_loops->tree_root)
1834 6003386 : bbs[i++] = bb;
1835 481514 : n = i;
1836 481514 : gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1837 : bb_loop_postorder);
1838 :
1839 : /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1840 : That results in better locality for all the bitmaps. It also
1841 : automatically sorts the location list of gathered memory references
1842 : after their loop postorder number allowing to binary-search it. */
1843 6484900 : for (i = 0; i < n; ++i)
1844 : {
1845 6003386 : basic_block bb = bbs[i];
1846 55035795 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1847 43029023 : gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1848 : }
1849 :
1850 : /* Verify the list of gathered memory references is sorted after their
1851 : loop postorder number. */
1852 481514 : if (flag_checking)
1853 : {
1854 : im_mem_ref *ref;
1855 10890072 : FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1856 6136147 : for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1857 931862 : gcc_assert (sort_locs_in_loop_postorder_cmp
1858 : (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1859 : bb_loop_postorder) <= 0);
1860 : }
1861 :
1862 481514 : free (bbs);
1863 :
1864 481514 : if (!store_motion)
1865 481514 : return;
1866 :
1867 : /* Propagate the information about accessed memory references up
1868 : the loop hierarchy. */
1869 2752166 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1870 : {
1871 : /* Finalize the overall touched references (including subloops). */
1872 1307957 : bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1873 1307957 : &memory_accesses.refs_stored_in_loop[loop->num]);
1874 :
1875 : /* Propagate the information about accessed memory references up
1876 : the loop hierarchy. */
1877 1307957 : outer = loop_outer (loop);
1878 1307957 : if (outer == current_loops->tree_root)
1879 1009783 : continue;
1880 :
1881 298174 : bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1882 298174 : &memory_accesses.all_refs_stored_in_loop[loop->num]);
1883 481403 : }
1884 : }
1885 :
1886 : /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1887 : tree_to_aff_combination_expand. */
1888 :
1889 : static bool
1890 541582 : mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1891 : hash_map<tree, name_expansion *> **ttae_cache,
1892 : bool tbaa_p)
1893 : {
1894 541582 : gcc_checking_assert (mem1->mem.ref != error_mark_node
1895 : && mem2->mem.ref != error_mark_node);
1896 :
1897 : /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1898 : object and their offset differ in such a way that the locations cannot
1899 : overlap, then they cannot alias. */
1900 : poly_widest_int size1, size2;
1901 1083164 : aff_tree off1, off2;
1902 :
1903 : /* Perform basic offset and type-based disambiguation. */
1904 541582 : if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1905 : return false;
1906 :
1907 : /* The expansion of addresses may be a bit expensive, thus we only do
1908 : the check at -O2 and higher optimization levels. */
1909 66879 : if (optimize < 2)
1910 : return true;
1911 :
1912 57252 : get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1913 57252 : get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1914 57252 : aff_combination_expand (&off1, ttae_cache);
1915 57252 : aff_combination_expand (&off2, ttae_cache);
1916 57252 : aff_combination_scale (&off1, -1);
1917 57252 : aff_combination_add (&off2, &off1);
1918 :
1919 57252 : if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1920 : return false;
1921 :
1922 : return true;
1923 541582 : }
1924 :
1925 : /* Compare function for bsearch searching for reference locations
1926 : in a loop. */
1927 :
1928 : static int
1929 259338 : find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1930 : void *bb_loop_postorder_)
1931 : {
1932 259338 : unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1933 259338 : class loop *loop = (class loop *)const_cast<void *>(loop_);
1934 259338 : mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1935 259338 : class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1936 259338 : if (loop->num == loc_loop->num
1937 259338 : || flow_loop_nested_p (loop, loc_loop))
1938 199770 : return 0;
1939 59568 : return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1940 59568 : ? -1 : 1);
1941 : }
1942 :
1943 : /* Iterates over all locations of REF in LOOP and its subloops calling
1944 : fn.operator() with the location as argument. When that operator
1945 : returns true the iteration is stopped and true is returned.
1946 : Otherwise false is returned. */
1947 :
1948 : template <typename FN>
1949 : static bool
1950 199770 : for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1951 : {
1952 : unsigned i;
1953 : mem_ref_loc *loc;
1954 :
1955 : /* Search for the cluster of locs in the accesses_in_loop vector
1956 : which is sorted after postorder index of the loop father. */
1957 199770 : loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1958 : bb_loop_postorder);
1959 199770 : if (!loc)
1960 0 : return false;
1961 :
1962 : /* We have found one location inside loop or its sub-loops. Iterate
1963 : both forward and backward to cover the whole cluster. */
1964 199770 : i = loc - ref->accesses_in_loop.address ();
1965 273766 : while (i > 0)
1966 : {
1967 132249 : --i;
1968 132249 : mem_ref_loc *l = &ref->accesses_in_loop[i];
1969 132249 : if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1970 : break;
1971 109066 : if (fn (l))
1972 : return true;
1973 : }
1974 445224 : for (i = loc - ref->accesses_in_loop.address ();
1975 280524 : i < ref->accesses_in_loop.length (); ++i)
1976 : {
1977 208602 : mem_ref_loc *l = &ref->accesses_in_loop[i];
1978 208602 : if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1979 : break;
1980 190201 : if (fn (l))
1981 : return true;
1982 : }
1983 :
1984 : return false;
1985 : }
1986 :
1987 : /* Rewrites location LOC by TMP_VAR. */
1988 :
1989 : class rewrite_mem_ref_loc
1990 : {
1991 : public:
1992 34884 : rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1993 : bool operator () (mem_ref_loc *loc);
1994 : tree tmp_var;
1995 : };
1996 :
1997 : bool
1998 55648 : rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1999 : {
2000 55648 : *loc->ref = tmp_var;
2001 55648 : update_stmt (loc->stmt);
2002 55648 : return false;
2003 : }
2004 :
2005 : /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
2006 :
2007 : static void
2008 34884 : rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
2009 : {
2010 0 : for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
2011 0 : }
2012 :
2013 : /* Stores the first reference location in LOCP. */
2014 :
2015 : class first_mem_ref_loc_1
2016 : {
2017 : public:
2018 34884 : first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
2019 : bool operator () (mem_ref_loc *loc);
2020 : mem_ref_loc **locp;
2021 : };
2022 :
2023 : bool
2024 34884 : first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
2025 : {
2026 34884 : *locp = loc;
2027 34884 : return true;
2028 : }
2029 :
2030 : /* Returns the first reference location to REF in LOOP. */
2031 :
2032 : static mem_ref_loc *
2033 34884 : first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
2034 : {
2035 34884 : mem_ref_loc *locp = NULL;
2036 0 : for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
2037 34884 : return locp;
2038 : }
2039 :
2040 : /* Helper function for execute_sm. Emit code to store TMP_VAR into
2041 : MEM along edge EX.
2042 :
2043 : The store is only done if MEM has changed. We do this so no
2044 : changes to MEM occur on code paths that did not originally store
2045 : into it.
2046 :
2047 : The common case for execute_sm will transform:
2048 :
2049 : for (...) {
2050 : if (foo)
2051 : stuff;
2052 : else
2053 : MEM = TMP_VAR;
2054 : }
2055 :
2056 : into:
2057 :
2058 : lsm = MEM;
2059 : for (...) {
2060 : if (foo)
2061 : stuff;
2062 : else
2063 : lsm = TMP_VAR;
2064 : }
2065 : MEM = lsm;
2066 :
2067 : This function will generate:
2068 :
2069 : lsm = MEM;
2070 :
2071 : lsm_flag = false;
2072 : ...
2073 : for (...) {
2074 : if (foo)
2075 : stuff;
2076 : else {
2077 : lsm = TMP_VAR;
2078 : lsm_flag = true;
2079 : }
2080 : }
2081 : if (lsm_flag) <--
2082 : MEM = lsm; <-- (X)
2083 :
2084 : In case MEM and TMP_VAR are NULL the function will return the then
2085 : block so the caller can insert (X) and other related stmts.
2086 : */
2087 :
2088 : static basic_block
2089 14469 : execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
2090 : edge preheader, hash_set <basic_block> *flag_bbs,
2091 : edge &append_cond_position, edge &last_cond_fallthru)
2092 : {
2093 14469 : basic_block new_bb, then_bb, old_dest;
2094 14469 : bool loop_has_only_one_exit;
2095 14469 : edge then_old_edge;
2096 14469 : gimple_stmt_iterator gsi;
2097 14469 : gimple *stmt;
2098 14469 : bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
2099 :
2100 14469 : profile_count count_sum = profile_count::zero ();
2101 14469 : int nbbs = 0, ncount = 0;
2102 14469 : profile_probability flag_probability = profile_probability::uninitialized ();
2103 :
2104 : /* Flag is set in FLAG_BBS. Determine probability that flag will be true
2105 : at loop exit.
2106 :
2107 : This code may look fancy, but it cannot update profile very realistically
2108 : because we do not know the probability that flag will be true at given
2109 : loop exit.
2110 :
2111 : We look for two interesting extremes
2112 : - when exit is dominated by block setting the flag, we know it will
2113 : always be true. This is a common case.
2114 : - when all blocks setting the flag have very low frequency we know
2115 : it will likely be false.
2116 : In all other cases we default to 2/3 for flag being true. */
2117 :
2118 14469 : for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
2119 50187 : it != flag_bbs->end (); ++it)
2120 : {
2121 17859 : if ((*it)->count.initialized_p ())
2122 17846 : count_sum += (*it)->count, ncount ++;
2123 17859 : if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
2124 2435 : flag_probability = profile_probability::always ();
2125 17859 : nbbs++;
2126 : }
2127 :
2128 14469 : profile_probability cap
2129 14469 : = profile_probability::guessed_always ().apply_scale (2, 3);
2130 :
2131 14469 : if (flag_probability.initialized_p ())
2132 : ;
2133 12135 : else if (ncount == nbbs
2134 24479 : && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
2135 : {
2136 184 : flag_probability = count_sum.probability_in (preheader->count ());
2137 184 : if (flag_probability > cap)
2138 71 : flag_probability = cap;
2139 : }
2140 :
2141 14469 : if (!flag_probability.initialized_p ())
2142 11951 : flag_probability = cap;
2143 :
2144 : /* ?? Insert store after previous store if applicable. See note
2145 : below. */
2146 14469 : if (append_cond_position)
2147 5492 : ex = append_cond_position;
2148 :
2149 14469 : loop_has_only_one_exit = single_pred_p (ex->dest);
2150 :
2151 14469 : if (loop_has_only_one_exit)
2152 5960 : ex = split_block_after_labels (ex->dest);
2153 : else
2154 : {
2155 8509 : for (gphi_iterator gpi = gsi_start_phis (ex->dest);
2156 12617 : !gsi_end_p (gpi); gsi_next (&gpi))
2157 : {
2158 4933 : gphi *phi = gpi.phi ();
2159 9866 : if (virtual_operand_p (gimple_phi_result (phi)))
2160 4108 : continue;
2161 :
2162 : /* When the destination has a non-virtual PHI node with multiple
2163 : predecessors make sure we preserve the PHI structure by
2164 : forcing a forwarder block so that hoisting of that PHI will
2165 : still work. */
2166 825 : split_edge (ex);
2167 825 : break;
2168 : }
2169 : }
2170 :
2171 14469 : old_dest = ex->dest;
2172 14469 : new_bb = split_edge (ex);
2173 14469 : if (append_cond_position)
2174 5492 : new_bb->count += last_cond_fallthru->count ();
2175 14469 : then_bb = create_empty_bb (new_bb);
2176 14469 : then_bb->count = new_bb->count.apply_probability (flag_probability);
2177 14469 : if (irr)
2178 49 : then_bb->flags = BB_IRREDUCIBLE_LOOP;
2179 14469 : add_bb_to_loop (then_bb, new_bb->loop_father);
2180 :
2181 14469 : gsi = gsi_start_bb (new_bb);
2182 14469 : stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2183 : NULL_TREE, NULL_TREE);
2184 14469 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2185 :
2186 : /* Insert actual store. */
2187 14469 : if (mem)
2188 : {
2189 12061 : gsi = gsi_start_bb (then_bb);
2190 12061 : stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2191 12061 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2192 : }
2193 :
2194 14469 : edge e1 = single_succ_edge (new_bb);
2195 28889 : edge e2 = make_edge (new_bb, then_bb,
2196 : EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2197 14469 : e2->probability = flag_probability;
2198 :
2199 14469 : e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2200 14469 : e1->flags &= ~EDGE_FALLTHRU;
2201 :
2202 14469 : e1->probability = flag_probability.invert ();
2203 :
2204 28889 : then_old_edge = make_single_succ_edge (then_bb, old_dest,
2205 : EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2206 :
2207 14469 : set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2208 :
2209 14469 : if (append_cond_position)
2210 : {
2211 5492 : basic_block prevbb = last_cond_fallthru->src;
2212 5492 : redirect_edge_succ (last_cond_fallthru, new_bb);
2213 5492 : set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2214 5492 : set_immediate_dominator (CDI_DOMINATORS, old_dest,
2215 : recompute_dominator (CDI_DOMINATORS, old_dest));
2216 : }
2217 :
2218 : /* ?? Because stores may alias, they must happen in the exact
2219 : sequence they originally happened. Save the position right after
2220 : the (_lsm) store we just created so we can continue appending after
2221 : it and maintain the original order. */
2222 14469 : append_cond_position = then_old_edge;
2223 14469 : last_cond_fallthru = find_edge (new_bb, old_dest);
2224 :
2225 14469 : if (!loop_has_only_one_exit)
2226 8509 : for (gphi_iterator gpi = gsi_start_phis (old_dest);
2227 12461 : !gsi_end_p (gpi); gsi_next (&gpi))
2228 : {
2229 3952 : gphi *phi = gpi.phi ();
2230 3952 : unsigned i;
2231 :
2232 22591 : for (i = 0; i < gimple_phi_num_args (phi); i++)
2233 18639 : if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2234 : {
2235 3952 : tree arg = gimple_phi_arg_def (phi, i);
2236 3952 : add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2237 3952 : update_stmt (phi);
2238 : }
2239 : }
2240 :
2241 14469 : return then_bb;
2242 : }
2243 :
2244 : /* When REF is set on the location, set flag indicating the store. */
2245 :
2246 : class sm_set_flag_if_changed
2247 : {
2248 : public:
2249 7695 : sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2250 7695 : : flag (flag_), bbs (bbs_) {}
2251 : bool operator () (mem_ref_loc *loc);
2252 : tree flag;
2253 : hash_set <basic_block> *bbs;
2254 : };
2255 :
2256 : bool
2257 14655 : sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2258 : {
2259 : /* Only set the flag for writes. */
2260 14655 : if (is_gimple_assign (loc->stmt)
2261 14655 : && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2262 : {
2263 8812 : gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2264 8812 : gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2265 8812 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2266 8812 : bbs->add (gimple_bb (stmt));
2267 : }
2268 14655 : return false;
2269 : }
2270 :
2271 : /* Helper function for execute_sm. On every location where REF is
2272 : set, set an appropriate flag indicating the store. */
2273 :
2274 : static tree
2275 7695 : execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2276 : hash_set <basic_block> *bbs)
2277 : {
2278 7695 : tree flag;
2279 7695 : char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2280 7695 : flag = create_tmp_reg (boolean_type_node, str);
2281 7695 : for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2282 7695 : return flag;
2283 : }
2284 :
2285 34884 : struct sm_aux
2286 : {
2287 : tree tmp_var;
2288 : tree store_flag;
2289 : hash_set <basic_block> flag_bbs;
2290 : };
2291 :
2292 : /* Executes store motion of memory reference REF from LOOP.
2293 : Exits from the LOOP are stored in EXITS. The initialization of the
2294 : temporary variable is put to the preheader of the loop, and assignments
2295 : to the reference from the temporary variable are emitted to exits. */
2296 :
2297 : static sm_aux *
2298 34884 : execute_sm (class loop *loop, im_mem_ref *ref,
2299 : hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
2300 : bool use_other_flag_var)
2301 : {
2302 34884 : gassign *load;
2303 34884 : struct fmt_data fmt_data;
2304 34884 : struct lim_aux_data *lim_data;
2305 34884 : bool multi_threaded_model_p = false;
2306 34884 : gimple_stmt_iterator gsi;
2307 34884 : sm_aux *aux = new sm_aux;
2308 :
2309 34884 : if (dump_file && (dump_flags & TDF_DETAILS))
2310 : {
2311 82 : fprintf (dump_file, "Executing store motion of ");
2312 82 : print_generic_expr (dump_file, ref->mem.ref);
2313 82 : fprintf (dump_file, " from loop %d\n", loop->num);
2314 : }
2315 :
2316 34884 : aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2317 34884 : get_lsm_tmp_name (ref->mem.ref, ~0));
2318 :
2319 34884 : fmt_data.loop = loop;
2320 34884 : fmt_data.orig_loop = loop;
2321 34884 : for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2322 :
2323 69768 : bool always_stored = ref_always_accessed_p (loop, ref, true);
2324 34884 : if (maybe_mt
2325 11726 : && (bb_in_transaction (loop_preheader_edge (loop)->src)
2326 11725 : || (ref_can_have_store_data_races (ref->mem.ref) && ! always_stored)
2327 : /* Do not speculate a load/store when that's not a noop, either
2328 : because the mode cannot be transferred or because there's
2329 : UB involved for out-of-bound values. */
2330 3762 : || !mode_can_transfer_bits (TYPE_MODE (TREE_TYPE (ref->mem.ref)))
2331 3762 : || TREE_CODE (TREE_TYPE (ref->mem.ref)) == BOOLEAN_TYPE
2332 3748 : || (TREE_CODE (ref->mem.ref) == COMPONENT_REF
2333 880 : && DECL_BIT_FIELD (TREE_OPERAND (ref->mem.ref, 1)))))
2334 : multi_threaded_model_p = true;
2335 :
2336 34884 : if (multi_threaded_model_p && !use_other_flag_var)
2337 7695 : aux->store_flag
2338 7695 : = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2339 : else
2340 27189 : aux->store_flag = NULL_TREE;
2341 :
2342 : /* Remember variable setup. */
2343 34884 : aux_map.put (ref, aux);
2344 :
2345 34884 : rewrite_mem_refs (loop, ref, aux->tmp_var);
2346 :
2347 : /* Emit the load code on a random exit edge or into the latch if
2348 : the loop does not exit, so that we are sure it will be processed
2349 : by move_computations after all dependencies. */
2350 34884 : gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2351 :
2352 : /* Avoid doing a load if there was no load of the ref in the loop.
2353 : Esp. when the ref is not always stored we cannot optimize it
2354 : away later. But when it is not always stored we must use a conditional
2355 : store then. */
2356 34884 : if ((!always_stored && !multi_threaded_model_p)
2357 34884 : || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2358 18198 : load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2359 : else
2360 : {
2361 : /* If not emitting a load mark the uninitialized state on the
2362 : loop entry as not to be warned for. */
2363 16686 : tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2364 16686 : suppress_warning (uninit, OPT_Wuninitialized);
2365 16686 : uninit = get_or_create_ssa_default_def (cfun, uninit);
2366 16686 : load = gimple_build_assign (aux->tmp_var, uninit);
2367 : }
2368 34884 : lim_data = init_lim_data (load);
2369 34884 : lim_data->max_loop = loop;
2370 34884 : lim_data->tgt_loop = loop;
2371 34884 : gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2372 :
2373 34884 : if (aux->store_flag)
2374 : {
2375 7695 : load = gimple_build_assign (aux->store_flag, boolean_false_node);
2376 7695 : lim_data = init_lim_data (load);
2377 7695 : lim_data->max_loop = loop;
2378 7695 : lim_data->tgt_loop = loop;
2379 7695 : gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2380 : }
2381 :
2382 34884 : return aux;
2383 : }
2384 :
2385 : /* sm_ord is used for ordinary stores we can retain order with respect
2386 : to other stores
2387 : sm_unord is used for conditional executed stores which need to be
2388 : able to execute in arbitrary order with respect to other stores
2389 : sm_other is used for stores we do not try to apply store motion to. */
2390 : enum sm_kind { sm_ord, sm_unord, sm_other };
2391 : struct seq_entry
2392 : {
2393 : seq_entry () = default;
2394 51978 : seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2395 51978 : : first (f), second (k), from (fr) {}
2396 : unsigned first;
2397 : sm_kind second;
2398 : tree from;
2399 : };
2400 :
2401 : static void
2402 32997 : execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2403 : hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2404 : edge &append_cond_position, edge &last_cond_fallthru,
2405 : bitmap clobbers_to_prune)
2406 : {
2407 : /* Sink the stores to exit from the loop. */
2408 116123 : for (unsigned i = seq.length (); i > 0; --i)
2409 : {
2410 50129 : im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2411 50129 : if (seq[i-1].second == sm_other)
2412 : {
2413 4920 : gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2414 4920 : gassign *store;
2415 4920 : if (ref->mem.ref == error_mark_node)
2416 : {
2417 177 : tree lhs = gimple_assign_lhs (ref->accesses_in_loop[0].stmt);
2418 177 : if (dump_file && (dump_flags & TDF_DETAILS))
2419 : {
2420 3 : fprintf (dump_file, "Re-issueing dependent ");
2421 3 : print_generic_expr (dump_file, unshare_expr (seq[i-1].from));
2422 3 : fprintf (dump_file, " of ");
2423 3 : print_generic_expr (dump_file, lhs);
2424 3 : fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2425 3 : loop->num, ex->src->index, ex->dest->index);
2426 : }
2427 177 : store = gimple_build_assign (unshare_expr (lhs),
2428 177 : unshare_expr (seq[i-1].from));
2429 177 : bitmap_set_bit (clobbers_to_prune, seq[i-1].first);
2430 : }
2431 : else
2432 : {
2433 4743 : if (dump_file && (dump_flags & TDF_DETAILS))
2434 : {
2435 0 : fprintf (dump_file, "Re-issueing dependent store of ");
2436 0 : print_generic_expr (dump_file, ref->mem.ref);
2437 0 : fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2438 0 : loop->num, ex->src->index, ex->dest->index);
2439 : }
2440 4743 : store = gimple_build_assign (unshare_expr (ref->mem.ref),
2441 4743 : seq[i-1].from);
2442 : }
2443 4920 : gsi_insert_on_edge (ex, store);
2444 : }
2445 : else
2446 : {
2447 45209 : sm_aux *aux = *aux_map.get (ref);
2448 45209 : if (!aux->store_flag || kind == sm_ord)
2449 : {
2450 33148 : gassign *store;
2451 33148 : store = gimple_build_assign (unshare_expr (ref->mem.ref),
2452 : aux->tmp_var);
2453 33148 : gsi_insert_on_edge (ex, store);
2454 33148 : }
2455 : else
2456 12061 : execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2457 : aux->store_flag,
2458 : loop_preheader_edge (loop), &aux->flag_bbs,
2459 : append_cond_position, last_cond_fallthru);
2460 : }
2461 : }
2462 32997 : }
2463 :
2464 : /* Push the SM candidate at index PTR in the sequence SEQ down until
2465 : we hit the next SM candidate. Return true if that went OK and
2466 : false if we could not disambiguate agains another unrelated ref.
2467 : Update *AT to the index where the candidate now resides. */
2468 :
2469 : static bool
2470 37547 : sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2471 : {
2472 37547 : *at = ptr;
2473 38323 : for (; ptr > 0; --ptr)
2474 : {
2475 17686 : seq_entry &new_cand = seq[ptr];
2476 17686 : seq_entry &against = seq[ptr-1];
2477 17686 : if (against.second == sm_ord
2478 5797 : || (against.second == sm_other && against.from != NULL_TREE))
2479 : /* Found the tail of the sequence. */
2480 : break;
2481 : /* We may not ignore self-dependences here. */
2482 1077 : if (new_cand.first == against.first
2483 : /* ??? We could actually handle clobbers here, but not easily
2484 : with LIMs dependence analysis. */
2485 855 : || (memory_accesses.refs_list[new_cand.first]->mem.ref
2486 855 : == error_mark_node)
2487 850 : || (memory_accesses.refs_list[against.first]->mem.ref
2488 : == error_mark_node)
2489 1903 : || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2490 826 : memory_accesses.refs_list[against.first],
2491 : false))
2492 : /* ??? Prune new_cand from the list of refs to apply SM to. */
2493 301 : return false;
2494 776 : std::swap (new_cand, against);
2495 776 : *at = ptr - 1;
2496 : }
2497 : return true;
2498 : }
2499 :
2500 : /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2501 : walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2502 :
2503 : static int
2504 35442 : sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2505 : vec<seq_entry> &seq, bitmap refs_not_in_seq,
2506 : bitmap refs_not_supported, bool forked,
2507 : bitmap fully_visited)
2508 : {
2509 35442 : if (!vdef)
2510 26359 : for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2511 113325 : gsi_prev (&gsi))
2512 : {
2513 139415 : vdef = gimple_vdef (gsi_stmt (gsi));
2514 52449 : if (vdef)
2515 : break;
2516 : }
2517 26359 : if (!vdef)
2518 : {
2519 8585 : gphi *vphi = get_virtual_phi (bb);
2520 8585 : if (vphi)
2521 4271 : vdef = gimple_phi_result (vphi);
2522 : }
2523 35442 : if (!vdef)
2524 : {
2525 4314 : if (single_pred_p (bb))
2526 : /* This handles the perfect nest case. */
2527 3522 : return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2528 : seq, refs_not_in_seq, refs_not_supported,
2529 3522 : forked, fully_visited);
2530 : return 0;
2531 : }
2532 55108 : do
2533 : {
2534 110216 : gimple *def = SSA_NAME_DEF_STMT (vdef);
2535 55108 : if (gimple_bb (def) != bb)
2536 : {
2537 : /* If we forked by processing a PHI do not allow our walk to
2538 : merge again until we handle that robustly. */
2539 3407 : if (forked)
2540 : {
2541 : /* Mark refs_not_in_seq as unsupported. */
2542 1694 : bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2543 1694 : return 1;
2544 : }
2545 : /* Otherwise it doesn't really matter if we end up in different
2546 : BBs. */
2547 : bb = gimple_bb (def);
2548 : }
2549 53414 : if (gphi *phi = dyn_cast <gphi *> (def))
2550 : {
2551 : /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2552 : this is still linear.
2553 : Eventually we want to cache intermediate results per BB
2554 : (but we can't easily cache for different exits?). */
2555 : /* Stop at PHIs with possible backedges. */
2556 10457 : if (bb == bb->loop_father->header
2557 5351 : || bb->flags & BB_IRREDUCIBLE_LOOP)
2558 : {
2559 : /* Mark refs_not_in_seq as unsupported. */
2560 5111 : bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2561 5111 : return 1;
2562 : }
2563 5346 : if (gimple_phi_num_args (phi) == 1)
2564 1522 : return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2565 : gimple_phi_arg_def (phi, 0), seq,
2566 : refs_not_in_seq, refs_not_supported,
2567 1522 : false, fully_visited);
2568 7648 : if (bitmap_bit_p (fully_visited,
2569 3824 : SSA_NAME_VERSION (gimple_phi_result (phi))))
2570 : return 1;
2571 3720 : auto_vec<seq_entry> first_edge_seq;
2572 3720 : auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2573 3720 : int eret;
2574 3720 : bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2575 3720 : eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2576 : gimple_phi_arg_def (phi, 0),
2577 : first_edge_seq,
2578 : tem_refs_not_in_seq, refs_not_supported,
2579 : true, fully_visited);
2580 3720 : if (eret != 1)
2581 : return -1;
2582 : /* Simplify our lives by pruning the sequence of !sm_ord. */
2583 3510 : while (!first_edge_seq.is_empty ()
2584 8405 : && first_edge_seq.last ().second != sm_ord)
2585 1175 : first_edge_seq.pop ();
2586 7561 : for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2587 : {
2588 5819 : tree vuse = gimple_phi_arg_def (phi, i);
2589 5819 : edge e = gimple_phi_arg_edge (phi, i);
2590 5819 : auto_vec<seq_entry> edge_seq;
2591 5819 : bitmap_and_compl (tem_refs_not_in_seq,
2592 : refs_not_in_seq, refs_not_supported);
2593 : /* If we've marked all refs we search for as unsupported
2594 : we can stop processing and use the sequence as before
2595 : the PHI. */
2596 5819 : if (bitmap_empty_p (tem_refs_not_in_seq))
2597 : return 1;
2598 3841 : eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2599 : tem_refs_not_in_seq, refs_not_supported,
2600 : true, fully_visited);
2601 3841 : if (eret != 1)
2602 : return -1;
2603 : /* Simplify our lives by pruning the sequence of !sm_ord. */
2604 3355 : while (!edge_seq.is_empty ()
2605 8411 : && edge_seq.last ().second != sm_ord)
2606 1215 : edge_seq.pop ();
2607 7682 : unsigned min_len = MIN(first_edge_seq.length (),
2608 : edge_seq.length ());
2609 : /* Incrementally merge seqs into first_edge_seq. */
2610 3841 : int first_uneq = -1;
2611 3841 : auto_vec<seq_entry, 2> extra_refs;
2612 6032 : for (unsigned int i = 0; i < min_len; ++i)
2613 : {
2614 : /* ??? We can more intelligently merge when we face different
2615 : order by additional sinking operations in one sequence.
2616 : For now we simply mark them as to be processed by the
2617 : not order-preserving SM code. */
2618 2191 : if (first_edge_seq[i].first != edge_seq[i].first)
2619 : {
2620 35 : if (first_edge_seq[i].second == sm_ord)
2621 22 : bitmap_set_bit (refs_not_supported,
2622 22 : first_edge_seq[i].first);
2623 35 : if (edge_seq[i].second == sm_ord)
2624 24 : bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2625 35 : first_edge_seq[i].second = sm_other;
2626 35 : first_edge_seq[i].from = NULL_TREE;
2627 : /* Record the dropped refs for later processing. */
2628 35 : if (first_uneq == -1)
2629 33 : first_uneq = i;
2630 70 : extra_refs.safe_push (seq_entry (edge_seq[i].first,
2631 35 : sm_other, NULL_TREE));
2632 : }
2633 : /* sm_other prevails. */
2634 2156 : else if (first_edge_seq[i].second != edge_seq[i].second)
2635 : {
2636 : /* Make sure the ref is marked as not supported. */
2637 0 : bitmap_set_bit (refs_not_supported,
2638 0 : first_edge_seq[i].first);
2639 0 : first_edge_seq[i].second = sm_other;
2640 0 : first_edge_seq[i].from = NULL_TREE;
2641 : }
2642 2156 : else if (first_edge_seq[i].second == sm_other
2643 7 : && first_edge_seq[i].from != NULL_TREE
2644 2163 : && (edge_seq[i].from == NULL_TREE
2645 7 : || !operand_equal_p (first_edge_seq[i].from,
2646 7 : edge_seq[i].from, 0)))
2647 7 : first_edge_seq[i].from = NULL_TREE;
2648 : }
2649 : /* Any excess elements become sm_other since they are now
2650 : coonditionally executed. */
2651 10563 : if (first_edge_seq.length () > edge_seq.length ())
2652 : {
2653 6708 : for (unsigned i = edge_seq.length ();
2654 8720 : i < first_edge_seq.length (); ++i)
2655 : {
2656 6708 : if (first_edge_seq[i].second == sm_ord)
2657 3566 : bitmap_set_bit (refs_not_supported,
2658 3566 : first_edge_seq[i].first);
2659 6708 : first_edge_seq[i].second = sm_other;
2660 : }
2661 : }
2662 1829 : else if (edge_seq.length () > first_edge_seq.length ())
2663 : {
2664 8 : if (first_uneq == -1)
2665 0 : first_uneq = first_edge_seq.length ();
2666 8 : for (unsigned i = first_edge_seq.length ();
2667 26 : i < edge_seq.length (); ++i)
2668 : {
2669 18 : if (edge_seq[i].second == sm_ord)
2670 8 : bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2671 36 : extra_refs.safe_push (seq_entry (edge_seq[i].first,
2672 18 : sm_other, NULL_TREE));
2673 : }
2674 : }
2675 : /* Put unmerged refs at first_uneq to force dependence checking
2676 : on them. */
2677 3841 : if (first_uneq != -1)
2678 : {
2679 : /* Missing ordered_splice_at. */
2680 66 : if ((unsigned)first_uneq == first_edge_seq.length ())
2681 0 : first_edge_seq.safe_splice (extra_refs);
2682 : else
2683 : {
2684 33 : unsigned fes_length = first_edge_seq.length ();
2685 33 : first_edge_seq.safe_grow (fes_length
2686 33 : + extra_refs.length ());
2687 33 : memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2688 33 : &first_edge_seq[first_uneq],
2689 33 : (fes_length - first_uneq) * sizeof (seq_entry));
2690 66 : memcpy (&first_edge_seq[first_uneq],
2691 33 : extra_refs.address (),
2692 33 : extra_refs.length () * sizeof (seq_entry));
2693 : }
2694 : }
2695 5819 : }
2696 : /* Use the sequence from the first edge and push SMs down. */
2697 4741 : for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2698 : {
2699 2999 : unsigned id = first_edge_seq[i].first;
2700 2999 : seq.safe_push (first_edge_seq[i]);
2701 2999 : unsigned new_idx;
2702 2999 : if ((first_edge_seq[i].second == sm_ord
2703 2356 : || (first_edge_seq[i].second == sm_other
2704 2356 : && first_edge_seq[i].from != NULL_TREE))
2705 4006 : && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2706 : {
2707 125 : if (first_edge_seq[i].second == sm_ord)
2708 0 : bitmap_set_bit (refs_not_supported, id);
2709 : /* Mark it sm_other. */
2710 125 : seq[new_idx].second = sm_other;
2711 125 : seq[new_idx].from = NULL_TREE;
2712 : }
2713 : }
2714 1742 : bitmap_set_bit (fully_visited,
2715 1742 : SSA_NAME_VERSION (gimple_phi_result (phi)));
2716 1742 : return 1;
2717 3720 : }
2718 42957 : lim_aux_data *data = get_lim_data (def);
2719 42957 : im_mem_ref *ref = memory_accesses.refs_list[data->ref];
2720 42957 : if (data->ref == UNANALYZABLE_MEM_ID)
2721 : return -1;
2722 : /* Stop at memory references which we can't move. */
2723 42957 : else if ((ref->mem.ref == error_mark_node
2724 : /* We can move end-of-storage/object down. */
2725 421 : && !gimple_clobber_p (ref->accesses_in_loop[0].stmt,
2726 : CLOBBER_STORAGE_END)
2727 291 : && !gimple_clobber_p (ref->accesses_in_loop[0].stmt,
2728 : CLOBBER_OBJECT_END))
2729 43271 : || TREE_THIS_VOLATILE (ref->mem.ref))
2730 : {
2731 : /* Mark refs_not_in_seq as unsupported. */
2732 139 : bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2733 139 : return 1;
2734 : }
2735 : /* One of the stores we want to apply SM to and we've not yet seen. */
2736 42818 : else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2737 : {
2738 33515 : seq.safe_push (seq_entry (data->ref, sm_ord));
2739 :
2740 : /* 1) push it down the queue until a SMed
2741 : and not ignored ref is reached, skipping all not SMed refs
2742 : and ignored refs via non-TBAA disambiguation. */
2743 33515 : unsigned new_idx;
2744 67030 : if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2745 : /* If that fails but we did not fork yet continue, we'll see
2746 : to re-materialize all of the stores in the sequence then.
2747 : Further stores will only be pushed up to this one. */
2748 33515 : && forked)
2749 : {
2750 0 : bitmap_set_bit (refs_not_supported, data->ref);
2751 : /* Mark it sm_other. */
2752 0 : seq[new_idx].second = sm_other;
2753 : }
2754 :
2755 : /* 2) check whether we've seen all refs we want to SM and if so
2756 : declare success for the active exit */
2757 33515 : if (bitmap_empty_p (refs_not_in_seq))
2758 18838 : return 1;
2759 : }
2760 : else
2761 : /* Another store not part of the final sequence. Simply push it. */
2762 9303 : seq.safe_push (seq_entry (data->ref, sm_other,
2763 9303 : gimple_assign_rhs1 (def)));
2764 :
2765 79088 : vdef = gimple_vuse (def);
2766 : }
2767 : while (1);
2768 : }
2769 :
2770 : /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2771 : edges of the LOOP. */
2772 :
2773 : static void
2774 21305 : hoist_memory_references (class loop *loop, bitmap mem_refs,
2775 : const vec<edge> &exits)
2776 : {
2777 21305 : im_mem_ref *ref;
2778 21305 : unsigned i;
2779 21305 : bitmap_iterator bi;
2780 :
2781 : /* There's a special case we can use ordered re-materialization for
2782 : conditionally excuted stores which is when all stores in the loop
2783 : happen in the same basic-block. In that case we know we'll reach
2784 : all stores and thus can simply process that BB and emit a single
2785 : conditional block of ordered materializations. See PR102436. */
2786 21305 : basic_block single_store_bb = NULL;
2787 31407 : EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
2788 : 0, i, bi)
2789 : {
2790 29207 : bool fail = false;
2791 29207 : ref = memory_accesses.refs_list[i];
2792 133843 : for (auto loc : ref->accesses_in_loop)
2793 143550 : if (!gimple_vdef (loc.stmt))
2794 : ;
2795 32001 : else if (!single_store_bb)
2796 : {
2797 21305 : single_store_bb = gimple_bb (loc.stmt);
2798 21305 : bool conditional = false;
2799 76572 : for (edge e : exits)
2800 22890 : if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
2801 : {
2802 : /* Conditional as seen from e. */
2803 : conditional = true;
2804 : break;
2805 : }
2806 21305 : if (!conditional)
2807 : {
2808 : fail = true;
2809 : break;
2810 : }
2811 : }
2812 10696 : else if (single_store_bb != gimple_bb (loc.stmt))
2813 : {
2814 : fail = true;
2815 : break;
2816 : }
2817 29207 : if (fail)
2818 : {
2819 : single_store_bb = NULL;
2820 : break;
2821 : }
2822 : }
2823 21305 : if (single_store_bb)
2824 : {
2825 : /* Analyze the single block with stores. */
2826 2200 : auto_bitmap fully_visited;
2827 2200 : auto_bitmap refs_not_supported;
2828 2200 : auto_bitmap refs_not_in_seq;
2829 2200 : auto_vec<seq_entry> seq;
2830 2200 : bitmap_copy (refs_not_in_seq, mem_refs);
2831 2200 : int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
2832 : seq, refs_not_in_seq, refs_not_supported,
2833 : false, fully_visited);
2834 2200 : if (res != 1)
2835 : {
2836 : /* Unhandled refs can still fail this. */
2837 0 : bitmap_clear (mem_refs);
2838 0 : return;
2839 : }
2840 :
2841 : /* We cannot handle sm_other since we neither remember the
2842 : stored location nor the value at the point we execute them. */
2843 5320 : for (unsigned i = 0; i < seq.length (); ++i)
2844 : {
2845 3120 : unsigned new_i;
2846 3120 : if (seq[i].second == sm_other
2847 3120 : && seq[i].from != NULL_TREE)
2848 438 : seq[i].from = NULL_TREE;
2849 2682 : else if ((seq[i].second == sm_ord
2850 0 : || (seq[i].second == sm_other
2851 0 : && seq[i].from != NULL_TREE))
2852 2682 : && !sm_seq_push_down (seq, i, &new_i))
2853 : {
2854 63 : bitmap_set_bit (refs_not_supported, seq[new_i].first);
2855 63 : seq[new_i].second = sm_other;
2856 63 : seq[new_i].from = NULL_TREE;
2857 : }
2858 : }
2859 2200 : bitmap_and_compl_into (mem_refs, refs_not_supported);
2860 2200 : if (bitmap_empty_p (mem_refs))
2861 : return;
2862 :
2863 : /* Prune seq. */
2864 2545 : while (seq.last ().second == sm_other
2865 2545 : && seq.last ().from == NULL_TREE)
2866 396 : seq.pop ();
2867 :
2868 2149 : hash_map<im_mem_ref *, sm_aux *> aux_map;
2869 :
2870 : /* Execute SM but delay the store materialization for ordered
2871 : sequences on exit. Remember a created flag var and make
2872 : sure to re-use it. */
2873 2149 : sm_aux *flag_var_aux = nullptr;
2874 4768 : EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2875 : {
2876 2619 : ref = memory_accesses.refs_list[i];
2877 2619 : sm_aux *aux = execute_sm (loop, ref, aux_map, true,
2878 : flag_var_aux != nullptr);
2879 2619 : if (aux->store_flag)
2880 1504 : flag_var_aux = aux;
2881 : }
2882 :
2883 : /* Materialize ordered store sequences on exits. */
2884 2149 : edge e;
2885 2149 : auto_bitmap clobbers_to_prune;
2886 5882 : FOR_EACH_VEC_ELT (exits, i, e)
2887 : {
2888 3733 : edge append_cond_position = NULL;
2889 3733 : edge last_cond_fallthru = NULL;
2890 3733 : edge insert_e = e;
2891 : /* Construct the single flag variable control flow and insert
2892 : the ordered seq of stores in the then block. With
2893 : -fstore-data-races we can do the stores unconditionally. */
2894 3733 : if (flag_var_aux)
2895 2408 : insert_e
2896 : = single_pred_edge
2897 2408 : (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
2898 : flag_var_aux->store_flag,
2899 : loop_preheader_edge (loop),
2900 : &flag_var_aux->flag_bbs,
2901 : append_cond_position,
2902 : last_cond_fallthru));
2903 3733 : execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
2904 : append_cond_position, last_cond_fallthru,
2905 : clobbers_to_prune);
2906 3733 : gsi_commit_one_edge_insert (insert_e, NULL);
2907 : }
2908 :
2909 : /* Remove clobbers inside the loop we re-materialized on exits. */
2910 2149 : EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
2911 : {
2912 0 : gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
2913 0 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2914 0 : unlink_stmt_vdef (stmt);
2915 0 : release_defs (stmt);
2916 0 : gimple_set_vdef (stmt, NULL_TREE);
2917 0 : gsi_remove (&gsi, true);
2918 : }
2919 :
2920 4768 : for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2921 7387 : iter != aux_map.end (); ++iter)
2922 5238 : delete (*iter).second;
2923 :
2924 2149 : return;
2925 4349 : }
2926 :
2927 : /* To address PR57359 before actually applying store-motion check
2928 : the candidates found for validity with regards to reordering
2929 : relative to other stores which we until here disambiguated using
2930 : TBAA which isn't valid.
2931 : What matters is the order of the last stores to the mem_refs
2932 : with respect to the other stores of the loop at the point of the
2933 : loop exits. */
2934 :
2935 : /* For each exit compute the store order, pruning from mem_refs
2936 : on the fly. */
2937 : /* The complexity of this is at least
2938 : O(number of exits * number of SM refs) but more approaching
2939 : O(number of exits * number of SM refs * number of stores). */
2940 : /* ??? Somehow do this in a single sweep over the loop body. */
2941 19105 : auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2942 19105 : auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2943 19105 : edge e;
2944 38950 : FOR_EACH_VEC_ELT (exits, i, e)
2945 : {
2946 22089 : vec<seq_entry> seq;
2947 22089 : seq.create (4);
2948 22089 : auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2949 22089 : bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2950 22089 : if (bitmap_empty_p (refs_not_in_seq))
2951 : {
2952 1452 : seq.release ();
2953 1452 : break;
2954 : }
2955 20637 : auto_bitmap fully_visited;
2956 20637 : int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2957 : seq, refs_not_in_seq,
2958 : refs_not_supported, false,
2959 : fully_visited);
2960 20637 : if (res != 1)
2961 : {
2962 792 : bitmap_copy (refs_not_supported, mem_refs);
2963 792 : seq.release ();
2964 792 : break;
2965 : }
2966 19845 : sms.safe_push (std::make_pair (e, seq));
2967 22881 : }
2968 :
2969 : /* Prune pruned mem_refs from earlier processed exits. */
2970 19105 : bool changed = !bitmap_empty_p (refs_not_supported);
2971 19105 : while (changed)
2972 : {
2973 6059 : changed = false;
2974 6059 : std::pair<edge, vec<seq_entry> > *seq;
2975 39892 : FOR_EACH_VEC_ELT (sms, i, seq)
2976 : {
2977 : bool need_to_push = false;
2978 13643 : for (unsigned i = 0; i < seq->second.length (); ++i)
2979 : {
2980 6279 : sm_kind kind = seq->second[i].second;
2981 6279 : if (kind == sm_other && seq->second[i].from == NULL_TREE)
2982 : break;
2983 5203 : unsigned id = seq->second[i].first;
2984 5203 : unsigned new_idx;
2985 5203 : if (kind == sm_ord
2986 5203 : && bitmap_bit_p (refs_not_supported, id))
2987 : {
2988 2540 : seq->second[i].second = sm_other;
2989 2540 : gcc_assert (seq->second[i].from == NULL_TREE);
2990 : need_to_push = true;
2991 : }
2992 2663 : else if (need_to_push
2993 2663 : && !sm_seq_push_down (seq->second, i, &new_idx))
2994 : {
2995 : /* We need to push down both sm_ord and sm_other
2996 : but for the latter we need to disqualify all
2997 : following refs. */
2998 113 : if (kind == sm_ord)
2999 : {
3000 7 : if (bitmap_set_bit (refs_not_supported, id))
3001 7 : changed = true;
3002 7 : seq->second[new_idx].second = sm_other;
3003 : }
3004 : else
3005 : {
3006 411 : for (unsigned j = seq->second.length () - 1;
3007 305 : j > new_idx; --j)
3008 199 : if (seq->second[j].second == sm_ord
3009 321 : && bitmap_set_bit (refs_not_supported,
3010 122 : seq->second[j].first))
3011 : changed = true;
3012 106 : seq->second.truncate (new_idx);
3013 106 : break;
3014 : }
3015 : }
3016 : }
3017 : }
3018 : }
3019 19105 : std::pair<edge, vec<seq_entry> > *seq;
3020 58795 : FOR_EACH_VEC_ELT (sms, i, seq)
3021 : {
3022 : /* Prune sm_other from the end. */
3023 63457 : while (!seq->second.is_empty ()
3024 43612 : && seq->second.last ().second == sm_other)
3025 5138 : seq->second.pop ();
3026 : /* Prune duplicates from the start. */
3027 19845 : auto_bitmap seen (&lim_bitmap_obstack);
3028 19845 : unsigned j, k;
3029 47269 : for (j = k = 0; j < seq->second.length (); ++j)
3030 27424 : if (bitmap_set_bit (seen, seq->second[j].first))
3031 : {
3032 27315 : if (k != j)
3033 77 : seq->second[k] = seq->second[j];
3034 27315 : ++k;
3035 : }
3036 19845 : seq->second.truncate (k);
3037 : /* And verify. */
3038 19845 : seq_entry *e;
3039 86850 : FOR_EACH_VEC_ELT (seq->second, j, e)
3040 27315 : gcc_assert (e->second == sm_ord
3041 : || (e->second == sm_other && e->from != NULL_TREE));
3042 19845 : }
3043 :
3044 : /* Verify dependence for refs we cannot handle with the order preserving
3045 : code (refs_not_supported) or prune them from mem_refs. */
3046 19105 : auto_vec<seq_entry> unord_refs;
3047 34747 : EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
3048 : {
3049 15642 : ref = memory_accesses.refs_list[i];
3050 15642 : if (!ref_indep_loop_p (loop, ref, sm_waw))
3051 6535 : bitmap_clear_bit (mem_refs, i);
3052 : /* We've now verified store order for ref with respect to all other
3053 : stores in the loop does not matter. */
3054 : else
3055 9107 : unord_refs.safe_push (seq_entry (i, sm_unord));
3056 : }
3057 :
3058 19105 : hash_map<im_mem_ref *, sm_aux *> aux_map;
3059 :
3060 : /* Execute SM but delay the store materialization for ordered
3061 : sequences on exit. */
3062 51370 : EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
3063 : {
3064 32265 : ref = memory_accesses.refs_list[i];
3065 32265 : execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
3066 : false);
3067 : }
3068 :
3069 : /* Materialize ordered store sequences on exits. */
3070 19105 : auto_bitmap clobbers_to_prune;
3071 42741 : FOR_EACH_VEC_ELT (exits, i, e)
3072 : {
3073 23636 : edge append_cond_position = NULL;
3074 23636 : edge last_cond_fallthru = NULL;
3075 23636 : if (i < sms.length ())
3076 : {
3077 19845 : gcc_assert (sms[i].first == e);
3078 19845 : execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
3079 : append_cond_position, last_cond_fallthru,
3080 : clobbers_to_prune);
3081 19845 : sms[i].second.release ();
3082 : }
3083 23636 : if (!unord_refs.is_empty ())
3084 9419 : execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
3085 : append_cond_position, last_cond_fallthru,
3086 : clobbers_to_prune);
3087 : /* Commit edge inserts here to preserve the order of stores
3088 : when an exit exits multiple loops. */
3089 23636 : gsi_commit_one_edge_insert (e, NULL);
3090 : }
3091 :
3092 : /* Remove clobbers inside the loop we re-materialized on exits. */
3093 19282 : EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
3094 : {
3095 177 : gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
3096 177 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3097 177 : unlink_stmt_vdef (stmt);
3098 177 : release_defs (stmt);
3099 177 : gimple_set_vdef (stmt, NULL_TREE);
3100 177 : gsi_remove (&gsi, true);
3101 : }
3102 :
3103 51370 : for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
3104 102740 : iter != aux_map.end (); ++iter)
3105 64530 : delete (*iter).second;
3106 19105 : }
3107 :
3108 : class ref_always_accessed
3109 : {
3110 : public:
3111 79839 : ref_always_accessed (class loop *loop_, bool stored_p_)
3112 79839 : : loop (loop_), stored_p (stored_p_) {}
3113 : bool operator () (mem_ref_loc *loc);
3114 : class loop *loop;
3115 : bool stored_p;
3116 : };
3117 :
3118 : bool
3119 150842 : ref_always_accessed::operator () (mem_ref_loc *loc)
3120 : {
3121 150842 : class loop *must_exec;
3122 :
3123 150842 : struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
3124 150842 : if (!lim_data)
3125 : return false;
3126 :
3127 : /* If we require an always executed store make sure the statement
3128 : is a store. */
3129 150842 : if (stored_p)
3130 : {
3131 150842 : tree lhs = gimple_get_lhs (loc->stmt);
3132 150842 : if (!lhs
3133 150842 : || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
3134 : return false;
3135 : }
3136 :
3137 95588 : must_exec = lim_data->always_executed_in;
3138 95588 : if (!must_exec)
3139 : return false;
3140 :
3141 36375 : if (must_exec == loop
3142 36375 : || flow_loop_nested_p (must_exec, loop))
3143 33077 : return true;
3144 :
3145 : return false;
3146 : }
3147 :
3148 : /* Returns true if REF is always accessed in LOOP. If STORED_P is true
3149 : make sure REF is always stored to in LOOP. */
3150 :
3151 : static bool
3152 79839 : ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
3153 : {
3154 34884 : return for_all_locs_in_loop (loop, ref,
3155 79839 : ref_always_accessed (loop, stored_p));
3156 : }
3157 :
3158 : /* Returns true if LOAD_REF and STORE_REF form a "self write" pattern
3159 : where the stored value comes from the loaded value via SSA.
3160 : Example: a[i] = a[0] is safe to hoist a[0] even when i==0. */
3161 :
3162 : static bool
3163 318063 : is_self_write (im_mem_ref *load_ref, im_mem_ref *store_ref)
3164 : {
3165 : /* Only handle the simple case with a single access per ref.
3166 : Bail out on multiple accesses to be conservative. */
3167 318063 : if (load_ref->accesses_in_loop.length () != 1
3168 408484 : || store_ref->accesses_in_loop.length () != 1)
3169 : return false;
3170 :
3171 90438 : gimple *load_stmt = load_ref->accesses_in_loop[0].stmt;
3172 90438 : gimple *store_stmt = store_ref->accesses_in_loop[0].stmt;
3173 :
3174 90438 : if (!is_gimple_assign (load_stmt) || !is_gimple_assign (store_stmt))
3175 : return false;
3176 :
3177 90438 : tree loaded_val = gimple_assign_lhs (load_stmt);
3178 90438 : tree stored_val = gimple_assign_rhs1 (store_stmt);
3179 :
3180 90438 : if (TREE_CODE (loaded_val) != SSA_NAME || TREE_CODE (stored_val) != SSA_NAME)
3181 : return false;
3182 :
3183 : /* Self write: stored value is the loaded value. */
3184 64573 : if (stored_val != loaded_val)
3185 : return false;
3186 :
3187 :
3188 : /* TODO: Try to factor this out with mem_ref_hasher::equal
3189 : into im_compare_access_position_and_size
3190 : or a similar helper to centralize this delicate handling
3191 : complete for MEM_REF offsets and base pointer equality.
3192 :
3193 : TODO: Also ensure max_size_known_p agrees or resort to
3194 : alignment considerations to rule out partial overlaps.
3195 :
3196 : See:
3197 : https://gcc.gnu.org/pipermail/gcc-patches/2025-December/704155.html
3198 : For more context on TODOs above. */
3199 :
3200 : /* They may alias. Verify exact same location. */
3201 1651 : return (operand_equal_p (load_ref->mem.base, store_ref->mem.base, 0)
3202 46 : && known_eq (load_ref->mem.size, store_ref->mem.size)
3203 1697 : && known_eq (load_ref->mem.offset, store_ref->mem.offset));
3204 :
3205 : }
3206 :
3207 : /* Returns true if REF1 and REF2 are independent. */
3208 :
3209 : static bool
3210 603805 : refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
3211 : {
3212 603805 : if (ref1 == ref2)
3213 : return true;
3214 :
3215 541582 : if (dump_file && (dump_flags & TDF_DETAILS))
3216 3040 : fprintf (dump_file, "Querying dependency of refs %u and %u: ",
3217 3040 : ref1->id, ref2->id);
3218 :
3219 541582 : if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
3220 : {
3221 64476 : if (dump_file && (dump_flags & TDF_DETAILS))
3222 99 : fprintf (dump_file, "dependent.\n");
3223 64476 : return false;
3224 : }
3225 : else
3226 : {
3227 477106 : if (dump_file && (dump_flags & TDF_DETAILS))
3228 2941 : fprintf (dump_file, "independent.\n");
3229 477106 : return true;
3230 : }
3231 : }
3232 :
3233 : /* Returns true if REF is independent on all other accessess in LOOP.
3234 : KIND specifies the kind of dependence to consider.
3235 : lim_raw assumes REF is not stored in LOOP and disambiguates RAW
3236 : dependences so if true REF can be hoisted out of LOOP
3237 : sm_war disambiguates a store REF against all other loads to see
3238 : whether the store can be sunk across loads out of LOOP
3239 : sm_waw disambiguates a store REF against all other stores to see
3240 : whether the store can be sunk across stores out of LOOP. */
3241 :
3242 : static bool
3243 2060584 : ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
3244 : {
3245 2060584 : bool indep_p = true;
3246 2060584 : bitmap refs_to_check;
3247 :
3248 2060584 : if (kind == sm_war)
3249 974527 : refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
3250 : else
3251 1086057 : refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
3252 :
3253 2060584 : if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
3254 2060584 : || ref->mem.ref == error_mark_node)
3255 : indep_p = false;
3256 : else
3257 : {
3258 : /* tri-state, { unknown, independent, dependent } */
3259 661357 : dep_state state = query_loop_dependence (loop, ref, kind);
3260 661357 : if (state != dep_unknown)
3261 0 : return state == dep_independent ? true : false;
3262 :
3263 661357 : class loop *inner = loop->inner;
3264 811650 : while (inner)
3265 : {
3266 451651 : if (!ref_indep_loop_p (inner, ref, kind))
3267 : {
3268 : indep_p = false;
3269 : break;
3270 : }
3271 150293 : inner = inner->next;
3272 : }
3273 :
3274 661357 : if (indep_p)
3275 : {
3276 359999 : unsigned i;
3277 359999 : bitmap_iterator bi;
3278 920225 : EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
3279 : {
3280 628881 : im_mem_ref *aref = memory_accesses.refs_list[i];
3281 628881 : if (aref->mem.ref == error_mark_node)
3282 : {
3283 25885 : gimple *stmt = aref->accesses_in_loop[0].stmt;
3284 25885 : if ((kind == sm_war
3285 9293 : && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
3286 : kind != sm_waw))
3287 34510 : || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
3288 : kind != sm_waw))
3289 : {
3290 : indep_p = false;
3291 : break;
3292 : }
3293 : }
3294 : /* For hoisting loads (lim_raw), allow "self write": the store
3295 : writes back the loaded value. Example: a[i] = a[0]
3296 : is safe even when i==0 causes aliasing. */
3297 602996 : else if (kind == lim_raw
3298 318063 : && ref->loaded && aref->stored
3299 921059 : && is_self_write (ref, aref))
3300 : {
3301 17 : if (dump_file && (dump_flags & TDF_DETAILS))
3302 2 : fprintf (dump_file,
3303 : "Dependency of refs %u and %u: "
3304 : "independent (self write).\n",
3305 2 : ref->id, aref->id);
3306 : }
3307 :
3308 602979 : else if (!refs_independent_p (ref, aref, kind != sm_waw))
3309 : {
3310 : indep_p = false;
3311 : break;
3312 : }
3313 : }
3314 : }
3315 : }
3316 :
3317 2060584 : if (dump_file && (dump_flags & TDF_DETAILS))
3318 755 : fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
3319 : kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
3320 367 : ref->id, loop->num, indep_p ? "independent" : "dependent");
3321 :
3322 : /* Record the computed result in the cache. */
3323 2060584 : record_loop_dependence (loop, ref, kind,
3324 : indep_p ? dep_independent : dep_dependent);
3325 :
3326 2060584 : return indep_p;
3327 : }
3328 :
3329 : class ref_in_loop_hot_body
3330 : {
3331 : public:
3332 42468 : ref_in_loop_hot_body (class loop *loop_) : l (loop_) {}
3333 : bool operator () (mem_ref_loc *loc);
3334 : class loop *l;
3335 : };
3336 :
3337 : /* Check the coldest loop between loop L and innermost loop. If there is one
3338 : cold loop between L and INNER_LOOP, store motion can be performed, otherwise
3339 : no cold loop means no store motion. get_coldest_out_loop also handles cases
3340 : when l is inner_loop. */
3341 : bool
3342 43238 : ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
3343 : {
3344 43238 : basic_block curr_bb = gimple_bb (loc->stmt);
3345 43238 : class loop *inner_loop = curr_bb->loop_father;
3346 43238 : return get_coldest_out_loop (l, inner_loop, curr_bb);
3347 : }
3348 :
3349 :
3350 : /* Returns true if we can perform store motion of REF from LOOP. */
3351 :
3352 : static bool
3353 2881658 : can_sm_ref_p (class loop *loop, im_mem_ref *ref)
3354 : {
3355 2881658 : tree base;
3356 :
3357 : /* Can't hoist unanalyzable refs. */
3358 2881658 : if (!MEM_ANALYZABLE (ref))
3359 : return false;
3360 :
3361 : /* Can't hoist/sink aggregate copies. */
3362 2526173 : if (ref->mem.ref == error_mark_node)
3363 : return false;
3364 :
3365 : /* It should be movable. */
3366 2097894 : if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
3367 2097668 : || TREE_THIS_VOLATILE (ref->mem.ref)
3368 4178739 : || !for_each_index (&ref->mem.ref, may_move_till, loop))
3369 1146163 : return false;
3370 :
3371 : /* If it can throw fail, we do not properly update EH info. */
3372 951731 : if (tree_could_throw_p (ref->mem.ref))
3373 : return false;
3374 :
3375 : /* If it can trap, it must be always executed in LOOP.
3376 : Readonly memory locations may trap when storing to them, but
3377 : tree_could_trap_p is a predicate for rvalues, so check that
3378 : explicitly. */
3379 928430 : base = get_base_address (ref->mem.ref);
3380 928430 : if ((tree_could_trap_p (ref->mem.ref)
3381 883816 : || (DECL_P (base) && TREE_READONLY (base))
3382 883483 : || TREE_CODE (base) == STRING_CST)
3383 : /* ??? We can at least use false here, allowing loads? We
3384 : are forcing conditional stores if the ref is not always
3385 : stored to later anyway. So this would only guard
3386 : the load we need to emit. Thus when the ref is not
3387 : loaded we can elide this completely? */
3388 973385 : && !ref_always_accessed_p (loop, ref, true))
3389 : return false;
3390 :
3391 : /* Verify all loads of ref can be hoisted. */
3392 893465 : if (ref->loaded
3393 102301 : && bitmap_bit_p (ref->loaded, loop->num)
3394 990553 : && !ref_indep_loop_p (loop, ref, lim_raw))
3395 : return false;
3396 :
3397 : /* Verify the candidate can be disambiguated against all loads,
3398 : that is, we can elide all in-loop stores. Disambiguation
3399 : against stores is done later when we cannot guarantee preserving
3400 : the order of stores. */
3401 820698 : if (!ref_indep_loop_p (loop, ref, sm_war))
3402 : return false;
3403 :
3404 : /* Verify whether the candidate is hot for LOOP. Only do store motion if the
3405 : candidate's profile count is hot. Statement in cold BB shouldn't be moved
3406 : out of it's loop_father. */
3407 42468 : if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
3408 : return false;
3409 :
3410 : return true;
3411 : }
3412 :
3413 : /* Marks the references in LOOP for that store motion should be performed
3414 : in REFS_TO_SM. SM_EXECUTED is the set of references for that store
3415 : motion was performed in one of the outer loops. */
3416 :
3417 : static void
3418 1266561 : find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
3419 : {
3420 1266561 : bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
3421 1266561 : unsigned i;
3422 1266561 : bitmap_iterator bi;
3423 1266561 : im_mem_ref *ref;
3424 :
3425 4148219 : EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
3426 : {
3427 2881658 : ref = memory_accesses.refs_list[i];
3428 2881658 : if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
3429 41486 : bitmap_set_bit (refs_to_sm, i);
3430 : }
3431 1266561 : }
3432 :
3433 : /* Checks whether LOOP (with exits stored in EXITS array) is suitable
3434 : for a store motion optimization (i.e. whether we can insert statement
3435 : on its exits). */
3436 :
3437 : static bool
3438 1307957 : loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
3439 : const vec<edge> &exits)
3440 : {
3441 1307957 : unsigned i;
3442 1307957 : edge ex;
3443 :
3444 3379954 : FOR_EACH_VEC_ELT (exits, i, ex)
3445 2113393 : if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
3446 : return false;
3447 :
3448 : return true;
3449 : }
3450 :
3451 : /* Try to perform store motion for all memory references modified inside
3452 : LOOP. SM_EXECUTED is the bitmap of the memory references for that
3453 : store motion was executed in one of the outer loops. */
3454 :
3455 : static void
3456 1307957 : store_motion_loop (class loop *loop, bitmap sm_executed)
3457 : {
3458 1307957 : auto_vec<edge> exits = get_loop_exit_edges (loop);
3459 1307957 : class loop *subloop;
3460 1307957 : bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
3461 :
3462 1307957 : if (loop_suitable_for_sm (loop, exits))
3463 : {
3464 1266561 : find_refs_for_sm (loop, sm_executed, sm_in_loop);
3465 1266561 : if (!bitmap_empty_p (sm_in_loop))
3466 21305 : hoist_memory_references (loop, sm_in_loop, exits);
3467 : }
3468 :
3469 1307957 : bitmap_ior_into (sm_executed, sm_in_loop);
3470 1606131 : for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3471 298174 : store_motion_loop (subloop, sm_executed);
3472 1307957 : bitmap_and_compl_into (sm_executed, sm_in_loop);
3473 1307957 : BITMAP_FREE (sm_in_loop);
3474 1307957 : }
3475 :
3476 : /* Try to perform store motion for all memory references modified inside
3477 : loops. */
3478 :
3479 : static void
3480 481403 : do_store_motion (void)
3481 : {
3482 481403 : class loop *loop;
3483 481403 : bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
3484 :
3485 1491186 : for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3486 1009783 : store_motion_loop (loop, sm_executed);
3487 :
3488 481403 : BITMAP_FREE (sm_executed);
3489 481403 : }
3490 :
3491 : /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3492 : for each such basic block bb records the outermost loop for that execution
3493 : of its header implies execution of bb. CONTAINS_CALL is the bitmap of
3494 : blocks that contain a nonpure call. */
3495 :
3496 : static void
3497 1308482 : fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3498 : {
3499 1308482 : basic_block bb = NULL, last = NULL;
3500 1308482 : edge e;
3501 1308482 : class loop *inn_loop = loop;
3502 :
3503 1606913 : for (class loop *inner = loop->inner; inner; inner = inner->next)
3504 298431 : fill_always_executed_in_1 (inner, contains_call);
3505 :
3506 1308482 : auto_vec<basic_block, 64> worklist;
3507 1308482 : worklist.reserve_exact (loop->num_nodes);
3508 1308482 : worklist.quick_push (loop->header);
3509 2907848 : do
3510 : {
3511 2907848 : edge_iterator ei;
3512 2907848 : bb = worklist.pop ();
3513 :
3514 2907848 : if (!flow_bb_inside_loop_p (inn_loop, bb))
3515 : {
3516 : /* When we are leaving a possibly infinite inner loop
3517 : we have to stop processing. */
3518 172641 : if (!finite_loop_p (inn_loop))
3519 : break;
3520 : /* If the loop was finite we can continue with processing
3521 : the loop we exited to. */
3522 164328 : inn_loop = bb->loop_father;
3523 : }
3524 :
3525 2899535 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3526 1700664 : last = bb;
3527 :
3528 2899535 : if (bitmap_bit_p (contains_call, bb->index))
3529 : break;
3530 :
3531 : /* If LOOP exits from this BB stop processing. */
3532 5586664 : FOR_EACH_EDGE (e, ei, bb->succs)
3533 3977509 : if (!flow_bb_inside_loop_p (loop, e->dest))
3534 : break;
3535 2566907 : if (e)
3536 : break;
3537 :
3538 : /* A loop might be infinite (TODO use simple loop analysis
3539 : to disprove this if possible). */
3540 1609155 : if (bb->flags & BB_IRREDUCIBLE_LOOP)
3541 : break;
3542 :
3543 1609051 : if (bb->loop_father->header == bb)
3544 : /* Record that we enter into a subloop since it might not
3545 : be finite. */
3546 : /* ??? Entering into a not always executed subloop makes
3547 : fill_always_executed_in quadratic in loop depth since
3548 : we walk those loops N times. This is not a problem
3549 : in practice though, see PR102253 for a worst-case testcase. */
3550 605341 : inn_loop = bb->loop_father;
3551 :
3552 : /* Walk the body of LOOP sorted by dominance relation. Additionally,
3553 : if a basic block S dominates the latch, then only blocks dominated
3554 : by S are after it.
3555 : This is get_loop_body_in_dom_order using a worklist algorithm and
3556 : stopping once we are no longer interested in visiting further
3557 : blocks. */
3558 1609051 : unsigned old_len = worklist.length ();
3559 1609051 : unsigned postpone = 0;
3560 1609051 : for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3561 3471893 : son;
3562 1862842 : son = next_dom_son (CDI_DOMINATORS, son))
3563 : {
3564 1862842 : if (!flow_bb_inside_loop_p (loop, son))
3565 20068 : continue;
3566 1842774 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3567 530392 : postpone = worklist.length ();
3568 1842774 : worklist.quick_push (son);
3569 : }
3570 1609051 : if (postpone)
3571 : /* Postponing the block that dominates the latch means
3572 : processing it last and thus putting it earliest in the
3573 : worklist. */
3574 156346 : std::swap (worklist[old_len], worklist[postpone]);
3575 : }
3576 3218102 : while (!worklist.is_empty ());
3577 :
3578 2092846 : while (1)
3579 : {
3580 1700664 : if (last->loop_father == loop
3581 1700664 : || (ALWAYS_EXECUTED_IN (last)
3582 156151 : && loop_outer (ALWAYS_EXECUTED_IN (last)) == loop))
3583 : {
3584 1699987 : if (dump_enabled_p ())
3585 1915 : dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3586 : last->index, loop->num);
3587 1699987 : SET_ALWAYS_EXECUTED_IN (last, loop);
3588 : }
3589 1700664 : if (last == loop->header)
3590 : break;
3591 392182 : last = get_immediate_dominator (CDI_DOMINATORS, last);
3592 : }
3593 1308482 : }
3594 :
3595 : /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3596 : for each such basic block bb records the outermost loop for that execution
3597 : of its header implies execution of bb. */
3598 :
3599 : static void
3600 481514 : fill_always_executed_in (void)
3601 : {
3602 481514 : basic_block bb;
3603 481514 : class loop *loop;
3604 :
3605 481514 : auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3606 481514 : bitmap_clear (contains_call);
3607 14642656 : FOR_EACH_BB_FN (bb, cfun)
3608 : {
3609 14161142 : if (loop_depth (bb->loop_father) == 0)
3610 : {
3611 : /* Outside of loops we can skip scanning all stmts. */
3612 8157756 : bitmap_set_bit (contains_call, bb->index);
3613 8157756 : continue;
3614 : }
3615 6003386 : gimple_stmt_iterator gsi;
3616 49455816 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3617 : {
3618 39476200 : if (nonpure_call_p (gsi_stmt (gsi)))
3619 : break;
3620 : }
3621 :
3622 6003386 : if (!gsi_end_p (gsi))
3623 952342 : bitmap_set_bit (contains_call, bb->index);
3624 : }
3625 :
3626 1491565 : for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3627 1010051 : fill_always_executed_in_1 (loop, contains_call);
3628 481514 : }
3629 :
3630 : /* Find the coldest loop preheader for LOOP, also find the nearest hotter loop
3631 : to LOOP. Then recursively iterate each inner loop. */
3632 :
3633 : void
3634 1308482 : fill_coldest_and_hotter_out_loop (class loop *coldest_loop,
3635 : class loop *hotter_loop, class loop *loop)
3636 : {
3637 1308482 : if (bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3638 : coldest_loop))
3639 27734 : coldest_loop = loop;
3640 :
3641 1308482 : coldest_outermost_loop[loop->num] = coldest_loop;
3642 :
3643 1308482 : hotter_than_inner_loop[loop->num] = NULL;
3644 1308482 : class loop *outer_loop = loop_outer (loop);
3645 1308482 : if (hotter_loop
3646 1308482 : && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3647 : hotter_loop))
3648 2147 : hotter_than_inner_loop[loop->num] = hotter_loop;
3649 :
3650 1308482 : if (outer_loop && outer_loop != current_loops->tree_root
3651 1606913 : && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3652 : outer_loop))
3653 33959 : hotter_than_inner_loop[loop->num] = outer_loop;
3654 :
3655 1308482 : if (dump_enabled_p ())
3656 : {
3657 1482 : dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, ",
3658 : loop->num, coldest_loop->num);
3659 1482 : if (hotter_than_inner_loop[loop->num])
3660 245 : dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n",
3661 245 : hotter_than_inner_loop[loop->num]->num);
3662 : else
3663 1237 : dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n");
3664 : }
3665 :
3666 1308482 : class loop *inner_loop;
3667 1606913 : for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next)
3668 596862 : fill_coldest_and_hotter_out_loop (coldest_loop,
3669 298431 : hotter_than_inner_loop[loop->num],
3670 : inner_loop);
3671 1308482 : }
3672 :
3673 : /* Compute the global information needed by the loop invariant motion pass. */
3674 :
3675 : static void
3676 481514 : tree_ssa_lim_initialize (bool store_motion)
3677 : {
3678 481514 : unsigned i;
3679 :
3680 481514 : bitmap_obstack_initialize (&lim_bitmap_obstack);
3681 481514 : gcc_obstack_init (&mem_ref_obstack);
3682 481514 : lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3683 :
3684 481514 : if (flag_tm)
3685 104 : compute_transaction_bits ();
3686 :
3687 481514 : memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3688 481514 : memory_accesses.refs_list.create (100);
3689 : /* Allocate a special, unanalyzable mem-ref with ID zero. */
3690 481514 : memory_accesses.refs_list.quick_push
3691 481514 : (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3692 :
3693 963028 : memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3694 963028 : memory_accesses.refs_loaded_in_loop.quick_grow_cleared (number_of_loops (cfun));
3695 963028 : memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3696 963028 : memory_accesses.refs_stored_in_loop.quick_grow_cleared (number_of_loops (cfun));
3697 481514 : if (store_motion)
3698 : {
3699 962806 : memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3700 481403 : memory_accesses.all_refs_stored_in_loop.quick_grow_cleared
3701 962806 : (number_of_loops (cfun));
3702 : }
3703 :
3704 5986458 : for (i = 0; i < number_of_loops (cfun); i++)
3705 : {
3706 2511715 : bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3707 : &lim_bitmap_obstack);
3708 2511715 : bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3709 : &lim_bitmap_obstack);
3710 2511715 : if (store_motion)
3711 2510614 : bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3712 : &lim_bitmap_obstack);
3713 : }
3714 :
3715 481514 : memory_accesses.ttae_cache = NULL;
3716 :
3717 : /* Initialize bb_loop_postorder with a mapping from loop->num to
3718 : its postorder index. */
3719 481514 : i = 0;
3720 963028 : bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3721 2753024 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3722 1308482 : bb_loop_postorder[loop->num] = i++;
3723 481514 : }
3724 :
3725 : /* Cleans up after the invariant motion pass. */
3726 :
3727 : static void
3728 481514 : tree_ssa_lim_finalize (void)
3729 : {
3730 481514 : basic_block bb;
3731 481514 : unsigned i;
3732 481514 : im_mem_ref *ref;
3733 :
3734 14687833 : FOR_EACH_BB_FN (bb, cfun)
3735 14206319 : SET_ALWAYS_EXECUTED_IN (bb, NULL);
3736 :
3737 481514 : bitmap_obstack_release (&lim_bitmap_obstack);
3738 963028 : delete lim_aux_data_map;
3739 :
3740 481514 : delete memory_accesses.refs;
3741 481514 : memory_accesses.refs = NULL;
3742 :
3743 5685829 : FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3744 5204315 : memref_free (ref);
3745 481514 : memory_accesses.refs_list.release ();
3746 481514 : obstack_free (&mem_ref_obstack, NULL);
3747 :
3748 481514 : memory_accesses.refs_loaded_in_loop.release ();
3749 481514 : memory_accesses.refs_stored_in_loop.release ();
3750 481514 : memory_accesses.all_refs_stored_in_loop.release ();
3751 :
3752 481514 : if (memory_accesses.ttae_cache)
3753 4534 : free_affine_expand_cache (&memory_accesses.ttae_cache);
3754 :
3755 481514 : free (bb_loop_postorder);
3756 :
3757 481514 : coldest_outermost_loop.release ();
3758 481514 : hotter_than_inner_loop.release ();
3759 481514 : }
3760 :
3761 : /* Moves invariants from loops. Only "expensive" invariants are moved out --
3762 : i.e. those that are likely to be win regardless of the register pressure.
3763 : Only perform store motion if STORE_MOTION is true. */
3764 :
3765 : unsigned int
3766 481514 : loop_invariant_motion_in_fun (function *fun, bool store_motion)
3767 : {
3768 481514 : unsigned int todo = 0;
3769 :
3770 481514 : tree_ssa_lim_initialize (store_motion);
3771 :
3772 481514 : mark_ssa_maybe_undefs ();
3773 :
3774 : /* Gathers information about memory accesses in the loops. */
3775 481514 : analyze_memory_references (store_motion);
3776 :
3777 : /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3778 481514 : fill_always_executed_in ();
3779 :
3780 : /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
3781 : */
3782 481514 : class loop *loop;
3783 963028 : coldest_outermost_loop.create (number_of_loops (cfun));
3784 963028 : coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
3785 963028 : hotter_than_inner_loop.create (number_of_loops (cfun));
3786 963028 : hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
3787 1491565 : for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3788 1010051 : fill_coldest_and_hotter_out_loop (loop, NULL, loop);
3789 :
3790 481514 : int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3791 481514 : int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3792 :
3793 : /* For each statement determine the outermost loop in that it is
3794 : invariant and cost for computing the invariant. */
3795 14642656 : for (int i = 0; i < n; ++i)
3796 14161142 : compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3797 :
3798 : /* Execute store motion. Force the necessary invariants to be moved
3799 : out of the loops as well. */
3800 481514 : if (store_motion)
3801 481403 : do_store_motion ();
3802 :
3803 481514 : free (rpo);
3804 481514 : rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3805 481514 : n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3806 :
3807 : /* Move the expressions that are expensive enough. */
3808 14687833 : for (int i = 0; i < n; ++i)
3809 14206319 : todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3810 :
3811 481514 : free (rpo);
3812 :
3813 481514 : gsi_commit_edge_inserts ();
3814 481514 : if (need_ssa_update_p (fun))
3815 14844 : rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3816 :
3817 481514 : tree_ssa_lim_finalize ();
3818 :
3819 481514 : return todo;
3820 : }
3821 :
3822 : /* Loop invariant motion pass. */
3823 :
3824 : namespace {
3825 :
3826 : const pass_data pass_data_lim =
3827 : {
3828 : GIMPLE_PASS, /* type */
3829 : "lim", /* name */
3830 : OPTGROUP_LOOP, /* optinfo_flags */
3831 : TV_LIM, /* tv_id */
3832 : PROP_cfg, /* properties_required */
3833 : 0, /* properties_provided */
3834 : 0, /* properties_destroyed */
3835 : 0, /* todo_flags_start */
3836 : 0, /* todo_flags_finish */
3837 : };
3838 :
3839 : class pass_lim : public gimple_opt_pass
3840 : {
3841 : public:
3842 1152188 : pass_lim (gcc::context *ctxt)
3843 2304376 : : gimple_opt_pass (pass_data_lim, ctxt)
3844 : {}
3845 :
3846 : /* opt_pass methods: */
3847 864141 : opt_pass * clone () final override { return new pass_lim (m_ctxt); }
3848 1281203 : bool gate (function *) final override { return flag_tree_loop_im != 0; }
3849 : unsigned int execute (function *) final override;
3850 :
3851 : }; // class pass_lim
3852 :
3853 : unsigned int
3854 1280877 : pass_lim::execute (function *fun)
3855 : {
3856 1280877 : in_loop_pipeline = scev_initialized_p ();
3857 1280877 : if (!in_loop_pipeline)
3858 1040386 : loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3859 :
3860 2561754 : if (number_of_loops (fun) <= 1)
3861 : return 0;
3862 481419 : unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3863 :
3864 481419 : if (!in_loop_pipeline)
3865 240928 : loop_optimizer_finalize ();
3866 : else
3867 240491 : scev_reset ();
3868 : return todo;
3869 : }
3870 :
3871 : } // anon namespace
3872 :
3873 : gimple_opt_pass *
3874 288047 : make_pass_lim (gcc::context *ctxt)
3875 : {
3876 288047 : return new pass_lim (ctxt);
3877 : }
3878 :
3879 :
|