LCOV - code coverage report
Current view: top level - gcc - tree-ssa-loop-im.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 97.8 % 1676 1639
Test Date: 2026-05-30 15:37:04 Functions: 100.0 % 79 79
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Loop invariant motion.
       2              :    Copyright (C) 2003-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it
       7              : under the terms of the GNU General Public License as published by the
       8              : Free Software Foundation; either version 3, or (at your option) any
       9              : later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT
      12              : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "backend.h"
      24              : #include "tree.h"
      25              : #include "gimple.h"
      26              : #include "cfghooks.h"
      27              : #include "tree-pass.h"
      28              : #include "ssa.h"
      29              : #include "gimple-pretty-print.h"
      30              : #include "fold-const.h"
      31              : #include "cfganal.h"
      32              : #include "tree-eh.h"
      33              : #include "gimple-iterator.h"
      34              : #include "tree-cfg.h"
      35              : #include "tree-ssa-loop-manip.h"
      36              : #include "tree-ssa-loop.h"
      37              : #include "tree-into-ssa.h"
      38              : #include "cfgloop.h"
      39              : #include "tree-affine.h"
      40              : #include "tree-ssa-propagate.h"
      41              : #include "trans-mem.h"
      42              : #include "gimple-fold.h"
      43              : #include "tree-scalar-evolution.h"
      44              : #include "tree-ssa-loop-niter.h"
      45              : #include "alias.h"
      46              : #include "builtins.h"
      47              : #include "tree-dfa.h"
      48              : #include "tree-ssa.h"
      49              : #include "dbgcnt.h"
      50              : #include "insn-codes.h"
      51              : #include "optabs-tree.h"
      52              : 
      53              : /* TODO:  Support for predicated code motion.  I.e.
      54              : 
      55              :    while (1)
      56              :      {
      57              :        if (cond)
      58              :          {
      59              :            a = inv;
      60              :            something;
      61              :          }
      62              :      }
      63              : 
      64              :    Where COND and INV are invariants, but evaluating INV may trap or be
      65              :    invalid from some other reason if !COND.  This may be transformed to
      66              : 
      67              :    if (cond)
      68              :      a = inv;
      69              :    while (1)
      70              :      {
      71              :        if (cond)
      72              :          something;
      73              :      }  */
      74              : 
      75              : /* The auxiliary data kept for each statement.  */
      76              : 
      77              : struct lim_aux_data
      78              : {
      79              :   class loop *max_loop; /* The outermost loop in that the statement
      80              :                                    is invariant.  */
      81              : 
      82              :   class loop *tgt_loop; /* The loop out of that we want to move the
      83              :                                    invariant.  */
      84              : 
      85              :   class loop *always_executed_in;
      86              :                                 /* The outermost loop for that we are sure
      87              :                                    the statement is executed if the loop
      88              :                                    is entered.  */
      89              : 
      90              :   unsigned cost;                /* Cost of the computation performed by the
      91              :                                    statement.  */
      92              : 
      93              :   unsigned ref;                 /* The simple_mem_ref in this stmt or 0.  */
      94              : 
      95              :   vec<gimple *> depends;  /* Vector of statements that must be also
      96              :                                    hoisted out of the loop when this statement
      97              :                                    is hoisted; i.e. those that define the
      98              :                                    operands of the statement and are inside of
      99              :                                    the MAX_LOOP loop.  */
     100              : };
     101              : 
     102              : /* Maps statements to their lim_aux_data.  */
     103              : 
     104              : static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
     105              : 
     106              : /* Description of a memory reference location.  */
     107              : 
     108              : struct mem_ref_loc
     109              : {
     110              :   tree *ref;                    /* The reference itself.  */
     111              :   gimple *stmt;                 /* The statement in that it occurs.  */
     112              : };
     113              : 
     114              : 
     115              : /* Description of a memory reference.  */
     116              : 
     117              : class im_mem_ref
     118              : {
     119              : public:
     120              :   unsigned id : 30;             /* ID assigned to the memory reference
     121              :                                    (its index in memory_accesses.refs_list)  */
     122              :   unsigned ref_canonical : 1;   /* Whether mem.ref was canonicalized.  */
     123              :   unsigned ref_decomposed : 1;  /* Whether the ref was hashed from mem.  */
     124              :   hashval_t hash;               /* Its hash value.  */
     125              : 
     126              :   /* The memory access itself and associated caching of alias-oracle
     127              :      query meta-data.  We are using mem.ref == error_mark_node for the
     128              :      case the reference is represented by its single access stmt
     129              :      in accesses_in_loop[0].  */
     130              :   ao_ref mem;
     131              : 
     132              :   bitmap stored;                /* The set of loops in that this memory location
     133              :                                    is stored to.  */
     134              :   bitmap loaded;                /* The set of loops in that this memory location
     135              :                                    is loaded from.  */
     136              :   vec<mem_ref_loc>                accesses_in_loop;
     137              :                                 /* The locations of the accesses.  */
     138              : 
     139              :   /* The following set is computed on demand.  */
     140              :   bitmap_head dep_loop;         /* The set of loops in that the memory
     141              :                                    reference is {in,}dependent in
     142              :                                    different modes.  */
     143              : };
     144              : 
     145              : static bool in_loop_pipeline;
     146              : 
     147              : /* We use six bits per loop in the ref->dep_loop bitmap to record
     148              :    the dep_kind x dep_state combinations.  */
     149              : 
     150              : enum dep_kind { lim_raw, sm_war, sm_waw };
     151              : enum dep_state { dep_unknown, dep_independent, dep_dependent };
     152              : 
     153              : /* coldest outermost loop for given loop.  */
     154              : vec<class loop *> coldest_outermost_loop;
     155              : /* hotter outer loop nearest to given loop.  */
     156              : vec<class loop *> hotter_than_inner_loop;
     157              : 
     158              : /* Populate the loop dependence cache of REF for LOOP, KIND with STATE.  */
     159              : 
     160              : static void
     161      2064512 : record_loop_dependence (class loop *loop, im_mem_ref *ref,
     162              :                         dep_kind kind, dep_state state)
     163              : {
     164      2064512 :   gcc_assert (state != dep_unknown);
     165      2064512 :   unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
     166      2064512 :   bitmap_set_bit (&ref->dep_loop, bit);
     167      2064512 : }
     168              : 
     169              : /* Query the loop dependence cache of REF for LOOP, KIND.  */
     170              : 
     171              : static dep_state
     172       662016 : query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
     173              : {
     174       662016 :   unsigned first_bit = 6 * loop->num + kind * 2;
     175       662016 :   if (bitmap_bit_p (&ref->dep_loop, first_bit))
     176              :     return dep_independent;
     177       662016 :   else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
     178            0 :     return dep_dependent;
     179              :   return dep_unknown;
     180              : }
     181              : 
     182              : /* Mem_ref hashtable helpers.  */
     183              : 
     184              : struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
     185              : {
     186              :   typedef ao_ref *compare_type;
     187              :   static inline hashval_t hash (const im_mem_ref *);
     188              :   static inline bool equal (const im_mem_ref *, const ao_ref *);
     189              : };
     190              : 
     191              : /* A hash function for class im_mem_ref object OBJ.  */
     192              : 
     193              : inline hashval_t
     194     10731821 : mem_ref_hasher::hash (const im_mem_ref *mem)
     195              : {
     196     10731821 :   return mem->hash;
     197              : }
     198              : 
     199              : /* An equality function for class im_mem_ref object MEM1 with
     200              :    memory reference OBJ2.  */
     201              : 
     202              : inline bool
     203     12488348 : mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
     204              : {
     205     12488348 :   if (obj2->max_size_known_p ())
     206     11013113 :     return (mem1->ref_decomposed
     207     10547045 :             && ((TREE_CODE (mem1->mem.base) == MEM_REF
     208      4606593 :                  && TREE_CODE (obj2->base) == MEM_REF
     209      3059018 :                  && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
     210      3059018 :                                      TREE_OPERAND (obj2->base, 0), 0)
     211     12863291 :                  && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
     212              :                               mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
     213     10120027 :                 || (operand_equal_p (mem1->mem.base, obj2->base, 0)
     214       801686 :                     && known_eq (mem1->mem.offset, obj2->offset)))
     215       885800 :             && known_eq (mem1->mem.size, obj2->size)
     216       883038 :             && known_eq (mem1->mem.max_size, obj2->max_size)
     217       883038 :             && mem1->mem.volatile_p == obj2->volatile_p
     218       883022 :             && (mem1->mem.ref_alias_set == obj2->ref_alias_set
     219              :                 /* We are not canonicalizing alias-sets but for the
     220              :                    special-case we didn't canonicalize yet and the
     221              :                    incoming ref is a alias-set zero MEM we pick
     222              :                    the correct one already.  */
     223        14595 :                 || (!mem1->ref_canonical
     224        13718 :                     && (TREE_CODE (obj2->ref) == MEM_REF
     225        13718 :                         || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
     226         6779 :                     && obj2->ref_alias_set == 0)
     227              :                 /* Likewise if there's a canonical ref with alias-set zero.  */
     228        11940 :                 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
     229     11884223 :             && types_compatible_p (TREE_TYPE (mem1->mem.ref),
     230       871110 :                                    TREE_TYPE (obj2->ref)));
     231              :   else
     232      1475235 :     return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
     233              : }
     234              : 
     235              : 
     236              : /* Description of memory accesses in loops.  */
     237              : 
     238              : static struct
     239              : {
     240              :   /* The hash table of memory references accessed in loops.  */
     241              :   hash_table<mem_ref_hasher> *refs;
     242              : 
     243              :   /* The list of memory references.  */
     244              :   vec<im_mem_ref *> refs_list;
     245              : 
     246              :   /* The set of memory references accessed in each loop.  */
     247              :   vec<bitmap_head> refs_loaded_in_loop;
     248              : 
     249              :   /* The set of memory references stored in each loop.  */
     250              :   vec<bitmap_head> refs_stored_in_loop;
     251              : 
     252              :   /* The set of memory references stored in each loop, including subloops .  */
     253              :   vec<bitmap_head> all_refs_stored_in_loop;
     254              : 
     255              :   /* Cache for expanding memory addresses.  */
     256              :   hash_map<tree, name_expansion *> *ttae_cache;
     257              : } memory_accesses;
     258              : 
     259              : /* Obstack for the bitmaps in the above data structures.  */
     260              : static bitmap_obstack lim_bitmap_obstack;
     261              : static obstack mem_ref_obstack;
     262              : 
     263              : static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
     264              : static bool ref_always_stored_p (class loop *, im_mem_ref *);
     265              : static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
     266              : 
     267              : /* Minimum cost of an expensive expression.  */
     268              : #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
     269              : 
     270              : /* The outermost loop for which execution of the header guarantees that the
     271              :    block will be executed.  */
     272              : #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
     273              : #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
     274              : 
     275              : /* ID of the shared unanalyzable mem.  */
     276              : #define UNANALYZABLE_MEM_ID 0
     277              : 
     278              : /* Whether the reference was analyzable.  */
     279              : #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
     280              : 
     281              : static struct lim_aux_data *
     282     17537367 : init_lim_data (gimple *stmt)
     283              : {
     284     17537367 :   lim_aux_data *p = XCNEW (struct lim_aux_data);
     285     17537367 :   lim_aux_data_map->put (stmt, p);
     286              : 
     287     17537367 :   return p;
     288              : }
     289              : 
     290              : static struct lim_aux_data *
     291     93834447 : get_lim_data (gimple *stmt)
     292              : {
     293     93834447 :   lim_aux_data **p = lim_aux_data_map->get (stmt);
     294     93834447 :   if (!p)
     295              :     return NULL;
     296              : 
     297     47670420 :   return *p;
     298              : }
     299              : 
     300              : /* Releases the memory occupied by DATA.  */
     301              : 
     302              : static void
     303     17537188 : free_lim_aux_data (struct lim_aux_data *data)
     304              : {
     305            0 :   data->depends.release ();
     306     17537188 :   free (data);
     307            0 : }
     308              : 
     309              : static void
     310     17537188 : clear_lim_data (gimple *stmt)
     311              : {
     312     17537188 :   lim_aux_data **p = lim_aux_data_map->get (stmt);
     313     17537188 :   if (!p)
     314              :     return;
     315              : 
     316     17537188 :   free_lim_aux_data (*p);
     317     17537188 :   *p = NULL;
     318              : }
     319              : 
     320              : 
     321              : /* The possibilities of statement movement.  */
     322              : enum move_pos
     323              :   {
     324              :     MOVE_IMPOSSIBLE,            /* No movement -- side effect expression.  */
     325              :     MOVE_PRESERVE_EXECUTION,    /* Must not cause the non-executed statement
     326              :                                    become executed -- memory accesses, ... */
     327              :     MOVE_POSSIBLE               /* Unlimited movement.  */
     328              :   };
     329              : 
     330              : 
     331              : /* If it is possible to hoist the statement STMT unconditionally,
     332              :    returns MOVE_POSSIBLE.
     333              :    If it is possible to hoist the statement STMT, but we must avoid making
     334              :    it executed if it would not be executed in the original program (e.g.
     335              :    because it may trap), return MOVE_PRESERVE_EXECUTION.
     336              :    Otherwise return MOVE_IMPOSSIBLE.  */
     337              : 
     338              : static enum move_pos
     339     46560663 : movement_possibility_1 (gimple *stmt)
     340              : {
     341     46560663 :   tree lhs;
     342     46560663 :   enum move_pos ret = MOVE_POSSIBLE;
     343              : 
     344     46560663 :   if (flag_unswitch_loops
     345     46560663 :       && gimple_code (stmt) == GIMPLE_COND)
     346              :     {
     347              :       /* If we perform unswitching, force the operands of the invariant
     348              :          condition to be moved out of the loop.  */
     349              :       return MOVE_POSSIBLE;
     350              :     }
     351              : 
     352     46211861 :   if (gimple_code (stmt) == GIMPLE_PHI
     353      2600781 :       && gimple_phi_num_args (stmt) <= 2
     354      4863636 :       && !virtual_operand_p (gimple_phi_result (stmt))
     355     47663408 :       && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
     356              :     return MOVE_POSSIBLE;
     357              : 
     358     44760571 :   if (gimple_get_lhs (stmt) == NULL_TREE)
     359              :     return MOVE_IMPOSSIBLE;
     360              : 
     361     31197787 :   if (gimple_vdef (stmt))
     362              :     return MOVE_IMPOSSIBLE;
     363              : 
     364     13190669 :   if (stmt_ends_bb_p (stmt)
     365     12013427 :       || gimple_has_volatile_ops (stmt)
     366     13136366 :       || gimple_has_side_effects (stmt)
     367     26321155 :       || stmt_could_throw_p (cfun, stmt))
     368       458133 :     return MOVE_IMPOSSIBLE;
     369              : 
     370     12732536 :   if (is_gimple_call (stmt))
     371              :     {
     372              :       /* While pure or const call is guaranteed to have no side effects, we
     373              :          cannot move it arbitrarily.  Consider code like
     374              : 
     375              :          char *s = something ();
     376              : 
     377              :          while (1)
     378              :            {
     379              :              if (s)
     380              :                t = strlen (s);
     381              :              else
     382              :                t = 0;
     383              :            }
     384              : 
     385              :          Here the strlen call cannot be moved out of the loop, even though
     386              :          s is invariant.  In addition to possibly creating a call with
     387              :          invalid arguments, moving out a function call that is not executed
     388              :          may cause performance regressions in case the call is costly and
     389              :          not executed at all.  */
     390       129317 :       ret = MOVE_PRESERVE_EXECUTION;
     391       129317 :       lhs = gimple_call_lhs (stmt);
     392              :     }
     393     12603219 :   else if (is_gimple_assign (stmt))
     394     11453728 :     lhs = gimple_assign_lhs (stmt);
     395              :   else
     396              :     return MOVE_IMPOSSIBLE;
     397              : 
     398     11583045 :   if (TREE_CODE (lhs) == SSA_NAME
     399     11583045 :       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
     400              :     return MOVE_IMPOSSIBLE;
     401              : 
     402     11582813 :   if (TREE_CODE (lhs) != SSA_NAME
     403     11582813 :       || gimple_could_trap_p (stmt))
     404      2390166 :     return MOVE_PRESERVE_EXECUTION;
     405              : 
     406      9192647 :   if (is_gimple_assign (stmt))
     407              :     {
     408      9070665 :       auto code = gimple_assign_rhs_code (stmt);
     409      9070665 :       tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
     410              :       /* For shifts and rotates and possibly out-of-bound shift operands
     411              :          we currently cannot rewrite them into something unconditionally
     412              :          well-defined.  */
     413      9070665 :       if ((code == LSHIFT_EXPR
     414              :            || code == RSHIFT_EXPR
     415              :            || code == LROTATE_EXPR
     416      9070665 :            || code == RROTATE_EXPR)
     417      9070665 :           && (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
     418              :               /* We cannot use ranges at 'stmt' here.  */
     419       119694 :               || wi::ltu_p (wi::to_wide (gimple_assign_rhs2 (stmt)),
     420      9029301 :                             element_precision (type))))
     421       161058 :         ret = MOVE_PRESERVE_EXECUTION;
     422              :     }
     423              : 
     424              :   /* Non local loads in a transaction cannot be hoisted out.  Well,
     425              :      unless the load happens on every path out of the loop, but we
     426              :      don't take this into account yet.  */
     427      9192647 :   if (flag_tm
     428          434 :       && gimple_in_transaction (stmt)
     429      9192750 :       && gimple_assign_single_p (stmt))
     430              :     {
     431           21 :       tree rhs = gimple_assign_rhs1 (stmt);
     432           21 :       if (DECL_P (rhs) && is_global_var (rhs))
     433              :         {
     434           18 :           if (dump_file)
     435              :             {
     436            2 :               fprintf (dump_file, "Cannot hoist conditional load of ");
     437            2 :               print_generic_expr (dump_file, rhs, TDF_SLIM);
     438            2 :               fprintf (dump_file, " because it is in a transaction.\n");
     439              :             }
     440           18 :           return MOVE_IMPOSSIBLE;
     441              :         }
     442              :     }
     443              : 
     444              :   return ret;
     445              : }
     446              : 
     447              : static enum move_pos
     448     46560663 : movement_possibility (gimple *stmt)
     449              : {
     450     46560663 :   enum move_pos pos = movement_possibility_1 (stmt);
     451     46560663 :   if (pos == MOVE_POSSIBLE)
     452              :     {
     453     10709681 :       use_operand_p use_p;
     454     10709681 :       ssa_op_iter ssa_iter;
     455     34771077 :       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE)
     456     13370143 :         if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME
     457     13370143 :             && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p)))
     458        18428 :           return MOVE_PRESERVE_EXECUTION;
     459              :     }
     460              :   return pos;
     461              : }
     462              : 
     463              : 
     464              : /* Compare the profile count inequality of bb and loop's preheader, it is
     465              :    three-state as stated in profile-count.h, FALSE is returned if inequality
     466              :    cannot be decided.  */
     467              : bool
     468     13864907 : bb_colder_than_loop_preheader (basic_block bb, class loop *loop)
     469              : {
     470     13864907 :   gcc_assert (bb && loop);
     471     13864907 :   return bb->count < loop_preheader_edge (loop)->src->count;
     472              : }
     473              : 
     474              : /* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile
     475              :    count.
     476              :   It does three steps check:
     477              :   1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just
     478              :   return NULL which means it should not be moved out at all;
     479              :   2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of
     480              :   OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP;
     481              :   3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a
     482              :   hotter loop between OUTERMOST_LOOP and loop in pre-computed
     483              :   HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return
     484              :   OUTERMOST_LOOP.
     485              :   At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as
     486              :   the hoist target.  */
     487              : 
     488              : static class loop *
     489     12249299 : get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
     490              :                       basic_block curr_bb)
     491              : {
     492     12249299 :   gcc_assert (outermost_loop == loop
     493              :               || flow_loop_nested_p (outermost_loop, loop));
     494              : 
     495              :   /* If bb_colder_than_loop_preheader returns false due to three-state
     496              :     comparision, OUTERMOST_LOOP is returned finally to preserve the behavior.
     497              :     Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP.  */
     498     12249299 :   if (curr_bb && bb_colder_than_loop_preheader (curr_bb, loop))
     499              :     return NULL;
     500              : 
     501     11300364 :   class loop *coldest_loop = coldest_outermost_loop[loop->num];
     502     22600728 :   if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
     503              :     {
     504       246192 :       class loop *hotter_loop = hotter_than_inner_loop[loop->num];
     505       246192 :       if (!hotter_loop
     506       252864 :           || loop_depth (hotter_loop) < loop_depth (outermost_loop))
     507              :         return outermost_loop;
     508              : 
     509              :       /*  hotter_loop is between OUTERMOST_LOOP and LOOP like:
     510              :         [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
     511              :         hotter_loop, second_coldest_loop, ..., loop]
     512              :         return second_coldest_loop to be the hoist target.  */
     513           23 :       class loop *aloop;
     514           23 :       for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
     515           23 :         if (aloop == loop || flow_loop_nested_p (aloop, loop))
     516           23 :           return aloop;
     517              :     }
     518              :   return coldest_loop;
     519              : }
     520              : 
     521              : /* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
     522              :    loop to that we could move the expression using DEF if it did not have
     523              :    other operands, i.e. the outermost loop enclosing LOOP in that the value
     524              :    of DEF is invariant.  */
     525              : 
     526              : static class loop *
     527     19657295 : outermost_invariant_loop (tree def, class loop *loop)
     528              : {
     529     19657295 :   gimple *def_stmt;
     530     19657295 :   basic_block def_bb;
     531     19657295 :   class loop *max_loop;
     532     19657295 :   struct lim_aux_data *lim_data;
     533              : 
     534     19657295 :   if (!def)
     535       945273 :     return superloop_at_depth (loop, 1);
     536              : 
     537     18712022 :   if (TREE_CODE (def) != SSA_NAME)
     538              :     {
     539      3921940 :       gcc_assert (is_gimple_min_invariant (def));
     540      3921940 :       return superloop_at_depth (loop, 1);
     541              :     }
     542              : 
     543     14790082 :   def_stmt = SSA_NAME_DEF_STMT (def);
     544     14790082 :   def_bb = gimple_bb (def_stmt);
     545     14790082 :   if (!def_bb)
     546        72777 :     return superloop_at_depth (loop, 1);
     547              : 
     548     14717305 :   max_loop = find_common_loop (loop, def_bb->loop_father);
     549              : 
     550     14717305 :   lim_data = get_lim_data (def_stmt);
     551     14717305 :   if (lim_data != NULL && lim_data->max_loop != NULL)
     552       690781 :     max_loop = find_common_loop (max_loop,
     553              :                                  loop_outer (lim_data->max_loop));
     554     14717305 :   if (max_loop == loop)
     555              :     return NULL;
     556      1843036 :   max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
     557              : 
     558      1843036 :   return max_loop;
     559              : }
     560              : 
     561              : /* DATA is a structure containing information associated with a statement
     562              :    inside LOOP.  DEF is one of the operands of this statement.
     563              : 
     564              :    Find the outermost loop enclosing LOOP in that value of DEF is invariant
     565              :    and record this in DATA->max_loop field.  If DEF itself is defined inside
     566              :    this loop as well (i.e. we need to hoist it out of the loop if we want
     567              :    to hoist the statement represented by DATA), record the statement in that
     568              :    DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
     569              :    add the cost of the computation of DEF to the DATA->cost.
     570              : 
     571              :    If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
     572              : 
     573              : static bool
     574     11497716 : add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
     575              :                 bool add_cost)
     576              : {
     577     11497716 :   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
     578     11497716 :   basic_block def_bb = gimple_bb (def_stmt);
     579     11497716 :   class loop *max_loop;
     580     11497716 :   struct lim_aux_data *def_data;
     581              : 
     582     11497716 :   if (!def_bb)
     583              :     return true;
     584              : 
     585     11176543 :   max_loop = outermost_invariant_loop (def, loop);
     586     11176543 :   if (!max_loop)
     587              :     return false;
     588              : 
     589      1386958 :   if (flow_loop_nested_p (data->max_loop, max_loop))
     590       422800 :     data->max_loop = max_loop;
     591              : 
     592      1386958 :   def_data = get_lim_data (def_stmt);
     593      1386958 :   if (!def_data)
     594              :     return true;
     595              : 
     596       679043 :   if (add_cost
     597              :       /* Only add the cost if the statement defining DEF is inside LOOP,
     598              :          i.e. if it is likely that by moving the invariants dependent
     599              :          on it, we will be able to avoid creating a new register for
     600              :          it (since it will be only used in these dependent invariants).  */
     601       658428 :       && def_bb->loop_father == loop)
     602       461306 :     data->cost += def_data->cost;
     603              : 
     604       679043 :   data->depends.safe_push (def_stmt);
     605              : 
     606       679043 :   return true;
     607              : }
     608              : 
     609              : /* Returns an estimate for a cost of statement STMT.  The values here
     610              :    are just ad-hoc constants, similar to costs for inlining.  */
     611              : 
     612              : static unsigned
     613       770402 : stmt_cost (gimple *stmt)
     614              : {
     615              :   /* Always try to create possibilities for unswitching.  */
     616       770402 :   if (gimple_code (stmt) == GIMPLE_COND
     617       770402 :       || gimple_code (stmt) == GIMPLE_PHI)
     618        12726 :     return LIM_EXPENSIVE;
     619              : 
     620              :   /* We should be hoisting calls if possible.  */
     621       757676 :   if (is_gimple_call (stmt))
     622              :     {
     623          834 :       tree fndecl;
     624              : 
     625              :       /* Unless the call is a builtin_constant_p; this always folds to a
     626              :          constant, so moving it is useless.  */
     627          834 :       fndecl = gimple_call_fndecl (stmt);
     628          834 :       if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
     629              :         return 0;
     630              : 
     631          834 :       return LIM_EXPENSIVE;
     632              :     }
     633              : 
     634              :   /* Hoisting memory references out should almost surely be a win.  */
     635       756842 :   if (gimple_references_memory_p (stmt))
     636        66180 :     return LIM_EXPENSIVE;
     637              : 
     638       690662 :   if (gimple_code (stmt) != GIMPLE_ASSIGN)
     639              :     return 1;
     640              : 
     641       690662 :   enum tree_code code = gimple_assign_rhs_code (stmt);
     642       690662 :   switch (code)
     643              :     {
     644       118593 :     case MULT_EXPR:
     645       118593 :     case WIDEN_MULT_EXPR:
     646       118593 :     case WIDEN_MULT_PLUS_EXPR:
     647       118593 :     case WIDEN_MULT_MINUS_EXPR:
     648       118593 :     case DOT_PROD_EXPR:
     649       118593 :     case TRUNC_DIV_EXPR:
     650       118593 :     case CEIL_DIV_EXPR:
     651       118593 :     case FLOOR_DIV_EXPR:
     652       118593 :     case ROUND_DIV_EXPR:
     653       118593 :     case EXACT_DIV_EXPR:
     654       118593 :     case CEIL_MOD_EXPR:
     655       118593 :     case FLOOR_MOD_EXPR:
     656       118593 :     case ROUND_MOD_EXPR:
     657       118593 :     case TRUNC_MOD_EXPR:
     658       118593 :     case RDIV_EXPR:
     659              :       /* Division and multiplication are usually expensive.  */
     660       118593 :       return LIM_EXPENSIVE;
     661              : 
     662         4328 :     case LSHIFT_EXPR:
     663         4328 :     case RSHIFT_EXPR:
     664         4328 :     case WIDEN_LSHIFT_EXPR:
     665         4328 :     case LROTATE_EXPR:
     666         4328 :     case RROTATE_EXPR:
     667              :       /* Shifts and rotates are usually expensive.  */
     668         4328 :       return LIM_EXPENSIVE;
     669              : 
     670          461 :     case COND_EXPR:
     671          461 :     case VEC_COND_EXPR:
     672              :       /* Conditionals are expensive.  */
     673          461 :       return LIM_EXPENSIVE;
     674              : 
     675         1329 :     case CONSTRUCTOR:
     676              :       /* Make vector construction cost proportional to the number
     677              :          of elements.  */
     678         1329 :       return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
     679              : 
     680              :     case SSA_NAME:
     681              :     case PAREN_EXPR:
     682              :       /* Whether or not something is wrapped inside a PAREN_EXPR
     683              :          should not change move cost.  Nor should an intermediate
     684              :          unpropagated SSA name copy.  */
     685              :       return 0;
     686              : 
     687       528349 :     default:
     688              :       /* Comparisons are usually expensive.  */
     689       528349 :       if (TREE_CODE_CLASS (code) == tcc_comparison)
     690         7499 :         return LIM_EXPENSIVE;
     691              :       return 1;
     692              :     }
     693              : }
     694              : 
     695              : /* Finds the outermost loop between OUTER and LOOP in that the memory reference
     696              :    REF is independent.  If REF is not independent in LOOP, NULL is returned
     697              :    instead.  */
     698              : 
     699              : static class loop *
     700       750914 : outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
     701              : {
     702       750914 :   class loop *aloop;
     703              : 
     704       750914 :   if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
     705              :     return NULL;
     706              : 
     707        84447 :   for (aloop = outer;
     708       689481 :        aloop != loop;
     709        84447 :        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
     710        10034 :     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
     711       100222 :         && ref_indep_loop_p (aloop, ref, lim_raw))
     712              :       return aloop;
     713              : 
     714       590947 :   if (ref_indep_loop_p (loop, ref, lim_raw))
     715              :     return loop;
     716              :   else
     717              :     return NULL;
     718              : }
     719              : 
     720              : /* If there is a simple load or store to a memory reference in STMT, returns
     721              :    the location of the memory reference, and sets IS_STORE according to whether
     722              :    it is a store or load.  Otherwise, returns NULL.  */
     723              : 
     724              : static tree *
     725      6936931 : simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
     726              : {
     727      6936931 :   tree *lhs, *rhs;
     728              : 
     729              :   /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
     730      6936931 :   if (!gimple_assign_single_p (stmt))
     731              :     return NULL;
     732              : 
     733      5685216 :   lhs = gimple_assign_lhs_ptr (stmt);
     734      5685216 :   rhs = gimple_assign_rhs1_ptr (stmt);
     735              : 
     736      8876258 :   if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
     737              :     {
     738      3191042 :       *is_store = false;
     739      3191042 :       return rhs;
     740              :     }
     741      2494174 :   else if (gimple_vdef (stmt)
     742      2494174 :            && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
     743              :     {
     744      2048248 :       *is_store = true;
     745      2048248 :       return lhs;
     746              :     }
     747              :   else
     748       445926 :     return NULL;
     749              : }
     750              : 
     751              : /* From a controlling predicate in DOM determine the arguments from
     752              :    the PHI node PHI that are chosen if the predicate evaluates to
     753              :    true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
     754              :    they are non-NULL.  Returns true if the arguments can be determined,
     755              :    else return false.  */
     756              : 
     757              : static bool
     758        11430 : extract_true_false_args_from_phi (basic_block dom, gphi *phi,
     759              :                                   tree *true_arg_p, tree *false_arg_p)
     760              : {
     761        11430 :   edge te, fe;
     762        11430 :   if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
     763              :                                              &te, &fe))
     764              :     return false;
     765              : 
     766        10541 :   if (true_arg_p)
     767          917 :     *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
     768        10541 :   if (false_arg_p)
     769          917 :     *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
     770              : 
     771              :   return true;
     772              : }
     773              : 
     774              : /* Determine the outermost loop to that it is possible to hoist a statement
     775              :    STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
     776              :    the outermost loop in that the value computed by STMT is invariant.
     777              :    If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
     778              :    we preserve the fact whether STMT is executed.  It also fills other related
     779              :    information to LIM_DATA (STMT).
     780              : 
     781              :    The function returns false if STMT cannot be hoisted outside of the loop it
     782              :    is defined in, and true otherwise.  */
     783              : 
     784              : static bool
     785     12206045 : determine_max_movement (gimple *stmt, bool must_preserve_exec)
     786              : {
     787     12206045 :   basic_block bb = gimple_bb (stmt);
     788     12206045 :   class loop *loop = bb->loop_father;
     789     12206045 :   class loop *level;
     790     12206045 :   struct lim_aux_data *lim_data = get_lim_data (stmt);
     791     12206045 :   tree val;
     792     12206045 :   ssa_op_iter iter;
     793              : 
     794     12206045 :   if (must_preserve_exec)
     795      1509168 :     level = ALWAYS_EXECUTED_IN (bb);
     796              :   else
     797     10696877 :     level = superloop_at_depth (loop, 1);
     798     12206045 :   lim_data->max_loop = get_coldest_out_loop (level, loop, bb);
     799     12206045 :   if (!lim_data->max_loop)
     800              :     return false;
     801              : 
     802     11258849 :   if (gphi *phi = dyn_cast <gphi *> (stmt))
     803              :     {
     804      1400672 :       use_operand_p use_p;
     805      1400672 :       unsigned min_cost = UINT_MAX;
     806      1400672 :       unsigned total_cost = 0;
     807      1400672 :       struct lim_aux_data *def_data;
     808              : 
     809              :       /* We will end up promoting dependencies to be unconditionally
     810              :          evaluated.  For this reason the PHI cost (and thus the
     811              :          cost we remove from the loop by doing the invariant motion)
     812              :          is that of the cheapest PHI argument dependency chain.  */
     813      1607408 :       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
     814              :         {
     815      1583779 :           val = USE_FROM_PTR (use_p);
     816              : 
     817      1583779 :           if (TREE_CODE (val) != SSA_NAME)
     818              :             {
     819              :               /* Assign const 1 to constants.  */
     820       129051 :               min_cost = MIN (min_cost, 1);
     821       129051 :               total_cost += 1;
     822       129051 :               continue;
     823              :             }
     824      1454728 :           if (!add_dependency (val, lim_data, loop, false))
     825              :             return false;
     826              : 
     827        77685 :           gimple *def_stmt = SSA_NAME_DEF_STMT (val);
     828        77685 :           if (gimple_bb (def_stmt)
     829        77685 :               && gimple_bb (def_stmt)->loop_father == loop)
     830              :             {
     831         1412 :               def_data = get_lim_data (def_stmt);
     832         1412 :               if (def_data)
     833              :                 {
     834         1412 :                   min_cost = MIN (min_cost, def_data->cost);
     835         1412 :                   total_cost += def_data->cost;
     836              :                 }
     837              :             }
     838              :         }
     839              : 
     840        23629 :       min_cost = MIN (min_cost, total_cost);
     841        23629 :       lim_data->cost += min_cost;
     842              : 
     843        23629 :       if (gimple_phi_num_args (phi) > 1)
     844              :         {
     845        23455 :           basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
     846        23455 :           gimple *cond;
     847        46910 :           if (gsi_end_p (gsi_last_bb (dom)))
     848              :             return false;
     849        15458 :           cond = gsi_stmt (gsi_last_bb (dom));
     850        15458 :           if (gimple_code (cond) != GIMPLE_COND)
     851              :             return false;
     852              :           /* Verify that this is an extended form of a diamond and
     853              :              the PHI arguments are completely controlled by the
     854              :              predicate in DOM.  */
     855        10513 :           if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
     856              :             return false;
     857              : 
     858              :         /* Check if one of the depedent statement is a vector compare whether
     859              :            the target supports it,  otherwise it's invalid to hoist it out of
     860              :            the gcond it belonged to.  */
     861         9624 :         if (VECTOR_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond))))
     862              :           {
     863            5 :             tree type = TREE_TYPE (gimple_cond_lhs (cond));
     864            5 :             auto code = gimple_cond_code (cond);
     865            5 :             if (!target_supports_op_p (type, code, optab_vector))
     866              :               return false;
     867              :           }
     868              : 
     869              :           /* Fold in dependencies and cost of the condition.  */
     870        11327 :           FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
     871              :             {
     872        10294 :               if (!add_dependency (val, lim_data, loop, false))
     873              :                 return false;
     874         1708 :               def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
     875         1708 :               if (def_data)
     876          922 :                 lim_data->cost += def_data->cost;
     877              :             }
     878              : 
     879              :           /* We want to avoid unconditionally executing very expensive
     880              :              operations.  As costs for our dependencies cannot be
     881              :              negative just claim we are not invariand for this case.
     882              :              We also are not sure whether the control-flow inside the
     883              :              loop will vanish.  */
     884         1033 :           if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
     885          137 :               && !(min_cost != 0
     886          137 :                    && total_cost / min_cost <= 2))
     887              :             return false;
     888              : 
     889              :           /* Assume that the control-flow in the loop will vanish.
     890              :              ???  We should verify this and not artificially increase
     891              :              the cost if that is not the case.  */
     892          917 :           lim_data->cost += stmt_cost (stmt);
     893              :         }
     894              : 
     895         1091 :       return true;
     896              :     }
     897              : 
     898              :   /* A stmt that receives abnormal edges cannot be hoisted.  */
     899      9858177 :   if (is_a <gcall *> (stmt)
     900      9858177 :       && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE))
     901              :     return false;
     902              : 
     903     11486888 :   FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
     904     10031670 :     if (!add_dependency (val, lim_data, loop, true))
     905              :       return false;
     906              : 
     907      2898627 :   if (gimple_vuse (stmt))
     908              :     {
     909       751938 :       im_mem_ref *ref
     910       751938 :         = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
     911       751938 :       if (ref
     912       751938 :           && MEM_ANALYZABLE (ref))
     913              :         {
     914       750914 :           lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
     915              :                                                      loop, ref);
     916       750914 :           if (!lim_data->max_loop)
     917              :             return false;
     918              :         }
     919         1024 :       else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
     920              :         return false;
     921              :     }
     922              : 
     923       769485 :   lim_data->cost += stmt_cost (stmt);
     924              : 
     925       769485 :   return true;
     926              : }
     927              : 
     928              : /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
     929              :    and that one of the operands of this statement is computed by STMT.
     930              :    Ensure that STMT (together with all the statements that define its
     931              :    operands) is hoisted at least out of the loop LEVEL.  */
     932              : 
     933              : static void
     934       692131 : set_level (gimple *stmt, class loop *orig_loop, class loop *level)
     935              : {
     936       692131 :   class loop *stmt_loop = gimple_bb (stmt)->loop_father;
     937       692131 :   struct lim_aux_data *lim_data;
     938       692131 :   gimple *dep_stmt;
     939       692131 :   unsigned i;
     940              : 
     941       692131 :   stmt_loop = find_common_loop (orig_loop, stmt_loop);
     942       692131 :   lim_data = get_lim_data (stmt);
     943       692131 :   if (lim_data != NULL && lim_data->tgt_loop != NULL)
     944       171545 :     stmt_loop = find_common_loop (stmt_loop,
     945              :                                   loop_outer (lim_data->tgt_loop));
     946       692131 :   if (flow_loop_nested_p (stmt_loop, level))
     947       692131 :     return;
     948              : 
     949       459148 :   gcc_assert (level == lim_data->max_loop
     950              :               || flow_loop_nested_p (lim_data->max_loop, level));
     951              : 
     952       459148 :   lim_data->tgt_loop = level;
     953       792479 :   FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
     954       333331 :     set_level (dep_stmt, orig_loop, level);
     955              : }
     956              : 
     957              : /* Determines an outermost loop from that we want to hoist the statement STMT.
     958              :    For now we chose the outermost possible loop.  TODO -- use profiling
     959              :    information to set it more sanely.  */
     960              : 
     961              : static void
     962       296205 : set_profitable_level (gimple *stmt)
     963              : {
     964       296205 :   set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
     965       296205 : }
     966              : 
     967              : /* Returns true if STMT is a call that has side effects.  */
     968              : 
     969              : static bool
     970     71374583 : nonpure_call_p (gimple *stmt)
     971              : {
     972            0 :   if (gimple_code (stmt) != GIMPLE_CALL)
     973              :     return false;
     974              : 
     975      2262512 :   return gimple_has_side_effects (stmt);
     976              : }
     977              : 
     978              : /* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */
     979              : 
     980              : static gimple *
     981           35 : rewrite_reciprocal (gimple_stmt_iterator *bsi)
     982              : {
     983           35 :   gassign *stmt, *stmt1, *stmt2;
     984           35 :   tree name, lhs, type;
     985           35 :   tree real_one;
     986           35 :   gimple_stmt_iterator gsi;
     987              : 
     988           35 :   stmt = as_a <gassign *> (gsi_stmt (*bsi));
     989           35 :   lhs = gimple_assign_lhs (stmt);
     990           35 :   type = TREE_TYPE (lhs);
     991              : 
     992           35 :   real_one = build_one_cst (type);
     993              : 
     994           35 :   name = make_temp_ssa_name (type, NULL, "reciptmp");
     995           70 :   stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
     996              :                                gimple_assign_rhs2 (stmt));
     997           35 :   stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
     998              :                                gimple_assign_rhs1 (stmt));
     999              : 
    1000              :   /* Replace division stmt with reciprocal and multiply stmts.
    1001              :      The multiply stmt is not invariant, so update iterator
    1002              :      and avoid rescanning.  */
    1003           35 :   gsi = *bsi;
    1004           35 :   gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
    1005           35 :   gsi_replace (&gsi, stmt2, true);
    1006              : 
    1007              :   /* Continue processing with invariant reciprocal statement.  */
    1008           35 :   return stmt1;
    1009              : }
    1010              : 
    1011              : /* Check if the pattern at *BSI is a bittest of the form
    1012              :    (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
    1013              : 
    1014              : static gimple *
    1015         9013 : rewrite_bittest (gimple_stmt_iterator *bsi)
    1016              : {
    1017         9013 :   gassign *stmt;
    1018         9013 :   gimple *stmt1;
    1019         9013 :   gassign *stmt2;
    1020         9013 :   gimple *use_stmt;
    1021         9013 :   gcond *cond_stmt;
    1022         9013 :   tree lhs, name, t, a, b;
    1023         9013 :   use_operand_p use;
    1024              : 
    1025         9013 :   stmt = as_a <gassign *> (gsi_stmt (*bsi));
    1026         9013 :   lhs = gimple_assign_lhs (stmt);
    1027              : 
    1028              :   /* Verify that the single use of lhs is a comparison against zero.  */
    1029         9013 :   if (TREE_CODE (lhs) != SSA_NAME
    1030         9013 :       || !single_imm_use (lhs, &use, &use_stmt))
    1031          455 :     return stmt;
    1032        13999 :   cond_stmt = dyn_cast <gcond *> (use_stmt);
    1033         5015 :   if (!cond_stmt)
    1034              :     return stmt;
    1035         5015 :   if (gimple_cond_lhs (cond_stmt) != lhs
    1036         4839 :       || (gimple_cond_code (cond_stmt) != NE_EXPR
    1037         2205 :           && gimple_cond_code (cond_stmt) != EQ_EXPR)
    1038         9854 :       || !integer_zerop (gimple_cond_rhs (cond_stmt)))
    1039          206 :     return stmt;
    1040              : 
    1041              :   /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
    1042         4809 :   stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    1043         4809 :   if (gimple_code (stmt1) != GIMPLE_ASSIGN)
    1044              :     return stmt;
    1045              : 
    1046              :   /* There is a conversion in between possibly inserted by fold.  */
    1047         4738 :   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
    1048              :     {
    1049          256 :       t = gimple_assign_rhs1 (stmt1);
    1050          256 :       if (TREE_CODE (t) != SSA_NAME
    1051          256 :           || !has_single_use (t))
    1052              :         return stmt;
    1053          180 :       stmt1 = SSA_NAME_DEF_STMT (t);
    1054          180 :       if (gimple_code (stmt1) != GIMPLE_ASSIGN)
    1055              :         return stmt;
    1056              :     }
    1057              : 
    1058              :   /* Verify that B is loop invariant but A is not.  Verify that with
    1059              :      all the stmt walking we are still in the same loop.  */
    1060         4620 :   if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
    1061        10954 :       || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
    1062              :     return stmt;
    1063              : 
    1064         3167 :   a = gimple_assign_rhs1 (stmt1);
    1065         3167 :   b = gimple_assign_rhs2 (stmt1);
    1066              : 
    1067         3167 :   if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
    1068         3196 :       && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
    1069              :     {
    1070           29 :       gimple_stmt_iterator rsi;
    1071              : 
    1072              :       /* 1 << B */
    1073           29 :       t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
    1074              :                        build_int_cst (TREE_TYPE (a), 1), b);
    1075           29 :       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
    1076           29 :       stmt1 = gimple_build_assign (name, t);
    1077              : 
    1078              :       /* A & (1 << B) */
    1079           29 :       t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
    1080           29 :       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
    1081           29 :       stmt2 = gimple_build_assign (name, t);
    1082              : 
    1083              :       /* Replace the SSA_NAME we compare against zero.  Adjust
    1084              :          the type of zero accordingly.  */
    1085           29 :       SET_USE (use, name);
    1086           29 :       gimple_cond_set_rhs (cond_stmt,
    1087           29 :                            build_int_cst_type (TREE_TYPE (name),
    1088              :                                                0));
    1089              : 
    1090              :       /* Don't use gsi_replace here, none of the new assignments sets
    1091              :          the variable originally set in stmt.  Move bsi to stmt1, and
    1092              :          then remove the original stmt, so that we get a chance to
    1093              :          retain debug info for it.  */
    1094           29 :       rsi = *bsi;
    1095           29 :       gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
    1096           29 :       gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
    1097           29 :       gimple *to_release = gsi_stmt (rsi);
    1098           29 :       gsi_remove (&rsi, true);
    1099           29 :       release_defs (to_release);
    1100              : 
    1101           29 :       return stmt1;
    1102              :     }
    1103              : 
    1104              :   return stmt;
    1105              : }
    1106              : 
    1107              : /* Determine the outermost loops in that statements in basic block BB are
    1108              :    invariant, and record them to the LIM_DATA associated with the
    1109              :    statements.  */
    1110              : 
    1111              : static void
    1112     14264413 : compute_invariantness (basic_block bb)
    1113              : {
    1114     14264413 :   enum move_pos pos;
    1115     14264413 :   gimple_stmt_iterator bsi;
    1116     14264413 :   gimple *stmt;
    1117     14264413 :   bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
    1118     14264413 :   class loop *outermost = ALWAYS_EXECUTED_IN (bb);
    1119     14264413 :   struct lim_aux_data *lim_data;
    1120              : 
    1121     14264413 :   if (!loop_outer (bb->loop_father))
    1122      8236701 :     return;
    1123              : 
    1124      6027712 :   if (dump_file && (dump_flags & TDF_DETAILS))
    1125          435 :     fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
    1126          435 :              bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
    1127              : 
    1128              :   /* Look at PHI nodes, but only if there is at most two.
    1129              :      ???  We could relax this further by post-processing the inserted
    1130              :      code and transforming adjacent cond-exprs with the same predicate
    1131              :      to control flow again.  */
    1132      6027712 :   bsi = gsi_start_phis (bb);
    1133      6027712 :   if (!gsi_end_p (bsi)
    1134      6027712 :       && ((gsi_next (&bsi), gsi_end_p (bsi))
    1135      1361729 :           || (gsi_next (&bsi), gsi_end_p (bsi))))
    1136      4318011 :     for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
    1137              :       {
    1138      2600781 :         stmt = gsi_stmt (bsi);
    1139              : 
    1140      2600781 :         pos = movement_possibility (stmt);
    1141      2600781 :         if (pos == MOVE_IMPOSSIBLE)
    1142      1149491 :           continue;
    1143              : 
    1144      1451290 :         lim_data = get_lim_data (stmt);
    1145      1451290 :         if (! lim_data)
    1146      1451290 :           lim_data = init_lim_data (stmt);
    1147      1451290 :         lim_data->always_executed_in = outermost;
    1148              : 
    1149      1451290 :         if (!determine_max_movement (stmt, false))
    1150              :           {
    1151      1450199 :             lim_data->max_loop = NULL;
    1152      1450199 :             continue;
    1153              :           }
    1154              : 
    1155         1091 :         if (dump_file && (dump_flags & TDF_DETAILS))
    1156              :           {
    1157            2 :             print_gimple_stmt (dump_file, stmt, 2);
    1158            2 :             fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
    1159            2 :                      loop_depth (lim_data->max_loop),
    1160              :                      lim_data->cost);
    1161              :           }
    1162              : 
    1163         1091 :         if (lim_data->cost >= LIM_EXPENSIVE)
    1164          920 :           set_profitable_level (stmt);
    1165              :       }
    1166              : 
    1167     56015306 :   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
    1168              :     {
    1169     43959882 :       stmt = gsi_stmt (bsi);
    1170              : 
    1171     43959882 :       pos = movement_possibility (stmt);
    1172     43959882 :       if (pos == MOVE_IMPOSSIBLE)
    1173              :         {
    1174     33209122 :           if (nonpure_call_p (stmt))
    1175              :             {
    1176              :               maybe_never = true;
    1177              :               outermost = NULL;
    1178              :             }
    1179              :           /* Make sure to note always_executed_in for stores to make
    1180              :              store-motion work.  */
    1181     30851536 :           else if (stmt_makes_single_store (stmt))
    1182              :             {
    1183      2482720 :               struct lim_aux_data *lim_data = get_lim_data (stmt);
    1184      2482720 :               if (! lim_data)
    1185            0 :                 lim_data = init_lim_data (stmt);
    1186      2482720 :               lim_data->always_executed_in = outermost;
    1187              :             }
    1188     32028285 :           continue;
    1189     32028285 :         }
    1190              : 
    1191     11931597 :       if (is_gimple_assign (stmt)
    1192     11931597 :           && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
    1193              :               == GIMPLE_BINARY_RHS))
    1194              :         {
    1195      5749084 :           tree op0 = gimple_assign_rhs1 (stmt);
    1196      5749084 :           tree op1 = gimple_assign_rhs2 (stmt);
    1197     11498168 :           class loop *ol1 = outermost_invariant_loop (op1,
    1198              :                                         loop_containing_stmt (stmt));
    1199              : 
    1200              :           /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
    1201              :              to be hoisted out of loop, saving expensive divide.  */
    1202      5749084 :           if (pos == MOVE_POSSIBLE
    1203      5178373 :               && gimple_assign_rhs_code (stmt) == RDIV_EXPR
    1204          940 :               && flag_unsafe_math_optimizations
    1205          380 :               && !flag_trapping_math
    1206          380 :               && ol1 != NULL
    1207      5749122 :               && outermost_invariant_loop (op0, ol1) == NULL)
    1208           35 :             stmt = rewrite_reciprocal (&bsi);
    1209              : 
    1210              :           /* If the shift count is invariant, convert (A >> B) & 1 to
    1211              :              A & (1 << B) allowing the bit mask to be hoisted out of the loop
    1212              :              saving an expensive shift.  */
    1213      5749084 :           if (pos == MOVE_POSSIBLE
    1214      5178373 :               && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
    1215       186330 :               && integer_onep (op1)
    1216        18450 :               && TREE_CODE (op0) == SSA_NAME
    1217      5767534 :               && has_single_use (op0))
    1218         9013 :             stmt = rewrite_bittest (&bsi);
    1219              :         }
    1220              : 
    1221     11931597 :       lim_data = get_lim_data (stmt);
    1222     11931597 :       if (! lim_data)
    1223      9106526 :         lim_data = init_lim_data (stmt);
    1224     11931597 :       lim_data->always_executed_in = outermost;
    1225              : 
    1226     11931597 :       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
    1227      1176842 :         continue;
    1228              : 
    1229     10754755 :       if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
    1230              :         {
    1231      9985270 :           lim_data->max_loop = NULL;
    1232      9985270 :           continue;
    1233              :         }
    1234              : 
    1235       769485 :       if (dump_file && (dump_flags & TDF_DETAILS))
    1236              :         {
    1237          283 :           print_gimple_stmt (dump_file, stmt, 2);
    1238          283 :           fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
    1239          283 :                    loop_depth (lim_data->max_loop),
    1240              :                    lim_data->cost);
    1241              :         }
    1242              : 
    1243       769485 :       if (lim_data->cost >= LIM_EXPENSIVE)
    1244       295285 :         set_profitable_level (stmt);
    1245              :       /* When we run before PRE and PRE is active hoist all expressions
    1246              :          to the always executed loop since PRE would do so anyway
    1247              :          and we can preserve range info while PRE cannot.  */
    1248       474200 :       else if (flag_tree_pre && !in_loop_pipeline
    1249       101409 :                && outermost)
    1250              :         {
    1251        59681 :           class loop *mloop = lim_data->max_loop;
    1252       179043 :           if (loop_depth (outermost) > loop_depth (mloop))
    1253              :             {
    1254         8481 :               mloop = outermost;
    1255         8481 :               if (dump_file && (dump_flags & TDF_DETAILS))
    1256            1 :                 fprintf (dump_file, "  constraining to loop depth %d\n\n\n",
    1257              :                          loop_depth (mloop));
    1258              :             }
    1259        59681 :           set_level (stmt, bb->loop_father, mloop);
    1260              :         }
    1261              :     }
    1262              : }
    1263              : 
    1264              : /* Hoist the statements in basic block BB out of the loops prescribed by
    1265              :    data stored in LIM_DATA structures associated with each statement.  Callback
    1266              :    for walk_dominator_tree.  */
    1267              : 
    1268              : unsigned int
    1269     14309669 : move_computations_worker (basic_block bb)
    1270              : {
    1271     14309669 :   class loop *level;
    1272     14309669 :   unsigned cost = 0;
    1273     14309669 :   struct lim_aux_data *lim_data;
    1274     14309669 :   unsigned int todo = 0;
    1275              : 
    1276     14309669 :   if (!loop_outer (bb->loop_father))
    1277              :     return todo;
    1278              : 
    1279     10537119 :   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
    1280              :     {
    1281      4503197 :       gassign *new_stmt;
    1282      4503197 :       gphi *stmt = bsi.phi ();
    1283              : 
    1284      4503197 :       lim_data = get_lim_data (stmt);
    1285      4503197 :       if (lim_data == NULL)
    1286              :         {
    1287      3051907 :           gsi_next (&bsi);
    1288      3051907 :           continue;
    1289              :         }
    1290              : 
    1291      1451290 :       cost = lim_data->cost;
    1292      1451290 :       level = lim_data->tgt_loop;
    1293      1451290 :       clear_lim_data (stmt);
    1294              : 
    1295      1451290 :       if (!level)
    1296              :         {
    1297      1450368 :           gsi_next (&bsi);
    1298      1450368 :           continue;
    1299              :         }
    1300              : 
    1301          922 :       if (dump_file && (dump_flags & TDF_DETAILS))
    1302              :         {
    1303            2 :           fprintf (dump_file, "Moving PHI node\n");
    1304            2 :           print_gimple_stmt (dump_file, stmt, 0);
    1305            2 :           fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
    1306              :                    cost, level->num);
    1307              :         }
    1308              : 
    1309          922 :       if (gimple_phi_num_args (stmt) == 1)
    1310              :         {
    1311            5 :           tree arg = PHI_ARG_DEF (stmt, 0);
    1312            5 :           new_stmt = gimple_build_assign (gimple_phi_result (stmt),
    1313            5 :                                           TREE_CODE (arg), arg);
    1314              :         }
    1315              :       else
    1316              :         {
    1317          917 :           basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
    1318          917 :           gimple *cond = gsi_stmt (gsi_last_bb (dom));
    1319          917 :           tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
    1320              :           /* Get the PHI arguments corresponding to the true and false
    1321              :              edges of COND.  */
    1322          917 :           extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
    1323          917 :           gcc_assert (arg0 && arg1);
    1324              :           /* For `bool_val != 0`, reuse bool_val. */
    1325          917 :           if (gimple_cond_code (cond) == NE_EXPR
    1326          536 :               && integer_zerop (gimple_cond_rhs (cond))
    1327         1379 :               && types_compatible_p (TREE_TYPE (gimple_cond_lhs (cond)),
    1328              :                                      boolean_type_node))
    1329              :             {
    1330           53 :               t = gimple_cond_lhs (cond);
    1331              :             }
    1332              :           else
    1333              :             {
    1334          864 :               t = make_ssa_name (boolean_type_node);
    1335          864 :               new_stmt = gimple_build_assign (t, gimple_cond_code (cond),
    1336              :                                               gimple_cond_lhs (cond),
    1337              :                                               gimple_cond_rhs (cond));
    1338          864 :               gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
    1339              :             }
    1340          917 :           new_stmt = gimple_build_assign (gimple_phi_result (stmt),
    1341              :                                           COND_EXPR, t, arg0, arg1);
    1342          917 :           todo |= TODO_cleanup_cfg;
    1343              :         }
    1344          922 :       if (!ALWAYS_EXECUTED_IN (bb)
    1345          922 :           || (ALWAYS_EXECUTED_IN (bb) != level
    1346           92 :               && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
    1347          427 :         reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
    1348          922 :       gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
    1349          922 :       remove_phi_node (&bsi, false);
    1350              :     }
    1351              : 
    1352     56093041 :   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
    1353              :     {
    1354     44025197 :       edge e;
    1355              : 
    1356     44025197 :       gimple *stmt = gsi_stmt (bsi);
    1357              : 
    1358     44025197 :       lim_data = get_lim_data (stmt);
    1359     44025197 :       if (lim_data == NULL)
    1360              :         {
    1361     27939299 :           gsi_next (&bsi);
    1362     27939299 :           continue;
    1363              :         }
    1364              : 
    1365     16085898 :       cost = lim_data->cost;
    1366     16085898 :       level = lim_data->tgt_loop;
    1367     16085898 :       clear_lim_data (stmt);
    1368              : 
    1369     16085898 :       if (!level)
    1370              :         {
    1371     15588160 :           gsi_next (&bsi);
    1372     15588160 :           continue;
    1373              :         }
    1374              : 
    1375              :       /* We do not really want to move conditionals out of the loop; we just
    1376              :          placed it here to force its operands to be moved if necessary.  */
    1377       497738 :       if (gimple_code (stmt) == GIMPLE_COND)
    1378              :         {
    1379        11809 :           gsi_next (&bsi);
    1380        11809 :           continue;
    1381              :         }
    1382              : 
    1383       485929 :       if (dump_file && (dump_flags & TDF_DETAILS))
    1384              :         {
    1385          353 :           fprintf (dump_file, "Moving statement ");
    1386          353 :           print_gimple_stmt (dump_file, stmt, 0);
    1387          353 :           fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
    1388              :                    cost, level->num);
    1389              :         }
    1390              : 
    1391       485929 :       e = loop_preheader_edge (level);
    1392       971858 :       gcc_assert (!gimple_vdef (stmt));
    1393       971858 :       if (gimple_vuse (stmt))
    1394              :         {
    1395              :           /* The new VUSE is the one from the virtual PHI in the loop
    1396              :              header or the one already present.  */
    1397        84196 :           gphi_iterator gsi2;
    1398        84196 :           for (gsi2 = gsi_start_phis (e->dest);
    1399       196490 :                !gsi_end_p (gsi2); gsi_next (&gsi2))
    1400              :             {
    1401       178880 :               gphi *phi = gsi2.phi ();
    1402       357760 :               if (virtual_operand_p (gimple_phi_result (phi)))
    1403              :                 {
    1404        66586 :                   SET_USE (gimple_vuse_op (stmt),
    1405              :                            PHI_ARG_DEF_FROM_EDGE (phi, e));
    1406        66586 :                   break;
    1407              :                 }
    1408              :             }
    1409              :         }
    1410       485929 :       gsi_remove (&bsi, false);
    1411       485929 :       if (gimple_has_lhs (stmt)
    1412       485929 :           && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
    1413       444143 :           && (!ALWAYS_EXECUTED_IN (bb)
    1414       377558 :               || !(ALWAYS_EXECUTED_IN (bb) == level
    1415        97878 :                    || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
    1416       204277 :         reset_flow_sensitive_info (gimple_get_lhs (stmt));
    1417              :       /* In case this is a stmt that is not unconditionally executed
    1418              :          when the target loop header is executed and the stmt may
    1419              :          invoke undefined integer or pointer overflow rewrite it to
    1420              :          unsigned arithmetic.  */
    1421       485929 :       if (gimple_needing_rewrite_undefined (stmt)
    1422       485929 :           && (!ALWAYS_EXECUTED_IN (bb)
    1423        81347 :               || !(ALWAYS_EXECUTED_IN (bb) == level
    1424        18950 :                    || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
    1425        20292 :         gsi_insert_seq_on_edge (e, rewrite_to_defined_unconditional (stmt));
    1426              :       else
    1427       465637 :         gsi_insert_on_edge (e, stmt);
    1428              :     }
    1429              : 
    1430      6033922 :   return todo;
    1431              : }
    1432              : 
    1433              : /* Checks whether the statement defining variable *INDEX can be hoisted
    1434              :    out of the loop passed in DATA.  Callback for for_each_index.  */
    1435              : 
    1436              : static bool
    1437      1780276 : may_move_till (tree ref, tree *index, void *data)
    1438              : {
    1439      1780276 :   class loop *loop = (class loop *) data, *max_loop;
    1440              : 
    1441              :   /* If REF is an array reference, check also that the step and the lower
    1442              :      bound is invariant in LOOP.  */
    1443      1780276 :   if (TREE_CODE (ref) == ARRAY_REF)
    1444              :     {
    1445       474079 :       tree step = TREE_OPERAND (ref, 3);
    1446       474079 :       tree lbound = TREE_OPERAND (ref, 2);
    1447              : 
    1448       474079 :       max_loop = outermost_invariant_loop (step, loop);
    1449       474079 :       if (!max_loop)
    1450              :         return false;
    1451              : 
    1452       474079 :       max_loop = outermost_invariant_loop (lbound, loop);
    1453       474079 :       if (!max_loop)
    1454              :         return false;
    1455              :     }
    1456              : 
    1457      1780276 :   max_loop = outermost_invariant_loop (*index, loop);
    1458      1780276 :   if (!max_loop)
    1459              :     return false;
    1460              : 
    1461              :   return true;
    1462              : }
    1463              : 
    1464              : /* If OP is SSA NAME, force the statement that defines it to be
    1465              :    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
    1466              : 
    1467              : static void
    1468        37965 : force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
    1469              : {
    1470        37965 :   gimple *stmt;
    1471              : 
    1472        37965 :   if (!op
    1473        37965 :       || is_gimple_min_invariant (op))
    1474        33445 :     return;
    1475              : 
    1476         4520 :   gcc_assert (TREE_CODE (op) == SSA_NAME);
    1477              : 
    1478         4520 :   stmt = SSA_NAME_DEF_STMT (op);
    1479         4520 :   if (gimple_nop_p (stmt))
    1480              :     return;
    1481              : 
    1482         2914 :   set_level (stmt, orig_loop, loop);
    1483              : }
    1484              : 
    1485              : /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
    1486              :    the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
    1487              :    for_each_index.  */
    1488              : 
    1489              : struct fmt_data
    1490              : {
    1491              :   class loop *loop;
    1492              :   class loop *orig_loop;
    1493              : };
    1494              : 
    1495              : static bool
    1496        20121 : force_move_till (tree ref, tree *index, void *data)
    1497              : {
    1498        20121 :   struct fmt_data *fmt_data = (struct fmt_data *) data;
    1499              : 
    1500        20121 :   if (TREE_CODE (ref) == ARRAY_REF)
    1501              :     {
    1502         8922 :       tree step = TREE_OPERAND (ref, 3);
    1503         8922 :       tree lbound = TREE_OPERAND (ref, 2);
    1504              : 
    1505         8922 :       force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
    1506         8922 :       force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
    1507              :     }
    1508              : 
    1509        20121 :   force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
    1510              : 
    1511        20121 :   return true;
    1512              : }
    1513              : 
    1514              : /* A function to free the mem_ref object OBJ.  */
    1515              : 
    1516              : static void
    1517      5231176 : memref_free (class im_mem_ref *mem)
    1518              : {
    1519            0 :   mem->accesses_in_loop.release ();
    1520            0 : }
    1521              : 
    1522              : /* Allocates and returns a memory reference description for MEM whose hash
    1523              :    value is HASH and id is ID.  */
    1524              : 
    1525              : static im_mem_ref *
    1526      5231176 : mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
    1527              : {
    1528      5231176 :   im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
    1529      5231176 :   if (mem)
    1530      4300338 :     ref->mem = *mem;
    1531              :   else
    1532       930838 :     ao_ref_init (&ref->mem, error_mark_node);
    1533      5231176 :   ref->id = id;
    1534      5231176 :   ref->ref_canonical = false;
    1535      5231176 :   ref->ref_decomposed = false;
    1536      5231176 :   ref->hash = hash;
    1537      5231176 :   ref->stored = NULL;
    1538      5231176 :   ref->loaded = NULL;
    1539      5231176 :   bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
    1540      5231176 :   ref->accesses_in_loop.create (1);
    1541              : 
    1542      5231176 :   return ref;
    1543              : }
    1544              : 
    1545              : /* Records memory reference location *LOC in LOOP to the memory reference
    1546              :    description REF.  The reference occurs in statement STMT.  */
    1547              : 
    1548              : static void
    1549      5685216 : record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
    1550              : {
    1551      5685216 :   mem_ref_loc aref;
    1552      5685216 :   aref.stmt = stmt;
    1553      5685216 :   aref.ref = loc;
    1554            0 :   ref->accesses_in_loop.safe_push (aref);
    1555            0 : }
    1556              : 
    1557              : /* Set the LOOP bit in REF stored bitmap and allocate that if
    1558              :    necessary.  Return whether a bit was changed.  */
    1559              : 
    1560              : static bool
    1561      4399828 : set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
    1562              : {
    1563      4399828 :   if (!ref->stored)
    1564      2497085 :     ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
    1565      4399828 :   return bitmap_set_bit (ref->stored, loop->num);
    1566              : }
    1567              : 
    1568              : /* Marks reference REF as stored in LOOP.  */
    1569              : 
    1570              : static void
    1571      3664896 : mark_ref_stored (im_mem_ref *ref, class loop *loop)
    1572              : {
    1573      3664896 :   while (loop != current_loops->tree_root
    1574      7063252 :          && set_ref_stored_in_loop (ref, loop))
    1575      3398356 :     loop = loop_outer (loop);
    1576      3664896 : }
    1577              : 
    1578              : /* Set the LOOP bit in REF loaded bitmap and allocate that if
    1579              :    necessary.  Return whether a bit was changed.  */
    1580              : 
    1581              : static bool
    1582      6143712 : set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
    1583              : {
    1584      6143712 :   if (!ref->loaded)
    1585      3366980 :     ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
    1586      6143712 :   return bitmap_set_bit (ref->loaded, loop->num);
    1587              : }
    1588              : 
    1589              : /* Marks reference REF as loaded in LOOP.  */
    1590              : 
    1591              : static void
    1592      4888683 : mark_ref_loaded (im_mem_ref *ref, class loop *loop)
    1593              : {
    1594      4888683 :   while (loop != current_loops->tree_root
    1595      9733296 :          && set_ref_loaded_in_loop (ref, loop))
    1596      4844613 :     loop = loop_outer (loop);
    1597      4888683 : }
    1598              : 
    1599              : /* Gathers memory references in statement STMT in LOOP, storing the
    1600              :    information about them in the memory_accesses structure.  Marks
    1601              :    the vops accessed through unrecognized statements there as
    1602              :    well.  */
    1603              : 
    1604              : static void
    1605     43959818 : gather_mem_refs_stmt (class loop *loop, gimple *stmt)
    1606              : {
    1607     43959818 :   tree *mem = NULL;
    1608     43959818 :   hashval_t hash;
    1609     43959818 :   im_mem_ref **slot;
    1610     43959818 :   im_mem_ref *ref;
    1611     43959818 :   bool is_stored;
    1612     43959818 :   unsigned id;
    1613              : 
    1614     59677908 :   if (!gimple_vuse (stmt))
    1615              :     return;
    1616              : 
    1617      6936931 :   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
    1618      6936931 :   if (!mem && is_gimple_assign (stmt))
    1619              :     {
    1620              :       /* For aggregate copies record distinct references but use them
    1621              :          only for disambiguation purposes.  */
    1622       445926 :       id = memory_accesses.refs_list.length ();
    1623       445926 :       ref = mem_ref_alloc (NULL, 0, id);
    1624       445926 :       memory_accesses.refs_list.safe_push (ref);
    1625       445926 :       if (dump_file && (dump_flags & TDF_DETAILS))
    1626              :         {
    1627           24 :           fprintf (dump_file, "Unhandled memory reference %u: ", id);
    1628           24 :           print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
    1629              :         }
    1630       445926 :       record_mem_ref_loc (ref, stmt, mem);
    1631       891852 :       is_stored = gimple_vdef (stmt);
    1632              :     }
    1633      6491005 :   else if (!mem)
    1634              :     {
    1635              :       /* We use the shared mem_ref for all unanalyzable refs.  */
    1636      1251715 :       id = UNANALYZABLE_MEM_ID;
    1637      1251715 :       ref = memory_accesses.refs_list[id];
    1638      1251715 :       if (dump_file && (dump_flags & TDF_DETAILS))
    1639              :         {
    1640            9 :           fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
    1641            9 :           print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
    1642              :         }
    1643      2503430 :       is_stored = gimple_vdef (stmt);
    1644              :     }
    1645              :   else
    1646              :     {
    1647              :       /* We are looking for equal refs that might differ in structure
    1648              :          such as a.b vs. MEM[&a + 4].  So we key off the ao_ref but
    1649              :          make sure we can canonicalize the ref in the hashtable if
    1650              :          non-operand_equal_p refs are found.  For the lookup we mark
    1651              :          the case we want strict equality with aor.max_size == -1.  */
    1652      5239290 :       ao_ref aor;
    1653      5239290 :       ao_ref_init (&aor, *mem);
    1654      5239290 :       ao_ref_base (&aor);
    1655      5239290 :       ao_ref_alias_set (&aor);
    1656      5239290 :       HOST_WIDE_INT offset, size, max_size;
    1657      5239290 :       poly_int64 saved_maxsize = aor.max_size, mem_off;
    1658      5239290 :       tree mem_base;
    1659      5239290 :       bool ref_decomposed;
    1660      5239290 :       if (aor.max_size_known_p ()
    1661      5061523 :           && aor.offset.is_constant (&offset)
    1662      5061523 :           && aor.size.is_constant (&size)
    1663      5061523 :           && aor.max_size.is_constant (&max_size)
    1664      5061523 :           && size == max_size
    1665      4458731 :           && (size % BITS_PER_UNIT) == 0
    1666              :           /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
    1667              :              size.  Make sure this is consistent with the extraction.  */
    1668      4448004 :           && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
    1669      4448004 :           && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
    1670              :                        aor.size)
    1671      9687202 :           && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
    1672              :         {
    1673      4446860 :           ref_decomposed = true;
    1674      4446860 :           tree base = ao_ref_base (&aor);
    1675      4446860 :           poly_int64 moffset;
    1676      4446860 :           HOST_WIDE_INT mcoffset;
    1677      4446860 :           if (TREE_CODE (base) == MEM_REF
    1678      2005324 :               && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
    1679      4446860 :               && moffset.is_constant (&mcoffset))
    1680              :             {
    1681      2005324 :               hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
    1682      2005324 :               hash = iterative_hash_host_wide_int (mcoffset, hash);
    1683              :             }
    1684              :           else
    1685              :             {
    1686      2441536 :               hash = iterative_hash_expr (base, 0);
    1687      2441536 :               hash = iterative_hash_host_wide_int (offset, hash);
    1688              :             }
    1689      4446860 :           hash = iterative_hash_host_wide_int (size, hash);
    1690              :         }
    1691              :       else
    1692              :         {
    1693       792430 :           ref_decomposed = false;
    1694       792430 :           hash = iterative_hash_expr (aor.ref, 0);
    1695       792430 :           aor.max_size = -1;
    1696              :         }
    1697      5239290 :       slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
    1698      5239290 :       aor.max_size = saved_maxsize;
    1699      5239290 :       if (*slot)
    1700              :         {
    1701       938952 :           if (!(*slot)->ref_canonical
    1702       938952 :               && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
    1703              :             {
    1704              :               /* If we didn't yet canonicalize the hashtable ref (which
    1705              :                  we'll end up using for code insertion) and hit a second
    1706              :                  equal ref that is not structurally equivalent create
    1707              :                  a canonical ref which is a bare MEM_REF.  */
    1708        50523 :               if (TREE_CODE (*mem) == MEM_REF
    1709        50523 :                   || TREE_CODE (*mem) == TARGET_MEM_REF)
    1710              :                 {
    1711        14987 :                   (*slot)->mem.ref = *mem;
    1712        14987 :                   (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
    1713              :                 }
    1714              :               else
    1715              :                 {
    1716        35536 :                   tree ref_alias_type = reference_alias_ptr_type (*mem);
    1717        35536 :                   unsigned int ref_align = get_object_alignment (*mem);
    1718        35536 :                   tree ref_type = TREE_TYPE (*mem);
    1719        35536 :                   tree tmp = build1 (ADDR_EXPR, ptr_type_node,
    1720              :                                      unshare_expr (mem_base));
    1721        35536 :                   if (TYPE_ALIGN (ref_type) != ref_align)
    1722         9194 :                     ref_type = build_aligned_type (ref_type, ref_align);
    1723        35536 :                   tree new_ref
    1724        35536 :                     = fold_build2 (MEM_REF, ref_type, tmp,
    1725              :                                    build_int_cst (ref_alias_type, mem_off));
    1726        35536 :                   if ((*slot)->mem.volatile_p)
    1727           22 :                     TREE_THIS_VOLATILE (new_ref) = 1;
    1728        35536 :                   (*slot)->mem.ref = new_ref;
    1729              :                   /* Make sure the recorded base and offset are consistent
    1730              :                      with the newly built ref.  */
    1731        35536 :                   if (TREE_CODE (TREE_OPERAND (new_ref, 0)) == ADDR_EXPR)
    1732              :                     ;
    1733              :                   else
    1734              :                     {
    1735        19736 :                       (*slot)->mem.base = new_ref;
    1736        19736 :                       (*slot)->mem.offset = 0;
    1737              :                     }
    1738        35536 :                   gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
    1739              :                                        && is_gimple_mem_ref_addr
    1740              :                                             (TREE_OPERAND ((*slot)->mem.ref,
    1741              :                                                            0)));
    1742        35536 :                   (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
    1743              :                 }
    1744        50523 :               (*slot)->ref_canonical = true;
    1745              :             }
    1746       938952 :           ref = *slot;
    1747       938952 :           id = ref->id;
    1748              :         }
    1749              :       else
    1750              :         {
    1751      4300338 :           id = memory_accesses.refs_list.length ();
    1752      4300338 :           ref = mem_ref_alloc (&aor, hash, id);
    1753      4300338 :           ref->ref_decomposed = ref_decomposed;
    1754      4300338 :           memory_accesses.refs_list.safe_push (ref);
    1755      4300338 :           *slot = ref;
    1756              : 
    1757      4300338 :           if (dump_file && (dump_flags & TDF_DETAILS))
    1758              :             {
    1759          499 :               fprintf (dump_file, "Memory reference %u: ", id);
    1760          499 :               print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
    1761          499 :               fprintf (dump_file, "\n");
    1762              :             }
    1763              :         }
    1764              : 
    1765      5239290 :       record_mem_ref_loc (ref, stmt, mem);
    1766              :     }
    1767      6936931 :   if (is_stored)
    1768              :     {
    1769      3664896 :       bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
    1770      3664896 :       mark_ref_stored (ref, loop);
    1771              :     }
    1772              :   /* A not simple memory op is also a read when it is a write.  */
    1773      6936931 :   if (!is_stored || id == UNANALYZABLE_MEM_ID
    1774      2494174 :       || ref->mem.ref == error_mark_node)
    1775              :     {
    1776      4888683 :       bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
    1777      4888683 :       mark_ref_loaded (ref, loop);
    1778              :     }
    1779      6936931 :   init_lim_data (stmt)->ref = ref->id;
    1780      6936931 :   return;
    1781              : }
    1782              : 
    1783              : static unsigned *bb_loop_postorder;
    1784              : 
    1785              : /* qsort sort function to sort blocks after their loop fathers postorder.  */
    1786              : 
    1787              : static int
    1788    157158310 : sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
    1789              :                                 void *bb_loop_postorder_)
    1790              : {
    1791    157158310 :   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
    1792    157158310 :   basic_block bb1 = *(const basic_block *)bb1_;
    1793    157158310 :   basic_block bb2 = *(const basic_block *)bb2_;
    1794    157158310 :   class loop *loop1 = bb1->loop_father;
    1795    157158310 :   class loop *loop2 = bb2->loop_father;
    1796    157158310 :   if (loop1->num == loop2->num)
    1797     73837195 :     return bb1->index - bb2->index;
    1798     83321115 :   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
    1799              : }
    1800              : 
    1801              : /* qsort sort function to sort ref locs after their loop fathers postorder.  */
    1802              : 
    1803              : static int
    1804       938937 : sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
    1805              :                                  void *bb_loop_postorder_)
    1806              : {
    1807       938937 :   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
    1808       938937 :   const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
    1809       938937 :   const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
    1810       938937 :   class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
    1811       938937 :   class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
    1812       938937 :   if (loop1->num == loop2->num)
    1813              :     return 0;
    1814       140854 :   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
    1815              : }
    1816              : 
    1817              : /* Gathers memory references in loops.  */
    1818              : 
    1819              : static void
    1820       484912 : analyze_memory_references (bool store_motion)
    1821              : {
    1822       484912 :   gimple_stmt_iterator bsi;
    1823       484912 :   basic_block bb, *bbs;
    1824       484912 :   class loop *outer;
    1825       484912 :   unsigned i, n;
    1826              : 
    1827              :   /* Collect all basic-blocks in loops and sort them after their
    1828              :      loops postorder.  */
    1829       484912 :   i = 0;
    1830       484912 :   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
    1831     14749325 :   FOR_EACH_BB_FN (bb, cfun)
    1832     14264413 :     if (bb->loop_father != current_loops->tree_root)
    1833      6027712 :       bbs[i++] = bb;
    1834       484912 :   n = i;
    1835       484912 :   gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
    1836              :               bb_loop_postorder);
    1837              : 
    1838              :   /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
    1839              :      That results in better locality for all the bitmaps.  It also
    1840              :      automatically sorts the location list of gathered memory references
    1841              :      after their loop postorder number allowing to binary-search it.  */
    1842      6512624 :   for (i = 0; i < n; ++i)
    1843              :     {
    1844      6027712 :       basic_block bb = bbs[i];
    1845     56015242 :       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
    1846     43959818 :         gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
    1847              :     }
    1848              : 
    1849              :   /* Verify the list of gathered memory references is sorted after their
    1850              :      loop postorder number.  */
    1851       484912 :   if (flag_checking)
    1852              :     {
    1853              :       im_mem_ref *ref;
    1854     10947192 :       FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
    1855      6170083 :         for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
    1856       938937 :           gcc_assert (sort_locs_in_loop_postorder_cmp
    1857              :                         (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
    1858              :                          bb_loop_postorder) <= 0);
    1859              :     }
    1860              : 
    1861       484912 :   free (bbs);
    1862              : 
    1863       484912 :   if (!store_motion)
    1864       484912 :     return;
    1865              : 
    1866              :   /* Propagate the information about accessed memory references up
    1867              :      the loop hierarchy.  */
    1868      2766767 :   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
    1869              :     {
    1870              :       /* Finalize the overall touched references (including subloops).  */
    1871      1312364 :       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
    1872      1312364 :                        &memory_accesses.refs_stored_in_loop[loop->num]);
    1873              : 
    1874              :       /* Propagate the information about accessed memory references up
    1875              :          the loop hierarchy.  */
    1876      1312364 :       outer = loop_outer (loop);
    1877      1312364 :       if (outer == current_loops->tree_root)
    1878      1014128 :         continue;
    1879              : 
    1880       298236 :       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
    1881       298236 :                        &memory_accesses.all_refs_stored_in_loop[loop->num]);
    1882       484801 :     }
    1883              : }
    1884              : 
    1885              : /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
    1886              :    tree_to_aff_combination_expand.  */
    1887              : 
    1888              : static bool
    1889       541364 : mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
    1890              :                       hash_map<tree, name_expansion *> **ttae_cache,
    1891              :                       bool tbaa_p)
    1892              : {
    1893       541364 :   gcc_checking_assert (mem1->mem.ref != error_mark_node
    1894              :                        && mem2->mem.ref != error_mark_node);
    1895              : 
    1896              :   /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
    1897              :      object and their offset differ in such a way that the locations cannot
    1898              :      overlap, then they cannot alias.  */
    1899              :   poly_widest_int size1, size2;
    1900      1082728 :   aff_tree off1, off2;
    1901              : 
    1902              :   /* Perform basic offset and type-based disambiguation.  */
    1903       541364 :   if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
    1904              :     return false;
    1905              : 
    1906              :   /* The expansion of addresses may be a bit expensive, thus we only do
    1907              :      the check at -O2 and higher optimization levels.  */
    1908        66953 :   if (optimize < 2)
    1909              :     return true;
    1910              : 
    1911        57272 :   get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
    1912        57272 :   get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
    1913        57272 :   aff_combination_expand (&off1, ttae_cache);
    1914        57272 :   aff_combination_expand (&off2, ttae_cache);
    1915        57272 :   aff_combination_scale (&off1, -1);
    1916        57272 :   aff_combination_add (&off2, &off1);
    1917              : 
    1918        57272 :   if (aff_comb_cannot_overlap_p (&off2, size1, size2))
    1919              :     return false;
    1920              : 
    1921              :   return true;
    1922       541364 : }
    1923              : 
    1924              : /* Compare function for bsearch searching for reference locations
    1925              :    in a loop.  */
    1926              : 
    1927              : static int
    1928       259441 : find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
    1929              :                           void *bb_loop_postorder_)
    1930              : {
    1931       259441 :   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
    1932       259441 :   class loop *loop = (class loop *)const_cast<void *>(loop_);
    1933       259441 :   mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
    1934       259441 :   class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
    1935       259441 :   if (loop->num  == loc_loop->num
    1936       259441 :       || flow_loop_nested_p (loop, loc_loop))
    1937       199948 :     return 0;
    1938        59493 :   return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
    1939        59493 :           ? -1 : 1);
    1940              : }
    1941              : 
    1942              : /* Iterates over all locations of REF in LOOP and its subloops calling
    1943              :    fn.operator() with the location as argument.  When that operator
    1944              :    returns true the iteration is stopped and true is returned.
    1945              :    Otherwise false is returned.  */
    1946              : 
    1947              : template <typename FN>
    1948              : static bool
    1949       199948 : for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
    1950              : {
    1951              :   unsigned i;
    1952              :   mem_ref_loc *loc;
    1953              : 
    1954              :   /* Search for the cluster of locs in the accesses_in_loop vector
    1955              :      which is sorted after postorder index of the loop father.  */
    1956       199948 :   loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
    1957              :                                        bb_loop_postorder);
    1958       199948 :   if (!loc)
    1959            0 :     return false;
    1960              : 
    1961              :   /* We have found one location inside loop or its sub-loops.  Iterate
    1962              :      both forward and backward to cover the whole cluster.  */
    1963       199948 :   i = loc - ref->accesses_in_loop.address ();
    1964       274155 :   while (i > 0)
    1965              :     {
    1966       132394 :       --i;
    1967       132394 :       mem_ref_loc *l = &ref->accesses_in_loop[i];
    1968       132394 :       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
    1969              :         break;
    1970       109288 :       if (fn (l))
    1971              :         return true;
    1972              :     }
    1973       445705 :   for (i = loc - ref->accesses_in_loop.address ();
    1974       280838 :        i < ref->accesses_in_loop.length (); ++i)
    1975              :     {
    1976       208700 :       mem_ref_loc *l = &ref->accesses_in_loop[i];
    1977       208700 :       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
    1978              :         break;
    1979       190353 :       if (fn (l))
    1980              :         return true;
    1981              :     }
    1982              : 
    1983              :   return false;
    1984              : }
    1985              : 
    1986              : /* Rewrites location LOC by TMP_VAR.  */
    1987              : 
    1988              : class rewrite_mem_ref_loc
    1989              : {
    1990              : public:
    1991        34908 :   rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
    1992              :   bool operator () (mem_ref_loc *loc);
    1993              :   tree tmp_var;
    1994              : };
    1995              : 
    1996              : bool
    1997        55665 : rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
    1998              : {
    1999        55665 :   *loc->ref = tmp_var;
    2000        55665 :   update_stmt (loc->stmt);
    2001        55665 :   return false;
    2002              : }
    2003              : 
    2004              : /* Rewrites all references to REF in LOOP by variable TMP_VAR.  */
    2005              : 
    2006              : static void
    2007        34908 : rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
    2008              : {
    2009            0 :   for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
    2010            0 : }
    2011              : 
    2012              : /* Stores the first reference location in LOCP.  */
    2013              : 
    2014              : class first_mem_ref_loc_1
    2015              : {
    2016              : public:
    2017        34908 :   first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
    2018              :   bool operator () (mem_ref_loc *loc);
    2019              :   mem_ref_loc **locp;
    2020              : };
    2021              : 
    2022              : bool
    2023        34908 : first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
    2024              : {
    2025        34908 :   *locp = loc;
    2026        34908 :   return true;
    2027              : }
    2028              : 
    2029              : /* Returns the first reference location to REF in LOOP.  */
    2030              : 
    2031              : static mem_ref_loc *
    2032        34908 : first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
    2033              : {
    2034        34908 :   mem_ref_loc *locp = NULL;
    2035            0 :   for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
    2036        34908 :   return locp;
    2037              : }
    2038              : 
    2039              : /* Helper function for execute_sm.  Emit code to store TMP_VAR into
    2040              :    MEM along edge EX.
    2041              : 
    2042              :    The store is only done if MEM has changed.  We do this so no
    2043              :    changes to MEM occur on code paths that did not originally store
    2044              :    into it.
    2045              : 
    2046              :    The common case for execute_sm will transform:
    2047              : 
    2048              :      for (...) {
    2049              :        if (foo)
    2050              :          stuff;
    2051              :        else
    2052              :          MEM = TMP_VAR;
    2053              :      }
    2054              : 
    2055              :    into:
    2056              : 
    2057              :      lsm = MEM;
    2058              :      for (...) {
    2059              :        if (foo)
    2060              :          stuff;
    2061              :        else
    2062              :          lsm = TMP_VAR;
    2063              :      }
    2064              :      MEM = lsm;
    2065              : 
    2066              :   This function will generate:
    2067              : 
    2068              :      lsm = MEM;
    2069              : 
    2070              :      lsm_flag = false;
    2071              :      ...
    2072              :      for (...) {
    2073              :        if (foo)
    2074              :          stuff;
    2075              :        else {
    2076              :          lsm = TMP_VAR;
    2077              :          lsm_flag = true;
    2078              :        }
    2079              :      }
    2080              :      if (lsm_flag)      <--
    2081              :        MEM = lsm;       <-- (X)
    2082              : 
    2083              :   In case MEM and TMP_VAR are NULL the function will return the then
    2084              :   block so the caller can insert (X) and other related stmts.
    2085              : */
    2086              : 
    2087              : static basic_block
    2088        14495 : execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
    2089              :                        edge preheader, hash_set <basic_block> *flag_bbs,
    2090              :                        edge &append_cond_position, edge &last_cond_fallthru)
    2091              : {
    2092        14495 :   basic_block new_bb, then_bb, old_dest;
    2093        14495 :   bool loop_has_only_one_exit;
    2094        14495 :   edge then_old_edge;
    2095        14495 :   gimple_stmt_iterator gsi;
    2096        14495 :   gimple *stmt;
    2097        14495 :   bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
    2098              : 
    2099        14495 :   profile_count count_sum = profile_count::zero ();
    2100        14495 :   int nbbs = 0, ncount = 0;
    2101        14495 :   profile_probability flag_probability = profile_probability::uninitialized ();
    2102              : 
    2103              :   /* Flag is set in FLAG_BBS. Determine probability that flag will be true
    2104              :      at loop exit.
    2105              : 
    2106              :      This code may look fancy, but it cannot update profile very realistically
    2107              :      because we do not know the probability that flag will be true at given
    2108              :      loop exit.
    2109              : 
    2110              :      We look for two interesting extremes
    2111              :        - when exit is dominated by block setting the flag, we know it will
    2112              :          always be true.  This is a common case.
    2113              :        - when all blocks setting the flag have very low frequency we know
    2114              :          it will likely be false.
    2115              :      In all other cases we default to 2/3 for flag being true.  */
    2116              : 
    2117        14495 :   for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
    2118        50265 :        it != flag_bbs->end (); ++it)
    2119              :     {
    2120        17885 :        if ((*it)->count.initialized_p ())
    2121        17872 :          count_sum += (*it)->count, ncount ++;
    2122        17885 :        if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
    2123         2438 :          flag_probability = profile_probability::always ();
    2124        17885 :        nbbs++;
    2125              :     }
    2126              : 
    2127        14495 :   profile_probability cap
    2128        14495 :           = profile_probability::guessed_always ().apply_scale (2, 3);
    2129              : 
    2130        14495 :   if (flag_probability.initialized_p ())
    2131              :     ;
    2132        12158 :   else if (ncount == nbbs
    2133        24525 :            && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
    2134              :     {
    2135          184 :       flag_probability = count_sum.probability_in (preheader->count ());
    2136          184 :       if (flag_probability > cap)
    2137           71 :         flag_probability = cap;
    2138              :     }
    2139              : 
    2140        14495 :   if (!flag_probability.initialized_p ())
    2141        11974 :     flag_probability = cap;
    2142              : 
    2143              :   /* ?? Insert store after previous store if applicable.  See note
    2144              :      below.  */
    2145        14495 :   if (append_cond_position)
    2146         5496 :     ex = append_cond_position;
    2147              : 
    2148        14495 :   loop_has_only_one_exit = single_pred_p (ex->dest);
    2149              : 
    2150        14495 :   if (loop_has_only_one_exit)
    2151         5974 :     ex = split_block_after_labels (ex->dest);
    2152              :   else
    2153              :     {
    2154         8521 :       for (gphi_iterator gpi = gsi_start_phis (ex->dest);
    2155        12647 :            !gsi_end_p (gpi); gsi_next (&gpi))
    2156              :         {
    2157         4951 :           gphi *phi = gpi.phi ();
    2158         9902 :           if (virtual_operand_p (gimple_phi_result (phi)))
    2159         4126 :             continue;
    2160              : 
    2161              :           /* When the destination has a non-virtual PHI node with multiple
    2162              :              predecessors make sure we preserve the PHI structure by
    2163              :              forcing a forwarder block so that hoisting of that PHI will
    2164              :              still work.  */
    2165          825 :           split_edge (ex);
    2166          825 :           break;
    2167              :         }
    2168              :     }
    2169              : 
    2170        14495 :   old_dest = ex->dest;
    2171        14495 :   new_bb = split_edge (ex);
    2172        14495 :   if (append_cond_position)
    2173         5496 :     new_bb->count += last_cond_fallthru->count ();
    2174        14495 :   then_bb = create_empty_bb (new_bb);
    2175        14495 :   then_bb->count = new_bb->count.apply_probability (flag_probability);
    2176        14495 :   if (irr)
    2177           49 :     then_bb->flags = BB_IRREDUCIBLE_LOOP;
    2178        14495 :   add_bb_to_loop (then_bb, new_bb->loop_father);
    2179              : 
    2180        14495 :   gsi = gsi_start_bb (new_bb);
    2181        14495 :   stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
    2182              :                             NULL_TREE, NULL_TREE);
    2183        14495 :   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
    2184              : 
    2185              :   /* Insert actual store.  */
    2186        14495 :   if (mem)
    2187              :     {
    2188        12087 :       gsi = gsi_start_bb (then_bb);
    2189        12087 :       stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
    2190        12087 :       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
    2191              :     }
    2192              : 
    2193        14495 :   edge e1 = single_succ_edge (new_bb);
    2194        28941 :   edge e2 = make_edge (new_bb, then_bb,
    2195              :                        EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
    2196        14495 :   e2->probability = flag_probability;
    2197              : 
    2198        14495 :   e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
    2199        14495 :   e1->flags &= ~EDGE_FALLTHRU;
    2200              : 
    2201        14495 :   e1->probability = flag_probability.invert ();
    2202              : 
    2203        28941 :   then_old_edge = make_single_succ_edge (then_bb, old_dest,
    2204              :                              EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
    2205              : 
    2206        14495 :   set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
    2207              : 
    2208        14495 :   if (append_cond_position)
    2209              :     {
    2210         5496 :       basic_block prevbb = last_cond_fallthru->src;
    2211         5496 :       redirect_edge_succ (last_cond_fallthru, new_bb);
    2212         5496 :       set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
    2213         5496 :       set_immediate_dominator (CDI_DOMINATORS, old_dest,
    2214              :                                recompute_dominator (CDI_DOMINATORS, old_dest));
    2215              :     }
    2216              : 
    2217              :   /* ?? Because stores may alias, they must happen in the exact
    2218              :      sequence they originally happened.  Save the position right after
    2219              :      the (_lsm) store we just created so we can continue appending after
    2220              :      it and maintain the original order.  */
    2221        14495 :   append_cond_position = then_old_edge;
    2222        14495 :   last_cond_fallthru = find_edge (new_bb, old_dest);
    2223              : 
    2224        14495 :   if (!loop_has_only_one_exit)
    2225         8521 :     for (gphi_iterator gpi = gsi_start_phis (old_dest);
    2226        12485 :          !gsi_end_p (gpi); gsi_next (&gpi))
    2227              :       {
    2228         3964 :         gphi *phi = gpi.phi ();
    2229         3964 :         unsigned i;
    2230              : 
    2231        22673 :         for (i = 0; i < gimple_phi_num_args (phi); i++)
    2232        18709 :           if (gimple_phi_arg_edge (phi, i)->src == new_bb)
    2233              :             {
    2234         3964 :               tree arg = gimple_phi_arg_def (phi, i);
    2235         3964 :               add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
    2236         3964 :               update_stmt (phi);
    2237              :             }
    2238              :       }
    2239              : 
    2240        14495 :   return then_bb;
    2241              : }
    2242              : 
    2243              : /* When REF is set on the location, set flag indicating the store.  */
    2244              : 
    2245              : class sm_set_flag_if_changed
    2246              : {
    2247              : public:
    2248         7712 :   sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
    2249         7712 :          : flag (flag_), bbs (bbs_) {}
    2250              :   bool operator () (mem_ref_loc *loc);
    2251              :   tree flag;
    2252              :   hash_set <basic_block> *bbs;
    2253              : };
    2254              : 
    2255              : bool
    2256        14680 : sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
    2257              : {
    2258              :   /* Only set the flag for writes.  */
    2259        14680 :   if (is_gimple_assign (loc->stmt)
    2260        14680 :       && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
    2261              :     {
    2262         8829 :       gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
    2263         8829 :       gimple *stmt = gimple_build_assign (flag, boolean_true_node);
    2264         8829 :       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
    2265         8829 :       bbs->add (gimple_bb (stmt));
    2266              :     }
    2267        14680 :   return false;
    2268              : }
    2269              : 
    2270              : /* Helper function for execute_sm.  On every location where REF is
    2271              :    set, set an appropriate flag indicating the store.  */
    2272              : 
    2273              : static tree
    2274         7712 : execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
    2275              :                                 hash_set <basic_block> *bbs)
    2276              : {
    2277         7712 :   tree flag;
    2278         7712 :   char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
    2279         7712 :   flag = create_tmp_reg (boolean_type_node, str);
    2280         7712 :   for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
    2281         7712 :   return flag;
    2282              : }
    2283              : 
    2284        34908 : struct sm_aux
    2285              : {
    2286              :   tree tmp_var;
    2287              :   tree store_flag;
    2288              :   hash_set <basic_block> flag_bbs;
    2289              : };
    2290              : 
    2291              : /* Executes store motion of memory reference REF from LOOP.
    2292              :    Exits from the LOOP are stored in EXITS.  The initialization of the
    2293              :    temporary variable is put to the preheader of the loop, and assignments
    2294              :    to the reference from the temporary variable are emitted to exits.  */
    2295              : 
    2296              : static sm_aux *
    2297        34908 : execute_sm (class loop *loop, im_mem_ref *ref,
    2298              :             hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
    2299              :             bool use_other_flag_var)
    2300              : {
    2301        34908 :   gassign *load;
    2302        34908 :   struct fmt_data fmt_data;
    2303        34908 :   struct lim_aux_data *lim_data;
    2304        34908 :   bool multi_threaded_model_p = false;
    2305        34908 :   gimple_stmt_iterator gsi;
    2306        34908 :   sm_aux *aux = new sm_aux;
    2307              : 
    2308        34908 :   if (dump_file && (dump_flags & TDF_DETAILS))
    2309              :     {
    2310           82 :       fprintf (dump_file, "Executing store motion of ");
    2311           82 :       print_generic_expr (dump_file, ref->mem.ref);
    2312           82 :       fprintf (dump_file, " from loop %d\n", loop->num);
    2313              :     }
    2314              : 
    2315        34908 :   aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
    2316        34908 :                                  get_lsm_tmp_name (ref->mem.ref, ~0));
    2317              : 
    2318        34908 :   fmt_data.loop = loop;
    2319        34908 :   fmt_data.orig_loop = loop;
    2320        34908 :   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
    2321              : 
    2322        69816 :   bool always_stored = ref_always_stored_p (loop, ref);
    2323        34908 :   if (maybe_mt
    2324        11734 :       && (bb_in_transaction (loop_preheader_edge (loop)->src)
    2325        11733 :           || (ref_can_have_store_data_races (ref->mem.ref) && ! always_stored)
    2326              :           /* Do not speculate a load/store when that's not a noop, either
    2327              :              because the mode cannot be transferred or because there's
    2328              :              UB involved for out-of-bound values.  */
    2329         3753 :           || !mode_can_transfer_bits (TYPE_MODE (TREE_TYPE (ref->mem.ref)))
    2330         3753 :           || TREE_CODE (TREE_TYPE (ref->mem.ref)) == BOOLEAN_TYPE
    2331         3739 :           || (TREE_CODE (ref->mem.ref) == COMPONENT_REF
    2332          879 :               && DECL_BIT_FIELD (TREE_OPERAND (ref->mem.ref, 1)))))
    2333              :     multi_threaded_model_p = true;
    2334              : 
    2335        34908 :   if (multi_threaded_model_p && !use_other_flag_var)
    2336         7712 :     aux->store_flag
    2337         7712 :       = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
    2338              :   else
    2339        27196 :     aux->store_flag = NULL_TREE;
    2340              : 
    2341              :   /* Remember variable setup.  */
    2342        34908 :   aux_map.put (ref, aux);
    2343              : 
    2344        34908 :   rewrite_mem_refs (loop, ref, aux->tmp_var);
    2345              : 
    2346              :   /* Emit the load code on a random exit edge or into the latch if
    2347              :      the loop does not exit, so that we are sure it will be processed
    2348              :      by move_computations after all dependencies.  */
    2349        34908 :   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
    2350              : 
    2351              :   /* Avoid doing a load if there was no load of the ref in the loop.
    2352              :      Esp. when the ref is not always stored we cannot optimize it
    2353              :      away later.  But when it is not always stored we must use a conditional
    2354              :      store then.  */
    2355        34908 :   if ((!always_stored && !multi_threaded_model_p)
    2356        34908 :       || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
    2357        18210 :     load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
    2358              :   else
    2359              :     {
    2360              :       /* If not emitting a load mark the uninitialized state on the
    2361              :          loop entry as not to be warned for.  */
    2362        16698 :       tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
    2363        16698 :       suppress_warning (uninit, OPT_Wuninitialized);
    2364        16698 :       uninit = get_or_create_ssa_default_def (cfun, uninit);
    2365        16698 :       load = gimple_build_assign (aux->tmp_var, uninit);
    2366              :     }
    2367        34908 :   lim_data = init_lim_data (load);
    2368        34908 :   lim_data->max_loop = loop;
    2369        34908 :   lim_data->tgt_loop = loop;
    2370        34908 :   gsi_insert_before (&gsi, load, GSI_SAME_STMT);
    2371              : 
    2372        34908 :   if (aux->store_flag)
    2373              :     {
    2374         7712 :       load = gimple_build_assign (aux->store_flag, boolean_false_node);
    2375         7712 :       lim_data = init_lim_data (load);
    2376         7712 :       lim_data->max_loop = loop;
    2377         7712 :       lim_data->tgt_loop = loop;
    2378         7712 :       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
    2379              :     }
    2380              : 
    2381        34908 :   return aux;
    2382              : }
    2383              : 
    2384              : /* sm_ord is used for ordinary stores we can retain order with respect
    2385              :        to other stores
    2386              :    sm_unord is used for conditional executed stores which need to be
    2387              :        able to execute in arbitrary order with respect to other stores
    2388              :    sm_other is used for stores we do not try to apply store motion to.  */
    2389              : enum sm_kind { sm_ord, sm_unord, sm_other };
    2390              : struct seq_entry
    2391              : {
    2392              :   seq_entry () = default;
    2393        52008 :   seq_entry (unsigned f, sm_kind k, tree fr = NULL)
    2394        52008 :     : first (f), second (k), from (fr) {}
    2395              :   unsigned first;
    2396              :   sm_kind second;
    2397              :   tree from;
    2398              : };
    2399              : 
    2400              : static void
    2401        33046 : execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
    2402              :                  hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
    2403              :                  edge &append_cond_position, edge &last_cond_fallthru,
    2404              :                  bitmap clobbers_to_prune)
    2405              : {
    2406              :   /* Sink the stores to exit from the loop.  */
    2407       116255 :   for (unsigned i = seq.length (); i > 0; --i)
    2408              :     {
    2409        50163 :       im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
    2410        50163 :       if (seq[i-1].second == sm_other)
    2411              :         {
    2412         4923 :           gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
    2413         4923 :           gassign *store;
    2414         4923 :           if (ref->mem.ref == error_mark_node)
    2415              :             {
    2416          179 :               tree lhs = gimple_assign_lhs (ref->accesses_in_loop[0].stmt);
    2417          179 :               if (dump_file && (dump_flags & TDF_DETAILS))
    2418              :                 {
    2419            3 :                   fprintf (dump_file, "Re-issueing dependent ");
    2420            3 :                   print_generic_expr (dump_file, unshare_expr (seq[i-1].from));
    2421            3 :                   fprintf (dump_file, " of ");
    2422            3 :                   print_generic_expr (dump_file, lhs);
    2423            3 :                   fprintf (dump_file, " from loop %d on exit %d -> %d\n",
    2424            3 :                            loop->num, ex->src->index, ex->dest->index);
    2425              :                 }
    2426          179 :               store = gimple_build_assign (unshare_expr (lhs),
    2427          179 :                                            unshare_expr (seq[i-1].from));
    2428          179 :               bitmap_set_bit (clobbers_to_prune, seq[i-1].first);
    2429              :             }
    2430              :           else
    2431              :             {
    2432         4744 :               if (dump_file && (dump_flags & TDF_DETAILS))
    2433              :                 {
    2434            0 :                   fprintf (dump_file, "Re-issueing dependent store of ");
    2435            0 :                   print_generic_expr (dump_file, ref->mem.ref);
    2436            0 :                   fprintf (dump_file, " from loop %d on exit %d -> %d\n",
    2437            0 :                            loop->num, ex->src->index, ex->dest->index);
    2438              :                 }
    2439         4744 :               store = gimple_build_assign (unshare_expr (ref->mem.ref),
    2440         4744 :                                            seq[i-1].from);
    2441              :             }
    2442         4923 :           gsi_insert_on_edge (ex, store);
    2443              :         }
    2444              :       else
    2445              :         {
    2446        45240 :           sm_aux *aux = *aux_map.get (ref);
    2447        45240 :           if (!aux->store_flag || kind == sm_ord)
    2448              :             {
    2449        33153 :               gassign *store;
    2450        33153 :               store = gimple_build_assign (unshare_expr (ref->mem.ref),
    2451              :                                            aux->tmp_var);
    2452        33153 :               gsi_insert_on_edge (ex, store);
    2453        33153 :             }
    2454              :           else
    2455        12087 :             execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
    2456              :                                    aux->store_flag,
    2457              :                                    loop_preheader_edge (loop), &aux->flag_bbs,
    2458              :                                    append_cond_position, last_cond_fallthru);
    2459              :         }
    2460              :     }
    2461        33046 : }
    2462              : 
    2463              : /* Push the SM candidate at index PTR in the sequence SEQ down until
    2464              :    we hit the next SM candidate.  Return true if that went OK and
    2465              :    false if we could not disambiguate agains another unrelated ref.
    2466              :    Update *AT to the index where the candidate now resides.  */
    2467              : 
    2468              : static bool
    2469        37564 : sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
    2470              : {
    2471        37564 :   *at = ptr;
    2472        38340 :   for (; ptr > 0; --ptr)
    2473              :     {
    2474        17686 :       seq_entry &new_cand = seq[ptr];
    2475        17686 :       seq_entry &against = seq[ptr-1];
    2476        17686 :       if (against.second == sm_ord
    2477         5800 :           || (against.second == sm_other && against.from != NULL_TREE))
    2478              :         /* Found the tail of the sequence.  */
    2479              :         break;
    2480              :       /* We may not ignore self-dependences here.  */
    2481         1077 :       if (new_cand.first == against.first
    2482              :           /* ???  We could actually handle clobbers here, but not easily
    2483              :              with LIMs dependence analysis.  */
    2484          855 :           || (memory_accesses.refs_list[new_cand.first]->mem.ref
    2485          855 :               == error_mark_node)
    2486          850 :           || (memory_accesses.refs_list[against.first]->mem.ref
    2487              :               == error_mark_node)
    2488         1903 :           || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
    2489          826 :                                   memory_accesses.refs_list[against.first],
    2490              :                                   false))
    2491              :         /* ???  Prune new_cand from the list of refs to apply SM to.  */
    2492          301 :         return false;
    2493          776 :       std::swap (new_cand, against);
    2494          776 :       *at = ptr - 1;
    2495              :     }
    2496              :   return true;
    2497              : }
    2498              : 
    2499              : /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
    2500              :    walking backwards from VDEF (or the end of BB if VDEF is NULL).  */
    2501              : 
    2502              : static int
    2503        35480 : sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
    2504              :                  vec<seq_entry> &seq, bitmap refs_not_in_seq,
    2505              :                  bitmap refs_not_supported, bool forked,
    2506              :                  bitmap fully_visited)
    2507              : {
    2508        35480 :   if (!vdef)
    2509        26382 :     for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
    2510       113441 :          gsi_prev (&gsi))
    2511              :       {
    2512       139569 :         vdef = gimple_vdef (gsi_stmt (gsi));
    2513        52510 :         if (vdef)
    2514              :           break;
    2515              :       }
    2516        26382 :   if (!vdef)
    2517              :     {
    2518         8589 :       gphi *vphi = get_virtual_phi (bb);
    2519         8589 :       if (vphi)
    2520         4281 :         vdef = gimple_phi_result (vphi);
    2521              :     }
    2522        35480 :   if (!vdef)
    2523              :     {
    2524         4308 :       if (single_pred_p (bb))
    2525              :         /* This handles the perfect nest case.  */
    2526         3516 :         return sm_seq_valid_bb (loop, single_pred (bb), vdef,
    2527              :                                 seq, refs_not_in_seq, refs_not_supported,
    2528         3516 :                                 forked, fully_visited);
    2529              :       return 0;
    2530              :     }
    2531        55159 :   do
    2532              :     {
    2533       110318 :       gimple *def = SSA_NAME_DEF_STMT (vdef);
    2534        55159 :       if (gimple_bb (def) != bb)
    2535              :         {
    2536              :           /* If we forked by processing a PHI do not allow our walk to
    2537              :              merge again until we handle that robustly.  */
    2538         3407 :           if (forked)
    2539              :             {
    2540              :               /* Mark refs_not_in_seq as unsupported.  */
    2541         1694 :               bitmap_ior_into (refs_not_supported, refs_not_in_seq);
    2542         1694 :               return 1;
    2543              :             }
    2544              :           /* Otherwise it doesn't really matter if we end up in different
    2545              :              BBs.  */
    2546              :           bb = gimple_bb (def);
    2547              :         }
    2548        53465 :       if (gphi *phi = dyn_cast <gphi *> (def))
    2549              :         {
    2550              :           /* Handle CFG merges.  Until we handle forks (gimple_bb (def) != bb)
    2551              :              this is still linear.
    2552              :              Eventually we want to cache intermediate results per BB
    2553              :              (but we can't easily cache for different exits?).  */
    2554              :           /* Stop at PHIs with possible backedges.  */
    2555        10486 :           if (bb == bb->loop_father->header
    2556         5366 :               || bb->flags & BB_IRREDUCIBLE_LOOP)
    2557              :             {
    2558              :               /* Mark refs_not_in_seq as unsupported.  */
    2559         5125 :               bitmap_ior_into (refs_not_supported, refs_not_in_seq);
    2560         5125 :               return 1;
    2561              :             }
    2562         5361 :           if (gimple_phi_num_args (phi) == 1)
    2563         1528 :             return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
    2564              :                                     gimple_phi_arg_def (phi, 0), seq,
    2565              :                                     refs_not_in_seq, refs_not_supported,
    2566         1528 :                                     false, fully_visited);
    2567         7666 :           if (bitmap_bit_p (fully_visited,
    2568         3833 :                             SSA_NAME_VERSION (gimple_phi_result (phi))))
    2569              :             return 1;
    2570         3729 :           auto_vec<seq_entry> first_edge_seq;
    2571         3729 :           auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
    2572         3729 :           int eret;
    2573         3729 :           bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
    2574         3729 :           eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
    2575              :                                   gimple_phi_arg_def (phi, 0),
    2576              :                                   first_edge_seq,
    2577              :                                   tem_refs_not_in_seq, refs_not_supported,
    2578              :                                   true, fully_visited);
    2579         3729 :           if (eret != 1)
    2580              :             return -1;
    2581              :           /* Simplify our lives by pruning the sequence of !sm_ord.  */
    2582         3514 :           while (!first_edge_seq.is_empty ()
    2583         8422 :                  && first_edge_seq.last ().second != sm_ord)
    2584         1179 :             first_edge_seq.pop ();
    2585         7570 :           for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
    2586              :             {
    2587         5828 :               tree vuse = gimple_phi_arg_def (phi, i);
    2588         5828 :               edge e = gimple_phi_arg_edge (phi, i);
    2589         5828 :               auto_vec<seq_entry> edge_seq;
    2590         5828 :               bitmap_and_compl (tem_refs_not_in_seq,
    2591              :                                 refs_not_in_seq, refs_not_supported);
    2592              :               /* If we've marked all refs we search for as unsupported
    2593              :                  we can stop processing and use the sequence as before
    2594              :                  the PHI.  */
    2595         5828 :               if (bitmap_empty_p (tem_refs_not_in_seq))
    2596              :                 return 1;
    2597         3841 :               eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
    2598              :                                       tem_refs_not_in_seq, refs_not_supported,
    2599              :                                       true, fully_visited);
    2600         3841 :               if (eret != 1)
    2601              :                 return -1;
    2602              :               /* Simplify our lives by pruning the sequence of !sm_ord.  */
    2603         3355 :               while (!edge_seq.is_empty ()
    2604         8411 :                      && edge_seq.last ().second != sm_ord)
    2605         1215 :                 edge_seq.pop ();
    2606         7682 :               unsigned min_len = MIN(first_edge_seq.length (),
    2607              :                                      edge_seq.length ());
    2608              :               /* Incrementally merge seqs into first_edge_seq.  */
    2609         3841 :               int first_uneq = -1;
    2610         3841 :               auto_vec<seq_entry, 2> extra_refs;
    2611         6032 :               for (unsigned int i = 0; i < min_len; ++i)
    2612              :                 {
    2613              :                   /* ???  We can more intelligently merge when we face different
    2614              :                      order by additional sinking operations in one sequence.
    2615              :                      For now we simply mark them as to be processed by the
    2616              :                      not order-preserving SM code.  */
    2617         2191 :                   if (first_edge_seq[i].first != edge_seq[i].first)
    2618              :                     {
    2619           35 :                       if (first_edge_seq[i].second == sm_ord)
    2620           22 :                         bitmap_set_bit (refs_not_supported,
    2621           22 :                                         first_edge_seq[i].first);
    2622           35 :                       if (edge_seq[i].second == sm_ord)
    2623           24 :                         bitmap_set_bit (refs_not_supported, edge_seq[i].first);
    2624           35 :                       first_edge_seq[i].second = sm_other;
    2625           35 :                       first_edge_seq[i].from = NULL_TREE;
    2626              :                       /* Record the dropped refs for later processing.  */
    2627           35 :                       if (first_uneq == -1)
    2628           33 :                         first_uneq = i;
    2629           70 :                       extra_refs.safe_push (seq_entry (edge_seq[i].first,
    2630           35 :                                                        sm_other, NULL_TREE));
    2631              :                     }
    2632              :                   /* sm_other prevails.  */
    2633         2156 :                   else if (first_edge_seq[i].second != edge_seq[i].second)
    2634              :                     {
    2635              :                       /* Make sure the ref is marked as not supported.  */
    2636            0 :                       bitmap_set_bit (refs_not_supported,
    2637            0 :                                       first_edge_seq[i].first);
    2638            0 :                       first_edge_seq[i].second = sm_other;
    2639            0 :                       first_edge_seq[i].from = NULL_TREE;
    2640              :                     }
    2641         2156 :                   else if (first_edge_seq[i].second == sm_other
    2642            7 :                            && first_edge_seq[i].from != NULL_TREE
    2643         2163 :                            && (edge_seq[i].from == NULL_TREE
    2644            7 :                                || !operand_equal_p (first_edge_seq[i].from,
    2645            7 :                                                     edge_seq[i].from, 0)))
    2646            7 :                     first_edge_seq[i].from = NULL_TREE;
    2647              :                 }
    2648              :               /* Any excess elements become sm_other since they are now
    2649              :                  coonditionally executed.  */
    2650        10563 :               if (first_edge_seq.length () > edge_seq.length ())
    2651              :                 {
    2652         6708 :                   for (unsigned i = edge_seq.length ();
    2653         8720 :                        i < first_edge_seq.length (); ++i)
    2654              :                     {
    2655         6708 :                       if (first_edge_seq[i].second == sm_ord)
    2656         3566 :                         bitmap_set_bit (refs_not_supported,
    2657         3566 :                                         first_edge_seq[i].first);
    2658         6708 :                       first_edge_seq[i].second = sm_other;
    2659              :                     }
    2660              :                 }
    2661         1829 :               else if (edge_seq.length () > first_edge_seq.length ())
    2662              :                 {
    2663            8 :                   if (first_uneq == -1)
    2664            0 :                     first_uneq = first_edge_seq.length ();
    2665            8 :                   for (unsigned i = first_edge_seq.length ();
    2666           26 :                        i < edge_seq.length (); ++i)
    2667              :                     {
    2668           18 :                       if (edge_seq[i].second == sm_ord)
    2669            8 :                         bitmap_set_bit (refs_not_supported, edge_seq[i].first);
    2670           36 :                       extra_refs.safe_push (seq_entry (edge_seq[i].first,
    2671           18 :                                                        sm_other, NULL_TREE));
    2672              :                     }
    2673              :                 }
    2674              :               /* Put unmerged refs at first_uneq to force dependence checking
    2675              :                  on them.  */
    2676         3841 :               if (first_uneq != -1)
    2677              :                 {
    2678              :                   /* Missing ordered_splice_at.  */
    2679           66 :                   if ((unsigned)first_uneq == first_edge_seq.length ())
    2680            0 :                     first_edge_seq.safe_splice (extra_refs);
    2681              :                   else
    2682              :                     {
    2683           33 :                       unsigned fes_length = first_edge_seq.length ();
    2684           33 :                       first_edge_seq.safe_grow (fes_length
    2685           33 :                                                 + extra_refs.length ());
    2686           33 :                       memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
    2687           33 :                                &first_edge_seq[first_uneq],
    2688           33 :                                (fes_length - first_uneq) * sizeof (seq_entry));
    2689           66 :                       memcpy (&first_edge_seq[first_uneq],
    2690           33 :                               extra_refs.address (),
    2691           33 :                               extra_refs.length () * sizeof (seq_entry));
    2692              :                     }
    2693              :                 }
    2694         5828 :             }
    2695              :           /* Use the sequence from the first edge and push SMs down.  */
    2696         4741 :           for (unsigned i = 0; i < first_edge_seq.length (); ++i)
    2697              :             {
    2698         2999 :               unsigned id = first_edge_seq[i].first;
    2699         2999 :               seq.safe_push (first_edge_seq[i]);
    2700         2999 :               unsigned new_idx;
    2701         2999 :               if ((first_edge_seq[i].second == sm_ord
    2702         2356 :                    || (first_edge_seq[i].second == sm_other
    2703         2356 :                        && first_edge_seq[i].from != NULL_TREE))
    2704         4006 :                   && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
    2705              :                 {
    2706          125 :                   if (first_edge_seq[i].second == sm_ord)
    2707            0 :                     bitmap_set_bit (refs_not_supported, id);
    2708              :                   /* Mark it sm_other.  */
    2709          125 :                   seq[new_idx].second = sm_other;
    2710          125 :                   seq[new_idx].from = NULL_TREE;
    2711              :                 }
    2712              :             }
    2713         1742 :           bitmap_set_bit (fully_visited,
    2714         1742 :                           SSA_NAME_VERSION (gimple_phi_result (phi)));
    2715         1742 :           return 1;
    2716         3729 :         }
    2717        42979 :       lim_aux_data *data = get_lim_data (def);
    2718        42979 :       im_mem_ref *ref = memory_accesses.refs_list[data->ref];
    2719        42979 :       if (data->ref == UNANALYZABLE_MEM_ID)
    2720              :         return -1;
    2721              :       /* Stop at memory references which we can't move.  */
    2722        42979 :       else if ((ref->mem.ref == error_mark_node
    2723              :                 /* We can move end-of-storage/object down.  */
    2724          423 :                 && !gimple_clobber_p (ref->accesses_in_loop[0].stmt,
    2725              :                                       CLOBBER_STORAGE_END)
    2726          291 :                 && !gimple_clobber_p (ref->accesses_in_loop[0].stmt,
    2727              :                                       CLOBBER_OBJECT_END))
    2728        43295 :                || TREE_THIS_VOLATILE (ref->mem.ref))
    2729              :         {
    2730              :           /* Mark refs_not_in_seq as unsupported.  */
    2731          139 :           bitmap_ior_into (refs_not_supported, refs_not_in_seq);
    2732          139 :           return 1;
    2733              :         }
    2734              :       /* One of the stores we want to apply SM to and we've not yet seen.  */
    2735        42840 :       else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
    2736              :         {
    2737        33532 :           seq.safe_push (seq_entry (data->ref, sm_ord));
    2738              : 
    2739              :           /* 1) push it down the queue until a SMed
    2740              :              and not ignored ref is reached, skipping all not SMed refs
    2741              :              and ignored refs via non-TBAA disambiguation.  */
    2742        33532 :           unsigned new_idx;
    2743        67064 :           if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
    2744              :               /* If that fails but we did not fork yet continue, we'll see
    2745              :                  to re-materialize all of the stores in the sequence then.
    2746              :                  Further stores will only be pushed up to this one.  */
    2747        33532 :               && forked)
    2748              :             {
    2749            0 :               bitmap_set_bit (refs_not_supported, data->ref);
    2750              :               /* Mark it sm_other.  */
    2751            0 :               seq[new_idx].second = sm_other;
    2752              :             }
    2753              : 
    2754              :           /* 2) check whether we've seen all refs we want to SM and if so
    2755              :              declare success for the active exit  */
    2756        33532 :           if (bitmap_empty_p (refs_not_in_seq))
    2757        18853 :             return 1;
    2758              :         }
    2759              :       else
    2760              :         /* Another store not part of the final sequence.  Simply push it.  */
    2761         9308 :         seq.safe_push (seq_entry (data->ref, sm_other,
    2762         9308 :                                   gimple_assign_rhs1 (def)));
    2763              : 
    2764        79146 :       vdef = gimple_vuse (def);
    2765              :     }
    2766              :   while (1);
    2767              : }
    2768              : 
    2769              : /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
    2770              :    edges of the LOOP.  */
    2771              : 
    2772              : static void
    2773        21333 : hoist_memory_references (class loop *loop, bitmap mem_refs,
    2774              :                          const vec<edge> &exits)
    2775              : {
    2776        21333 :   im_mem_ref *ref;
    2777        21333 :   unsigned  i;
    2778        21333 :   bitmap_iterator bi;
    2779              : 
    2780              :   /* There's a special case we can use ordered re-materialization for
    2781              :      conditionally excuted stores which is when all stores in the loop
    2782              :      happen in the same basic-block.  In that case we know we'll reach
    2783              :      all stores and thus can simply process that BB and emit a single
    2784              :      conditional block of ordered materializations.  See PR102436.  */
    2785        21333 :   basic_block single_store_bb = NULL;
    2786        31447 :   EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
    2787              :                             0, i, bi)
    2788              :     {
    2789        29247 :       bool fail = false;
    2790        29247 :       ref = memory_accesses.refs_list[i];
    2791       133950 :       for (auto loc : ref->accesses_in_loop)
    2792       143594 :         if (!gimple_vdef (loc.stmt))
    2793              :           ;
    2794        32043 :         else if (!single_store_bb)
    2795              :           {
    2796        21333 :             single_store_bb = gimple_bb (loc.stmt);
    2797        21333 :             bool conditional = false;
    2798        76672 :             for (edge e : exits)
    2799        22918 :               if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
    2800              :                 {
    2801              :                   /* Conditional as seen from e.  */
    2802              :                   conditional = true;
    2803              :                   break;
    2804              :                 }
    2805        21333 :             if (!conditional)
    2806              :               {
    2807              :                 fail = true;
    2808              :                 break;
    2809              :               }
    2810              :           }
    2811        10710 :         else if (single_store_bb != gimple_bb (loc.stmt))
    2812              :           {
    2813              :             fail = true;
    2814              :             break;
    2815              :           }
    2816        29247 :       if (fail)
    2817              :         {
    2818              :           single_store_bb = NULL;
    2819              :           break;
    2820              :         }
    2821              :     }
    2822        21333 :   if (single_store_bb)
    2823              :     {
    2824              :       /* Analyze the single block with stores.  */
    2825         2200 :       auto_bitmap fully_visited;
    2826         2200 :       auto_bitmap refs_not_supported;
    2827         2200 :       auto_bitmap refs_not_in_seq;
    2828         2200 :       auto_vec<seq_entry> seq;
    2829         2200 :       bitmap_copy (refs_not_in_seq, mem_refs);
    2830         2200 :       int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
    2831              :                                  seq, refs_not_in_seq, refs_not_supported,
    2832              :                                  false, fully_visited);
    2833         2200 :       if (res != 1)
    2834              :         {
    2835              :           /* Unhandled refs can still fail this.  */
    2836            0 :           bitmap_clear (mem_refs);
    2837            0 :           return;
    2838              :         }
    2839              : 
    2840              :       /* We cannot handle sm_other since we neither remember the
    2841              :          stored location nor the value at the point we execute them.  */
    2842         5320 :       for (unsigned i = 0; i < seq.length (); ++i)
    2843              :         {
    2844         3120 :           unsigned new_i;
    2845         3120 :           if (seq[i].second == sm_other
    2846         3120 :               && seq[i].from != NULL_TREE)
    2847          438 :             seq[i].from = NULL_TREE;
    2848         2682 :           else if ((seq[i].second == sm_ord
    2849            0 :                     || (seq[i].second == sm_other
    2850            0 :                         && seq[i].from != NULL_TREE))
    2851         2682 :                    && !sm_seq_push_down (seq, i, &new_i))
    2852              :             {
    2853           63 :               bitmap_set_bit (refs_not_supported, seq[new_i].first);
    2854           63 :               seq[new_i].second = sm_other;
    2855           63 :               seq[new_i].from = NULL_TREE;
    2856              :             }
    2857              :         }
    2858         2200 :       bitmap_and_compl_into (mem_refs, refs_not_supported);
    2859         2200 :       if (bitmap_empty_p (mem_refs))
    2860              :         return;
    2861              : 
    2862              :       /* Prune seq.  */
    2863         2545 :       while (seq.last ().second == sm_other
    2864         2545 :              && seq.last ().from == NULL_TREE)
    2865          396 :         seq.pop ();
    2866              : 
    2867         2149 :       hash_map<im_mem_ref *, sm_aux *> aux_map;
    2868              : 
    2869              :       /* Execute SM but delay the store materialization for ordered
    2870              :          sequences on exit.  Remember a created flag var and make
    2871              :          sure to re-use it.  */
    2872         2149 :       sm_aux *flag_var_aux = nullptr;
    2873         4768 :       EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
    2874              :         {
    2875         2619 :           ref = memory_accesses.refs_list[i];
    2876         2619 :           sm_aux *aux = execute_sm (loop, ref, aux_map, true,
    2877              :                                     flag_var_aux != nullptr);
    2878         2619 :           if (aux->store_flag)
    2879         1504 :             flag_var_aux = aux;
    2880              :         }
    2881              : 
    2882              :       /* Materialize ordered store sequences on exits.  */
    2883         2149 :       edge e;
    2884         2149 :       auto_bitmap clobbers_to_prune;
    2885         5882 :       FOR_EACH_VEC_ELT (exits, i, e)
    2886              :         {
    2887         3733 :           edge append_cond_position = NULL;
    2888         3733 :           edge last_cond_fallthru = NULL;
    2889         3733 :           edge insert_e = e;
    2890              :           /* Construct the single flag variable control flow and insert
    2891              :              the ordered seq of stores in the then block.  With
    2892              :              -fstore-data-races we can do the stores unconditionally.  */
    2893         3733 :           if (flag_var_aux)
    2894         2408 :             insert_e
    2895              :               = single_pred_edge
    2896         2408 :                   (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
    2897              :                                           flag_var_aux->store_flag,
    2898              :                                           loop_preheader_edge (loop),
    2899              :                                           &flag_var_aux->flag_bbs,
    2900              :                                           append_cond_position,
    2901              :                                           last_cond_fallthru));
    2902         3733 :           execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
    2903              :                            append_cond_position, last_cond_fallthru,
    2904              :                            clobbers_to_prune);
    2905         3733 :           gsi_commit_one_edge_insert (insert_e, NULL);
    2906              :         }
    2907              : 
    2908              :       /* Remove clobbers inside the loop we re-materialized on exits.  */
    2909         2149 :       EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
    2910              :         {
    2911            0 :           gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
    2912            0 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
    2913            0 :           unlink_stmt_vdef (stmt);
    2914            0 :           release_defs (stmt);
    2915            0 :           gimple_set_vdef (stmt, NULL_TREE);
    2916            0 :           gsi_remove (&gsi, true);
    2917              :         }
    2918              : 
    2919         4768 :       for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
    2920         7387 :            iter != aux_map.end (); ++iter)
    2921         5238 :         delete (*iter).second;
    2922              : 
    2923         2149 :       return;
    2924         4349 :     }
    2925              : 
    2926              :   /* To address PR57359 before actually applying store-motion check
    2927              :      the candidates found for validity with regards to reordering
    2928              :      relative to other stores which we until here disambiguated using
    2929              :      TBAA which isn't valid.
    2930              :      What matters is the order of the last stores to the mem_refs
    2931              :      with respect to the other stores of the loop at the point of the
    2932              :      loop exits.  */
    2933              : 
    2934              :   /* For each exit compute the store order, pruning from mem_refs
    2935              :      on the fly.  */
    2936              :   /* The complexity of this is at least
    2937              :      O(number of exits * number of SM refs) but more approaching
    2938              :      O(number of exits * number of SM refs * number of stores).  */
    2939              :   /* ???  Somehow do this in a single sweep over the loop body.  */
    2940        19133 :   auto_vec<std::pair<edge, vec<seq_entry> > > sms;
    2941        19133 :   auto_bitmap refs_not_supported (&lim_bitmap_obstack);
    2942        19133 :   edge e;
    2943        39007 :   FOR_EACH_VEC_ELT (exits, i, e)
    2944              :     {
    2945        22124 :       vec<seq_entry> seq;
    2946        22124 :       seq.create (4);
    2947        22124 :       auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
    2948        22124 :       bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
    2949        22124 :       if (bitmap_empty_p (refs_not_in_seq))
    2950              :         {
    2951         1458 :           seq.release ();
    2952         1458 :           break;
    2953              :         }
    2954        20666 :       auto_bitmap fully_visited;
    2955        20666 :       int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
    2956              :                                  seq, refs_not_in_seq,
    2957              :                                  refs_not_supported, false,
    2958              :                                  fully_visited);
    2959        20666 :       if (res != 1)
    2960              :         {
    2961          792 :           bitmap_copy (refs_not_supported, mem_refs);
    2962          792 :           seq.release ();
    2963          792 :           break;
    2964              :         }
    2965        19874 :       sms.safe_push (std::make_pair (e, seq));
    2966        22916 :     }
    2967              : 
    2968              :   /* Prune pruned mem_refs from earlier processed exits.  */
    2969        19133 :   bool changed = !bitmap_empty_p (refs_not_supported);
    2970        19133 :   while (changed)
    2971              :     {
    2972         6073 :       changed = false;
    2973         6073 :       std::pair<edge, vec<seq_entry> > *seq;
    2974        39962 :       FOR_EACH_VEC_ELT (sms, i, seq)
    2975              :         {
    2976              :           bool need_to_push = false;
    2977        13659 :           for (unsigned i = 0; i < seq->second.length (); ++i)
    2978              :             {
    2979         6281 :               sm_kind kind = seq->second[i].second;
    2980         6281 :               if (kind == sm_other && seq->second[i].from == NULL_TREE)
    2981              :                 break;
    2982         5205 :               unsigned id = seq->second[i].first;
    2983         5205 :               unsigned new_idx;
    2984         5205 :               if (kind == sm_ord
    2985         5205 :                   && bitmap_bit_p (refs_not_supported, id))
    2986              :                 {
    2987         2540 :                   seq->second[i].second = sm_other;
    2988         2540 :                   gcc_assert (seq->second[i].from == NULL_TREE);
    2989              :                   need_to_push = true;
    2990              :                 }
    2991         2665 :               else if (need_to_push
    2992         2665 :                        && !sm_seq_push_down (seq->second, i, &new_idx))
    2993              :                 {
    2994              :                   /* We need to push down both sm_ord and sm_other
    2995              :                      but for the latter we need to disqualify all
    2996              :                      following refs.  */
    2997          113 :                   if (kind == sm_ord)
    2998              :                     {
    2999            7 :                       if (bitmap_set_bit (refs_not_supported, id))
    3000            7 :                         changed = true;
    3001            7 :                       seq->second[new_idx].second = sm_other;
    3002              :                     }
    3003              :                   else
    3004              :                     {
    3005          411 :                       for (unsigned j = seq->second.length () - 1;
    3006          305 :                            j > new_idx; --j)
    3007          199 :                         if (seq->second[j].second == sm_ord
    3008          321 :                             && bitmap_set_bit (refs_not_supported,
    3009          122 :                                                seq->second[j].first))
    3010              :                           changed = true;
    3011          106 :                       seq->second.truncate (new_idx);
    3012          106 :                       break;
    3013              :                     }
    3014              :                 }
    3015              :             }
    3016              :         }
    3017              :     }
    3018        19133 :   std::pair<edge, vec<seq_entry> > *seq;
    3019        58881 :   FOR_EACH_VEC_ELT (sms, i, seq)
    3020              :     {
    3021              :       /* Prune sm_other from the end.  */
    3022        63531 :       while (!seq->second.is_empty ()
    3023        43657 :              && seq->second.last ().second == sm_other)
    3024         5136 :         seq->second.pop ();
    3025              :       /* Prune duplicates from the start.  */
    3026        19874 :       auto_bitmap seen (&lim_bitmap_obstack);
    3027        19874 :       unsigned j, k;
    3028        47318 :       for (j = k = 0; j < seq->second.length (); ++j)
    3029        27444 :         if (bitmap_set_bit (seen, seq->second[j].first))
    3030              :           {
    3031        27335 :             if (k != j)
    3032           77 :               seq->second[k] = seq->second[j];
    3033        27335 :             ++k;
    3034              :           }
    3035        19874 :       seq->second.truncate (k);
    3036              :       /* And verify.  */
    3037        19874 :       seq_entry *e;
    3038        86957 :       FOR_EACH_VEC_ELT (seq->second, j, e)
    3039        27335 :         gcc_assert (e->second == sm_ord
    3040              :                     || (e->second == sm_other && e->from != NULL_TREE));
    3041        19874 :     }
    3042              : 
    3043              :   /* Verify dependence for refs we cannot handle with the order preserving
    3044              :      code (refs_not_supported) or prune them from mem_refs.  */
    3045        19133 :   auto_vec<seq_entry> unord_refs;
    3046        34788 :   EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
    3047              :     {
    3048        15655 :       ref = memory_accesses.refs_list[i];
    3049        15655 :       if (!ref_indep_loop_p (loop, ref, sm_waw))
    3050         6540 :         bitmap_clear_bit (mem_refs, i);
    3051              :       /* We've now verified store order for ref with respect to all other
    3052              :          stores in the loop does not matter.  */
    3053              :       else
    3054         9115 :         unord_refs.safe_push (seq_entry (i, sm_unord));
    3055              :     }
    3056              : 
    3057        19133 :   hash_map<im_mem_ref *, sm_aux *> aux_map;
    3058              : 
    3059              :   /* Execute SM but delay the store materialization for ordered
    3060              :      sequences on exit.  */
    3061        51422 :   EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
    3062              :     {
    3063        32289 :       ref = memory_accesses.refs_list[i];
    3064        32289 :       execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
    3065              :                   false);
    3066              :     }
    3067              : 
    3068              :   /* Materialize ordered store sequences on exits.  */
    3069        19133 :   auto_bitmap clobbers_to_prune;
    3070        42795 :   FOR_EACH_VEC_ELT (exits, i, e)
    3071              :     {
    3072        23662 :       edge append_cond_position = NULL;
    3073        23662 :       edge last_cond_fallthru = NULL;
    3074        23662 :       if (i < sms.length ())
    3075              :         {
    3076        19874 :           gcc_assert (sms[i].first == e);
    3077        19874 :           execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
    3078              :                            append_cond_position, last_cond_fallthru,
    3079              :                            clobbers_to_prune);
    3080        19874 :           sms[i].second.release ();
    3081              :         }
    3082        23662 :       if (!unord_refs.is_empty ())
    3083         9439 :         execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
    3084              :                          append_cond_position, last_cond_fallthru,
    3085              :                          clobbers_to_prune);
    3086              :       /* Commit edge inserts here to preserve the order of stores
    3087              :          when an exit exits multiple loops.  */
    3088        23662 :       gsi_commit_one_edge_insert (e, NULL);
    3089              :     }
    3090              : 
    3091              :   /* Remove clobbers inside the loop we re-materialized on exits.  */
    3092        19312 :   EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
    3093              :     {
    3094          179 :       gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
    3095          179 :       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
    3096          179 :       unlink_stmt_vdef (stmt);
    3097          179 :       release_defs (stmt);
    3098          179 :       gimple_set_vdef (stmt, NULL_TREE);
    3099          179 :       gsi_remove (&gsi, true);
    3100              :     }
    3101              : 
    3102        51422 :   for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
    3103       102844 :        iter != aux_map.end (); ++iter)
    3104        64578 :     delete (*iter).second;
    3105        19133 : }
    3106              : 
    3107              : class ref_always_stored
    3108              : {
    3109              : public:
    3110        79936 :   ref_always_stored (class loop *loop_)
    3111        79936 :       : loop (loop_) {}
    3112              :   bool operator () (mem_ref_loc *loc);
    3113              :   class loop *loop;
    3114              : };
    3115              : 
    3116              : bool
    3117       151134 : ref_always_stored::operator () (mem_ref_loc *loc)
    3118              : {
    3119              :   /* Make sure the statement is a store.  */
    3120       151134 :   tree lhs = gimple_get_lhs (loc->stmt);
    3121       151134 :   if (!lhs
    3122       151134 :       || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
    3123              :     return false;
    3124              : 
    3125        95703 :   struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
    3126        95703 :   if (!lim_data)
    3127              :     return false;
    3128              : 
    3129        95703 :   class loop *must_exec = lim_data->always_executed_in;
    3130        95703 :   if (!must_exec)
    3131              :     return false;
    3132              : 
    3133        36346 :   if (must_exec == loop
    3134        36346 :       || flow_loop_nested_p (must_exec, loop))
    3135        33040 :     return true;
    3136              : 
    3137              :   return false;
    3138              : }
    3139              : 
    3140              : /* Returns true if REF is always stored in LOOP.  */
    3141              : 
    3142              : static bool
    3143        79936 : ref_always_stored_p (class loop *loop, im_mem_ref *ref)
    3144              : {
    3145        79936 :   return for_all_locs_in_loop (loop, ref, ref_always_stored (loop));
    3146              : }
    3147              : 
    3148              : /* Returns true if LOAD_REF and STORE_REF form a "self write" pattern
    3149              :    where the stored value comes from the loaded value via SSA.
    3150              :    Example: a[i] = a[0] is safe to hoist a[0] even when i==0.  */
    3151              : 
    3152              : static bool
    3153       318250 : is_self_write (im_mem_ref *load_ref, im_mem_ref *store_ref)
    3154              : {
    3155              :   /* Only handle the simple case with a single access per ref.
    3156              :      Bail out on multiple accesses to be conservative.  */
    3157       318250 :   if (load_ref->accesses_in_loop.length () != 1
    3158       409192 :       || store_ref->accesses_in_loop.length () != 1)
    3159              :     return false;
    3160              : 
    3161        90959 :   gimple *load_stmt = load_ref->accesses_in_loop[0].stmt;
    3162        90959 :   gimple *store_stmt = store_ref->accesses_in_loop[0].stmt;
    3163              : 
    3164        90959 :   if (!is_gimple_assign (load_stmt) || !is_gimple_assign (store_stmt))
    3165              :     return false;
    3166              : 
    3167        90959 :   tree loaded_val = gimple_assign_lhs (load_stmt);
    3168        90959 :   tree stored_val = gimple_assign_rhs1 (store_stmt);
    3169              : 
    3170        90959 :   if (TREE_CODE (loaded_val) != SSA_NAME || TREE_CODE (stored_val) != SSA_NAME)
    3171              :     return false;
    3172              : 
    3173              :   /* Self write: stored value is the loaded value.  */
    3174        65031 :   if (stored_val != loaded_val)
    3175              :     return false;
    3176              : 
    3177              : 
    3178              :   /* TODO: Try to factor this out with mem_ref_hasher::equal
    3179              :      into im_compare_access_position_and_size
    3180              :      or a similar helper to centralize this delicate handling
    3181              :      complete for MEM_REF offsets and base pointer equality.
    3182              : 
    3183              :      TODO: Also ensure max_size_known_p agrees or resort to
    3184              :      alignment considerations to rule out partial overlaps.
    3185              : 
    3186              :      See:
    3187              :      https://gcc.gnu.org/pipermail/gcc-patches/2025-December/704155.html
    3188              :      For more context on TODOs above.  */
    3189              : 
    3190              :   /* They may alias.  Verify exact same location.  */
    3191         1672 :   return (operand_equal_p (load_ref->mem.base, store_ref->mem.base, 0)
    3192           46 :           && known_eq (load_ref->mem.size, store_ref->mem.size)
    3193         1718 :           && known_eq (load_ref->mem.offset, store_ref->mem.offset));
    3194              : 
    3195              : }
    3196              : 
    3197              : /* Returns true if REF1 and REF2 are independent.  */
    3198              : 
    3199              : static bool
    3200       603526 : refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
    3201              : {
    3202       603526 :   if (ref1 == ref2)
    3203              :     return true;
    3204              : 
    3205       541364 :   if (dump_file && (dump_flags & TDF_DETAILS))
    3206         3041 :     fprintf (dump_file, "Querying dependency of refs %u and %u: ",
    3207         3041 :              ref1->id, ref2->id);
    3208              : 
    3209       541364 :   if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
    3210              :     {
    3211        64550 :       if (dump_file && (dump_flags & TDF_DETAILS))
    3212           99 :         fprintf (dump_file, "dependent.\n");
    3213        64550 :       return false;
    3214              :     }
    3215              :   else
    3216              :     {
    3217       476814 :       if (dump_file && (dump_flags & TDF_DETAILS))
    3218         2942 :         fprintf (dump_file, "independent.\n");
    3219       476814 :       return true;
    3220              :     }
    3221              : }
    3222              : 
    3223              : /* Returns true if REF is independent on all other accessess in LOOP.
    3224              :    KIND specifies the kind of dependence to consider.
    3225              :      lim_raw assumes REF is not stored in LOOP and disambiguates RAW
    3226              :              dependences so if true REF can be hoisted out of LOOP
    3227              :      sm_war disambiguates a store REF against all other loads to see
    3228              :             whether the store can be sunk across loads out of LOOP
    3229              :      sm_waw disambiguates a store REF against all other stores to see
    3230              :             whether the store can be sunk across stores out of LOOP.  */
    3231              : 
    3232              : static bool
    3233      2064512 : ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
    3234              : {
    3235      2064512 :   bool indep_p = true;
    3236      2064512 :   bitmap refs_to_check;
    3237              : 
    3238      2064512 :   if (kind == sm_war)
    3239       973426 :     refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
    3240              :   else
    3241      1091086 :     refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
    3242              : 
    3243      2064512 :   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
    3244      2064512 :       || ref->mem.ref == error_mark_node)
    3245              :     indep_p = false;
    3246              :   else
    3247              :     {
    3248              :       /* tri-state, { unknown, independent, dependent }  */
    3249       662016 :       dep_state state = query_loop_dependence (loop, ref, kind);
    3250       662016 :       if (state != dep_unknown)
    3251            0 :         return state == dep_independent ? true : false;
    3252              : 
    3253       662016 :       class loop *inner = loop->inner;
    3254       811692 :       while (inner)
    3255              :         {
    3256       451027 :           if (!ref_indep_loop_p (inner, ref, kind))
    3257              :             {
    3258              :               indep_p = false;
    3259              :               break;
    3260              :             }
    3261       149676 :           inner = inner->next;
    3262              :         }
    3263              : 
    3264       662016 :       if (indep_p)
    3265              :         {
    3266       360665 :           unsigned i;
    3267       360665 :           bitmap_iterator bi;
    3268       920837 :           EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
    3269              :             {
    3270       629065 :               im_mem_ref *aref = memory_accesses.refs_list[i];
    3271       629065 :               if (aref->mem.ref == error_mark_node)
    3272              :                 {
    3273        26348 :                   gimple *stmt = aref->accesses_in_loop[0].stmt;
    3274        26348 :                   if ((kind == sm_war
    3275         9366 :                        && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
    3276              :                                                     kind != sm_waw))
    3277        35046 :                       || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
    3278              :                                                    kind != sm_waw))
    3279              :                     {
    3280              :                       indep_p = false;
    3281              :                       break;
    3282              :                     }
    3283              :                 }
    3284              :               /* For hoisting loads (lim_raw), allow "self write": the store
    3285              :                  writes back the loaded value.  Example: a[i] = a[0]
    3286              :                  is safe even when i==0 causes aliasing.  */
    3287       602717 :               else if (kind == lim_raw
    3288       318250 :                        && ref->loaded && aref->stored
    3289       920967 :                        && is_self_write (ref, aref))
    3290              :                 {
    3291           17 :                   if (dump_file && (dump_flags & TDF_DETAILS))
    3292            2 :                     fprintf (dump_file,
    3293              :                              "Dependency of refs %u and %u: "
    3294              :                              "independent (self write).\n",
    3295            2 :                              ref->id, aref->id);
    3296              :                 }
    3297              : 
    3298       602700 :               else if (!refs_independent_p (ref, aref, kind != sm_waw))
    3299              :                 {
    3300              :                   indep_p = false;
    3301              :                   break;
    3302              :                 }
    3303              :             }
    3304              :         }
    3305              :     }
    3306              : 
    3307      2064512 :   if (dump_file && (dump_flags & TDF_DETAILS))
    3308          763 :     fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
    3309              :              kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
    3310          371 :              ref->id, loop->num, indep_p ? "independent" : "dependent");
    3311              : 
    3312              :   /* Record the computed result in the cache.  */
    3313      2064512 :   record_loop_dependence (loop, ref, kind,
    3314              :                           indep_p ? dep_independent : dep_dependent);
    3315              : 
    3316      2064512 :   return indep_p;
    3317              : }
    3318              : 
    3319              : class ref_in_loop_hot_body
    3320              : {
    3321              : public:
    3322        42484 :   ref_in_loop_hot_body (class loop *loop_) : l (loop_) {}
    3323              :   bool operator () (mem_ref_loc *loc);
    3324              :   class loop *l;
    3325              : };
    3326              : 
    3327              : /* Check the coldest loop between loop L and innermost loop.  If there is one
    3328              :    cold loop between L and INNER_LOOP, store motion can be performed, otherwise
    3329              :    no cold loop means no store motion.  get_coldest_out_loop also handles cases
    3330              :    when l is inner_loop.  */
    3331              : bool
    3332        43254 : ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
    3333              : {
    3334        43254 :   basic_block curr_bb = gimple_bb (loc->stmt);
    3335        43254 :   class loop *inner_loop = curr_bb->loop_father;
    3336        43254 :   return get_coldest_out_loop (l, inner_loop, curr_bb);
    3337              : }
    3338              : 
    3339              : 
    3340              : /* Returns true if we can perform store motion of REF from LOOP.  */
    3341              : 
    3342              : static bool
    3343      2883527 : can_sm_ref_p (class loop *loop, im_mem_ref *ref)
    3344              : {
    3345      2883527 :   tree base;
    3346              : 
    3347              :   /* Can't hoist unanalyzable refs.  */
    3348      2883527 :   if (!MEM_ANALYZABLE (ref))
    3349              :     return false;
    3350              : 
    3351              :   /* Can't hoist/sink aggregate copies.  */
    3352      2527295 :   if (ref->mem.ref == error_mark_node)
    3353              :     return false;
    3354              : 
    3355              :   /* It should be movable.  */
    3356      2100913 :   if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
    3357      2100687 :       || TREE_THIS_VOLATILE (ref->mem.ref)
    3358      4184733 :       || !for_each_index (&ref->mem.ref, may_move_till, loop))
    3359      1150159 :     return false;
    3360              : 
    3361              :   /* If it can throw fail, we do not properly update EH info.  */
    3362       950754 :   if (tree_could_throw_p (ref->mem.ref))
    3363              :     return false;
    3364              : 
    3365              :   /* If it can trap, it must be always executed in LOOP.
    3366              :      Readonly memory locations may trap when storing to them, but
    3367              :      tree_could_trap_p is a predicate for rvalues, so check that
    3368              :      explicitly.  */
    3369       927453 :   base = get_base_address (ref->mem.ref);
    3370       927453 :   if ((tree_could_trap_p (ref->mem.ref)
    3371       882766 :        || (DECL_P (base) && TREE_READONLY (base))
    3372       882433 :        || TREE_CODE (base) == STRING_CST)
    3373              :       /* ???  We can at least use false here, allowing loads?  We
    3374              :          are forcing conditional stores if the ref is not always
    3375              :          stored to later anyway.  So this would only guard
    3376              :          the load we need to emit.  Thus when the ref is not
    3377              :          loaded we can elide this completely?  */
    3378       972481 :       && !ref_always_stored_p (loop, ref))
    3379              :     return false;
    3380              : 
    3381              :   /* Verify all loads of ref can be hoisted.  */
    3382       892378 :   if (ref->loaded
    3383       101873 :       && bitmap_bit_p (ref->loaded, loop->num)
    3384       989109 :       && !ref_indep_loop_p (loop, ref, lim_raw))
    3385              :     return false;
    3386              : 
    3387              :   /* Verify the candidate can be disambiguated against all loads,
    3388              :      that is, we can elide all in-loop stores.  Disambiguation
    3389              :      against stores is done later when we cannot guarantee preserving
    3390              :      the order of stores.  */
    3391       819964 :   if (!ref_indep_loop_p (loop, ref, sm_war))
    3392              :     return false;
    3393              : 
    3394              :   /* Verify whether the candidate is hot for LOOP.  Only do store motion if the
    3395              :     candidate's profile count is hot.  Statement in cold BB shouldn't be moved
    3396              :     out of it's loop_father.  */
    3397        42484 :   if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
    3398              :     return false;
    3399              : 
    3400              :   return true;
    3401              : }
    3402              : 
    3403              : /* Marks the references in LOOP for that store motion should be performed
    3404              :    in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
    3405              :    motion was performed in one of the outer loops.  */
    3406              : 
    3407              : static void
    3408      1270311 : find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
    3409              : {
    3410      1270311 :   bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
    3411      1270311 :   unsigned i;
    3412      1270311 :   bitmap_iterator bi;
    3413      1270311 :   im_mem_ref *ref;
    3414              : 
    3415      4153838 :   EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
    3416              :     {
    3417      2883527 :       ref = memory_accesses.refs_list[i];
    3418      2883527 :       if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
    3419        41515 :         bitmap_set_bit (refs_to_sm, i);
    3420              :     }
    3421      1270311 : }
    3422              : 
    3423              : /* Checks whether LOOP (with exits stored in EXITS array) is suitable
    3424              :    for a store motion optimization (i.e. whether we can insert statement
    3425              :    on its exits).  */
    3426              : 
    3427              : static bool
    3428      1312364 : loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
    3429              :                       const vec<edge> &exits)
    3430              : {
    3431      1312364 :   unsigned i;
    3432      1312364 :   edge ex;
    3433              : 
    3434      3394419 :   FOR_EACH_VEC_ELT (exits, i, ex)
    3435      2124108 :     if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
    3436              :       return false;
    3437              : 
    3438              :   return true;
    3439              : }
    3440              : 
    3441              : /* Try to perform store motion for all memory references modified inside
    3442              :    LOOP.  SM_EXECUTED is the bitmap of the memory references for that
    3443              :    store motion was executed in one of the outer loops.  */
    3444              : 
    3445              : static void
    3446      1312364 : store_motion_loop (class loop *loop, bitmap sm_executed)
    3447              : {
    3448      1312364 :   auto_vec<edge> exits = get_loop_exit_edges (loop);
    3449      1312364 :   class loop *subloop;
    3450      1312364 :   bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
    3451              : 
    3452      1312364 :   if (loop_suitable_for_sm (loop, exits))
    3453              :     {
    3454      1270311 :       find_refs_for_sm (loop, sm_executed, sm_in_loop);
    3455      1270311 :       if (!bitmap_empty_p (sm_in_loop))
    3456        21333 :         hoist_memory_references (loop, sm_in_loop, exits);
    3457              :     }
    3458              : 
    3459      1312364 :   bitmap_ior_into (sm_executed, sm_in_loop);
    3460      1610600 :   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
    3461       298236 :     store_motion_loop (subloop, sm_executed);
    3462      1312364 :   bitmap_and_compl_into (sm_executed, sm_in_loop);
    3463      1312364 :   BITMAP_FREE (sm_in_loop);
    3464      1312364 : }
    3465              : 
    3466              : /* Try to perform store motion for all memory references modified inside
    3467              :    loops.  */
    3468              : 
    3469              : static void
    3470       484801 : do_store_motion (void)
    3471              : {
    3472       484801 :   class loop *loop;
    3473       484801 :   bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
    3474              : 
    3475      1498929 :   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
    3476      1014128 :     store_motion_loop (loop, sm_executed);
    3477              : 
    3478       484801 :   BITMAP_FREE (sm_executed);
    3479       484801 : }
    3480              : 
    3481              : /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
    3482              :    for each such basic block bb records the outermost loop for that execution
    3483              :    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
    3484              :    blocks that contain a nonpure call.  */
    3485              : 
    3486              : static void
    3487      1312889 : fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
    3488              : {
    3489      1312889 :   basic_block bb = NULL, last = NULL;
    3490      1312889 :   edge e;
    3491      1312889 :   class loop *inn_loop = loop;
    3492              : 
    3493      1611382 :   for (class loop *inner = loop->inner; inner; inner = inner->next)
    3494       298493 :     fill_always_executed_in_1 (inner, contains_call);
    3495              : 
    3496      1312889 :   auto_vec<basic_block, 64> worklist;
    3497      1312889 :   worklist.reserve_exact (loop->num_nodes);
    3498      1312889 :   worklist.quick_push (loop->header);
    3499      2914305 :   do
    3500              :     {
    3501      2914305 :       edge_iterator ei;
    3502      2914305 :       bb = worklist.pop ();
    3503              : 
    3504      2914305 :       if (!flow_bb_inside_loop_p (inn_loop, bb))
    3505              :         {
    3506              :           /* When we are leaving a possibly infinite inner loop
    3507              :              we have to stop processing.  */
    3508       172484 :           if (!finite_loop_p (inn_loop))
    3509              :             break;
    3510              :           /* If the loop was finite we can continue with processing
    3511              :              the loop we exited to.  */
    3512       164162 :           inn_loop = bb->loop_father;
    3513              :         }
    3514              : 
    3515      2905983 :       if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
    3516      1705687 :         last = bb;
    3517              : 
    3518      2905983 :       if (bitmap_bit_p (contains_call, bb->index))
    3519              :         break;
    3520              : 
    3521              :       /* If LOOP exits from this BB stop processing.  */
    3522      5595720 :       FOR_EACH_EDGE (e, ei, bb->succs)
    3523      3984519 :         if (!flow_bb_inside_loop_p (loop, e->dest))
    3524              :           break;
    3525      2572674 :       if (e)
    3526              :         break;
    3527              : 
    3528              :       /* A loop might be infinite (TODO use simple loop analysis
    3529              :          to disprove this if possible).  */
    3530      1611201 :       if (bb->flags & BB_IRREDUCIBLE_LOOP)
    3531              :         break;
    3532              : 
    3533      1611085 :       if (bb->loop_father->header == bb)
    3534              :         /* Record that we enter into a subloop since it might not
    3535              :            be finite.  */
    3536              :         /* ???  Entering into a not always executed subloop makes
    3537              :            fill_always_executed_in quadratic in loop depth since
    3538              :            we walk those loops N times.  This is not a problem
    3539              :            in practice though, see PR102253 for a worst-case testcase.  */
    3540       606000 :         inn_loop = bb->loop_father;
    3541              : 
    3542              :       /* Walk the body of LOOP sorted by dominance relation.  Additionally,
    3543              :          if a basic block S dominates the latch, then only blocks dominated
    3544              :          by S are after it.
    3545              :          This is get_loop_body_in_dom_order using a worklist algorithm and
    3546              :          stopping once we are no longer interested in visiting further
    3547              :          blocks.  */
    3548      1611085 :       unsigned old_len = worklist.length ();
    3549      1611085 :       unsigned postpone = 0;
    3550      1611085 :       for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
    3551      3476987 :            son;
    3552      1865902 :            son = next_dom_son (CDI_DOMINATORS, son))
    3553              :         {
    3554      1865902 :           if (!flow_bb_inside_loop_p (loop, son))
    3555        20298 :             continue;
    3556      1845604 :           if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
    3557       531667 :             postpone = worklist.length ();
    3558      1845604 :           worklist.quick_push (son);
    3559              :         }
    3560      1611085 :       if (postpone)
    3561              :         /* Postponing the block that dominates the latch means
    3562              :            processing it last and thus putting it earliest in the
    3563              :            worklist.  */
    3564       156697 :         std::swap (worklist[old_len], worklist[postpone]);
    3565              :     }
    3566      3222170 :   while (!worklist.is_empty ());
    3567              : 
    3568      2098485 :   while (1)
    3569              :     {
    3570      1705687 :       if (last->loop_father == loop
    3571      1705687 :           || (ALWAYS_EXECUTED_IN (last)
    3572       156105 :               && loop_outer (ALWAYS_EXECUTED_IN (last)) == loop))
    3573              :         {
    3574      1705010 :           if (dump_enabled_p ())
    3575         1903 :             dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
    3576              :                          last->index, loop->num);
    3577      1705010 :           SET_ALWAYS_EXECUTED_IN (last, loop);
    3578              :         }
    3579      1705687 :       if (last == loop->header)
    3580              :         break;
    3581       392798 :       last = get_immediate_dominator (CDI_DOMINATORS, last);
    3582              :     }
    3583      1312889 : }
    3584              : 
    3585              : /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
    3586              :    for each such basic block bb records the outermost loop for that execution
    3587              :    of its header implies execution of bb.  */
    3588              : 
    3589              : static void
    3590       484912 : fill_always_executed_in (void)
    3591              : {
    3592       484912 :   basic_block bb;
    3593       484912 :   class loop *loop;
    3594              : 
    3595       484912 :   auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
    3596       484912 :   bitmap_clear (contains_call);
    3597     14749325 :   FOR_EACH_BB_FN (bb, cfun)
    3598              :     {
    3599     14264413 :       if (loop_depth (bb->loop_father) == 0)
    3600              :         {
    3601              :           /* Outside of loops we can skip scanning all stmts.  */
    3602      8236701 :           bitmap_set_bit (contains_call, bb->index);
    3603      8236701 :           continue;
    3604              :         }
    3605      6027712 :       gimple_stmt_iterator gsi;
    3606     50443574 :       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    3607              :         {
    3608     40427973 :           if (nonpure_call_p (gsi_stmt (gsi)))
    3609              :             break;
    3610              :         }
    3611              : 
    3612      6027712 :       if (!gsi_end_p (gsi))
    3613       958148 :         bitmap_set_bit (contains_call, bb->index);
    3614              :     }
    3615              : 
    3616      1499308 :   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
    3617      1014396 :     fill_always_executed_in_1 (loop, contains_call);
    3618       484912 : }
    3619              : 
    3620              : /* Find the coldest loop preheader for LOOP, also find the nearest hotter loop
    3621              :    to LOOP.  Then recursively iterate each inner loop.  */
    3622              : 
    3623              : void
    3624      1312889 : fill_coldest_and_hotter_out_loop (class loop *coldest_loop,
    3625              :                                   class loop *hotter_loop, class loop *loop)
    3626              : {
    3627      1312889 :   if (bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
    3628              :                                      coldest_loop))
    3629        27555 :     coldest_loop = loop;
    3630              : 
    3631      1312889 :   coldest_outermost_loop[loop->num] = coldest_loop;
    3632              : 
    3633      1312889 :   hotter_than_inner_loop[loop->num] = NULL;
    3634      1312889 :   class loop *outer_loop = loop_outer (loop);
    3635      1312889 :   if (hotter_loop
    3636      1312889 :       && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
    3637              :                                         hotter_loop))
    3638         2154 :     hotter_than_inner_loop[loop->num] = hotter_loop;
    3639              : 
    3640      1312889 :   if (outer_loop && outer_loop != current_loops->tree_root
    3641      1611382 :       && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
    3642              :                                         outer_loop))
    3643        33784 :     hotter_than_inner_loop[loop->num] = outer_loop;
    3644              : 
    3645      1312889 :   if (dump_enabled_p ())
    3646              :     {
    3647         1470 :       dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, ",
    3648              :                    loop->num, coldest_loop->num);
    3649         1470 :       if (hotter_than_inner_loop[loop->num])
    3650          245 :         dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n",
    3651          245 :                      hotter_than_inner_loop[loop->num]->num);
    3652              :       else
    3653         1225 :         dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n");
    3654              :     }
    3655              : 
    3656      1312889 :   class loop *inner_loop;
    3657      1611382 :   for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next)
    3658       596986 :     fill_coldest_and_hotter_out_loop (coldest_loop,
    3659       298493 :                                       hotter_than_inner_loop[loop->num],
    3660              :                                       inner_loop);
    3661      1312889 : }
    3662              : 
    3663              : /* Compute the global information needed by the loop invariant motion pass.  */
    3664              : 
    3665              : static void
    3666       484912 : tree_ssa_lim_initialize (bool store_motion)
    3667              : {
    3668       484912 :   unsigned i;
    3669              : 
    3670       484912 :   bitmap_obstack_initialize (&lim_bitmap_obstack);
    3671       484912 :   gcc_obstack_init (&mem_ref_obstack);
    3672       484912 :   lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
    3673              : 
    3674       484912 :   if (flag_tm)
    3675          104 :     compute_transaction_bits ();
    3676              : 
    3677       484912 :   memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
    3678       484912 :   memory_accesses.refs_list.create (100);
    3679              :   /* Allocate a special, unanalyzable mem-ref with ID zero.  */
    3680       484912 :   memory_accesses.refs_list.quick_push
    3681       484912 :     (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
    3682              : 
    3683       969824 :   memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
    3684       969824 :   memory_accesses.refs_loaded_in_loop.quick_grow_cleared (number_of_loops (cfun));
    3685       969824 :   memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
    3686       969824 :   memory_accesses.refs_stored_in_loop.quick_grow_cleared (number_of_loops (cfun));
    3687       484912 :   if (store_motion)
    3688              :     {
    3689       969602 :       memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
    3690       484801 :       memory_accesses.all_refs_stored_in_loop.quick_grow_cleared
    3691       969602 :                                                       (number_of_loops (cfun));
    3692              :     }
    3693              : 
    3694      6011146 :   for (i = 0; i < number_of_loops (cfun); i++)
    3695              :     {
    3696      2520661 :       bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
    3697              :                          &lim_bitmap_obstack);
    3698      2520661 :       bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
    3699              :                          &lim_bitmap_obstack);
    3700      2520661 :       if (store_motion)
    3701      2519560 :         bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
    3702              :                            &lim_bitmap_obstack);
    3703              :     }
    3704              : 
    3705       484912 :   memory_accesses.ttae_cache = NULL;
    3706              : 
    3707              :   /* Initialize bb_loop_postorder with a mapping from loop->num to
    3708              :      its postorder index.  */
    3709       484912 :   i = 0;
    3710       969824 :   bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
    3711      2767625 :   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
    3712      1312889 :     bb_loop_postorder[loop->num] = i++;
    3713       484912 : }
    3714              : 
    3715              : /* Cleans up after the invariant motion pass.  */
    3716              : 
    3717              : static void
    3718       484912 : tree_ssa_lim_finalize (void)
    3719              : {
    3720       484912 :   basic_block bb;
    3721       484912 :   unsigned i;
    3722       484912 :   im_mem_ref *ref;
    3723              : 
    3724     14794581 :   FOR_EACH_BB_FN (bb, cfun)
    3725     14309669 :     SET_ALWAYS_EXECUTED_IN (bb, NULL);
    3726              : 
    3727       484912 :   bitmap_obstack_release (&lim_bitmap_obstack);
    3728       969824 :   delete lim_aux_data_map;
    3729              : 
    3730       484912 :   delete memory_accesses.refs;
    3731       484912 :   memory_accesses.refs = NULL;
    3732              : 
    3733      5716088 :   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
    3734      5231176 :     memref_free (ref);
    3735       484912 :   memory_accesses.refs_list.release ();
    3736       484912 :   obstack_free (&mem_ref_obstack, NULL);
    3737              : 
    3738       484912 :   memory_accesses.refs_loaded_in_loop.release ();
    3739       484912 :   memory_accesses.refs_stored_in_loop.release ();
    3740       484912 :   memory_accesses.all_refs_stored_in_loop.release ();
    3741              : 
    3742       484912 :   if (memory_accesses.ttae_cache)
    3743         4570 :     free_affine_expand_cache (&memory_accesses.ttae_cache);
    3744              : 
    3745       484912 :   free (bb_loop_postorder);
    3746              : 
    3747       484912 :   coldest_outermost_loop.release ();
    3748       484912 :   hotter_than_inner_loop.release ();
    3749       484912 : }
    3750              : 
    3751              : /* Moves invariants from loops.  Only "expensive" invariants are moved out --
    3752              :    i.e. those that are likely to be win regardless of the register pressure.
    3753              :    Only perform store motion if STORE_MOTION is true.  */
    3754              : 
    3755              : unsigned int
    3756       484912 : loop_invariant_motion_in_fun (function *fun, bool store_motion)
    3757              : {
    3758       484912 :   unsigned int todo = 0;
    3759              : 
    3760       484912 :   tree_ssa_lim_initialize (store_motion);
    3761              : 
    3762       484912 :   mark_ssa_maybe_undefs ();
    3763              : 
    3764              :   /* Gathers information about memory accesses in the loops.  */
    3765       484912 :   analyze_memory_references (store_motion);
    3766              : 
    3767              :   /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
    3768       484912 :   fill_always_executed_in ();
    3769              : 
    3770              :   /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
    3771              :    */
    3772       484912 :   class loop *loop;
    3773       969824 :   coldest_outermost_loop.create (number_of_loops (cfun));
    3774       969824 :   coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
    3775       969824 :   hotter_than_inner_loop.create (number_of_loops (cfun));
    3776       969824 :   hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
    3777      1499308 :   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
    3778      1014396 :     fill_coldest_and_hotter_out_loop (loop, NULL, loop);
    3779              : 
    3780       484912 :   int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
    3781       484912 :   int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
    3782              : 
    3783              :   /* For each statement determine the outermost loop in that it is
    3784              :      invariant and cost for computing the invariant.  */
    3785     14749325 :   for (int i = 0; i < n; ++i)
    3786     14264413 :     compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
    3787              : 
    3788              :   /* Execute store motion.  Force the necessary invariants to be moved
    3789              :      out of the loops as well.  */
    3790       484912 :   if (store_motion)
    3791       484801 :     do_store_motion ();
    3792              : 
    3793       484912 :   free (rpo);
    3794       484912 :   rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
    3795       484912 :   n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
    3796              : 
    3797              :   /* Move the expressions that are expensive enough.  */
    3798     14794581 :   for (int i = 0; i < n; ++i)
    3799     14309669 :     todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
    3800              : 
    3801       484912 :   free (rpo);
    3802              : 
    3803       484912 :   gsi_commit_edge_inserts ();
    3804       484912 :   if (need_ssa_update_p (fun))
    3805        14872 :     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
    3806              : 
    3807       484912 :   tree_ssa_lim_finalize ();
    3808              : 
    3809       484912 :   return todo;
    3810              : }
    3811              : 
    3812              : /* Loop invariant motion pass.  */
    3813              : 
    3814              : namespace {
    3815              : 
    3816              : const pass_data pass_data_lim =
    3817              : {
    3818              :   GIMPLE_PASS, /* type */
    3819              :   "lim", /* name */
    3820              :   OPTGROUP_LOOP, /* optinfo_flags */
    3821              :   TV_LIM, /* tv_id */
    3822              :   PROP_cfg, /* properties_required */
    3823              :   0, /* properties_provided */
    3824              :   0, /* properties_destroyed */
    3825              :   0, /* todo_flags_start */
    3826              :   0, /* todo_flags_finish */
    3827              : };
    3828              : 
    3829              : class pass_lim : public gimple_opt_pass
    3830              : {
    3831              : public:
    3832      1155068 :   pass_lim (gcc::context *ctxt)
    3833      2310136 :     : gimple_opt_pass (pass_data_lim, ctxt)
    3834              :   {}
    3835              : 
    3836              :   /* opt_pass methods: */
    3837       866301 :   opt_pass * clone () final override { return new pass_lim (m_ctxt); }
    3838      1287690 :   bool gate (function *) final override { return flag_tree_loop_im != 0; }
    3839              :   unsigned int execute (function *) final override;
    3840              : 
    3841              : }; // class pass_lim
    3842              : 
    3843              : unsigned int
    3844      1287362 : pass_lim::execute (function *fun)
    3845              : {
    3846      1287362 :   in_loop_pipeline = scev_initialized_p ();
    3847      1287362 :   if (!in_loop_pipeline)
    3848      1045170 :     loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
    3849              : 
    3850      2574724 :   if (number_of_loops (fun) <= 1)
    3851              :     return 0;
    3852       484817 :   unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
    3853              : 
    3854       484817 :   if (!in_loop_pipeline)
    3855       242625 :     loop_optimizer_finalize ();
    3856              :   else
    3857       242192 :     scev_reset ();
    3858              :   return todo;
    3859              : }
    3860              : 
    3861              : } // anon namespace
    3862              : 
    3863              : gimple_opt_pass *
    3864       288767 : make_pass_lim (gcc::context *ctxt)
    3865              : {
    3866       288767 :   return new pass_lim (ctxt);
    3867              : }
    3868              : 
    3869              : 
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.