LCOV - code coverage report
Current view: top level - gcc - tree-ssa-loop-im.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 97.8 % 1675 1638
Test Date: 2026-02-28 14:20:25 Functions: 100.0 % 79 79
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Loop invariant motion.
       2              :    Copyright (C) 2003-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it
       7              : under the terms of the GNU General Public License as published by the
       8              : Free Software Foundation; either version 3, or (at your option) any
       9              : later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT
      12              : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "backend.h"
      24              : #include "tree.h"
      25              : #include "gimple.h"
      26              : #include "cfghooks.h"
      27              : #include "tree-pass.h"
      28              : #include "ssa.h"
      29              : #include "gimple-pretty-print.h"
      30              : #include "fold-const.h"
      31              : #include "cfganal.h"
      32              : #include "tree-eh.h"
      33              : #include "gimplify.h"
      34              : #include "gimple-iterator.h"
      35              : #include "tree-cfg.h"
      36              : #include "tree-ssa-loop-manip.h"
      37              : #include "tree-ssa-loop.h"
      38              : #include "tree-into-ssa.h"
      39              : #include "cfgloop.h"
      40              : #include "tree-affine.h"
      41              : #include "tree-ssa-propagate.h"
      42              : #include "trans-mem.h"
      43              : #include "gimple-fold.h"
      44              : #include "tree-scalar-evolution.h"
      45              : #include "tree-ssa-loop-niter.h"
      46              : #include "alias.h"
      47              : #include "builtins.h"
      48              : #include "tree-dfa.h"
      49              : #include "tree-ssa.h"
      50              : #include "dbgcnt.h"
      51              : #include "insn-codes.h"
      52              : #include "optabs-tree.h"
      53              : 
      54              : /* TODO:  Support for predicated code motion.  I.e.
      55              : 
      56              :    while (1)
      57              :      {
      58              :        if (cond)
      59              :          {
      60              :            a = inv;
      61              :            something;
      62              :          }
      63              :      }
      64              : 
      65              :    Where COND and INV are invariants, but evaluating INV may trap or be
      66              :    invalid from some other reason if !COND.  This may be transformed to
      67              : 
      68              :    if (cond)
      69              :      a = inv;
      70              :    while (1)
      71              :      {
      72              :        if (cond)
      73              :          something;
      74              :      }  */
      75              : 
      76              : /* The auxiliary data kept for each statement.  */
      77              : 
      78              : struct lim_aux_data
      79              : {
      80              :   class loop *max_loop; /* The outermost loop in that the statement
      81              :                                    is invariant.  */
      82              : 
      83              :   class loop *tgt_loop; /* The loop out of that we want to move the
      84              :                                    invariant.  */
      85              : 
      86              :   class loop *always_executed_in;
      87              :                                 /* The outermost loop for that we are sure
      88              :                                    the statement is executed if the loop
      89              :                                    is entered.  */
      90              : 
      91              :   unsigned cost;                /* Cost of the computation performed by the
      92              :                                    statement.  */
      93              : 
      94              :   unsigned ref;                 /* The simple_mem_ref in this stmt or 0.  */
      95              : 
      96              :   vec<gimple *> depends;  /* Vector of statements that must be also
      97              :                                    hoisted out of the loop when this statement
      98              :                                    is hoisted; i.e. those that define the
      99              :                                    operands of the statement and are inside of
     100              :                                    the MAX_LOOP loop.  */
     101              : };
     102              : 
     103              : /* Maps statements to their lim_aux_data.  */
     104              : 
     105              : static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
     106              : 
     107              : /* Description of a memory reference location.  */
     108              : 
     109              : struct mem_ref_loc
     110              : {
     111              :   tree *ref;                    /* The reference itself.  */
     112              :   gimple *stmt;                 /* The statement in that it occurs.  */
     113              : };
     114              : 
     115              : 
     116              : /* Description of a memory reference.  */
     117              : 
     118              : class im_mem_ref
     119              : {
     120              : public:
     121              :   unsigned id : 30;             /* ID assigned to the memory reference
     122              :                                    (its index in memory_accesses.refs_list)  */
     123              :   unsigned ref_canonical : 1;   /* Whether mem.ref was canonicalized.  */
     124              :   unsigned ref_decomposed : 1;  /* Whether the ref was hashed from mem.  */
     125              :   hashval_t hash;               /* Its hash value.  */
     126              : 
     127              :   /* The memory access itself and associated caching of alias-oracle
     128              :      query meta-data.  We are using mem.ref == error_mark_node for the
     129              :      case the reference is represented by its single access stmt
     130              :      in accesses_in_loop[0].  */
     131              :   ao_ref mem;
     132              : 
     133              :   bitmap stored;                /* The set of loops in that this memory location
     134              :                                    is stored to.  */
     135              :   bitmap loaded;                /* The set of loops in that this memory location
     136              :                                    is loaded from.  */
     137              :   vec<mem_ref_loc>                accesses_in_loop;
     138              :                                 /* The locations of the accesses.  */
     139              : 
     140              :   /* The following set is computed on demand.  */
     141              :   bitmap_head dep_loop;         /* The set of loops in that the memory
     142              :                                    reference is {in,}dependent in
     143              :                                    different modes.  */
     144              : };
     145              : 
     146              : static bool in_loop_pipeline;
     147              : 
     148              : /* We use six bits per loop in the ref->dep_loop bitmap to record
     149              :    the dep_kind x dep_state combinations.  */
     150              : 
     151              : enum dep_kind { lim_raw, sm_war, sm_waw };
     152              : enum dep_state { dep_unknown, dep_independent, dep_dependent };
     153              : 
     154              : /* coldest outermost loop for given loop.  */
     155              : vec<class loop *> coldest_outermost_loop;
     156              : /* hotter outer loop nearest to given loop.  */
     157              : vec<class loop *> hotter_than_inner_loop;
     158              : 
     159              : /* Populate the loop dependence cache of REF for LOOP, KIND with STATE.  */
     160              : 
     161              : static void
     162      2036576 : record_loop_dependence (class loop *loop, im_mem_ref *ref,
     163              :                         dep_kind kind, dep_state state)
     164              : {
     165      2036576 :   gcc_assert (state != dep_unknown);
     166      2036576 :   unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
     167      2036576 :   bitmap_set_bit (&ref->dep_loop, bit);
     168      2036576 : }
     169              : 
     170              : /* Query the loop dependence cache of REF for LOOP, KIND.  */
     171              : 
     172              : static dep_state
     173       641454 : query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
     174              : {
     175       641454 :   unsigned first_bit = 6 * loop->num + kind * 2;
     176       641454 :   if (bitmap_bit_p (&ref->dep_loop, first_bit))
     177              :     return dep_independent;
     178       641454 :   else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
     179            0 :     return dep_dependent;
     180              :   return dep_unknown;
     181              : }
     182              : 
     183              : /* Mem_ref hashtable helpers.  */
     184              : 
     185              : struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
     186              : {
     187              :   typedef ao_ref *compare_type;
     188              :   static inline hashval_t hash (const im_mem_ref *);
     189              :   static inline bool equal (const im_mem_ref *, const ao_ref *);
     190              : };
     191              : 
     192              : /* A hash function for class im_mem_ref object OBJ.  */
     193              : 
     194              : inline hashval_t
     195     10419200 : mem_ref_hasher::hash (const im_mem_ref *mem)
     196              : {
     197     10419200 :   return mem->hash;
     198              : }
     199              : 
     200              : /* An equality function for class im_mem_ref object MEM1 with
     201              :    memory reference OBJ2.  */
     202              : 
     203              : inline bool
     204     12166202 : mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
     205              : {
     206     12166202 :   if (obj2->max_size_known_p ())
     207     10729255 :     return (mem1->ref_decomposed
     208     10273327 :             && ((TREE_CODE (mem1->mem.base) == MEM_REF
     209      4575970 :                  && TREE_CODE (obj2->base) == MEM_REF
     210      3023571 :                  && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
     211      3023571 :                                      TREE_OPERAND (obj2->base, 0), 0)
     212     12590008 :                  && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
     213              :                               mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
     214      9851957 :                 || (operand_equal_p (mem1->mem.base, obj2->base, 0)
     215       797284 :                     && known_eq (mem1->mem.offset, obj2->offset)))
     216       876999 :             && known_eq (mem1->mem.size, obj2->size)
     217       874626 :             && known_eq (mem1->mem.max_size, obj2->max_size)
     218       874626 :             && mem1->mem.volatile_p == obj2->volatile_p
     219       874610 :             && (mem1->mem.ref_alias_set == obj2->ref_alias_set
     220              :                 /* We are not canonicalizing alias-sets but for the
     221              :                    special-case we didn't canonicalize yet and the
     222              :                    incoming ref is a alias-set zero MEM we pick
     223              :                    the correct one already.  */
     224        16005 :                 || (!mem1->ref_canonical
     225        14852 :                     && (TREE_CODE (obj2->ref) == MEM_REF
     226        14852 :                         || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
     227         6873 :                     && obj2->ref_alias_set == 0)
     228              :                 /* Likewise if there's a canonical ref with alias-set zero.  */
     229        13112 :                 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
     230     11590780 :             && types_compatible_p (TREE_TYPE (mem1->mem.ref),
     231       861525 :                                    TREE_TYPE (obj2->ref)));
     232              :   else
     233      1436947 :     return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
     234              : }
     235              : 
     236              : 
     237              : /* Description of memory accesses in loops.  */
     238              : 
     239              : static struct
     240              : {
     241              :   /* The hash table of memory references accessed in loops.  */
     242              :   hash_table<mem_ref_hasher> *refs;
     243              : 
     244              :   /* The list of memory references.  */
     245              :   vec<im_mem_ref *> refs_list;
     246              : 
     247              :   /* The set of memory references accessed in each loop.  */
     248              :   vec<bitmap_head> refs_loaded_in_loop;
     249              : 
     250              :   /* The set of memory references stored in each loop.  */
     251              :   vec<bitmap_head> refs_stored_in_loop;
     252              : 
     253              :   /* The set of memory references stored in each loop, including subloops .  */
     254              :   vec<bitmap_head> all_refs_stored_in_loop;
     255              : 
     256              :   /* Cache for expanding memory addresses.  */
     257              :   hash_map<tree, name_expansion *> *ttae_cache;
     258              : } memory_accesses;
     259              : 
     260              : /* Obstack for the bitmaps in the above data structures.  */
     261              : static bitmap_obstack lim_bitmap_obstack;
     262              : static obstack mem_ref_obstack;
     263              : 
     264              : static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
     265              : static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
     266              : static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
     267              : 
     268              : /* Minimum cost of an expensive expression.  */
     269              : #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
     270              : 
     271              : /* The outermost loop for which execution of the header guarantees that the
     272              :    block will be executed.  */
     273              : #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
     274              : #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
     275              : 
     276              : /* ID of the shared unanalyzable mem.  */
     277              : #define UNANALYZABLE_MEM_ID 0
     278              : 
     279              : /* Whether the reference was analyzable.  */
     280              : #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
     281              : 
     282              : static struct lim_aux_data *
     283     17483060 : init_lim_data (gimple *stmt)
     284              : {
     285     17483060 :   lim_aux_data *p = XCNEW (struct lim_aux_data);
     286     17483060 :   lim_aux_data_map->put (stmt, p);
     287              : 
     288     17483060 :   return p;
     289              : }
     290              : 
     291              : static struct lim_aux_data *
     292     93018227 : get_lim_data (gimple *stmt)
     293              : {
     294     93018227 :   lim_aux_data **p = lim_aux_data_map->get (stmt);
     295     93018227 :   if (!p)
     296              :     return NULL;
     297              : 
     298     47566915 :   return *p;
     299              : }
     300              : 
     301              : /* Releases the memory occupied by DATA.  */
     302              : 
     303              : static void
     304     17482883 : free_lim_aux_data (struct lim_aux_data *data)
     305              : {
     306            0 :   data->depends.release ();
     307     17482883 :   free (data);
     308            0 : }
     309              : 
     310              : static void
     311     17482883 : clear_lim_data (gimple *stmt)
     312              : {
     313     17482883 :   lim_aux_data **p = lim_aux_data_map->get (stmt);
     314     17482883 :   if (!p)
     315              :     return;
     316              : 
     317     17482883 :   free_lim_aux_data (*p);
     318     17482883 :   *p = NULL;
     319              : }
     320              : 
     321              : 
     322              : /* The possibilities of statement movement.  */
     323              : enum move_pos
     324              :   {
     325              :     MOVE_IMPOSSIBLE,            /* No movement -- side effect expression.  */
     326              :     MOVE_PRESERVE_EXECUTION,    /* Must not cause the non-executed statement
     327              :                                    become executed -- memory accesses, ... */
     328              :     MOVE_POSSIBLE               /* Unlimited movement.  */
     329              :   };
     330              : 
     331              : 
     332              : /* If it is possible to hoist the statement STMT unconditionally,
     333              :    returns MOVE_POSSIBLE.
     334              :    If it is possible to hoist the statement STMT, but we must avoid making
     335              :    it executed if it would not be executed in the original program (e.g.
     336              :    because it may trap), return MOVE_PRESERVE_EXECUTION.
     337              :    Otherwise return MOVE_IMPOSSIBLE.  */
     338              : 
     339              : static enum move_pos
     340     45765181 : movement_possibility_1 (gimple *stmt)
     341              : {
     342     45765181 :   tree lhs;
     343     45765181 :   enum move_pos ret = MOVE_POSSIBLE;
     344              : 
     345     45765181 :   if (flag_unswitch_loops
     346     45765181 :       && gimple_code (stmt) == GIMPLE_COND)
     347              :     {
     348              :       /* If we perform unswitching, force the operands of the invariant
     349              :          condition to be moved out of the loop.  */
     350              :       return MOVE_POSSIBLE;
     351              :     }
     352              : 
     353     45417969 :   if (gimple_code (stmt) == GIMPLE_PHI
     354      2592481 :       && gimple_phi_num_args (stmt) <= 2
     355      4851916 :       && !virtual_operand_p (gimple_phi_result (stmt))
     356     46865547 :       && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
     357              :     return MOVE_POSSIBLE;
     358              : 
     359     43970646 :   if (gimple_get_lhs (stmt) == NULL_TREE)
     360              :     return MOVE_IMPOSSIBLE;
     361              : 
     362     31102190 :   if (gimple_vdef (stmt))
     363              :     return MOVE_IMPOSSIBLE;
     364              : 
     365     13164440 :   if (stmt_ends_bb_p (stmt)
     366     11991531 :       || gimple_has_volatile_ops (stmt)
     367     13110563 :       || gimple_has_side_effects (stmt)
     368     26269120 :       || stmt_could_throw_p (cfun, stmt))
     369       457804 :     return MOVE_IMPOSSIBLE;
     370              : 
     371     12706636 :   if (is_gimple_call (stmt))
     372              :     {
     373              :       /* While pure or const call is guaranteed to have no side effects, we
     374              :          cannot move it arbitrarily.  Consider code like
     375              : 
     376              :          char *s = something ();
     377              : 
     378              :          while (1)
     379              :            {
     380              :              if (s)
     381              :                t = strlen (s);
     382              :              else
     383              :                t = 0;
     384              :            }
     385              : 
     386              :          Here the strlen call cannot be moved out of the loop, even though
     387              :          s is invariant.  In addition to possibly creating a call with
     388              :          invalid arguments, moving out a function call that is not executed
     389              :          may cause performance regressions in case the call is costly and
     390              :          not executed at all.  */
     391       130102 :       ret = MOVE_PRESERVE_EXECUTION;
     392       130102 :       lhs = gimple_call_lhs (stmt);
     393              :     }
     394     12576534 :   else if (is_gimple_assign (stmt))
     395     11431376 :     lhs = gimple_assign_lhs (stmt);
     396              :   else
     397              :     return MOVE_IMPOSSIBLE;
     398              : 
     399     11561478 :   if (TREE_CODE (lhs) == SSA_NAME
     400     11561478 :       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
     401              :     return MOVE_IMPOSSIBLE;
     402              : 
     403     11561270 :   if (TREE_CODE (lhs) != SSA_NAME
     404     11561270 :       || gimple_could_trap_p (stmt))
     405      2382287 :     return MOVE_PRESERVE_EXECUTION;
     406              : 
     407      9178983 :   if (is_gimple_assign (stmt))
     408              :     {
     409      9055983 :       auto code = gimple_assign_rhs_code (stmt);
     410      9055983 :       tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
     411              :       /* For shifts and rotates and possibly out-of-bound shift operands
     412              :          we currently cannot rewrite them into something unconditionally
     413              :          well-defined.  */
     414      9055983 :       if ((code == LSHIFT_EXPR
     415              :            || code == RSHIFT_EXPR
     416              :            || code == LROTATE_EXPR
     417      9055983 :            || code == RROTATE_EXPR)
     418      9055983 :           && (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
     419              :               /* We cannot use ranges at 'stmt' here.  */
     420       121094 :               || wi::ltu_p (wi::to_wide (gimple_assign_rhs2 (stmt)),
     421      9015608 :                             element_precision (type))))
     422       161469 :         ret = MOVE_PRESERVE_EXECUTION;
     423              :     }
     424              : 
     425              :   /* Non local loads in a transaction cannot be hoisted out.  Well,
     426              :      unless the load happens on every path out of the loop, but we
     427              :      don't take this into account yet.  */
     428      9178983 :   if (flag_tm
     429          434 :       && gimple_in_transaction (stmt)
     430      9179086 :       && gimple_assign_single_p (stmt))
     431              :     {
     432           21 :       tree rhs = gimple_assign_rhs1 (stmt);
     433           21 :       if (DECL_P (rhs) && is_global_var (rhs))
     434              :         {
     435           18 :           if (dump_file)
     436              :             {
     437            2 :               fprintf (dump_file, "Cannot hoist conditional load of ");
     438            2 :               print_generic_expr (dump_file, rhs, TDF_SLIM);
     439            2 :               fprintf (dump_file, " because it is in a transaction.\n");
     440              :             }
     441           18 :           return MOVE_IMPOSSIBLE;
     442              :         }
     443              :     }
     444              : 
     445              :   return ret;
     446              : }
     447              : 
     448              : static enum move_pos
     449     45765181 : movement_possibility (gimple *stmt)
     450              : {
     451     45765181 :   enum move_pos pos = movement_possibility_1 (stmt);
     452     45765181 :   if (pos == MOVE_POSSIBLE)
     453              :     {
     454     10689031 :       use_operand_p use_p;
     455     10689031 :       ssa_op_iter ssa_iter;
     456     34706789 :       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE)
     457     13346737 :         if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME
     458     13346737 :             && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p)))
     459        18010 :           return MOVE_PRESERVE_EXECUTION;
     460              :     }
     461              :   return pos;
     462              : }
     463              : 
     464              : 
     465              : /* Compare the profile count inequality of bb and loop's preheader, it is
     466              :    three-state as stated in profile-count.h, FALSE is returned if inequality
     467              :    cannot be decided.  */
     468              : bool
     469     13849886 : bb_colder_than_loop_preheader (basic_block bb, class loop *loop)
     470              : {
     471     13849886 :   gcc_assert (bb && loop);
     472     13849886 :   return bb->count < loop_preheader_edge (loop)->src->count;
     473              : }
     474              : 
     475              : /* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile
     476              :    count.
     477              :   It does three steps check:
     478              :   1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just
     479              :   return NULL which means it should not be moved out at all;
     480              :   2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of
     481              :   OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP;
     482              :   3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a
     483              :   hotter loop between OUTERMOST_LOOP and loop in pre-computed
     484              :   HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return
     485              :   OUTERMOST_LOOP.
     486              :   At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as
     487              :   the hoist target.  */
     488              : 
     489              : static class loop *
     490     12226859 : get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
     491              :                       basic_block curr_bb)
     492              : {
     493     12226859 :   gcc_assert (outermost_loop == loop
     494              :               || flow_loop_nested_p (outermost_loop, loop));
     495              : 
     496              :   /* If bb_colder_than_loop_preheader returns false due to three-state
     497              :     comparision, OUTERMOST_LOOP is returned finally to preserve the behavior.
     498              :     Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP.  */
     499     12226859 :   if (curr_bb && bb_colder_than_loop_preheader (curr_bb, loop))
     500              :     return NULL;
     501              : 
     502     11273292 :   class loop *coldest_loop = coldest_outermost_loop[loop->num];
     503     22546584 :   if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
     504              :     {
     505       245922 :       class loop *hotter_loop = hotter_than_inner_loop[loop->num];
     506       245922 :       if (!hotter_loop
     507       252571 :           || loop_depth (hotter_loop) < loop_depth (outermost_loop))
     508              :         return outermost_loop;
     509              : 
     510              :       /*  hotter_loop is between OUTERMOST_LOOP and LOOP like:
     511              :         [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
     512              :         hotter_loop, second_coldest_loop, ..., loop]
     513              :         return second_coldest_loop to be the hoist target.  */
     514            3 :       class loop *aloop;
     515            3 :       for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
     516            3 :         if (aloop == loop || flow_loop_nested_p (aloop, loop))
     517            3 :           return aloop;
     518              :     }
     519              :   return coldest_loop;
     520              : }
     521              : 
     522              : /* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
     523              :    loop to that we could move the expression using DEF if it did not have
     524              :    other operands, i.e. the outermost loop enclosing LOOP in that the value
     525              :    of DEF is invariant.  */
     526              : 
     527              : static class loop *
     528     19642059 : outermost_invariant_loop (tree def, class loop *loop)
     529              : {
     530     19642059 :   gimple *def_stmt;
     531     19642059 :   basic_block def_bb;
     532     19642059 :   class loop *max_loop;
     533     19642059 :   struct lim_aux_data *lim_data;
     534              : 
     535     19642059 :   if (!def)
     536       955927 :     return superloop_at_depth (loop, 1);
     537              : 
     538     18686132 :   if (TREE_CODE (def) != SSA_NAME)
     539              :     {
     540      3924227 :       gcc_assert (is_gimple_min_invariant (def));
     541      3924227 :       return superloop_at_depth (loop, 1);
     542              :     }
     543              : 
     544     14761905 :   def_stmt = SSA_NAME_DEF_STMT (def);
     545     14761905 :   def_bb = gimple_bb (def_stmt);
     546     14761905 :   if (!def_bb)
     547        71150 :     return superloop_at_depth (loop, 1);
     548              : 
     549     14690755 :   max_loop = find_common_loop (loop, def_bb->loop_father);
     550              : 
     551     14690755 :   lim_data = get_lim_data (def_stmt);
     552     14690755 :   if (lim_data != NULL && lim_data->max_loop != NULL)
     553       686603 :     max_loop = find_common_loop (max_loop,
     554              :                                  loop_outer (lim_data->max_loop));
     555     14690755 :   if (max_loop == loop)
     556              :     return NULL;
     557      1836249 :   max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
     558              : 
     559      1836249 :   return max_loop;
     560              : }
     561              : 
     562              : /* DATA is a structure containing information associated with a statement
     563              :    inside LOOP.  DEF is one of the operands of this statement.
     564              : 
     565              :    Find the outermost loop enclosing LOOP in that value of DEF is invariant
     566              :    and record this in DATA->max_loop field.  If DEF itself is defined inside
     567              :    this loop as well (i.e. we need to hoist it out of the loop if we want
     568              :    to hoist the statement represented by DATA), record the statement in that
     569              :    DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
     570              :    add the cost of the computation of DEF to the DATA->cost.
     571              : 
     572              :    If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
     573              : 
     574              : static bool
     575     11477216 : add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
     576              :                 bool add_cost)
     577              : {
     578     11477216 :   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
     579     11477216 :   basic_block def_bb = gimple_bb (def_stmt);
     580     11477216 :   class loop *max_loop;
     581     11477216 :   struct lim_aux_data *def_data;
     582              : 
     583     11477216 :   if (!def_bb)
     584              :     return true;
     585              : 
     586     11159296 :   max_loop = outermost_invariant_loop (def, loop);
     587     11159296 :   if (!max_loop)
     588              :     return false;
     589              : 
     590      1381943 :   if (flow_loop_nested_p (data->max_loop, max_loop))
     591       423413 :     data->max_loop = max_loop;
     592              : 
     593      1381943 :   def_data = get_lim_data (def_stmt);
     594      1381943 :   if (!def_data)
     595              :     return true;
     596              : 
     597       675696 :   if (add_cost
     598              :       /* Only add the cost if the statement defining DEF is inside LOOP,
     599              :          i.e. if it is likely that by moving the invariants dependent
     600              :          on it, we will be able to avoid creating a new register for
     601              :          it (since it will be only used in these dependent invariants).  */
     602       655032 :       && def_bb->loop_father == loop)
     603       458910 :     data->cost += def_data->cost;
     604              : 
     605       675696 :   data->depends.safe_push (def_stmt);
     606              : 
     607       675696 :   return true;
     608              : }
     609              : 
     610              : /* Returns an estimate for a cost of statement STMT.  The values here
     611              :    are just ad-hoc constants, similar to costs for inlining.  */
     612              : 
     613              : static unsigned
     614       766458 : stmt_cost (gimple *stmt)
     615              : {
     616              :   /* Always try to create possibilities for unswitching.  */
     617       766458 :   if (gimple_code (stmt) == GIMPLE_COND
     618       766458 :       || gimple_code (stmt) == GIMPLE_PHI)
     619        12725 :     return LIM_EXPENSIVE;
     620              : 
     621              :   /* We should be hoisting calls if possible.  */
     622       753733 :   if (is_gimple_call (stmt))
     623              :     {
     624          812 :       tree fndecl;
     625              : 
     626              :       /* Unless the call is a builtin_constant_p; this always folds to a
     627              :          constant, so moving it is useless.  */
     628          812 :       fndecl = gimple_call_fndecl (stmt);
     629          812 :       if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
     630              :         return 0;
     631              : 
     632          812 :       return LIM_EXPENSIVE;
     633              :     }
     634              : 
     635              :   /* Hoisting memory references out should almost surely be a win.  */
     636       752921 :   if (gimple_references_memory_p (stmt))
     637        64804 :     return LIM_EXPENSIVE;
     638              : 
     639       688117 :   if (gimple_code (stmt) != GIMPLE_ASSIGN)
     640              :     return 1;
     641              : 
     642       688117 :   enum tree_code code = gimple_assign_rhs_code (stmt);
     643       688117 :   switch (code)
     644              :     {
     645       118346 :     case MULT_EXPR:
     646       118346 :     case WIDEN_MULT_EXPR:
     647       118346 :     case WIDEN_MULT_PLUS_EXPR:
     648       118346 :     case WIDEN_MULT_MINUS_EXPR:
     649       118346 :     case DOT_PROD_EXPR:
     650       118346 :     case TRUNC_DIV_EXPR:
     651       118346 :     case CEIL_DIV_EXPR:
     652       118346 :     case FLOOR_DIV_EXPR:
     653       118346 :     case ROUND_DIV_EXPR:
     654       118346 :     case EXACT_DIV_EXPR:
     655       118346 :     case CEIL_MOD_EXPR:
     656       118346 :     case FLOOR_MOD_EXPR:
     657       118346 :     case ROUND_MOD_EXPR:
     658       118346 :     case TRUNC_MOD_EXPR:
     659       118346 :     case RDIV_EXPR:
     660              :       /* Division and multiplication are usually expensive.  */
     661       118346 :       return LIM_EXPENSIVE;
     662              : 
     663         4361 :     case LSHIFT_EXPR:
     664         4361 :     case RSHIFT_EXPR:
     665         4361 :     case WIDEN_LSHIFT_EXPR:
     666         4361 :     case LROTATE_EXPR:
     667         4361 :     case RROTATE_EXPR:
     668              :       /* Shifts and rotates are usually expensive.  */
     669         4361 :       return LIM_EXPENSIVE;
     670              : 
     671          468 :     case COND_EXPR:
     672          468 :     case VEC_COND_EXPR:
     673              :       /* Conditionals are expensive.  */
     674          468 :       return LIM_EXPENSIVE;
     675              : 
     676         1336 :     case CONSTRUCTOR:
     677              :       /* Make vector construction cost proportional to the number
     678              :          of elements.  */
     679         1336 :       return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
     680              : 
     681              :     case SSA_NAME:
     682              :     case PAREN_EXPR:
     683              :       /* Whether or not something is wrapped inside a PAREN_EXPR
     684              :          should not change move cost.  Nor should an intermediate
     685              :          unpropagated SSA name copy.  */
     686              :       return 0;
     687              : 
     688       525616 :     default:
     689              :       /* Comparisons are usually expensive.  */
     690       525616 :       if (TREE_CODE_CLASS (code) == tcc_comparison)
     691         7390 :         return LIM_EXPENSIVE;
     692              :       return 1;
     693              :     }
     694              : }
     695              : 
     696              : /* Finds the outermost loop between OUTER and LOOP in that the memory reference
     697              :    REF is independent.  If REF is not independent in LOOP, NULL is returned
     698              :    instead.  */
     699              : 
     700              : static class loop *
     701       738843 : outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
     702              : {
     703       738843 :   class loop *aloop;
     704              : 
     705       738843 :   if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
     706              :     return NULL;
     707              : 
     708        84243 :   for (aloop = outer;
     709       684820 :        aloop != loop;
     710        84243 :        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
     711         9854 :     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
     712        99961 :         && ref_indep_loop_p (aloop, ref, lim_raw))
     713              :       return aloop;
     714              : 
     715       586527 :   if (ref_indep_loop_p (loop, ref, lim_raw))
     716              :     return loop;
     717              :   else
     718              :     return NULL;
     719              : }
     720              : 
     721              : /* If there is a simple load or store to a memory reference in STMT, returns
     722              :    the location of the memory reference, and sets IS_STORE according to whether
     723              :    it is a store or load.  Otherwise, returns NULL.  */
     724              : 
     725              : static tree *
     726      6899793 : simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
     727              : {
     728      6899793 :   tree *lhs, *rhs;
     729              : 
     730              :   /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
     731      6899793 :   if (!gimple_assign_single_p (stmt))
     732              :     return NULL;
     733              : 
     734      5651924 :   lhs = gimple_assign_lhs_ptr (stmt);
     735      5651924 :   rhs = gimple_assign_rhs1_ptr (stmt);
     736              : 
     737      8830780 :   if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
     738              :     {
     739      3178856 :       *is_store = false;
     740      3178856 :       return rhs;
     741              :     }
     742      2473068 :   else if (gimple_vdef (stmt)
     743      2473068 :            && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
     744              :     {
     745      2032407 :       *is_store = true;
     746      2032407 :       return lhs;
     747              :     }
     748              :   else
     749       440661 :     return NULL;
     750              : }
     751              : 
     752              : /* From a controlling predicate in DOM determine the arguments from
     753              :    the PHI node PHI that are chosen if the predicate evaluates to
     754              :    true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
     755              :    they are non-NULL.  Returns true if the arguments can be determined,
     756              :    else return false.  */
     757              : 
     758              : static bool
     759        11469 : extract_true_false_args_from_phi (basic_block dom, gphi *phi,
     760              :                                   tree *true_arg_p, tree *false_arg_p)
     761              : {
     762        11469 :   edge te, fe;
     763        11469 :   if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
     764              :                                              &te, &fe))
     765              :     return false;
     766              : 
     767        10580 :   if (true_arg_p)
     768          910 :     *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
     769        10580 :   if (false_arg_p)
     770          910 :     *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
     771              : 
     772              :   return true;
     773              : }
     774              : 
     775              : /* Determine the outermost loop to that it is possible to hoist a statement
     776              :    STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
     777              :    the outermost loop in that the value computed by STMT is invariant.
     778              :    If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
     779              :    we preserve the fact whether STMT is executed.  It also fills other related
     780              :    information to LIM_DATA (STMT).
     781              : 
     782              :    The function returns false if STMT cannot be hoisted outside of the loop it
     783              :    is defined in, and true otherwise.  */
     784              : 
     785              : static bool
     786     12183798 : determine_max_movement (gimple *stmt, bool must_preserve_exec)
     787              : {
     788     12183798 :   basic_block bb = gimple_bb (stmt);
     789     12183798 :   class loop *loop = bb->loop_father;
     790     12183798 :   class loop *level;
     791     12183798 :   struct lim_aux_data *lim_data = get_lim_data (stmt);
     792     12183798 :   tree val;
     793     12183798 :   ssa_op_iter iter;
     794              : 
     795     12183798 :   if (must_preserve_exec)
     796      1507129 :     level = ALWAYS_EXECUTED_IN (bb);
     797              :   else
     798     10676669 :     level = superloop_at_depth (loop, 1);
     799     12183798 :   lim_data->max_loop = get_coldest_out_loop (level, loop, bb);
     800     12183798 :   if (!lim_data->max_loop)
     801              :     return false;
     802              : 
     803     11231959 :   if (gphi *phi = dyn_cast <gphi *> (stmt))
     804              :     {
     805      1395112 :       use_operand_p use_p;
     806      1395112 :       unsigned min_cost = UINT_MAX;
     807      1395112 :       unsigned total_cost = 0;
     808      1395112 :       struct lim_aux_data *def_data;
     809              : 
     810              :       /* We will end up promoting dependencies to be unconditionally
     811              :          evaluated.  For this reason the PHI cost (and thus the
     812              :          cost we remove from the loop by doing the invariant motion)
     813              :          is that of the cheapest PHI argument dependency chain.  */
     814      1600051 :       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
     815              :         {
     816      1576390 :           val = USE_FROM_PTR (use_p);
     817              : 
     818      1576390 :           if (TREE_CODE (val) != SSA_NAME)
     819              :             {
     820              :               /* Assign const 1 to constants.  */
     821       127033 :               min_cost = MIN (min_cost, 1);
     822       127033 :               total_cost += 1;
     823       127033 :               continue;
     824              :             }
     825      1449357 :           if (!add_dependency (val, lim_data, loop, false))
     826              :             return false;
     827              : 
     828        77906 :           gimple *def_stmt = SSA_NAME_DEF_STMT (val);
     829        77906 :           if (gimple_bb (def_stmt)
     830        77906 :               && gimple_bb (def_stmt)->loop_father == loop)
     831              :             {
     832         1392 :               def_data = get_lim_data (def_stmt);
     833         1392 :               if (def_data)
     834              :                 {
     835         1392 :                   min_cost = MIN (min_cost, def_data->cost);
     836         1392 :                   total_cost += def_data->cost;
     837              :                 }
     838              :             }
     839              :         }
     840              : 
     841        23661 :       min_cost = MIN (min_cost, total_cost);
     842        23661 :       lim_data->cost += min_cost;
     843              : 
     844        23661 :       if (gimple_phi_num_args (phi) > 1)
     845              :         {
     846        23487 :           basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
     847        23487 :           gimple *cond;
     848        46974 :           if (gsi_end_p (gsi_last_bb (dom)))
     849              :             return false;
     850        15497 :           cond = gsi_stmt (gsi_last_bb (dom));
     851        15497 :           if (gimple_code (cond) != GIMPLE_COND)
     852              :             return false;
     853              :           /* Verify that this is an extended form of a diamond and
     854              :              the PHI arguments are completely controlled by the
     855              :              predicate in DOM.  */
     856        10559 :           if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
     857              :             return false;
     858              : 
     859              :         /* Check if one of the depedent statement is a vector compare whether
     860              :            the target supports it,  otherwise it's invalid to hoist it out of
     861              :            the gcond it belonged to.  */
     862         9670 :         if (VECTOR_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond))))
     863              :           {
     864            5 :             tree type = TREE_TYPE (gimple_cond_lhs (cond));
     865            5 :             auto code = gimple_cond_code (cond);
     866            5 :             if (!target_supports_op_p (type, code, optab_vector))
     867              :               return false;
     868              :           }
     869              : 
     870              :           /* Fold in dependencies and cost of the condition.  */
     871        11364 :           FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
     872              :             {
     873        10343 :               if (!add_dependency (val, lim_data, loop, false))
     874              :                 return false;
     875         1699 :               def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
     876         1699 :               if (def_data)
     877          916 :                 lim_data->cost += def_data->cost;
     878              :             }
     879              : 
     880              :           /* We want to avoid unconditionally executing very expensive
     881              :              operations.  As costs for our dependencies cannot be
     882              :              negative just claim we are not invariand for this case.
     883              :              We also are not sure whether the control-flow inside the
     884              :              loop will vanish.  */
     885         1021 :           if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
     886          132 :               && !(min_cost != 0
     887          132 :                    && total_cost / min_cost <= 2))
     888              :             return false;
     889              : 
     890              :           /* Assume that the control-flow in the loop will vanish.
     891              :              ???  We should verify this and not artificially increase
     892              :              the cost if that is not the case.  */
     893          910 :           lim_data->cost += stmt_cost (stmt);
     894              :         }
     895              : 
     896         1084 :       return true;
     897              :     }
     898              : 
     899              :   /* A stmt that receives abnormal edges cannot be hoisted.  */
     900      9836847 :   if (is_a <gcall *> (stmt)
     901      9836847 :       && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE))
     902              :     return false;
     903              : 
     904     11457078 :   FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
     905     10016506 :     if (!add_dependency (val, lim_data, loop, true))
     906              :       return false;
     907              : 
     908      2869329 :   if (gimple_vuse (stmt))
     909              :     {
     910       739853 :       im_mem_ref *ref
     911       739853 :         = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
     912       739853 :       if (ref
     913       739853 :           && MEM_ANALYZABLE (ref))
     914              :         {
     915       738843 :           lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
     916              :                                                      loop, ref);
     917       738843 :           if (!lim_data->max_loop)
     918              :             return false;
     919              :         }
     920         1010 :       else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
     921              :         return false;
     922              :     }
     923              : 
     924       765548 :   lim_data->cost += stmt_cost (stmt);
     925              : 
     926       765548 :   return true;
     927              : }
     928              : 
     929              : /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
     930              :    and that one of the operands of this statement is computed by STMT.
     931              :    Ensure that STMT (together with all the statements that define its
     932              :    operands) is hoisted at least out of the loop LEVEL.  */
     933              : 
     934              : static void
     935       687832 : set_level (gimple *stmt, class loop *orig_loop, class loop *level)
     936              : {
     937       687832 :   class loop *stmt_loop = gimple_bb (stmt)->loop_father;
     938       687832 :   struct lim_aux_data *lim_data;
     939       687832 :   gimple *dep_stmt;
     940       687832 :   unsigned i;
     941              : 
     942       687832 :   stmt_loop = find_common_loop (orig_loop, stmt_loop);
     943       687832 :   lim_data = get_lim_data (stmt);
     944       687832 :   if (lim_data != NULL && lim_data->tgt_loop != NULL)
     945       170536 :     stmt_loop = find_common_loop (stmt_loop,
     946              :                                   loop_outer (lim_data->tgt_loop));
     947       687832 :   if (flow_loop_nested_p (stmt_loop, level))
     948       687832 :     return;
     949              : 
     950       455979 :   gcc_assert (level == lim_data->max_loop
     951              :               || flow_loop_nested_p (lim_data->max_loop, level));
     952              : 
     953       455979 :   lim_data->tgt_loop = level;
     954       787697 :   FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
     955       331718 :     set_level (dep_stmt, orig_loop, level);
     956              : }
     957              : 
     958              : /* Determines an outermost loop from that we want to hoist the statement STMT.
     959              :    For now we chose the outermost possible loop.  TODO -- use profiling
     960              :    information to set it more sanely.  */
     961              : 
     962              : static void
     963       293628 : set_profitable_level (gimple *stmt)
     964              : {
     965       293628 :   set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
     966       293628 : }
     967              : 
     968              : /* Returns true if STMT is a call that has side effects.  */
     969              : 
     970              : static bool
     971     69703639 : nonpure_call_p (gimple *stmt)
     972              : {
     973            0 :   if (gimple_code (stmt) != GIMPLE_CALL)
     974              :     return false;
     975              : 
     976      2253655 :   return gimple_has_side_effects (stmt);
     977              : }
     978              : 
     979              : /* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */
     980              : 
     981              : static gimple *
     982           35 : rewrite_reciprocal (gimple_stmt_iterator *bsi)
     983              : {
     984           35 :   gassign *stmt, *stmt1, *stmt2;
     985           35 :   tree name, lhs, type;
     986           35 :   tree real_one;
     987           35 :   gimple_stmt_iterator gsi;
     988              : 
     989           35 :   stmt = as_a <gassign *> (gsi_stmt (*bsi));
     990           35 :   lhs = gimple_assign_lhs (stmt);
     991           35 :   type = TREE_TYPE (lhs);
     992              : 
     993           35 :   real_one = build_one_cst (type);
     994              : 
     995           35 :   name = make_temp_ssa_name (type, NULL, "reciptmp");
     996           70 :   stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
     997              :                                gimple_assign_rhs2 (stmt));
     998           35 :   stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
     999              :                                gimple_assign_rhs1 (stmt));
    1000              : 
    1001              :   /* Replace division stmt with reciprocal and multiply stmts.
    1002              :      The multiply stmt is not invariant, so update iterator
    1003              :      and avoid rescanning.  */
    1004           35 :   gsi = *bsi;
    1005           35 :   gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
    1006           35 :   gsi_replace (&gsi, stmt2, true);
    1007              : 
    1008              :   /* Continue processing with invariant reciprocal statement.  */
    1009           35 :   return stmt1;
    1010              : }
    1011              : 
    1012              : /* Check if the pattern at *BSI is a bittest of the form
    1013              :    (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
    1014              : 
    1015              : static gimple *
    1016         9737 : rewrite_bittest (gimple_stmt_iterator *bsi)
    1017              : {
    1018         9737 :   gassign *stmt;
    1019         9737 :   gimple *stmt1;
    1020         9737 :   gassign *stmt2;
    1021         9737 :   gimple *use_stmt;
    1022         9737 :   gcond *cond_stmt;
    1023         9737 :   tree lhs, name, t, a, b;
    1024         9737 :   use_operand_p use;
    1025              : 
    1026         9737 :   stmt = as_a <gassign *> (gsi_stmt (*bsi));
    1027         9737 :   lhs = gimple_assign_lhs (stmt);
    1028              : 
    1029              :   /* Verify that the single use of lhs is a comparison against zero.  */
    1030         9737 :   if (TREE_CODE (lhs) != SSA_NAME
    1031         9737 :       || !single_imm_use (lhs, &use, &use_stmt))
    1032          455 :     return stmt;
    1033        14733 :   cond_stmt = dyn_cast <gcond *> (use_stmt);
    1034         5025 :   if (!cond_stmt)
    1035              :     return stmt;
    1036         5025 :   if (gimple_cond_lhs (cond_stmt) != lhs
    1037         4849 :       || (gimple_cond_code (cond_stmt) != NE_EXPR
    1038         2221 :           && gimple_cond_code (cond_stmt) != EQ_EXPR)
    1039         9874 :       || !integer_zerop (gimple_cond_rhs (cond_stmt)))
    1040          206 :     return stmt;
    1041              : 
    1042              :   /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
    1043         4819 :   stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    1044         4819 :   if (gimple_code (stmt1) != GIMPLE_ASSIGN)
    1045              :     return stmt;
    1046              : 
    1047              :   /* There is a conversion in between possibly inserted by fold.  */
    1048         4748 :   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
    1049              :     {
    1050          256 :       t = gimple_assign_rhs1 (stmt1);
    1051          256 :       if (TREE_CODE (t) != SSA_NAME
    1052          256 :           || !has_single_use (t))
    1053              :         return stmt;
    1054          180 :       stmt1 = SSA_NAME_DEF_STMT (t);
    1055          180 :       if (gimple_code (stmt1) != GIMPLE_ASSIGN)
    1056              :         return stmt;
    1057              :     }
    1058              : 
    1059              :   /* Verify that B is loop invariant but A is not.  Verify that with
    1060              :      all the stmt walking we are still in the same loop.  */
    1061         4630 :   if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
    1062        10980 :       || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
    1063              :     return stmt;
    1064              : 
    1065         3175 :   a = gimple_assign_rhs1 (stmt1);
    1066         3175 :   b = gimple_assign_rhs2 (stmt1);
    1067              : 
    1068         3175 :   if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
    1069         3204 :       && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
    1070              :     {
    1071           29 :       gimple_stmt_iterator rsi;
    1072              : 
    1073              :       /* 1 << B */
    1074           29 :       t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
    1075              :                        build_int_cst (TREE_TYPE (a), 1), b);
    1076           29 :       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
    1077           29 :       stmt1 = gimple_build_assign (name, t);
    1078              : 
    1079              :       /* A & (1 << B) */
    1080           29 :       t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
    1081           29 :       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
    1082           29 :       stmt2 = gimple_build_assign (name, t);
    1083              : 
    1084              :       /* Replace the SSA_NAME we compare against zero.  Adjust
    1085              :          the type of zero accordingly.  */
    1086           29 :       SET_USE (use, name);
    1087           29 :       gimple_cond_set_rhs (cond_stmt,
    1088           29 :                            build_int_cst_type (TREE_TYPE (name),
    1089              :                                                0));
    1090              : 
    1091              :       /* Don't use gsi_replace here, none of the new assignments sets
    1092              :          the variable originally set in stmt.  Move bsi to stmt1, and
    1093              :          then remove the original stmt, so that we get a chance to
    1094              :          retain debug info for it.  */
    1095           29 :       rsi = *bsi;
    1096           29 :       gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
    1097           29 :       gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
    1098           29 :       gimple *to_release = gsi_stmt (rsi);
    1099           29 :       gsi_remove (&rsi, true);
    1100           29 :       release_defs (to_release);
    1101              : 
    1102           29 :       return stmt1;
    1103              :     }
    1104              : 
    1105              :   return stmt;
    1106              : }
    1107              : 
    1108              : /* Determine the outermost loops in that statements in basic block BB are
    1109              :    invariant, and record them to the LIM_DATA associated with the
    1110              :    statements.  */
    1111              : 
    1112              : static void
    1113     14397423 : compute_invariantness (basic_block bb)
    1114              : {
    1115     14397423 :   enum move_pos pos;
    1116     14397423 :   gimple_stmt_iterator bsi;
    1117     14397423 :   gimple *stmt;
    1118     14397423 :   bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
    1119     14397423 :   class loop *outermost = ALWAYS_EXECUTED_IN (bb);
    1120     14397423 :   struct lim_aux_data *lim_data;
    1121              : 
    1122     14397423 :   if (!loop_outer (bb->loop_father))
    1123      8369714 :     return;
    1124              : 
    1125      6027709 :   if (dump_file && (dump_flags & TDF_DETAILS))
    1126          435 :     fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
    1127          435 :              bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
    1128              : 
    1129              :   /* Look at PHI nodes, but only if there is at most two.
    1130              :      ???  We could relax this further by post-processing the inserted
    1131              :      code and transforming adjacent cond-exprs with the same predicate
    1132              :      to control flow again.  */
    1133      6027709 :   bsi = gsi_start_phis (bb);
    1134      6027709 :   if (!gsi_end_p (bsi)
    1135      6027709 :       && ((gsi_next (&bsi), gsi_end_p (bsi))
    1136      1369283 :           || (gsi_next (&bsi), gsi_end_p (bsi))))
    1137      4302028 :     for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
    1138              :       {
    1139      2592481 :         stmt = gsi_stmt (bsi);
    1140              : 
    1141      2592481 :         pos = movement_possibility (stmt);
    1142      2592481 :         if (pos == MOVE_IMPOSSIBLE)
    1143      1145158 :           continue;
    1144              : 
    1145      1447323 :         lim_data = get_lim_data (stmt);
    1146      1447323 :         if (! lim_data)
    1147      1447323 :           lim_data = init_lim_data (stmt);
    1148      1447323 :         lim_data->always_executed_in = outermost;
    1149              : 
    1150      1447323 :         if (!determine_max_movement (stmt, false))
    1151              :           {
    1152      1446239 :             lim_data->max_loop = NULL;
    1153      1446239 :             continue;
    1154              :           }
    1155              : 
    1156         1084 :         if (dump_file && (dump_flags & TDF_DETAILS))
    1157              :           {
    1158            2 :             print_gimple_stmt (dump_file, stmt, 2);
    1159            2 :             fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
    1160            2 :                      loop_depth (lim_data->max_loop),
    1161              :                      lim_data->cost);
    1162              :           }
    1163              : 
    1164         1084 :         if (lim_data->cost >= LIM_EXPENSIVE)
    1165          913 :           set_profitable_level (stmt);
    1166              :       }
    1167              : 
    1168     55228118 :   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
    1169              :     {
    1170     43172700 :       stmt = gsi_stmt (bsi);
    1171              : 
    1172     43172700 :       pos = movement_possibility (stmt);
    1173     43172700 :       if (pos == MOVE_IMPOSSIBLE)
    1174              :         {
    1175     32439621 :           if (nonpure_call_p (stmt))
    1176              :             {
    1177              :               maybe_never = true;
    1178              :               outermost = NULL;
    1179              :             }
    1180              :           /* Make sure to note always_executed_in for stores to make
    1181              :              store-motion work.  */
    1182     30092839 :           else if (stmt_makes_single_store (stmt))
    1183              :             {
    1184      2461558 :               struct lim_aux_data *lim_data = get_lim_data (stmt);
    1185      2461558 :               if (! lim_data)
    1186            0 :                 lim_data = init_lim_data (stmt);
    1187      2461558 :               lim_data->always_executed_in = outermost;
    1188              :             }
    1189     31264236 :           continue;
    1190     31264236 :         }
    1191              : 
    1192     11908464 :       if (is_gimple_assign (stmt)
    1193     11908464 :           && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
    1194              :               == GIMPLE_BINARY_RHS))
    1195              :         {
    1196      5748814 :           tree op0 = gimple_assign_rhs1 (stmt);
    1197      5748814 :           tree op1 = gimple_assign_rhs2 (stmt);
    1198     11497628 :           class loop *ol1 = outermost_invariant_loop (op1,
    1199              :                                         loop_containing_stmt (stmt));
    1200              : 
    1201              :           /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
    1202              :              to be hoisted out of loop, saving expensive divide.  */
    1203      5748814 :           if (pos == MOVE_POSSIBLE
    1204      5178975 :               && gimple_assign_rhs_code (stmt) == RDIV_EXPR
    1205          940 :               && flag_unsafe_math_optimizations
    1206          380 :               && !flag_trapping_math
    1207          380 :               && ol1 != NULL
    1208      5748852 :               && outermost_invariant_loop (op0, ol1) == NULL)
    1209           35 :             stmt = rewrite_reciprocal (&bsi);
    1210              : 
    1211              :           /* If the shift count is invariant, convert (A >> B) & 1 to
    1212              :              A & (1 << B) allowing the bit mask to be hoisted out of the loop
    1213              :              saving an expensive shift.  */
    1214      5748814 :           if (pos == MOVE_POSSIBLE
    1215      5178975 :               && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
    1216       189522 :               && integer_onep (op1)
    1217        19139 :               && TREE_CODE (op0) == SSA_NAME
    1218      5767953 :               && has_single_use (op0))
    1219         9737 :             stmt = rewrite_bittest (&bsi);
    1220              :         }
    1221              : 
    1222     11908464 :       lim_data = get_lim_data (stmt);
    1223     11908464 :       if (! lim_data)
    1224      9093632 :         lim_data = init_lim_data (stmt);
    1225     11908464 :       lim_data->always_executed_in = outermost;
    1226              : 
    1227     11908464 :       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
    1228      1171989 :         continue;
    1229              : 
    1230     10736475 :       if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
    1231              :         {
    1232      9970927 :           lim_data->max_loop = NULL;
    1233      9970927 :           continue;
    1234              :         }
    1235              : 
    1236       765548 :       if (dump_file && (dump_flags & TDF_DETAILS))
    1237              :         {
    1238          280 :           print_gimple_stmt (dump_file, stmt, 2);
    1239          280 :           fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
    1240          280 :                    loop_depth (lim_data->max_loop),
    1241              :                    lim_data->cost);
    1242              :         }
    1243              : 
    1244       765548 :       if (lim_data->cost >= LIM_EXPENSIVE)
    1245       292715 :         set_profitable_level (stmt);
    1246              :       /* When we run before PRE and PRE is active hoist all expressions
    1247              :          to the always executed loop since PRE would do so anyway
    1248              :          and we can preserve range info while PRE cannot.  */
    1249       472833 :       else if (flag_tree_pre && !in_loop_pipeline
    1250       101100 :                && outermost)
    1251              :         {
    1252        59581 :           class loop *mloop = lim_data->max_loop;
    1253       178743 :           if (loop_depth (outermost) > loop_depth (mloop))
    1254              :             {
    1255         8481 :               mloop = outermost;
    1256         8481 :               if (dump_file && (dump_flags & TDF_DETAILS))
    1257            1 :                 fprintf (dump_file, "  constraining to loop depth %d\n\n\n",
    1258              :                          loop_depth (mloop));
    1259              :             }
    1260        59581 :           set_level (stmt, bb->loop_father, mloop);
    1261              :         }
    1262              :     }
    1263              : }
    1264              : 
    1265              : /* Hoist the statements in basic block BB out of the loops prescribed by
    1266              :    data stored in LIM_DATA structures associated with each statement.  Callback
    1267              :    for walk_dominator_tree.  */
    1268              : 
    1269              : unsigned int
    1270     14442346 : move_computations_worker (basic_block bb)
    1271              : {
    1272     14442346 :   class loop *level;
    1273     14442346 :   unsigned cost = 0;
    1274     14442346 :   struct lim_aux_data *lim_data;
    1275     14442346 :   unsigned int todo = 0;
    1276              : 
    1277     14442346 :   if (!loop_outer (bb->loop_father))
    1278              :     return todo;
    1279              : 
    1280     10559915 :   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
    1281              :     {
    1282      4526034 :       gassign *new_stmt;
    1283      4526034 :       gphi *stmt = bsi.phi ();
    1284              : 
    1285      4526034 :       lim_data = get_lim_data (stmt);
    1286      4526034 :       if (lim_data == NULL)
    1287              :         {
    1288      3078711 :           gsi_next (&bsi);
    1289      3078711 :           continue;
    1290              :         }
    1291              : 
    1292      1447323 :       cost = lim_data->cost;
    1293      1447323 :       level = lim_data->tgt_loop;
    1294      1447323 :       clear_lim_data (stmt);
    1295              : 
    1296      1447323 :       if (!level)
    1297              :         {
    1298      1446408 :           gsi_next (&bsi);
    1299      1446408 :           continue;
    1300              :         }
    1301              : 
    1302          915 :       if (dump_file && (dump_flags & TDF_DETAILS))
    1303              :         {
    1304            2 :           fprintf (dump_file, "Moving PHI node\n");
    1305            2 :           print_gimple_stmt (dump_file, stmt, 0);
    1306            2 :           fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
    1307              :                    cost, level->num);
    1308              :         }
    1309              : 
    1310          915 :       if (gimple_phi_num_args (stmt) == 1)
    1311              :         {
    1312            5 :           tree arg = PHI_ARG_DEF (stmt, 0);
    1313            5 :           new_stmt = gimple_build_assign (gimple_phi_result (stmt),
    1314            5 :                                           TREE_CODE (arg), arg);
    1315              :         }
    1316              :       else
    1317              :         {
    1318          910 :           basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
    1319          910 :           gimple *cond = gsi_stmt (gsi_last_bb (dom));
    1320          910 :           tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
    1321              :           /* Get the PHI arguments corresponding to the true and false
    1322              :              edges of COND.  */
    1323          910 :           extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
    1324          910 :           gcc_assert (arg0 && arg1);
    1325              :           /* For `bool_val != 0`, reuse bool_val. */
    1326          910 :           if (gimple_cond_code (cond) == NE_EXPR
    1327          525 :               && integer_zerop (gimple_cond_rhs (cond))
    1328         1361 :               && types_compatible_p (TREE_TYPE (gimple_cond_lhs (cond)),
    1329              :                                      boolean_type_node))
    1330              :             {
    1331           53 :               t = gimple_cond_lhs (cond);
    1332              :             }
    1333              :           else
    1334              :             {
    1335          857 :               t = make_ssa_name (boolean_type_node);
    1336          857 :               new_stmt = gimple_build_assign (t, gimple_cond_code (cond),
    1337              :                                               gimple_cond_lhs (cond),
    1338              :                                               gimple_cond_rhs (cond));
    1339          857 :               gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
    1340              :             }
    1341          910 :           new_stmt = gimple_build_assign (gimple_phi_result (stmt),
    1342              :                                           COND_EXPR, t, arg0, arg1);
    1343          910 :           todo |= TODO_cleanup_cfg;
    1344              :         }
    1345          915 :       if (!ALWAYS_EXECUTED_IN (bb)
    1346          915 :           || (ALWAYS_EXECUTED_IN (bb) != level
    1347           93 :               && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
    1348          432 :         reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
    1349          915 :       gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
    1350          915 :       remove_phi_node (&bsi, false);
    1351              :     }
    1352              : 
    1353     55305347 :   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
    1354              :     {
    1355     43237585 :       edge e;
    1356              : 
    1357     43237585 :       gimple *stmt = gsi_stmt (bsi);
    1358              : 
    1359     43237585 :       lim_data = get_lim_data (stmt);
    1360     43237585 :       if (lim_data == NULL)
    1361              :         {
    1362     27202025 :           gsi_next (&bsi);
    1363     27202025 :           continue;
    1364              :         }
    1365              : 
    1366     16035560 :       cost = lim_data->cost;
    1367     16035560 :       level = lim_data->tgt_loop;
    1368     16035560 :       clear_lim_data (stmt);
    1369              : 
    1370     16035560 :       if (!level)
    1371              :         {
    1372     15541291 :           gsi_next (&bsi);
    1373     15541291 :           continue;
    1374              :         }
    1375              : 
    1376              :       /* We do not really want to move conditionals out of the loop; we just
    1377              :          placed it here to force its operands to be moved if necessary.  */
    1378       494269 :       if (gimple_code (stmt) == GIMPLE_COND)
    1379              :         {
    1380        11815 :           gsi_next (&bsi);
    1381        11815 :           continue;
    1382              :         }
    1383              : 
    1384       482454 :       if (dump_file && (dump_flags & TDF_DETAILS))
    1385              :         {
    1386          350 :           fprintf (dump_file, "Moving statement ");
    1387          350 :           print_gimple_stmt (dump_file, stmt, 0);
    1388          350 :           fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
    1389              :                    cost, level->num);
    1390              :         }
    1391              : 
    1392       482454 :       e = loop_preheader_edge (level);
    1393       964908 :       gcc_assert (!gimple_vdef (stmt));
    1394       964908 :       if (gimple_vuse (stmt))
    1395              :         {
    1396              :           /* The new VUSE is the one from the virtual PHI in the loop
    1397              :              header or the one already present.  */
    1398        82766 :           gphi_iterator gsi2;
    1399        82766 :           for (gsi2 = gsi_start_phis (e->dest);
    1400       193860 :                !gsi_end_p (gsi2); gsi_next (&gsi2))
    1401              :             {
    1402       176795 :               gphi *phi = gsi2.phi ();
    1403       353590 :               if (virtual_operand_p (gimple_phi_result (phi)))
    1404              :                 {
    1405        65701 :                   SET_USE (gimple_vuse_op (stmt),
    1406              :                            PHI_ARG_DEF_FROM_EDGE (phi, e));
    1407        65701 :                   break;
    1408              :                 }
    1409              :             }
    1410              :         }
    1411       482454 :       gsi_remove (&bsi, false);
    1412       482454 :       if (gimple_has_lhs (stmt)
    1413       482454 :           && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
    1414       440954 :           && (!ALWAYS_EXECUTED_IN (bb)
    1415       374929 :               || !(ALWAYS_EXECUTED_IN (bb) == level
    1416        98356 :                    || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
    1417       204695 :         reset_flow_sensitive_info (gimple_get_lhs (stmt));
    1418              :       /* In case this is a stmt that is not unconditionally executed
    1419              :          when the target loop header is executed and the stmt may
    1420              :          invoke undefined integer or pointer overflow rewrite it to
    1421              :          unsigned arithmetic.  */
    1422       482454 :       if (gimple_needing_rewrite_undefined (stmt)
    1423       482454 :           && (!ALWAYS_EXECUTED_IN (bb)
    1424        80452 :               || !(ALWAYS_EXECUTED_IN (bb) == level
    1425        18964 :                    || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
    1426        20293 :         gsi_insert_seq_on_edge (e, rewrite_to_defined_unconditional (stmt));
    1427              :       else
    1428       462161 :         gsi_insert_on_edge (e, stmt);
    1429              :     }
    1430              : 
    1431      6033881 :   return todo;
    1432              : }
    1433              : 
    1434              : /* Checks whether the statement defining variable *INDEX can be hoisted
    1435              :    out of the loop passed in DATA.  Callback for for_each_index.  */
    1436              : 
    1437              : static bool
    1438      1771903 : may_move_till (tree ref, tree *index, void *data)
    1439              : {
    1440      1771903 :   class loop *loop = (class loop *) data, *max_loop;
    1441              : 
    1442              :   /* If REF is an array reference, check also that the step and the lower
    1443              :      bound is invariant in LOOP.  */
    1444      1771903 :   if (TREE_CODE (ref) == ARRAY_REF)
    1445              :     {
    1446       479402 :       tree step = TREE_OPERAND (ref, 3);
    1447       479402 :       tree lbound = TREE_OPERAND (ref, 2);
    1448              : 
    1449       479402 :       max_loop = outermost_invariant_loop (step, loop);
    1450       479402 :       if (!max_loop)
    1451              :         return false;
    1452              : 
    1453       479402 :       max_loop = outermost_invariant_loop (lbound, loop);
    1454       479402 :       if (!max_loop)
    1455              :         return false;
    1456              :     }
    1457              : 
    1458      1771903 :   max_loop = outermost_invariant_loop (*index, loop);
    1459      1771903 :   if (!max_loop)
    1460              :     return false;
    1461              : 
    1462              :   return true;
    1463              : }
    1464              : 
    1465              : /* If OP is SSA NAME, force the statement that defines it to be
    1466              :    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
    1467              : 
    1468              : static void
    1469        37889 : force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
    1470              : {
    1471        37889 :   gimple *stmt;
    1472              : 
    1473        37889 :   if (!op
    1474        37889 :       || is_gimple_min_invariant (op))
    1475        33367 :     return;
    1476              : 
    1477         4522 :   gcc_assert (TREE_CODE (op) == SSA_NAME);
    1478              : 
    1479         4522 :   stmt = SSA_NAME_DEF_STMT (op);
    1480         4522 :   if (gimple_nop_p (stmt))
    1481              :     return;
    1482              : 
    1483         2905 :   set_level (stmt, orig_loop, loop);
    1484              : }
    1485              : 
    1486              : /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
    1487              :    the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
    1488              :    for_each_index.  */
    1489              : 
    1490              : struct fmt_data
    1491              : {
    1492              :   class loop *loop;
    1493              :   class loop *orig_loop;
    1494              : };
    1495              : 
    1496              : static bool
    1497        20033 : force_move_till (tree ref, tree *index, void *data)
    1498              : {
    1499        20033 :   struct fmt_data *fmt_data = (struct fmt_data *) data;
    1500              : 
    1501        20033 :   if (TREE_CODE (ref) == ARRAY_REF)
    1502              :     {
    1503         8928 :       tree step = TREE_OPERAND (ref, 3);
    1504         8928 :       tree lbound = TREE_OPERAND (ref, 2);
    1505              : 
    1506         8928 :       force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
    1507         8928 :       force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
    1508              :     }
    1509              : 
    1510        20033 :   force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
    1511              : 
    1512        20033 :   return true;
    1513              : }
    1514              : 
    1515              : /* A function to free the mem_ref object OBJ.  */
    1516              : 
    1517              : static void
    1518      5206857 : memref_free (class im_mem_ref *mem)
    1519              : {
    1520            0 :   mem->accesses_in_loop.release ();
    1521            0 : }
    1522              : 
    1523              : /* Allocates and returns a memory reference description for MEM whose hash
    1524              :    value is HASH and id is ID.  */
    1525              : 
    1526              : static im_mem_ref *
    1527      5206857 : mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
    1528              : {
    1529      5206857 :   im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
    1530      5206857 :   if (mem)
    1531      4281799 :     ref->mem = *mem;
    1532              :   else
    1533       925058 :     ao_ref_init (&ref->mem, error_mark_node);
    1534      5206857 :   ref->id = id;
    1535      5206857 :   ref->ref_canonical = false;
    1536      5206857 :   ref->ref_decomposed = false;
    1537      5206857 :   ref->hash = hash;
    1538      5206857 :   ref->stored = NULL;
    1539      5206857 :   ref->loaded = NULL;
    1540      5206857 :   bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
    1541      5206857 :   ref->accesses_in_loop.create (1);
    1542              : 
    1543      5206857 :   return ref;
    1544              : }
    1545              : 
    1546              : /* Records memory reference location *LOC in LOOP to the memory reference
    1547              :    description REF.  The reference occurs in statement STMT.  */
    1548              : 
    1549              : static void
    1550      5651924 : record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
    1551              : {
    1552      5651924 :   mem_ref_loc aref;
    1553      5651924 :   aref.stmt = stmt;
    1554      5651924 :   aref.ref = loc;
    1555            0 :   ref->accesses_in_loop.safe_push (aref);
    1556            0 : }
    1557              : 
    1558              : /* Set the LOOP bit in REF stored bitmap and allocate that if
    1559              :    necessary.  Return whether a bit was changed.  */
    1560              : 
    1561              : static bool
    1562      4372569 : set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
    1563              : {
    1564      4372569 :   if (!ref->stored)
    1565      2477674 :     ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
    1566      4372569 :   return bitmap_set_bit (ref->stored, loop->num);
    1567              : }
    1568              : 
    1569              : /* Marks reference REF as stored in LOOP.  */
    1570              : 
    1571              : static void
    1572      3638217 : mark_ref_stored (im_mem_ref *ref, class loop *loop)
    1573              : {
    1574      3638217 :   while (loop != current_loops->tree_root
    1575      7016456 :          && set_ref_stored_in_loop (ref, loop))
    1576      3378239 :     loop = loop_outer (loop);
    1577      3638217 : }
    1578              : 
    1579              : /* Set the LOOP bit in REF loaded bitmap and allocate that if
    1580              :    necessary.  Return whether a bit was changed.  */
    1581              : 
    1582              : static bool
    1583      6129006 : set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
    1584              : {
    1585      6129006 :   if (!ref->loaded)
    1586      3359697 :     ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
    1587      6129006 :   return bitmap_set_bit (ref->loaded, loop->num);
    1588              : }
    1589              : 
    1590              : /* Marks reference REF as loaded in LOOP.  */
    1591              : 
    1592              : static void
    1593      4867386 : mark_ref_loaded (im_mem_ref *ref, class loop *loop)
    1594              : {
    1595      4867386 :   while (loop != current_loops->tree_root
    1596      9711354 :          && set_ref_loaded_in_loop (ref, loop))
    1597      4843968 :     loop = loop_outer (loop);
    1598      4867386 : }
    1599              : 
    1600              : /* Gathers memory references in statement STMT in LOOP, storing the
    1601              :    information about them in the memory_accesses structure.  Marks
    1602              :    the vops accessed through unrecognized statements there as
    1603              :    well.  */
    1604              : 
    1605              : static void
    1606     43172636 : gather_mem_refs_stmt (class loop *loop, gimple *stmt)
    1607              : {
    1608     43172636 :   tree *mem = NULL;
    1609     43172636 :   hashval_t hash;
    1610     43172636 :   im_mem_ref **slot;
    1611     43172636 :   im_mem_ref *ref;
    1612     43172636 :   bool is_stored;
    1613     43172636 :   unsigned id;
    1614              : 
    1615     58842253 :   if (!gimple_vuse (stmt))
    1616              :     return;
    1617              : 
    1618      6899793 :   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
    1619      6899793 :   if (!mem && is_gimple_assign (stmt))
    1620              :     {
    1621              :       /* For aggregate copies record distinct references but use them
    1622              :          only for disambiguation purposes.  */
    1623       440661 :       id = memory_accesses.refs_list.length ();
    1624       440661 :       ref = mem_ref_alloc (NULL, 0, id);
    1625       440661 :       memory_accesses.refs_list.safe_push (ref);
    1626       440661 :       if (dump_file && (dump_flags & TDF_DETAILS))
    1627              :         {
    1628           24 :           fprintf (dump_file, "Unhandled memory reference %u: ", id);
    1629           24 :           print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
    1630              :         }
    1631       440661 :       record_mem_ref_loc (ref, stmt, mem);
    1632       881322 :       is_stored = gimple_vdef (stmt);
    1633              :     }
    1634      6459132 :   else if (!mem)
    1635              :     {
    1636              :       /* We use the shared mem_ref for all unanalyzable refs.  */
    1637      1247869 :       id = UNANALYZABLE_MEM_ID;
    1638      1247869 :       ref = memory_accesses.refs_list[id];
    1639      1247869 :       if (dump_file && (dump_flags & TDF_DETAILS))
    1640              :         {
    1641            9 :           fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
    1642            9 :           print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
    1643              :         }
    1644      2495738 :       is_stored = gimple_vdef (stmt);
    1645              :     }
    1646              :   else
    1647              :     {
    1648              :       /* We are looking for equal refs that might differ in structure
    1649              :          such as a.b vs. MEM[&a + 4].  So we key off the ao_ref but
    1650              :          make sure we can canonicalize the ref in the hashtable if
    1651              :          non-operand_equal_p refs are found.  For the lookup we mark
    1652              :          the case we want strict equality with aor.max_size == -1.  */
    1653      5211263 :       ao_ref aor;
    1654      5211263 :       ao_ref_init (&aor, *mem);
    1655      5211263 :       ao_ref_base (&aor);
    1656      5211263 :       ao_ref_alias_set (&aor);
    1657      5211263 :       HOST_WIDE_INT offset, size, max_size;
    1658      5211263 :       poly_int64 saved_maxsize = aor.max_size, mem_off;
    1659      5211263 :       tree mem_base;
    1660      5211263 :       bool ref_decomposed;
    1661      5211263 :       if (aor.max_size_known_p ()
    1662      5034573 :           && aor.offset.is_constant (&offset)
    1663      5034573 :           && aor.size.is_constant (&size)
    1664      5034573 :           && aor.max_size.is_constant (&max_size)
    1665      5034573 :           && size == max_size
    1666      4429873 :           && (size % BITS_PER_UNIT) == 0
    1667              :           /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
    1668              :              size.  Make sure this is consistent with the extraction.  */
    1669      4419567 :           && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
    1670      4419567 :           && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
    1671              :                        aor.size)
    1672      9630738 :           && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
    1673              :         {
    1674      4418430 :           ref_decomposed = true;
    1675      4418430 :           tree base = ao_ref_base (&aor);
    1676      4418430 :           poly_int64 moffset;
    1677      4418430 :           HOST_WIDE_INT mcoffset;
    1678      4418430 :           if (TREE_CODE (base) == MEM_REF
    1679      1997504 :               && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
    1680      4418430 :               && moffset.is_constant (&mcoffset))
    1681              :             {
    1682      1997504 :               hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
    1683      1997504 :               hash = iterative_hash_host_wide_int (mcoffset, hash);
    1684              :             }
    1685              :           else
    1686              :             {
    1687      2420926 :               hash = iterative_hash_expr (base, 0);
    1688      2420926 :               hash = iterative_hash_host_wide_int (offset, hash);
    1689              :             }
    1690      4418430 :           hash = iterative_hash_host_wide_int (size, hash);
    1691              :         }
    1692              :       else
    1693              :         {
    1694       792833 :           ref_decomposed = false;
    1695       792833 :           hash = iterative_hash_expr (aor.ref, 0);
    1696       792833 :           aor.max_size = -1;
    1697              :         }
    1698      5211263 :       slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
    1699      5211263 :       aor.max_size = saved_maxsize;
    1700      5211263 :       if (*slot)
    1701              :         {
    1702       929464 :           if (!(*slot)->ref_canonical
    1703       929464 :               && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
    1704              :             {
    1705              :               /* If we didn't yet canonicalize the hashtable ref (which
    1706              :                  we'll end up using for code insertion) and hit a second
    1707              :                  equal ref that is not structurally equivalent create
    1708              :                  a canonical ref which is a bare MEM_REF.  */
    1709        49312 :               if (TREE_CODE (*mem) == MEM_REF
    1710        49312 :                   || TREE_CODE (*mem) == TARGET_MEM_REF)
    1711              :                 {
    1712        14319 :                   (*slot)->mem.ref = *mem;
    1713        14319 :                   (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
    1714              :                 }
    1715              :               else
    1716              :                 {
    1717        34993 :                   tree ref_alias_type = reference_alias_ptr_type (*mem);
    1718        34993 :                   unsigned int ref_align = get_object_alignment (*mem);
    1719        34993 :                   tree ref_type = TREE_TYPE (*mem);
    1720        34993 :                   tree tmp = build1 (ADDR_EXPR, ptr_type_node,
    1721              :                                      unshare_expr (mem_base));
    1722        34993 :                   if (TYPE_ALIGN (ref_type) != ref_align)
    1723         8666 :                     ref_type = build_aligned_type (ref_type, ref_align);
    1724        34993 :                   tree new_ref
    1725        34993 :                     = fold_build2 (MEM_REF, ref_type, tmp,
    1726              :                                    build_int_cst (ref_alias_type, mem_off));
    1727        34993 :                   if ((*slot)->mem.volatile_p)
    1728           22 :                     TREE_THIS_VOLATILE (new_ref) = 1;
    1729        34993 :                   (*slot)->mem.ref = new_ref;
    1730              :                   /* Make sure the recorded base and offset are consistent
    1731              :                      with the newly built ref.  */
    1732        34993 :                   if (TREE_CODE (TREE_OPERAND (new_ref, 0)) == ADDR_EXPR)
    1733              :                     ;
    1734              :                   else
    1735              :                     {
    1736        19838 :                       (*slot)->mem.base = new_ref;
    1737        19838 :                       (*slot)->mem.offset = 0;
    1738              :                     }
    1739        34993 :                   gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
    1740              :                                        && is_gimple_mem_ref_addr
    1741              :                                             (TREE_OPERAND ((*slot)->mem.ref,
    1742              :                                                            0)));
    1743        34993 :                   (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
    1744              :                 }
    1745        49312 :               (*slot)->ref_canonical = true;
    1746              :             }
    1747       929464 :           ref = *slot;
    1748       929464 :           id = ref->id;
    1749              :         }
    1750              :       else
    1751              :         {
    1752      4281799 :           id = memory_accesses.refs_list.length ();
    1753      4281799 :           ref = mem_ref_alloc (&aor, hash, id);
    1754      4281799 :           ref->ref_decomposed = ref_decomposed;
    1755      4281799 :           memory_accesses.refs_list.safe_push (ref);
    1756      4281799 :           *slot = ref;
    1757              : 
    1758      4281799 :           if (dump_file && (dump_flags & TDF_DETAILS))
    1759              :             {
    1760          498 :               fprintf (dump_file, "Memory reference %u: ", id);
    1761          498 :               print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
    1762          498 :               fprintf (dump_file, "\n");
    1763              :             }
    1764              :         }
    1765              : 
    1766      5211263 :       record_mem_ref_loc (ref, stmt, mem);
    1767              :     }
    1768      6899793 :   if (is_stored)
    1769              :     {
    1770      3638217 :       bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
    1771      3638217 :       mark_ref_stored (ref, loop);
    1772              :     }
    1773              :   /* A not simple memory op is also a read when it is a write.  */
    1774      6899793 :   if (!is_stored || id == UNANALYZABLE_MEM_ID
    1775      2473068 :       || ref->mem.ref == error_mark_node)
    1776              :     {
    1777      4867386 :       bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
    1778      4867386 :       mark_ref_loaded (ref, loop);
    1779              :     }
    1780      6899793 :   init_lim_data (stmt)->ref = ref->id;
    1781      6899793 :   return;
    1782              : }
    1783              : 
    1784              : static unsigned *bb_loop_postorder;
    1785              : 
    1786              : /* qsort sort function to sort blocks after their loop fathers postorder.  */
    1787              : 
    1788              : static int
    1789    157004940 : sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
    1790              :                                 void *bb_loop_postorder_)
    1791              : {
    1792    157004940 :   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
    1793    157004940 :   basic_block bb1 = *(const basic_block *)bb1_;
    1794    157004940 :   basic_block bb2 = *(const basic_block *)bb2_;
    1795    157004940 :   class loop *loop1 = bb1->loop_father;
    1796    157004940 :   class loop *loop2 = bb2->loop_father;
    1797    157004940 :   if (loop1->num == loop2->num)
    1798     73287390 :     return bb1->index - bb2->index;
    1799     83717550 :   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
    1800              : }
    1801              : 
    1802              : /* qsort sort function to sort ref locs after their loop fathers postorder.  */
    1803              : 
    1804              : static int
    1805       929449 : sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
    1806              :                                  void *bb_loop_postorder_)
    1807              : {
    1808       929449 :   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
    1809       929449 :   const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
    1810       929449 :   const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
    1811       929449 :   class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
    1812       929449 :   class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
    1813       929449 :   if (loop1->num == loop2->num)
    1814              :     return 0;
    1815       143208 :   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
    1816              : }
    1817              : 
    1818              : /* Gathers memory references in loops.  */
    1819              : 
    1820              : static void
    1821       484397 : analyze_memory_references (bool store_motion)
    1822              : {
    1823       484397 :   gimple_stmt_iterator bsi;
    1824       484397 :   basic_block bb, *bbs;
    1825       484397 :   class loop *outer;
    1826       484397 :   unsigned i, n;
    1827              : 
    1828              :   /* Collect all basic-blocks in loops and sort them after their
    1829              :      loops postorder.  */
    1830       484397 :   i = 0;
    1831       484397 :   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
    1832     14881820 :   FOR_EACH_BB_FN (bb, cfun)
    1833     14397423 :     if (bb->loop_father != current_loops->tree_root)
    1834      6027709 :       bbs[i++] = bb;
    1835       484397 :   n = i;
    1836       484397 :   gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
    1837              :               bb_loop_postorder);
    1838              : 
    1839              :   /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
    1840              :      That results in better locality for all the bitmaps.  It also
    1841              :      automatically sorts the location list of gathered memory references
    1842              :      after their loop postorder number allowing to binary-search it.  */
    1843      6512106 :   for (i = 0; i < n; ++i)
    1844              :     {
    1845      6027709 :       basic_block bb = bbs[i];
    1846     55228054 :       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
    1847     43172636 :         gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
    1848              :     }
    1849              : 
    1850              :   /* Verify the list of gathered memory references is sorted after their
    1851              :      loop postorder number.  */
    1852       484397 :   if (flag_checking)
    1853              :     {
    1854              :       im_mem_ref *ref;
    1855     10898039 :       FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
    1856      6136276 :         for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
    1857       929449 :           gcc_assert (sort_locs_in_loop_postorder_cmp
    1858              :                         (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
    1859              :                          bb_loop_postorder) <= 0);
    1860              :     }
    1861              : 
    1862       484397 :   free (bbs);
    1863              : 
    1864       484397 :   if (!store_motion)
    1865       484397 :     return;
    1866              : 
    1867              :   /* Propagate the information about accessed memory references up
    1868              :      the loop hierarchy.  */
    1869      2770830 :   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
    1870              :     {
    1871              :       /* Finalize the overall touched references (including subloops).  */
    1872      1317972 :       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
    1873      1317972 :                        &memory_accesses.refs_stored_in_loop[loop->num]);
    1874              : 
    1875              :       /* Propagate the information about accessed memory references up
    1876              :          the loop hierarchy.  */
    1877      1317972 :       outer = loop_outer (loop);
    1878      1317972 :       if (outer == current_loops->tree_root)
    1879      1017928 :         continue;
    1880              : 
    1881       300044 :       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
    1882       300044 :                        &memory_accesses.all_refs_stored_in_loop[loop->num]);
    1883       484286 :     }
    1884              : }
    1885              : 
    1886              : /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
    1887              :    tree_to_aff_combination_expand.  */
    1888              : 
    1889              : static bool
    1890       541424 : mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
    1891              :                       hash_map<tree, name_expansion *> **ttae_cache,
    1892              :                       bool tbaa_p)
    1893              : {
    1894       541424 :   gcc_checking_assert (mem1->mem.ref != error_mark_node
    1895              :                        && mem2->mem.ref != error_mark_node);
    1896              : 
    1897              :   /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
    1898              :      object and their offset differ in such a way that the locations cannot
    1899              :      overlap, then they cannot alias.  */
    1900              :   poly_widest_int size1, size2;
    1901      1082848 :   aff_tree off1, off2;
    1902              : 
    1903              :   /* Perform basic offset and type-based disambiguation.  */
    1904       541424 :   if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
    1905              :     return false;
    1906              : 
    1907              :   /* The expansion of addresses may be a bit expensive, thus we only do
    1908              :      the check at -O2 and higher optimization levels.  */
    1909        67174 :   if (optimize < 2)
    1910              :     return true;
    1911              : 
    1912        57572 :   get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
    1913        57572 :   get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
    1914        57572 :   aff_combination_expand (&off1, ttae_cache);
    1915        57572 :   aff_combination_expand (&off2, ttae_cache);
    1916        57572 :   aff_combination_scale (&off1, -1);
    1917        57572 :   aff_combination_add (&off2, &off1);
    1918              : 
    1919        57572 :   if (aff_comb_cannot_overlap_p (&off2, size1, size2))
    1920              :     return false;
    1921              : 
    1922              :   return true;
    1923       541424 : }
    1924              : 
    1925              : /* Compare function for bsearch searching for reference locations
    1926              :    in a loop.  */
    1927              : 
    1928              : static int
    1929       259888 : find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
    1930              :                           void *bb_loop_postorder_)
    1931              : {
    1932       259888 :   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
    1933       259888 :   class loop *loop = (class loop *)const_cast<void *>(loop_);
    1934       259888 :   mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
    1935       259888 :   class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
    1936       259888 :   if (loop->num  == loc_loop->num
    1937       259888 :       || flow_loop_nested_p (loop, loc_loop))
    1938       199368 :     return 0;
    1939        60520 :   return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
    1940        60520 :           ? -1 : 1);
    1941              : }
    1942              : 
    1943              : /* Iterates over all locations of REF in LOOP and its subloops calling
    1944              :    fn.operator() with the location as argument.  When that operator
    1945              :    returns true the iteration is stopped and true is returned.
    1946              :    Otherwise false is returned.  */
    1947              : 
    1948              : template <typename FN>
    1949              : static bool
    1950       199368 : for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
    1951              : {
    1952              :   unsigned i;
    1953              :   mem_ref_loc *loc;
    1954              : 
    1955              :   /* Search for the cluster of locs in the accesses_in_loop vector
    1956              :      which is sorted after postorder index of the loop father.  */
    1957       199368 :   loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
    1958              :                                        bb_loop_postorder);
    1959       199368 :   if (!loc)
    1960            0 :     return false;
    1961              : 
    1962              :   /* We have found one location inside loop or its sub-loops.  Iterate
    1963              :      both forward and backward to cover the whole cluster.  */
    1964       199368 :   i = loc - ref->accesses_in_loop.address ();
    1965       273558 :   while (i > 0)
    1966              :     {
    1967       132750 :       --i;
    1968       132750 :       mem_ref_loc *l = &ref->accesses_in_loop[i];
    1969       132750 :       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
    1970              :         break;
    1971       109142 :       if (fn (l))
    1972              :         return true;
    1973              :     }
    1974       446718 :   for (i = loc - ref->accesses_in_loop.address ();
    1975       282302 :        i < ref->accesses_in_loop.length (); ++i)
    1976              :     {
    1977       211253 :       mem_ref_loc *l = &ref->accesses_in_loop[i];
    1978       211253 :       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
    1979              :         break;
    1980       191770 :       if (fn (l))
    1981              :         return true;
    1982              :     }
    1983              : 
    1984              :   return false;
    1985              : }
    1986              : 
    1987              : /* Rewrites location LOC by TMP_VAR.  */
    1988              : 
    1989              : class rewrite_mem_ref_loc
    1990              : {
    1991              : public:
    1992        34700 :   rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
    1993              :   bool operator () (mem_ref_loc *loc);
    1994              :   tree tmp_var;
    1995              : };
    1996              : 
    1997              : bool
    1998        55417 : rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
    1999              : {
    2000        55417 :   *loc->ref = tmp_var;
    2001        55417 :   update_stmt (loc->stmt);
    2002        55417 :   return false;
    2003              : }
    2004              : 
    2005              : /* Rewrites all references to REF in LOOP by variable TMP_VAR.  */
    2006              : 
    2007              : static void
    2008        34700 : rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
    2009              : {
    2010            0 :   for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
    2011            0 : }
    2012              : 
    2013              : /* Stores the first reference location in LOCP.  */
    2014              : 
    2015              : class first_mem_ref_loc_1
    2016              : {
    2017              : public:
    2018        34700 :   first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
    2019              :   bool operator () (mem_ref_loc *loc);
    2020              :   mem_ref_loc **locp;
    2021              : };
    2022              : 
    2023              : bool
    2024        34700 : first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
    2025              : {
    2026        34700 :   *locp = loc;
    2027        34700 :   return true;
    2028              : }
    2029              : 
    2030              : /* Returns the first reference location to REF in LOOP.  */
    2031              : 
    2032              : static mem_ref_loc *
    2033        34700 : first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
    2034              : {
    2035        34700 :   mem_ref_loc *locp = NULL;
    2036            0 :   for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
    2037        34700 :   return locp;
    2038              : }
    2039              : 
    2040              : /* Helper function for execute_sm.  Emit code to store TMP_VAR into
    2041              :    MEM along edge EX.
    2042              : 
    2043              :    The store is only done if MEM has changed.  We do this so no
    2044              :    changes to MEM occur on code paths that did not originally store
    2045              :    into it.
    2046              : 
    2047              :    The common case for execute_sm will transform:
    2048              : 
    2049              :      for (...) {
    2050              :        if (foo)
    2051              :          stuff;
    2052              :        else
    2053              :          MEM = TMP_VAR;
    2054              :      }
    2055              : 
    2056              :    into:
    2057              : 
    2058              :      lsm = MEM;
    2059              :      for (...) {
    2060              :        if (foo)
    2061              :          stuff;
    2062              :        else
    2063              :          lsm = TMP_VAR;
    2064              :      }
    2065              :      MEM = lsm;
    2066              : 
    2067              :   This function will generate:
    2068              : 
    2069              :      lsm = MEM;
    2070              : 
    2071              :      lsm_flag = false;
    2072              :      ...
    2073              :      for (...) {
    2074              :        if (foo)
    2075              :          stuff;
    2076              :        else {
    2077              :          lsm = TMP_VAR;
    2078              :          lsm_flag = true;
    2079              :        }
    2080              :      }
    2081              :      if (lsm_flag)      <--
    2082              :        MEM = lsm;       <-- (X)
    2083              : 
    2084              :   In case MEM and TMP_VAR are NULL the function will return the then
    2085              :   block so the caller can insert (X) and other related stmts.
    2086              : */
    2087              : 
    2088              : static basic_block
    2089        14373 : execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
    2090              :                        edge preheader, hash_set <basic_block> *flag_bbs,
    2091              :                        edge &append_cond_position, edge &last_cond_fallthru)
    2092              : {
    2093        14373 :   basic_block new_bb, then_bb, old_dest;
    2094        14373 :   bool loop_has_only_one_exit;
    2095        14373 :   edge then_old_edge;
    2096        14373 :   gimple_stmt_iterator gsi;
    2097        14373 :   gimple *stmt;
    2098        14373 :   bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
    2099              : 
    2100        14373 :   profile_count count_sum = profile_count::zero ();
    2101        14373 :   int nbbs = 0, ncount = 0;
    2102        14373 :   profile_probability flag_probability = profile_probability::uninitialized ();
    2103              : 
    2104              :   /* Flag is set in FLAG_BBS. Determine probability that flag will be true
    2105              :      at loop exit.
    2106              : 
    2107              :      This code may look fancy, but it cannot update profile very realistically
    2108              :      because we do not know the probability that flag will be true at given
    2109              :      loop exit.
    2110              : 
    2111              :      We look for two interesting extremes
    2112              :        - when exit is dominated by block setting the flag, we know it will
    2113              :          always be true.  This is a common case.
    2114              :        - when all blocks setting the flag have very low frequency we know
    2115              :          it will likely be false.
    2116              :      In all other cases we default to 2/3 for flag being true.  */
    2117              : 
    2118        14373 :   for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
    2119        49905 :        it != flag_bbs->end (); ++it)
    2120              :     {
    2121        17766 :        if ((*it)->count.initialized_p ())
    2122        17753 :          count_sum += (*it)->count, ncount ++;
    2123        17766 :        if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
    2124         2389 :          flag_probability = profile_probability::always ();
    2125        17766 :        nbbs++;
    2126              :     }
    2127              : 
    2128        14373 :   profile_probability cap
    2129        14373 :           = profile_probability::guessed_always ().apply_scale (2, 3);
    2130              : 
    2131        14373 :   if (flag_probability.initialized_p ())
    2132              :     ;
    2133        12080 :   else if (ncount == nbbs
    2134        24369 :            && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
    2135              :     {
    2136          184 :       flag_probability = count_sum.probability_in (preheader->count ());
    2137          184 :       if (flag_probability > cap)
    2138           71 :         flag_probability = cap;
    2139              :     }
    2140              : 
    2141        14373 :   if (!flag_probability.initialized_p ())
    2142        11896 :     flag_probability = cap;
    2143              : 
    2144              :   /* ?? Insert store after previous store if applicable.  See note
    2145              :      below.  */
    2146        14373 :   if (append_cond_position)
    2147         5434 :     ex = append_cond_position;
    2148              : 
    2149        14373 :   loop_has_only_one_exit = single_pred_p (ex->dest);
    2150              : 
    2151        14373 :   if (loop_has_only_one_exit)
    2152         5948 :     ex = split_block_after_labels (ex->dest);
    2153              :   else
    2154              :     {
    2155         8425 :       for (gphi_iterator gpi = gsi_start_phis (ex->dest);
    2156        12450 :            !gsi_end_p (gpi); gsi_next (&gpi))
    2157              :         {
    2158         4842 :           gphi *phi = gpi.phi ();
    2159         9684 :           if (virtual_operand_p (gimple_phi_result (phi)))
    2160         4025 :             continue;
    2161              : 
    2162              :           /* When the destination has a non-virtual PHI node with multiple
    2163              :              predecessors make sure we preserve the PHI structure by
    2164              :              forcing a forwarder block so that hoisting of that PHI will
    2165              :              still work.  */
    2166          817 :           split_edge (ex);
    2167          817 :           break;
    2168              :         }
    2169              :     }
    2170              : 
    2171        14373 :   old_dest = ex->dest;
    2172        14373 :   new_bb = split_edge (ex);
    2173        14373 :   if (append_cond_position)
    2174         5434 :     new_bb->count += last_cond_fallthru->count ();
    2175        14373 :   then_bb = create_empty_bb (new_bb);
    2176        14373 :   then_bb->count = new_bb->count.apply_probability (flag_probability);
    2177        14373 :   if (irr)
    2178           49 :     then_bb->flags = BB_IRREDUCIBLE_LOOP;
    2179        14373 :   add_bb_to_loop (then_bb, new_bb->loop_father);
    2180              : 
    2181        14373 :   gsi = gsi_start_bb (new_bb);
    2182        14373 :   stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
    2183              :                             NULL_TREE, NULL_TREE);
    2184        14373 :   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
    2185              : 
    2186              :   /* Insert actual store.  */
    2187        14373 :   if (mem)
    2188              :     {
    2189        11976 :       gsi = gsi_start_bb (then_bb);
    2190        11976 :       stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
    2191        11976 :       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
    2192              :     }
    2193              : 
    2194        14373 :   edge e1 = single_succ_edge (new_bb);
    2195        28697 :   edge e2 = make_edge (new_bb, then_bb,
    2196              :                        EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
    2197        14373 :   e2->probability = flag_probability;
    2198              : 
    2199        14373 :   e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
    2200        14373 :   e1->flags &= ~EDGE_FALLTHRU;
    2201              : 
    2202        14373 :   e1->probability = flag_probability.invert ();
    2203              : 
    2204        28697 :   then_old_edge = make_single_succ_edge (then_bb, old_dest,
    2205              :                              EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
    2206              : 
    2207        14373 :   set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
    2208              : 
    2209        14373 :   if (append_cond_position)
    2210              :     {
    2211         5434 :       basic_block prevbb = last_cond_fallthru->src;
    2212         5434 :       redirect_edge_succ (last_cond_fallthru, new_bb);
    2213         5434 :       set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
    2214         5434 :       set_immediate_dominator (CDI_DOMINATORS, old_dest,
    2215              :                                recompute_dominator (CDI_DOMINATORS, old_dest));
    2216              :     }
    2217              : 
    2218              :   /* ?? Because stores may alias, they must happen in the exact
    2219              :      sequence they originally happened.  Save the position right after
    2220              :      the (_lsm) store we just created so we can continue appending after
    2221              :      it and maintain the original order.  */
    2222        14373 :   append_cond_position = then_old_edge;
    2223        14373 :   last_cond_fallthru = find_edge (new_bb, old_dest);
    2224              : 
    2225        14373 :   if (!loop_has_only_one_exit)
    2226         8425 :     for (gphi_iterator gpi = gsi_start_phis (old_dest);
    2227        12294 :          !gsi_end_p (gpi); gsi_next (&gpi))
    2228              :       {
    2229         3869 :         gphi *phi = gpi.phi ();
    2230         3869 :         unsigned i;
    2231              : 
    2232        22232 :         for (i = 0; i < gimple_phi_num_args (phi); i++)
    2233        18363 :           if (gimple_phi_arg_edge (phi, i)->src == new_bb)
    2234              :             {
    2235         3869 :               tree arg = gimple_phi_arg_def (phi, i);
    2236         3869 :               add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
    2237         3869 :               update_stmt (phi);
    2238              :             }
    2239              :       }
    2240              : 
    2241        14373 :   return then_bb;
    2242              : }
    2243              : 
    2244              : /* When REF is set on the location, set flag indicating the store.  */
    2245              : 
    2246              : class sm_set_flag_if_changed
    2247              : {
    2248              : public:
    2249         7612 :   sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
    2250         7612 :          : flag (flag_), bbs (bbs_) {}
    2251              :   bool operator () (mem_ref_loc *loc);
    2252              :   tree flag;
    2253              :   hash_set <basic_block> *bbs;
    2254              : };
    2255              : 
    2256              : bool
    2257        14538 : sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
    2258              : {
    2259              :   /* Only set the flag for writes.  */
    2260        14538 :   if (is_gimple_assign (loc->stmt)
    2261        14538 :       && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
    2262              :     {
    2263         8730 :       gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
    2264         8730 :       gimple *stmt = gimple_build_assign (flag, boolean_true_node);
    2265         8730 :       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
    2266         8730 :       bbs->add (gimple_bb (stmt));
    2267              :     }
    2268        14538 :   return false;
    2269              : }
    2270              : 
    2271              : /* Helper function for execute_sm.  On every location where REF is
    2272              :    set, set an appropriate flag indicating the store.  */
    2273              : 
    2274              : static tree
    2275         7612 : execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
    2276              :                                 hash_set <basic_block> *bbs)
    2277              : {
    2278         7612 :   tree flag;
    2279         7612 :   char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
    2280         7612 :   flag = create_tmp_reg (boolean_type_node, str);
    2281         7612 :   for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
    2282         7612 :   return flag;
    2283              : }
    2284              : 
    2285        34700 : struct sm_aux
    2286              : {
    2287              :   tree tmp_var;
    2288              :   tree store_flag;
    2289              :   hash_set <basic_block> flag_bbs;
    2290              : };
    2291              : 
    2292              : /* Executes store motion of memory reference REF from LOOP.
    2293              :    Exits from the LOOP are stored in EXITS.  The initialization of the
    2294              :    temporary variable is put to the preheader of the loop, and assignments
    2295              :    to the reference from the temporary variable are emitted to exits.  */
    2296              : 
    2297              : static sm_aux *
    2298        34700 : execute_sm (class loop *loop, im_mem_ref *ref,
    2299              :             hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
    2300              :             bool use_other_flag_var)
    2301              : {
    2302        34700 :   gassign *load;
    2303        34700 :   struct fmt_data fmt_data;
    2304        34700 :   struct lim_aux_data *lim_data;
    2305        34700 :   bool multi_threaded_model_p = false;
    2306        34700 :   gimple_stmt_iterator gsi;
    2307        34700 :   sm_aux *aux = new sm_aux;
    2308              : 
    2309        34700 :   if (dump_file && (dump_flags & TDF_DETAILS))
    2310              :     {
    2311           82 :       fprintf (dump_file, "Executing store motion of ");
    2312           82 :       print_generic_expr (dump_file, ref->mem.ref);
    2313           82 :       fprintf (dump_file, " from loop %d\n", loop->num);
    2314              :     }
    2315              : 
    2316        34700 :   aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
    2317        34700 :                                  get_lsm_tmp_name (ref->mem.ref, ~0));
    2318              : 
    2319        34700 :   fmt_data.loop = loop;
    2320        34700 :   fmt_data.orig_loop = loop;
    2321        34700 :   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
    2322              : 
    2323        69400 :   bool always_stored = ref_always_accessed_p (loop, ref, true);
    2324        34700 :   if (maybe_mt
    2325        11677 :       && (bb_in_transaction (loop_preheader_edge (loop)->src)
    2326        11676 :           || (ref_can_have_store_data_races (ref->mem.ref) && ! always_stored)))
    2327              :     multi_threaded_model_p = true;
    2328              : 
    2329        34700 :   if (multi_threaded_model_p && !use_other_flag_var)
    2330         7612 :     aux->store_flag
    2331         7612 :       = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
    2332              :   else
    2333        27088 :     aux->store_flag = NULL_TREE;
    2334              : 
    2335              :   /* Remember variable setup.  */
    2336        34700 :   aux_map.put (ref, aux);
    2337              : 
    2338        34700 :   rewrite_mem_refs (loop, ref, aux->tmp_var);
    2339              : 
    2340              :   /* Emit the load code on a random exit edge or into the latch if
    2341              :      the loop does not exit, so that we are sure it will be processed
    2342              :      by move_computations after all dependencies.  */
    2343        34700 :   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
    2344              : 
    2345              :   /* Avoid doing a load if there was no load of the ref in the loop.
    2346              :      Esp. when the ref is not always stored we cannot optimize it
    2347              :      away later.  But when it is not always stored we must use a conditional
    2348              :      store then.  */
    2349        34700 :   if ((!always_stored && !multi_threaded_model_p)
    2350        34700 :       || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
    2351        18154 :     load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
    2352              :   else
    2353              :     {
    2354              :       /* If not emitting a load mark the uninitialized state on the
    2355              :          loop entry as not to be warned for.  */
    2356        16546 :       tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
    2357        16546 :       suppress_warning (uninit, OPT_Wuninitialized);
    2358        16546 :       uninit = get_or_create_ssa_default_def (cfun, uninit);
    2359        16546 :       load = gimple_build_assign (aux->tmp_var, uninit);
    2360              :     }
    2361        34700 :   lim_data = init_lim_data (load);
    2362        34700 :   lim_data->max_loop = loop;
    2363        34700 :   lim_data->tgt_loop = loop;
    2364        34700 :   gsi_insert_before (&gsi, load, GSI_SAME_STMT);
    2365              : 
    2366        34700 :   if (aux->store_flag)
    2367              :     {
    2368         7612 :       load = gimple_build_assign (aux->store_flag, boolean_false_node);
    2369         7612 :       lim_data = init_lim_data (load);
    2370         7612 :       lim_data->max_loop = loop;
    2371         7612 :       lim_data->tgt_loop = loop;
    2372         7612 :       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
    2373              :     }
    2374              : 
    2375        34700 :   return aux;
    2376              : }
    2377              : 
    2378              : /* sm_ord is used for ordinary stores we can retain order with respect
    2379              :        to other stores
    2380              :    sm_unord is used for conditional executed stores which need to be
    2381              :        able to execute in arbitrary order with respect to other stores
    2382              :    sm_other is used for stores we do not try to apply store motion to.  */
    2383              : enum sm_kind { sm_ord, sm_unord, sm_other };
    2384              : struct seq_entry
    2385              : {
    2386              :   seq_entry () = default;
    2387        51994 :   seq_entry (unsigned f, sm_kind k, tree fr = NULL)
    2388        51994 :     : first (f), second (k), from (fr) {}
    2389              :   unsigned first;
    2390              :   sm_kind second;
    2391              :   tree from;
    2392              : };
    2393              : 
    2394              : static void
    2395        32917 : execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
    2396              :                  hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
    2397              :                  edge &append_cond_position, edge &last_cond_fallthru,
    2398              :                  bitmap clobbers_to_prune)
    2399              : {
    2400              :   /* Sink the stores to exit from the loop.  */
    2401       115906 :   for (unsigned i = seq.length (); i > 0; --i)
    2402              :     {
    2403        50072 :       im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
    2404        50072 :       if (seq[i-1].second == sm_other)
    2405              :         {
    2406         5038 :           gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
    2407         5038 :           gassign *store;
    2408         5038 :           if (ref->mem.ref == error_mark_node)
    2409              :             {
    2410          177 :               tree lhs = gimple_assign_lhs (ref->accesses_in_loop[0].stmt);
    2411          177 :               if (dump_file && (dump_flags & TDF_DETAILS))
    2412              :                 {
    2413            3 :                   fprintf (dump_file, "Re-issueing dependent ");
    2414            3 :                   print_generic_expr (dump_file, unshare_expr (seq[i-1].from));
    2415            3 :                   fprintf (dump_file, " of ");
    2416            3 :                   print_generic_expr (dump_file, lhs);
    2417            3 :                   fprintf (dump_file, " from loop %d on exit %d -> %d\n",
    2418            3 :                            loop->num, ex->src->index, ex->dest->index);
    2419              :                 }
    2420          177 :               store = gimple_build_assign (unshare_expr (lhs),
    2421          177 :                                            unshare_expr (seq[i-1].from));
    2422          177 :               bitmap_set_bit (clobbers_to_prune, seq[i-1].first);
    2423              :             }
    2424              :           else
    2425              :             {
    2426         4861 :               if (dump_file && (dump_flags & TDF_DETAILS))
    2427              :                 {
    2428            0 :                   fprintf (dump_file, "Re-issueing dependent store of ");
    2429            0 :                   print_generic_expr (dump_file, ref->mem.ref);
    2430            0 :                   fprintf (dump_file, " from loop %d on exit %d -> %d\n",
    2431            0 :                            loop->num, ex->src->index, ex->dest->index);
    2432              :                 }
    2433         4861 :               store = gimple_build_assign (unshare_expr (ref->mem.ref),
    2434         4861 :                                            seq[i-1].from);
    2435              :             }
    2436         5038 :           gsi_insert_on_edge (ex, store);
    2437              :         }
    2438              :       else
    2439              :         {
    2440        45034 :           sm_aux *aux = *aux_map.get (ref);
    2441        45034 :           if (!aux->store_flag || kind == sm_ord)
    2442              :             {
    2443        33058 :               gassign *store;
    2444        33058 :               store = gimple_build_assign (unshare_expr (ref->mem.ref),
    2445              :                                            aux->tmp_var);
    2446        33058 :               gsi_insert_on_edge (ex, store);
    2447        33058 :             }
    2448              :           else
    2449        11976 :             execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
    2450              :                                    aux->store_flag,
    2451              :                                    loop_preheader_edge (loop), &aux->flag_bbs,
    2452              :                                    append_cond_position, last_cond_fallthru);
    2453              :         }
    2454              :     }
    2455        32917 : }
    2456              : 
    2457              : /* Push the SM candidate at index PTR in the sequence SEQ down until
    2458              :    we hit the next SM candidate.  Return true if that went OK and
    2459              :    false if we could not disambiguate agains another unrelated ref.
    2460              :    Update *AT to the index where the candidate now resides.  */
    2461              : 
    2462              : static bool
    2463        37484 : sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
    2464              : {
    2465        37484 :   *at = ptr;
    2466        38274 :   for (; ptr > 0; --ptr)
    2467              :     {
    2468        17712 :       seq_entry &new_cand = seq[ptr];
    2469        17712 :       seq_entry &against = seq[ptr-1];
    2470        17712 :       if (against.second == sm_ord
    2471         5869 :           || (against.second == sm_other && against.from != NULL_TREE))
    2472              :         /* Found the tail of the sequence.  */
    2473              :         break;
    2474              :       /* We may not ignore self-dependences here.  */
    2475         1091 :       if (new_cand.first == against.first
    2476              :           /* ???  We could actually handle clobbers here, but not easily
    2477              :              with LIMs dependence analysis.  */
    2478          869 :           || (memory_accesses.refs_list[new_cand.first]->mem.ref
    2479          869 :               == error_mark_node)
    2480          864 :           || (memory_accesses.refs_list[against.first]->mem.ref
    2481              :               == error_mark_node)
    2482         1931 :           || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
    2483          840 :                                   memory_accesses.refs_list[against.first],
    2484              :                                   false))
    2485              :         /* ???  Prune new_cand from the list of refs to apply SM to.  */
    2486          301 :         return false;
    2487          790 :       std::swap (new_cand, against);
    2488          790 :       *at = ptr - 1;
    2489              :     }
    2490              :   return true;
    2491              : }
    2492              : 
    2493              : /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
    2494              :    walking backwards from VDEF (or the end of BB if VDEF is NULL).  */
    2495              : 
    2496              : static int
    2497        35411 : sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
    2498              :                  vec<seq_entry> &seq, bitmap refs_not_in_seq,
    2499              :                  bitmap refs_not_supported, bool forked,
    2500              :                  bitmap fully_visited)
    2501              : {
    2502        35411 :   if (!vdef)
    2503        26314 :     for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
    2504       113527 :          gsi_prev (&gsi))
    2505              :       {
    2506       139582 :         vdef = gimple_vdef (gsi_stmt (gsi));
    2507        52369 :         if (vdef)
    2508              :           break;
    2509              :       }
    2510        26314 :   if (!vdef)
    2511              :     {
    2512         8601 :       gphi *vphi = get_virtual_phi (bb);
    2513         8601 :       if (vphi)
    2514         4271 :         vdef = gimple_phi_result (vphi);
    2515              :     }
    2516        35411 :   if (!vdef)
    2517              :     {
    2518         4330 :       if (single_pred_p (bb))
    2519              :         /* This handles the perfect nest case.  */
    2520         3538 :         return sm_seq_valid_bb (loop, single_pred (bb), vdef,
    2521              :                                 seq, refs_not_in_seq, refs_not_supported,
    2522         3538 :                                 forked, fully_visited);
    2523              :       return 0;
    2524              :     }
    2525        55172 :   do
    2526              :     {
    2527       110344 :       gimple *def = SSA_NAME_DEF_STMT (vdef);
    2528        55172 :       if (gimple_bb (def) != bb)
    2529              :         {
    2530              :           /* If we forked by processing a PHI do not allow our walk to
    2531              :              merge again until we handle that robustly.  */
    2532         3424 :           if (forked)
    2533              :             {
    2534              :               /* Mark refs_not_in_seq as unsupported.  */
    2535         1713 :               bitmap_ior_into (refs_not_supported, refs_not_in_seq);
    2536         1713 :               return 1;
    2537              :             }
    2538              :           /* Otherwise it doesn't really matter if we end up in different
    2539              :              BBs.  */
    2540              :           bb = gimple_bb (def);
    2541              :         }
    2542        53459 :       if (gphi *phi = dyn_cast <gphi *> (def))
    2543              :         {
    2544              :           /* Handle CFG merges.  Until we handle forks (gimple_bb (def) != bb)
    2545              :              this is still linear.
    2546              :              Eventually we want to cache intermediate results per BB
    2547              :              (but we can't easily cache for different exits?).  */
    2548              :           /* Stop at PHIs with possible backedges.  */
    2549        10439 :           if (bb == bb->loop_father->header
    2550         5354 :               || bb->flags & BB_IRREDUCIBLE_LOOP)
    2551              :             {
    2552              :               /* Mark refs_not_in_seq as unsupported.  */
    2553         5090 :               bitmap_ior_into (refs_not_supported, refs_not_in_seq);
    2554         5090 :               return 1;
    2555              :             }
    2556         5349 :           if (gimple_phi_num_args (phi) == 1)
    2557         1520 :             return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
    2558              :                                     gimple_phi_arg_def (phi, 0), seq,
    2559              :                                     refs_not_in_seq, refs_not_supported,
    2560         1520 :                                     false, fully_visited);
    2561         7658 :           if (bitmap_bit_p (fully_visited,
    2562         3829 :                             SSA_NAME_VERSION (gimple_phi_result (phi))))
    2563              :             return 1;
    2564         3725 :           auto_vec<seq_entry> first_edge_seq;
    2565         3725 :           auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
    2566         3725 :           int eret;
    2567         3725 :           bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
    2568         3725 :           eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
    2569              :                                   gimple_phi_arg_def (phi, 0),
    2570              :                                   first_edge_seq,
    2571              :                                   tem_refs_not_in_seq, refs_not_supported,
    2572              :                                   true, fully_visited);
    2573         3725 :           if (eret != 1)
    2574              :             return -1;
    2575              :           /* Simplify our lives by pruning the sequence of !sm_ord.  */
    2576         3535 :           while (!first_edge_seq.is_empty ()
    2577         8449 :                  && first_edge_seq.last ().second != sm_ord)
    2578         1189 :             first_edge_seq.pop ();
    2579         7577 :           for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
    2580              :             {
    2581         5825 :               tree vuse = gimple_phi_arg_def (phi, i);
    2582         5825 :               edge e = gimple_phi_arg_edge (phi, i);
    2583         5825 :               auto_vec<seq_entry> edge_seq;
    2584         5825 :               bitmap_and_compl (tem_refs_not_in_seq,
    2585              :                                 refs_not_in_seq, refs_not_supported);
    2586              :               /* If we've marked all refs we search for as unsupported
    2587              :                  we can stop processing and use the sequence as before
    2588              :                  the PHI.  */
    2589         5825 :               if (bitmap_empty_p (tem_refs_not_in_seq))
    2590              :                 return 1;
    2591         3852 :               eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
    2592              :                                       tem_refs_not_in_seq, refs_not_supported,
    2593              :                                       true, fully_visited);
    2594         3852 :               if (eret != 1)
    2595              :                 return -1;
    2596              :               /* Simplify our lives by pruning the sequence of !sm_ord.  */
    2597         3360 :               while (!edge_seq.is_empty ()
    2598         8425 :                      && edge_seq.last ().second != sm_ord)
    2599         1213 :                 edge_seq.pop ();
    2600         7704 :               unsigned min_len = MIN(first_edge_seq.length (),
    2601              :                                      edge_seq.length ());
    2602              :               /* Incrementally merge seqs into first_edge_seq.  */
    2603         3852 :               int first_uneq = -1;
    2604         3852 :               auto_vec<seq_entry, 2> extra_refs;
    2605         6050 :               for (unsigned int i = 0; i < min_len; ++i)
    2606              :                 {
    2607              :                   /* ???  We can more intelligently merge when we face different
    2608              :                      order by additional sinking operations in one sequence.
    2609              :                      For now we simply mark them as to be processed by the
    2610              :                      not order-preserving SM code.  */
    2611         2198 :                   if (first_edge_seq[i].first != edge_seq[i].first)
    2612              :                     {
    2613           35 :                       if (first_edge_seq[i].second == sm_ord)
    2614           22 :                         bitmap_set_bit (refs_not_supported,
    2615           22 :                                         first_edge_seq[i].first);
    2616           35 :                       if (edge_seq[i].second == sm_ord)
    2617           24 :                         bitmap_set_bit (refs_not_supported, edge_seq[i].first);
    2618           35 :                       first_edge_seq[i].second = sm_other;
    2619           35 :                       first_edge_seq[i].from = NULL_TREE;
    2620              :                       /* Record the dropped refs for later processing.  */
    2621           35 :                       if (first_uneq == -1)
    2622           33 :                         first_uneq = i;
    2623           70 :                       extra_refs.safe_push (seq_entry (edge_seq[i].first,
    2624           35 :                                                        sm_other, NULL_TREE));
    2625              :                     }
    2626              :                   /* sm_other prevails.  */
    2627         2163 :                   else if (first_edge_seq[i].second != edge_seq[i].second)
    2628              :                     {
    2629              :                       /* Make sure the ref is marked as not supported.  */
    2630            0 :                       bitmap_set_bit (refs_not_supported,
    2631            0 :                                       first_edge_seq[i].first);
    2632            0 :                       first_edge_seq[i].second = sm_other;
    2633            0 :                       first_edge_seq[i].from = NULL_TREE;
    2634              :                     }
    2635         2163 :                   else if (first_edge_seq[i].second == sm_other
    2636            7 :                            && first_edge_seq[i].from != NULL_TREE
    2637         2170 :                            && (edge_seq[i].from == NULL_TREE
    2638            7 :                                || !operand_equal_p (first_edge_seq[i].from,
    2639            7 :                                                     edge_seq[i].from, 0)))
    2640            7 :                     first_edge_seq[i].from = NULL_TREE;
    2641              :                 }
    2642              :               /* Any excess elements become sm_other since they are now
    2643              :                  coonditionally executed.  */
    2644        10590 :               if (first_edge_seq.length () > edge_seq.length ())
    2645              :                 {
    2646         6744 :                   for (unsigned i = edge_seq.length ();
    2647         8767 :                        i < first_edge_seq.length (); ++i)
    2648              :                     {
    2649         6744 :                       if (first_edge_seq[i].second == sm_ord)
    2650         3585 :                         bitmap_set_bit (refs_not_supported,
    2651         3585 :                                         first_edge_seq[i].first);
    2652         6744 :                       first_edge_seq[i].second = sm_other;
    2653              :                     }
    2654              :                 }
    2655         1829 :               else if (edge_seq.length () > first_edge_seq.length ())
    2656              :                 {
    2657            8 :                   if (first_uneq == -1)
    2658            0 :                     first_uneq = first_edge_seq.length ();
    2659            8 :                   for (unsigned i = first_edge_seq.length ();
    2660           26 :                        i < edge_seq.length (); ++i)
    2661              :                     {
    2662           18 :                       if (edge_seq[i].second == sm_ord)
    2663            8 :                         bitmap_set_bit (refs_not_supported, edge_seq[i].first);
    2664           36 :                       extra_refs.safe_push (seq_entry (edge_seq[i].first,
    2665           18 :                                                        sm_other, NULL_TREE));
    2666              :                     }
    2667              :                 }
    2668              :               /* Put unmerged refs at first_uneq to force dependence checking
    2669              :                  on them.  */
    2670         3852 :               if (first_uneq != -1)
    2671              :                 {
    2672              :                   /* Missing ordered_splice_at.  */
    2673           66 :                   if ((unsigned)first_uneq == first_edge_seq.length ())
    2674            0 :                     first_edge_seq.safe_splice (extra_refs);
    2675              :                   else
    2676              :                     {
    2677           33 :                       unsigned fes_length = first_edge_seq.length ();
    2678           33 :                       first_edge_seq.safe_grow (fes_length
    2679           33 :                                                 + extra_refs.length ());
    2680           33 :                       memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
    2681           33 :                                &first_edge_seq[first_uneq],
    2682           33 :                                (fes_length - first_uneq) * sizeof (seq_entry));
    2683           66 :                       memcpy (&first_edge_seq[first_uneq],
    2684           33 :                               extra_refs.address (),
    2685           33 :                               extra_refs.length () * sizeof (seq_entry));
    2686              :                     }
    2687              :                 }
    2688         5825 :             }
    2689              :           /* Use the sequence from the first edge and push SMs down.  */
    2690         4788 :           for (unsigned i = 0; i < first_edge_seq.length (); ++i)
    2691              :             {
    2692         3036 :               unsigned id = first_edge_seq[i].first;
    2693         3036 :               seq.safe_push (first_edge_seq[i]);
    2694         3036 :               unsigned new_idx;
    2695         3036 :               if ((first_edge_seq[i].second == sm_ord
    2696         2386 :                    || (first_edge_seq[i].second == sm_other
    2697         2386 :                        && first_edge_seq[i].from != NULL_TREE))
    2698         4064 :                   && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
    2699              :                 {
    2700          125 :                   if (first_edge_seq[i].second == sm_ord)
    2701            0 :                     bitmap_set_bit (refs_not_supported, id);
    2702              :                   /* Mark it sm_other.  */
    2703          125 :                   seq[new_idx].second = sm_other;
    2704          125 :                   seq[new_idx].from = NULL_TREE;
    2705              :                 }
    2706              :             }
    2707         1752 :           bitmap_set_bit (fully_visited,
    2708         1752 :                           SSA_NAME_VERSION (gimple_phi_result (phi)));
    2709         1752 :           return 1;
    2710         3725 :         }
    2711        43020 :       lim_aux_data *data = get_lim_data (def);
    2712        43020 :       im_mem_ref *ref = memory_accesses.refs_list[data->ref];
    2713        43020 :       if (data->ref == UNANALYZABLE_MEM_ID)
    2714              :         return -1;
    2715              :       /* Stop at memory references which we can't move.  */
    2716        43020 :       else if ((ref->mem.ref == error_mark_node
    2717              :                 /* We can move end-of-storage/object down.  */
    2718          427 :                 && !gimple_clobber_p (ref->accesses_in_loop[0].stmt,
    2719              :                                       CLOBBER_STORAGE_END)
    2720          297 :                 && !gimple_clobber_p (ref->accesses_in_loop[0].stmt,
    2721              :                                       CLOBBER_OBJECT_END))
    2722        43334 :                || TREE_THIS_VOLATILE (ref->mem.ref))
    2723              :         {
    2724              :           /* Mark refs_not_in_seq as unsupported.  */
    2725          145 :           bitmap_ior_into (refs_not_supported, refs_not_in_seq);
    2726          145 :           return 1;
    2727              :         }
    2728              :       /* One of the stores we want to apply SM to and we've not yet seen.  */
    2729        42875 :       else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
    2730              :         {
    2731        33425 :           seq.safe_push (seq_entry (data->ref, sm_ord));
    2732              : 
    2733              :           /* 1) push it down the queue until a SMed
    2734              :              and not ignored ref is reached, skipping all not SMed refs
    2735              :              and ignored refs via non-TBAA disambiguation.  */
    2736        33425 :           unsigned new_idx;
    2737        66850 :           if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
    2738              :               /* If that fails but we did not fork yet continue, we'll see
    2739              :                  to re-materialize all of the stores in the sequence then.
    2740              :                  Further stores will only be pushed up to this one.  */
    2741        33425 :               && forked)
    2742              :             {
    2743            0 :               bitmap_set_bit (refs_not_supported, data->ref);
    2744              :               /* Mark it sm_other.  */
    2745            0 :               seq[new_idx].second = sm_other;
    2746              :             }
    2747              : 
    2748              :           /* 2) check whether we've seen all refs we want to SM and if so
    2749              :              declare success for the active exit  */
    2750        33425 :           if (bitmap_empty_p (refs_not_in_seq))
    2751        18784 :             return 1;
    2752              :         }
    2753              :       else
    2754              :         /* Another store not part of the final sequence.  Simply push it.  */
    2755         9450 :         seq.safe_push (seq_entry (data->ref, sm_other,
    2756         9450 :                                   gimple_assign_rhs1 (def)));
    2757              : 
    2758        79263 :       vdef = gimple_vuse (def);
    2759              :     }
    2760              :   while (1);
    2761              : }
    2762              : 
    2763              : /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
    2764              :    edges of the LOOP.  */
    2765              : 
    2766              : static void
    2767        21217 : hoist_memory_references (class loop *loop, bitmap mem_refs,
    2768              :                          const vec<edge> &exits)
    2769              : {
    2770        21217 :   im_mem_ref *ref;
    2771        21217 :   unsigned  i;
    2772        21217 :   bitmap_iterator bi;
    2773              : 
    2774              :   /* There's a special case we can use ordered re-materialization for
    2775              :      conditionally excuted stores which is when all stores in the loop
    2776              :      happen in the same basic-block.  In that case we know we'll reach
    2777              :      all stores and thus can simply process that BB and emit a single
    2778              :      conditional block of ordered materializations.  See PR102436.  */
    2779        21217 :   basic_block single_store_bb = NULL;
    2780        31296 :   EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
    2781              :                             0, i, bi)
    2782              :     {
    2783        29103 :       bool fail = false;
    2784        29103 :       ref = memory_accesses.refs_list[i];
    2785       133500 :       for (auto loc : ref->accesses_in_loop)
    2786       143320 :         if (!gimple_vdef (loc.stmt))
    2787              :           ;
    2788        31914 :         else if (!single_store_bb)
    2789              :           {
    2790        21217 :             single_store_bb = gimple_bb (loc.stmt);
    2791        21217 :             bool conditional = false;
    2792        76234 :             for (edge e : exits)
    2793        22806 :               if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
    2794              :                 {
    2795              :                   /* Conditional as seen from e.  */
    2796              :                   conditional = true;
    2797              :                   break;
    2798              :                 }
    2799        21217 :             if (!conditional)
    2800              :               {
    2801              :                 fail = true;
    2802              :                 break;
    2803              :               }
    2804              :           }
    2805        10697 :         else if (single_store_bb != gimple_bb (loc.stmt))
    2806              :           {
    2807              :             fail = true;
    2808              :             break;
    2809              :           }
    2810        29103 :       if (fail)
    2811              :         {
    2812              :           single_store_bb = NULL;
    2813              :           break;
    2814              :         }
    2815              :     }
    2816        21217 :   if (single_store_bb)
    2817              :     {
    2818              :       /* Analyze the single block with stores.  */
    2819         2193 :       auto_bitmap fully_visited;
    2820         2193 :       auto_bitmap refs_not_supported;
    2821         2193 :       auto_bitmap refs_not_in_seq;
    2822         2193 :       auto_vec<seq_entry> seq;
    2823         2193 :       bitmap_copy (refs_not_in_seq, mem_refs);
    2824         2193 :       int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
    2825              :                                  seq, refs_not_in_seq, refs_not_supported,
    2826              :                                  false, fully_visited);
    2827         2193 :       if (res != 1)
    2828              :         {
    2829              :           /* Unhandled refs can still fail this.  */
    2830            0 :           bitmap_clear (mem_refs);
    2831            0 :           return;
    2832              :         }
    2833              : 
    2834              :       /* We cannot handle sm_other since we neither remember the
    2835              :          stored location nor the value at the point we execute them.  */
    2836         5305 :       for (unsigned i = 0; i < seq.length (); ++i)
    2837              :         {
    2838         3112 :           unsigned new_i;
    2839         3112 :           if (seq[i].second == sm_other
    2840         3112 :               && seq[i].from != NULL_TREE)
    2841          438 :             seq[i].from = NULL_TREE;
    2842         2674 :           else if ((seq[i].second == sm_ord
    2843            0 :                     || (seq[i].second == sm_other
    2844            0 :                         && seq[i].from != NULL_TREE))
    2845         2674 :                    && !sm_seq_push_down (seq, i, &new_i))
    2846              :             {
    2847           63 :               bitmap_set_bit (refs_not_supported, seq[new_i].first);
    2848           63 :               seq[new_i].second = sm_other;
    2849           63 :               seq[new_i].from = NULL_TREE;
    2850              :             }
    2851              :         }
    2852         2193 :       bitmap_and_compl_into (mem_refs, refs_not_supported);
    2853         2193 :       if (bitmap_empty_p (mem_refs))
    2854              :         return;
    2855              : 
    2856              :       /* Prune seq.  */
    2857         2538 :       while (seq.last ().second == sm_other
    2858         2538 :              && seq.last ().from == NULL_TREE)
    2859          396 :         seq.pop ();
    2860              : 
    2861         2142 :       hash_map<im_mem_ref *, sm_aux *> aux_map;
    2862              : 
    2863              :       /* Execute SM but delay the store materialization for ordered
    2864              :          sequences on exit.  Remember a created flag var and make
    2865              :          sure to re-use it.  */
    2866         2142 :       sm_aux *flag_var_aux = nullptr;
    2867         4753 :       EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
    2868              :         {
    2869         2611 :           ref = memory_accesses.refs_list[i];
    2870         2611 :           sm_aux *aux = execute_sm (loop, ref, aux_map, true,
    2871              :                                     flag_var_aux != nullptr);
    2872         2611 :           if (aux->store_flag)
    2873         1497 :             flag_var_aux = aux;
    2874              :         }
    2875              : 
    2876              :       /* Materialize ordered store sequences on exits.  */
    2877         2142 :       edge e;
    2878         2142 :       auto_bitmap clobbers_to_prune;
    2879         5865 :       FOR_EACH_VEC_ELT (exits, i, e)
    2880              :         {
    2881         3723 :           edge append_cond_position = NULL;
    2882         3723 :           edge last_cond_fallthru = NULL;
    2883         3723 :           edge insert_e = e;
    2884              :           /* Construct the single flag variable control flow and insert
    2885              :              the ordered seq of stores in the then block.  With
    2886              :              -fstore-data-races we can do the stores unconditionally.  */
    2887         3723 :           if (flag_var_aux)
    2888         2397 :             insert_e
    2889              :               = single_pred_edge
    2890         2397 :                   (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
    2891              :                                           flag_var_aux->store_flag,
    2892              :                                           loop_preheader_edge (loop),
    2893              :                                           &flag_var_aux->flag_bbs,
    2894              :                                           append_cond_position,
    2895              :                                           last_cond_fallthru));
    2896         3723 :           execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
    2897              :                            append_cond_position, last_cond_fallthru,
    2898              :                            clobbers_to_prune);
    2899         3723 :           gsi_commit_one_edge_insert (insert_e, NULL);
    2900              :         }
    2901              : 
    2902              :       /* Remove clobbers inside the loop we re-materialized on exits.  */
    2903         2142 :       EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
    2904              :         {
    2905            0 :           gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
    2906            0 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
    2907            0 :           unlink_stmt_vdef (stmt);
    2908            0 :           release_defs (stmt);
    2909            0 :           gimple_set_vdef (stmt, NULL_TREE);
    2910            0 :           gsi_remove (&gsi, true);
    2911              :         }
    2912              : 
    2913         4753 :       for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
    2914         7364 :            iter != aux_map.end (); ++iter)
    2915         5222 :         delete (*iter).second;
    2916              : 
    2917         2142 :       return;
    2918         4335 :     }
    2919              : 
    2920              :   /* To address PR57359 before actually applying store-motion check
    2921              :      the candidates found for validity with regards to reordering
    2922              :      relative to other stores which we until here disambiguated using
    2923              :      TBAA which isn't valid.
    2924              :      What matters is the order of the last stores to the mem_refs
    2925              :      with respect to the other stores of the loop at the point of the
    2926              :      loop exits.  */
    2927              : 
    2928              :   /* For each exit compute the store order, pruning from mem_refs
    2929              :      on the fly.  */
    2930              :   /* The complexity of this is at least
    2931              :      O(number of exits * number of SM refs) but more approaching
    2932              :      O(number of exits * number of SM refs * number of stores).  */
    2933              :   /* ???  Somehow do this in a single sweep over the loop body.  */
    2934        19024 :   auto_vec<std::pair<edge, vec<seq_entry> > > sms;
    2935        19024 :   auto_bitmap refs_not_supported (&lim_bitmap_obstack);
    2936        19024 :   edge e;
    2937        38815 :   FOR_EACH_VEC_ELT (exits, i, e)
    2938              :     {
    2939        22031 :       vec<seq_entry> seq;
    2940        22031 :       seq.create (4);
    2941        22031 :       auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
    2942        22031 :       bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
    2943        22031 :       if (bitmap_empty_p (refs_not_in_seq))
    2944              :         {
    2945         1448 :           seq.release ();
    2946         1448 :           break;
    2947              :         }
    2948        20583 :       auto_bitmap fully_visited;
    2949        20583 :       int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
    2950              :                                  seq, refs_not_in_seq,
    2951              :                                  refs_not_supported, false,
    2952              :                                  fully_visited);
    2953        20583 :       if (res != 1)
    2954              :         {
    2955          792 :           bitmap_copy (refs_not_supported, mem_refs);
    2956          792 :           seq.release ();
    2957          792 :           break;
    2958              :         }
    2959        19791 :       sms.safe_push (std::make_pair (e, seq));
    2960        22823 :     }
    2961              : 
    2962              :   /* Prune pruned mem_refs from earlier processed exits.  */
    2963        19024 :   bool changed = !bitmap_empty_p (refs_not_supported);
    2964        19024 :   while (changed)
    2965              :     {
    2966         6044 :       changed = false;
    2967         6044 :       std::pair<edge, vec<seq_entry> > *seq;
    2968        39778 :       FOR_EACH_VEC_ELT (sms, i, seq)
    2969              :         {
    2970              :           bool need_to_push = false;
    2971        13650 :           for (unsigned i = 0; i < seq->second.length (); ++i)
    2972              :             {
    2973         6295 :               sm_kind kind = seq->second[i].second;
    2974         6295 :               if (kind == sm_other && seq->second[i].from == NULL_TREE)
    2975              :                 break;
    2976         5209 :               unsigned id = seq->second[i].first;
    2977         5209 :               unsigned new_idx;
    2978         5209 :               if (kind == sm_ord
    2979         5209 :                   && bitmap_bit_p (refs_not_supported, id))
    2980              :                 {
    2981         2546 :                   seq->second[i].second = sm_other;
    2982         2546 :                   gcc_assert (seq->second[i].from == NULL_TREE);
    2983              :                   need_to_push = true;
    2984              :                 }
    2985         2663 :               else if (need_to_push
    2986         2663 :                        && !sm_seq_push_down (seq->second, i, &new_idx))
    2987              :                 {
    2988              :                   /* We need to push down both sm_ord and sm_other
    2989              :                      but for the latter we need to disqualify all
    2990              :                      following refs.  */
    2991          113 :                   if (kind == sm_ord)
    2992              :                     {
    2993            7 :                       if (bitmap_set_bit (refs_not_supported, id))
    2994            7 :                         changed = true;
    2995            7 :                       seq->second[new_idx].second = sm_other;
    2996              :                     }
    2997              :                   else
    2998              :                     {
    2999          411 :                       for (unsigned j = seq->second.length () - 1;
    3000          305 :                            j > new_idx; --j)
    3001          199 :                         if (seq->second[j].second == sm_ord
    3002          321 :                             && bitmap_set_bit (refs_not_supported,
    3003          122 :                                                seq->second[j].first))
    3004              :                           changed = true;
    3005          106 :                       seq->second.truncate (new_idx);
    3006          106 :                       break;
    3007              :                     }
    3008              :                 }
    3009              :             }
    3010              :         }
    3011              :     }
    3012        19024 :   std::pair<edge, vec<seq_entry> > *seq;
    3013        58606 :   FOR_EACH_VEC_ELT (sms, i, seq)
    3014              :     {
    3015              :       /* Prune sm_other from the end.  */
    3016        63364 :       while (!seq->second.is_empty ()
    3017        43573 :              && seq->second.last ().second == sm_other)
    3018         5174 :         seq->second.pop ();
    3019              :       /* Prune duplicates from the start.  */
    3020        19791 :       auto_bitmap seen (&lim_bitmap_obstack);
    3021        19791 :       unsigned j, k;
    3022        47219 :       for (j = k = 0; j < seq->second.length (); ++j)
    3023        27428 :         if (bitmap_set_bit (seen, seq->second[j].first))
    3024              :           {
    3025        27319 :             if (k != j)
    3026           77 :               seq->second[k] = seq->second[j];
    3027        27319 :             ++k;
    3028              :           }
    3029        19791 :       seq->second.truncate (k);
    3030              :       /* And verify.  */
    3031        19791 :       seq_entry *e;
    3032        86692 :       FOR_EACH_VEC_ELT (seq->second, j, e)
    3033        27319 :         gcc_assert (e->second == sm_ord
    3034              :                     || (e->second == sm_other && e->from != NULL_TREE));
    3035        19791 :     }
    3036              : 
    3037              :   /* Verify dependence for refs we cannot handle with the order preserving
    3038              :      code (refs_not_supported) or prune them from mem_refs.  */
    3039        19024 :   auto_vec<seq_entry> unord_refs;
    3040        34656 :   EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
    3041              :     {
    3042        15632 :       ref = memory_accesses.refs_list[i];
    3043        15632 :       if (!ref_indep_loop_p (loop, ref, sm_waw))
    3044         6566 :         bitmap_clear_bit (mem_refs, i);
    3045              :       /* We've now verified store order for ref with respect to all other
    3046              :          stores in the loop does not matter.  */
    3047              :       else
    3048         9066 :         unord_refs.safe_push (seq_entry (i, sm_unord));
    3049              :     }
    3050              : 
    3051        19024 :   hash_map<im_mem_ref *, sm_aux *> aux_map;
    3052              : 
    3053              :   /* Execute SM but delay the store materialization for ordered
    3054              :      sequences on exit.  */
    3055        51113 :   EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
    3056              :     {
    3057        32089 :       ref = memory_accesses.refs_list[i];
    3058        32089 :       execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
    3059              :                   false);
    3060              :     }
    3061              : 
    3062              :   /* Materialize ordered store sequences on exits.  */
    3063        19024 :   auto_bitmap clobbers_to_prune;
    3064        42582 :   FOR_EACH_VEC_ELT (exits, i, e)
    3065              :     {
    3066        23558 :       edge append_cond_position = NULL;
    3067        23558 :       edge last_cond_fallthru = NULL;
    3068        23558 :       if (i < sms.length ())
    3069              :         {
    3070        19791 :           gcc_assert (sms[i].first == e);
    3071        19791 :           execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
    3072              :                            append_cond_position, last_cond_fallthru,
    3073              :                            clobbers_to_prune);
    3074        19791 :           sms[i].second.release ();
    3075              :         }
    3076        23558 :       if (!unord_refs.is_empty ())
    3077         9403 :         execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
    3078              :                          append_cond_position, last_cond_fallthru,
    3079              :                          clobbers_to_prune);
    3080              :       /* Commit edge inserts here to preserve the order of stores
    3081              :          when an exit exits multiple loops.  */
    3082        23558 :       gsi_commit_one_edge_insert (e, NULL);
    3083              :     }
    3084              : 
    3085              :   /* Remove clobbers inside the loop we re-materialized on exits.  */
    3086        19201 :   EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
    3087              :     {
    3088          177 :       gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
    3089          177 :       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
    3090          177 :       unlink_stmt_vdef (stmt);
    3091          177 :       release_defs (stmt);
    3092          177 :       gimple_set_vdef (stmt, NULL_TREE);
    3093          177 :       gsi_remove (&gsi, true);
    3094              :     }
    3095              : 
    3096        51113 :   for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
    3097       102226 :        iter != aux_map.end (); ++iter)
    3098        64178 :     delete (*iter).second;
    3099        19024 : }
    3100              : 
    3101              : class ref_always_accessed
    3102              : {
    3103              : public:
    3104        80082 :   ref_always_accessed (class loop *loop_, bool stored_p_)
    3105        80082 :       : loop (loop_), stored_p (stored_p_) {}
    3106              :   bool operator () (mem_ref_loc *loc);
    3107              :   class loop *loop;
    3108              :   bool stored_p;
    3109              : };
    3110              : 
    3111              : bool
    3112       153196 : ref_always_accessed::operator () (mem_ref_loc *loc)
    3113              : {
    3114       153196 :   class loop *must_exec;
    3115              : 
    3116       153196 :   struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
    3117       153196 :   if (!lim_data)
    3118              :     return false;
    3119              : 
    3120              :   /* If we require an always executed store make sure the statement
    3121              :      is a store.  */
    3122       153196 :   if (stored_p)
    3123              :     {
    3124       153196 :       tree lhs = gimple_get_lhs (loc->stmt);
    3125       153196 :       if (!lhs
    3126       153196 :           || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
    3127              :         return false;
    3128              :     }
    3129              : 
    3130        95771 :   must_exec = lim_data->always_executed_in;
    3131        95771 :   if (!must_exec)
    3132              :     return false;
    3133              : 
    3134        36092 :   if (must_exec == loop
    3135        36092 :       || flow_loop_nested_p (must_exec, loop))
    3136        32803 :     return true;
    3137              : 
    3138              :   return false;
    3139              : }
    3140              : 
    3141              : /* Returns true if REF is always accessed in LOOP.  If STORED_P is true
    3142              :    make sure REF is always stored to in LOOP.  */
    3143              : 
    3144              : static bool
    3145        80082 : ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
    3146              : {
    3147        34700 :   return for_all_locs_in_loop (loop, ref,
    3148        80082 :                                ref_always_accessed (loop, stored_p));
    3149              : }
    3150              : 
    3151              : /* Returns true if LOAD_REF and STORE_REF form a "self write" pattern
    3152              :    where the stored value comes from the loaded value via SSA.
    3153              :    Example: a[i] = a[0] is safe to hoist a[0] even when i==0.  */
    3154              : 
    3155              : static bool
    3156       318134 : is_self_write (im_mem_ref *load_ref, im_mem_ref *store_ref)
    3157              : {
    3158              :   /* Only handle the simple case with a single access per ref.
    3159              :      Bail out on multiple accesses to be conservative.  */
    3160       318134 :   if (load_ref->accesses_in_loop.length () != 1
    3161       407565 :       || store_ref->accesses_in_loop.length () != 1)
    3162              :     return false;
    3163              : 
    3164        89448 :   gimple *load_stmt = load_ref->accesses_in_loop[0].stmt;
    3165        89448 :   gimple *store_stmt = store_ref->accesses_in_loop[0].stmt;
    3166              : 
    3167        89448 :   if (!is_gimple_assign (load_stmt) || !is_gimple_assign (store_stmt))
    3168              :     return false;
    3169              : 
    3170        89448 :   tree loaded_val = gimple_assign_lhs (load_stmt);
    3171        89448 :   tree stored_val = gimple_assign_rhs1 (store_stmt);
    3172              : 
    3173        89448 :   if (TREE_CODE (loaded_val) != SSA_NAME || TREE_CODE (stored_val) != SSA_NAME)
    3174              :     return false;
    3175              : 
    3176              :   /* Self write: stored value is the loaded value.  */
    3177        63487 :   if (stored_val != loaded_val)
    3178              :     return false;
    3179              : 
    3180              : 
    3181              :   /* TODO: Try to factor this out with mem_ref_hasher::equal
    3182              :      into im_compare_access_position_and_size
    3183              :      or a similar helper to centralize this delicate handling
    3184              :      complete for MEM_REF offsets and base pointer equality.
    3185              : 
    3186              :      TODO: Also ensure max_size_known_p agrees or resort to
    3187              :      alignment considerations to rule out partial overlaps.
    3188              : 
    3189              :      See:
    3190              :      https://gcc.gnu.org/pipermail/gcc-patches/2025-December/704155.html
    3191              :      For more context on TODOs above.  */
    3192              : 
    3193              :   /* They may alias.  Verify exact same location.  */
    3194         1632 :   return (operand_equal_p (load_ref->mem.base, store_ref->mem.base, 0)
    3195           46 :           && known_eq (load_ref->mem.size, store_ref->mem.size)
    3196         1678 :           && known_eq (load_ref->mem.offset, store_ref->mem.offset));
    3197              : 
    3198              : }
    3199              : 
    3200              : /* Returns true if REF1 and REF2 are independent.  */
    3201              : 
    3202              : static bool
    3203       603622 : refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
    3204              : {
    3205       603622 :   if (ref1 == ref2)
    3206              :     return true;
    3207              : 
    3208       541424 :   if (dump_file && (dump_flags & TDF_DETAILS))
    3209         3040 :     fprintf (dump_file, "Querying dependency of refs %u and %u: ",
    3210         3040 :              ref1->id, ref2->id);
    3211              : 
    3212       541424 :   if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
    3213              :     {
    3214        64771 :       if (dump_file && (dump_flags & TDF_DETAILS))
    3215           99 :         fprintf (dump_file, "dependent.\n");
    3216        64771 :       return false;
    3217              :     }
    3218              :   else
    3219              :     {
    3220       476653 :       if (dump_file && (dump_flags & TDF_DETAILS))
    3221         2941 :         fprintf (dump_file, "independent.\n");
    3222       476653 :       return true;
    3223              :     }
    3224              : }
    3225              : 
    3226              : /* Returns true if REF is independent on all other accessess in LOOP.
    3227              :    KIND specifies the kind of dependence to consider.
    3228              :      lim_raw assumes REF is not stored in LOOP and disambiguates RAW
    3229              :              dependences so if true REF can be hoisted out of LOOP
    3230              :      sm_war disambiguates a store REF against all other loads to see
    3231              :             whether the store can be sunk across loads out of LOOP
    3232              :      sm_waw disambiguates a store REF against all other stores to see
    3233              :             whether the store can be sunk across stores out of LOOP.  */
    3234              : 
    3235              : static bool
    3236      2036576 : ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
    3237              : {
    3238      2036576 :   bool indep_p = true;
    3239      2036576 :   bitmap refs_to_check;
    3240              : 
    3241      2036576 :   if (kind == sm_war)
    3242       971381 :     refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
    3243              :   else
    3244      1065195 :     refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
    3245              : 
    3246      2036576 :   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
    3247      2036576 :       || ref->mem.ref == error_mark_node)
    3248              :     indep_p = false;
    3249              :   else
    3250              :     {
    3251              :       /* tri-state, { unknown, independent, dependent }  */
    3252       641454 :       dep_state state = query_loop_dependence (loop, ref, kind);
    3253       641454 :       if (state != dep_unknown)
    3254            0 :         return state == dep_independent ? true : false;
    3255              : 
    3256       641454 :       class loop *inner = loop->inner;
    3257       791169 :       while (inner)
    3258              :         {
    3259       431996 :           if (!ref_indep_loop_p (inner, ref, kind))
    3260              :             {
    3261              :               indep_p = false;
    3262              :               break;
    3263              :             }
    3264       149715 :           inner = inner->next;
    3265              :         }
    3266              : 
    3267       641454 :       if (indep_p)
    3268              :         {
    3269       359173 :           unsigned i;
    3270       359173 :           bitmap_iterator bi;
    3271       919051 :           EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
    3272              :             {
    3273       628787 :               im_mem_ref *aref = memory_accesses.refs_list[i];
    3274       628787 :               if (aref->mem.ref == error_mark_node)
    3275              :                 {
    3276        25988 :                   gimple *stmt = aref->accesses_in_loop[0].stmt;
    3277        25988 :                   if ((kind == sm_war
    3278         9305 :                        && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
    3279              :                                                     kind != sm_waw))
    3280        34642 :                       || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
    3281              :                                                    kind != sm_waw))
    3282              :                     {
    3283              :                       indep_p = false;
    3284              :                       break;
    3285              :                     }
    3286              :                 }
    3287              :               /* For hoisting loads (lim_raw), allow "self write": the store
    3288              :                  writes back the loaded value.  Example: a[i] = a[0]
    3289              :                  is safe even when i==0 causes aliasing.  */
    3290       602799 :               else if (kind == lim_raw
    3291       318134 :                        && ref->loaded && aref->stored
    3292       920933 :                        && is_self_write (ref, aref))
    3293              :                 {
    3294           17 :                   if (dump_file && (dump_flags & TDF_DETAILS))
    3295            2 :                     fprintf (dump_file,
    3296              :                              "Dependency of refs %u and %u: "
    3297              :                              "independent (self write).\n",
    3298            2 :                              ref->id, aref->id);
    3299              :                 }
    3300              : 
    3301       602782 :               else if (!refs_independent_p (ref, aref, kind != sm_waw))
    3302              :                 {
    3303              :                   indep_p = false;
    3304              :                   break;
    3305              :                 }
    3306              :             }
    3307              :         }
    3308              :     }
    3309              : 
    3310      2036576 :   if (dump_file && (dump_flags & TDF_DETAILS))
    3311          755 :     fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
    3312              :              kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
    3313          367 :              ref->id, loop->num, indep_p ? "independent" : "dependent");
    3314              : 
    3315              :   /* Record the computed result in the cache.  */
    3316      2036576 :   record_loop_dependence (loop, ref, kind,
    3317              :                           indep_p ? dep_independent : dep_dependent);
    3318              : 
    3319      2036576 :   return indep_p;
    3320              : }
    3321              : 
    3322              : class ref_in_loop_hot_body
    3323              : {
    3324              : public:
    3325        42274 :   ref_in_loop_hot_body (class loop *loop_) : l (loop_) {}
    3326              :   bool operator () (mem_ref_loc *loc);
    3327              :   class loop *l;
    3328              : };
    3329              : 
    3330              : /* Check the coldest loop between loop L and innermost loop.  If there is one
    3331              :    cold loop between L and INNER_LOOP, store motion can be performed, otherwise
    3332              :    no cold loop means no store motion.  get_coldest_out_loop also handles cases
    3333              :    when l is inner_loop.  */
    3334              : bool
    3335        43061 : ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
    3336              : {
    3337        43061 :   basic_block curr_bb = gimple_bb (loc->stmt);
    3338        43061 :   class loop *inner_loop = curr_bb->loop_father;
    3339        43061 :   return get_coldest_out_loop (l, inner_loop, curr_bb);
    3340              : }
    3341              : 
    3342              : 
    3343              : /* Returns true if we can perform store motion of REF from LOOP.  */
    3344              : 
    3345              : static bool
    3346      2871222 : can_sm_ref_p (class loop *loop, im_mem_ref *ref)
    3347              : {
    3348      2871222 :   tree base;
    3349              : 
    3350              :   /* Can't hoist unanalyzable refs.  */
    3351      2871222 :   if (!MEM_ANALYZABLE (ref))
    3352              :     return false;
    3353              : 
    3354              :   /* Can't hoist/sink aggregate copies.  */
    3355      2514022 :   if (ref->mem.ref == error_mark_node)
    3356              :     return false;
    3357              : 
    3358              :   /* It should be movable.  */
    3359      2089212 :   if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
    3360      2088986 :       || TREE_THIS_VOLATILE (ref->mem.ref)
    3361      4161377 :       || !for_each_index (&ref->mem.ref, may_move_till, loop))
    3362      1142478 :     return false;
    3363              : 
    3364              :   /* If it can throw fail, we do not properly update EH info.  */
    3365       946734 :   if (tree_could_throw_p (ref->mem.ref))
    3366              :     return false;
    3367              : 
    3368              :   /* If it can trap, it must be always executed in LOOP.
    3369              :      Readonly memory locations may trap when storing to them, but
    3370              :      tree_could_trap_p is a predicate for rvalues, so check that
    3371              :      explicitly.  */
    3372       923433 :   base = get_base_address (ref->mem.ref);
    3373       923433 :   if ((tree_could_trap_p (ref->mem.ref)
    3374       878257 :        || (DECL_P (base) && TREE_READONLY (base))
    3375       878059 :        || TREE_CODE (base) == STRING_CST)
    3376              :       /* ???  We can at least use false here, allowing loads?  We
    3377              :          are forcing conditional stores if the ref is not always
    3378              :          stored to later anyway.  So this would only guard
    3379              :          the load we need to emit.  Thus when the ref is not
    3380              :          loaded we can elide this completely?  */
    3381       968815 :       && !ref_always_accessed_p (loop, ref, true))
    3382              :     return false;
    3383              : 
    3384              :   /* Verify all loads of ref can be hoisted.  */
    3385       887909 :   if (ref->loaded
    3386       101217 :       && bitmap_bit_p (ref->loaded, loop->num)
    3387       983950 :       && !ref_indep_loop_p (loop, ref, lim_raw))
    3388              :     return false;
    3389              : 
    3390              :   /* Verify the candidate can be disambiguated against all loads,
    3391              :      that is, we can elide all in-loop stores.  Disambiguation
    3392              :      against stores is done later when we cannot guarantee preserving
    3393              :      the order of stores.  */
    3394       816273 :   if (!ref_indep_loop_p (loop, ref, sm_war))
    3395              :     return false;
    3396              : 
    3397              :   /* Verify whether the candidate is hot for LOOP.  Only do store motion if the
    3398              :     candidate's profile count is hot.  Statement in cold BB shouldn't be moved
    3399              :     out of it's loop_father.  */
    3400        42274 :   if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
    3401              :     return false;
    3402              : 
    3403              :   return true;
    3404              : }
    3405              : 
    3406              : /* Marks the references in LOOP for that store motion should be performed
    3407              :    in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
    3408              :    motion was performed in one of the outer loops.  */
    3409              : 
    3410              : static void
    3411      1275061 : find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
    3412              : {
    3413      1275061 :   bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
    3414      1275061 :   unsigned i;
    3415      1275061 :   bitmap_iterator bi;
    3416      1275061 :   im_mem_ref *ref;
    3417              : 
    3418      4146283 :   EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
    3419              :     {
    3420      2871222 :       ref = memory_accesses.refs_list[i];
    3421      2871222 :       if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
    3422        41333 :         bitmap_set_bit (refs_to_sm, i);
    3423              :     }
    3424      1275061 : }
    3425              : 
    3426              : /* Checks whether LOOP (with exits stored in EXITS array) is suitable
    3427              :    for a store motion optimization (i.e. whether we can insert statement
    3428              :    on its exits).  */
    3429              : 
    3430              : static bool
    3431      1317972 : loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
    3432              :                       const vec<edge> &exits)
    3433              : {
    3434      1317972 :   unsigned i;
    3435      1317972 :   edge ex;
    3436              : 
    3437      3400817 :   FOR_EACH_VEC_ELT (exits, i, ex)
    3438      2125756 :     if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
    3439              :       return false;
    3440              : 
    3441              :   return true;
    3442              : }
    3443              : 
    3444              : /* Try to perform store motion for all memory references modified inside
    3445              :    LOOP.  SM_EXECUTED is the bitmap of the memory references for that
    3446              :    store motion was executed in one of the outer loops.  */
    3447              : 
    3448              : static void
    3449      1317972 : store_motion_loop (class loop *loop, bitmap sm_executed)
    3450              : {
    3451      1317972 :   auto_vec<edge> exits = get_loop_exit_edges (loop);
    3452      1317972 :   class loop *subloop;
    3453      1317972 :   bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
    3454              : 
    3455      1317972 :   if (loop_suitable_for_sm (loop, exits))
    3456              :     {
    3457      1275061 :       find_refs_for_sm (loop, sm_executed, sm_in_loop);
    3458      1275061 :       if (!bitmap_empty_p (sm_in_loop))
    3459        21217 :         hoist_memory_references (loop, sm_in_loop, exits);
    3460              :     }
    3461              : 
    3462      1317972 :   bitmap_ior_into (sm_executed, sm_in_loop);
    3463      1618016 :   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
    3464       300044 :     store_motion_loop (subloop, sm_executed);
    3465      1317972 :   bitmap_and_compl_into (sm_executed, sm_in_loop);
    3466      1317972 :   BITMAP_FREE (sm_in_loop);
    3467      1317972 : }
    3468              : 
    3469              : /* Try to perform store motion for all memory references modified inside
    3470              :    loops.  */
    3471              : 
    3472              : static void
    3473       484286 : do_store_motion (void)
    3474              : {
    3475       484286 :   class loop *loop;
    3476       484286 :   bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
    3477              : 
    3478      1502214 :   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
    3479      1017928 :     store_motion_loop (loop, sm_executed);
    3480              : 
    3481       484286 :   BITMAP_FREE (sm_executed);
    3482       484286 : }
    3483              : 
    3484              : /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
    3485              :    for each such basic block bb records the outermost loop for that execution
    3486              :    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
    3487              :    blocks that contain a nonpure call.  */
    3488              : 
    3489              : static void
    3490      1318497 : fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
    3491              : {
    3492      1318497 :   basic_block bb = NULL, last = NULL;
    3493      1318497 :   edge e;
    3494      1318497 :   class loop *inn_loop = loop;
    3495              : 
    3496      1618798 :   for (class loop *inner = loop->inner; inner; inner = inner->next)
    3497       300301 :     fill_always_executed_in_1 (inner, contains_call);
    3498              : 
    3499      1318497 :   auto_vec<basic_block, 64> worklist;
    3500      1318497 :   worklist.reserve_exact (loop->num_nodes);
    3501      1318497 :   worklist.quick_push (loop->header);
    3502      2927413 :   do
    3503              :     {
    3504      2927413 :       edge_iterator ei;
    3505      2927413 :       bb = worklist.pop ();
    3506              : 
    3507      2927413 :       if (!flow_bb_inside_loop_p (inn_loop, bb))
    3508              :         {
    3509              :           /* When we are leaving a possibly infinite inner loop
    3510              :              we have to stop processing.  */
    3511       172672 :           if (!finite_loop_p (inn_loop))
    3512              :             break;
    3513              :           /* If the loop was finite we can continue with processing
    3514              :              the loop we exited to.  */
    3515       164412 :           inn_loop = bb->loop_father;
    3516              :         }
    3517              : 
    3518      2919153 :       if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
    3519      1712121 :         last = bb;
    3520              : 
    3521      2919153 :       if (bitmap_bit_p (contains_call, bb->index))
    3522              :         break;
    3523              : 
    3524              :       /* If LOOP exits from this BB stop processing.  */
    3525      5627842 :       FOR_EACH_EDGE (e, ei, bb->succs)
    3526      4009156 :         if (!flow_bb_inside_loop_p (loop, e->dest))
    3527              :           break;
    3528      2584632 :       if (e)
    3529              :         break;
    3530              : 
    3531              :       /* A loop might be infinite (TODO use simple loop analysis
    3532              :          to disprove this if possible).  */
    3533      1618686 :       if (bb->flags & BB_IRREDUCIBLE_LOOP)
    3534              :         break;
    3535              : 
    3536      1618582 :       if (bb->loop_father->header == bb)
    3537              :         /* Record that we enter into a subloop since it might not
    3538              :            be finite.  */
    3539              :         /* ???  Entering into a not always executed subloop makes
    3540              :            fill_always_executed_in quadratic in loop depth since
    3541              :            we walk those loops N times.  This is not a problem
    3542              :            in practice though, see PR102253 for a worst-case testcase.  */
    3543       610724 :         inn_loop = bb->loop_father;
    3544              : 
    3545              :       /* Walk the body of LOOP sorted by dominance relation.  Additionally,
    3546              :          if a basic block S dominates the latch, then only blocks dominated
    3547              :          by S are after it.
    3548              :          This is get_loop_body_in_dom_order using a worklist algorithm and
    3549              :          stopping once we are no longer interested in visiting further
    3550              :          blocks.  */
    3551      1618582 :       unsigned old_len = worklist.length ();
    3552      1618582 :       unsigned postpone = 0;
    3553      1618582 :       for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
    3554      3497564 :            son;
    3555      1878982 :            son = next_dom_son (CDI_DOMINATORS, son))
    3556              :         {
    3557      1878982 :           if (!flow_bb_inside_loop_p (loop, son))
    3558        20444 :             continue;
    3559      1858538 :           if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
    3560       535447 :             postpone = worklist.length ();
    3561      1858538 :           worklist.quick_push (son);
    3562              :         }
    3563      1618582 :       if (postpone)
    3564              :         /* Postponing the block that dominates the latch means
    3565              :            processing it last and thus putting it earliest in the
    3566              :            worklist.  */
    3567       157294 :         std::swap (worklist[old_len], worklist[postpone]);
    3568              :     }
    3569      3237164 :   while (!worklist.is_empty ());
    3570              : 
    3571      2105745 :   while (1)
    3572              :     {
    3573      1712121 :       if (last->loop_father == loop
    3574      1712121 :           || (ALWAYS_EXECUTED_IN (last)
    3575       156109 :               && loop_outer (ALWAYS_EXECUTED_IN (last)) == loop))
    3576              :         {
    3577      1711450 :           if (dump_enabled_p ())
    3578         1915 :             dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
    3579              :                          last->index, loop->num);
    3580      1711450 :           SET_ALWAYS_EXECUTED_IN (last, loop);
    3581              :         }
    3582      1712121 :       if (last == loop->header)
    3583              :         break;
    3584       393624 :       last = get_immediate_dominator (CDI_DOMINATORS, last);
    3585              :     }
    3586      1318497 : }
    3587              : 
    3588              : /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
    3589              :    for each such basic block bb records the outermost loop for that execution
    3590              :    of its header implies execution of bb.  */
    3591              : 
    3592              : static void
    3593       484397 : fill_always_executed_in (void)
    3594              : {
    3595       484397 :   basic_block bb;
    3596       484397 :   class loop *loop;
    3597              : 
    3598       484397 :   auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
    3599       484397 :   bitmap_clear (contains_call);
    3600     14881820 :   FOR_EACH_BB_FN (bb, cfun)
    3601              :     {
    3602     14397423 :       if (loop_depth (bb->loop_father) == 0)
    3603              :         {
    3604              :           /* Outside of loops we can skip scanning all stmts.  */
    3605      8369714 :           bitmap_set_bit (contains_call, bb->index);
    3606      8369714 :           continue;
    3607              :         }
    3608      6027709 :       gimple_stmt_iterator gsi;
    3609     49540800 :       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    3610              :         {
    3611     39517673 :           if (nonpure_call_p (gsi_stmt (gsi)))
    3612              :             break;
    3613              :         }
    3614              : 
    3615      6027709 :       if (!gsi_end_p (gsi))
    3616       954021 :         bitmap_set_bit (contains_call, bb->index);
    3617              :     }
    3618              : 
    3619      1502593 :   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
    3620      1018196 :     fill_always_executed_in_1 (loop, contains_call);
    3621       484397 : }
    3622              : 
    3623              : /* Find the coldest loop preheader for LOOP, also find the nearest hotter loop
    3624              :    to LOOP.  Then recursively iterate each inner loop.  */
    3625              : 
    3626              : void
    3627      1318497 : fill_coldest_and_hotter_out_loop (class loop *coldest_loop,
    3628              :                                   class loop *hotter_loop, class loop *loop)
    3629              : {
    3630      1318497 :   if (bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
    3631              :                                      coldest_loop))
    3632        28253 :     coldest_loop = loop;
    3633              : 
    3634      1318497 :   coldest_outermost_loop[loop->num] = coldest_loop;
    3635              : 
    3636      1318497 :   hotter_than_inner_loop[loop->num] = NULL;
    3637      1318497 :   class loop *outer_loop = loop_outer (loop);
    3638      1318497 :   if (hotter_loop
    3639      1318497 :       && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
    3640              :                                         hotter_loop))
    3641         2145 :     hotter_than_inner_loop[loop->num] = hotter_loop;
    3642              : 
    3643      1318497 :   if (outer_loop && outer_loop != current_loops->tree_root
    3644      1618798 :       && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
    3645              :                                         outer_loop))
    3646        34478 :     hotter_than_inner_loop[loop->num] = outer_loop;
    3647              : 
    3648      1318497 :   if (dump_enabled_p ())
    3649              :     {
    3650         1482 :       dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, ",
    3651              :                    loop->num, coldest_loop->num);
    3652         1482 :       if (hotter_than_inner_loop[loop->num])
    3653          245 :         dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n",
    3654          245 :                      hotter_than_inner_loop[loop->num]->num);
    3655              :       else
    3656         1237 :         dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n");
    3657              :     }
    3658              : 
    3659      1318497 :   class loop *inner_loop;
    3660      1618798 :   for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next)
    3661       600602 :     fill_coldest_and_hotter_out_loop (coldest_loop,
    3662       300301 :                                       hotter_than_inner_loop[loop->num],
    3663              :                                       inner_loop);
    3664      1318497 : }
    3665              : 
    3666              : /* Compute the global information needed by the loop invariant motion pass.  */
    3667              : 
    3668              : static void
    3669       484397 : tree_ssa_lim_initialize (bool store_motion)
    3670              : {
    3671       484397 :   unsigned i;
    3672              : 
    3673       484397 :   bitmap_obstack_initialize (&lim_bitmap_obstack);
    3674       484397 :   gcc_obstack_init (&mem_ref_obstack);
    3675       484397 :   lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
    3676              : 
    3677       484397 :   if (flag_tm)
    3678          104 :     compute_transaction_bits ();
    3679              : 
    3680       484397 :   memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
    3681       484397 :   memory_accesses.refs_list.create (100);
    3682              :   /* Allocate a special, unanalyzable mem-ref with ID zero.  */
    3683       484397 :   memory_accesses.refs_list.quick_push
    3684       484397 :     (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
    3685              : 
    3686       968794 :   memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
    3687       968794 :   memory_accesses.refs_loaded_in_loop.quick_grow_cleared (number_of_loops (cfun));
    3688       968794 :   memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
    3689       968794 :   memory_accesses.refs_stored_in_loop.quick_grow_cleared (number_of_loops (cfun));
    3690       484397 :   if (store_motion)
    3691              :     {
    3692       968572 :       memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
    3693       484286 :       memory_accesses.all_refs_stored_in_loop.quick_grow_cleared
    3694       968572 :                                                       (number_of_loops (cfun));
    3695              :     }
    3696              : 
    3697      6035482 :   for (i = 0; i < number_of_loops (cfun); i++)
    3698              :     {
    3699      2533344 :       bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
    3700              :                          &lim_bitmap_obstack);
    3701      2533344 :       bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
    3702              :                          &lim_bitmap_obstack);
    3703      2533344 :       if (store_motion)
    3704      2532243 :         bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
    3705              :                            &lim_bitmap_obstack);
    3706              :     }
    3707              : 
    3708       484397 :   memory_accesses.ttae_cache = NULL;
    3709              : 
    3710              :   /* Initialize bb_loop_postorder with a mapping from loop->num to
    3711              :      its postorder index.  */
    3712       484397 :   i = 0;
    3713       968794 :   bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
    3714      2771688 :   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
    3715      1318497 :     bb_loop_postorder[loop->num] = i++;
    3716       484397 : }
    3717              : 
    3718              : /* Cleans up after the invariant motion pass.  */
    3719              : 
    3720              : static void
    3721       484397 : tree_ssa_lim_finalize (void)
    3722              : {
    3723       484397 :   basic_block bb;
    3724       484397 :   unsigned i;
    3725       484397 :   im_mem_ref *ref;
    3726              : 
    3727     14926743 :   FOR_EACH_BB_FN (bb, cfun)
    3728     14442346 :     SET_ALWAYS_EXECUTED_IN (bb, NULL);
    3729              : 
    3730       484397 :   bitmap_obstack_release (&lim_bitmap_obstack);
    3731       968794 :   delete lim_aux_data_map;
    3732              : 
    3733       484397 :   delete memory_accesses.refs;
    3734       484397 :   memory_accesses.refs = NULL;
    3735              : 
    3736      5691254 :   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
    3737      5206857 :     memref_free (ref);
    3738       484397 :   memory_accesses.refs_list.release ();
    3739       484397 :   obstack_free (&mem_ref_obstack, NULL);
    3740              : 
    3741       484397 :   memory_accesses.refs_loaded_in_loop.release ();
    3742       484397 :   memory_accesses.refs_stored_in_loop.release ();
    3743       484397 :   memory_accesses.all_refs_stored_in_loop.release ();
    3744              : 
    3745       484397 :   if (memory_accesses.ttae_cache)
    3746         4504 :     free_affine_expand_cache (&memory_accesses.ttae_cache);
    3747              : 
    3748       484397 :   free (bb_loop_postorder);
    3749              : 
    3750       484397 :   coldest_outermost_loop.release ();
    3751       484397 :   hotter_than_inner_loop.release ();
    3752       484397 : }
    3753              : 
    3754              : /* Moves invariants from loops.  Only "expensive" invariants are moved out --
    3755              :    i.e. those that are likely to be win regardless of the register pressure.
    3756              :    Only perform store motion if STORE_MOTION is true.  */
    3757              : 
    3758              : unsigned int
    3759       484397 : loop_invariant_motion_in_fun (function *fun, bool store_motion)
    3760              : {
    3761       484397 :   unsigned int todo = 0;
    3762              : 
    3763       484397 :   tree_ssa_lim_initialize (store_motion);
    3764              : 
    3765       484397 :   mark_ssa_maybe_undefs ();
    3766              : 
    3767              :   /* Gathers information about memory accesses in the loops.  */
    3768       484397 :   analyze_memory_references (store_motion);
    3769              : 
    3770              :   /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
    3771       484397 :   fill_always_executed_in ();
    3772              : 
    3773              :   /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
    3774              :    */
    3775       484397 :   class loop *loop;
    3776       968794 :   coldest_outermost_loop.create (number_of_loops (cfun));
    3777       968794 :   coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
    3778       968794 :   hotter_than_inner_loop.create (number_of_loops (cfun));
    3779       968794 :   hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
    3780      1502593 :   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
    3781      1018196 :     fill_coldest_and_hotter_out_loop (loop, NULL, loop);
    3782              : 
    3783       484397 :   int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
    3784       484397 :   int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
    3785              : 
    3786              :   /* For each statement determine the outermost loop in that it is
    3787              :      invariant and cost for computing the invariant.  */
    3788     14881820 :   for (int i = 0; i < n; ++i)
    3789     14397423 :     compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
    3790              : 
    3791              :   /* Execute store motion.  Force the necessary invariants to be moved
    3792              :      out of the loops as well.  */
    3793       484397 :   if (store_motion)
    3794       484286 :     do_store_motion ();
    3795              : 
    3796       484397 :   free (rpo);
    3797       484397 :   rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
    3798       484397 :   n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
    3799              : 
    3800              :   /* Move the expressions that are expensive enough.  */
    3801     14926743 :   for (int i = 0; i < n; ++i)
    3802     14442346 :     todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
    3803              : 
    3804       484397 :   free (rpo);
    3805              : 
    3806       484397 :   gsi_commit_edge_inserts ();
    3807       484397 :   if (need_ssa_update_p (fun))
    3808        14768 :     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
    3809              : 
    3810       484397 :   tree_ssa_lim_finalize ();
    3811              : 
    3812       484397 :   return todo;
    3813              : }
    3814              : 
    3815              : /* Loop invariant motion pass.  */
    3816              : 
    3817              : namespace {
    3818              : 
    3819              : const pass_data pass_data_lim =
    3820              : {
    3821              :   GIMPLE_PASS, /* type */
    3822              :   "lim", /* name */
    3823              :   OPTGROUP_LOOP, /* optinfo_flags */
    3824              :   TV_LIM, /* tv_id */
    3825              :   PROP_cfg, /* properties_required */
    3826              :   0, /* properties_provided */
    3827              :   0, /* properties_destroyed */
    3828              :   0, /* todo_flags_start */
    3829              :   0, /* todo_flags_finish */
    3830              : };
    3831              : 
    3832              : class pass_lim : public gimple_opt_pass
    3833              : {
    3834              : public:
    3835      1142888 :   pass_lim (gcc::context *ctxt)
    3836      2285776 :     : gimple_opt_pass (pass_data_lim, ctxt)
    3837              :   {}
    3838              : 
    3839              :   /* opt_pass methods: */
    3840       857166 :   opt_pass * clone () final override { return new pass_lim (m_ctxt); }
    3841      1284588 :   bool gate (function *) final override { return flag_tree_loop_im != 0; }
    3842              :   unsigned int execute (function *) final override;
    3843              : 
    3844              : }; // class pass_lim
    3845              : 
    3846              : unsigned int
    3847      1284262 : pass_lim::execute (function *fun)
    3848              : {
    3849      1284262 :   in_loop_pipeline = scev_initialized_p ();
    3850      1284262 :   if (!in_loop_pipeline)
    3851      1042328 :     loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
    3852              : 
    3853      2568524 :   if (number_of_loops (fun) <= 1)
    3854              :     return 0;
    3855       484302 :   unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
    3856              : 
    3857       484302 :   if (!in_loop_pipeline)
    3858       242368 :     loop_optimizer_finalize ();
    3859              :   else
    3860       241934 :     scev_reset ();
    3861              :   return todo;
    3862              : }
    3863              : 
    3864              : } // anon namespace
    3865              : 
    3866              : gimple_opt_pass *
    3867       285722 : make_pass_lim (gcc::context *ctxt)
    3868              : {
    3869       285722 :   return new pass_lim (ctxt);
    3870              : }
    3871              : 
    3872              : 
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.