LCOV - code coverage report
Current view: top level - gcc - avoid-store-forwarding.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 83.8 % 320 268
Test Date: 2026-02-28 14:20:25 Functions: 100.0 % 8 8
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Avoid store forwarding optimization pass.
       2              :    Copyright (C) 2024-2026 Free Software Foundation, Inc.
       3              :    Contributed by VRULL GmbH.
       4              : 
       5              :    This file is part of GCC.
       6              : 
       7              :    GCC is free software; you can redistribute it and/or modify it
       8              :    under the terms of the GNU General Public License as published by
       9              :    the Free Software Foundation; either version 3, or (at your option)
      10              :    any later version.
      11              : 
      12              :    GCC is distributed in the hope that it will be useful, but
      13              :    WITHOUT ANY WARRANTY; without even the implied warranty of
      14              :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15              :    General Public License for more details.
      16              : 
      17              :    You should have received a copy of the GNU General Public License
      18              :    along with GCC; see the file COPYING3.  If not see
      19              :    <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "target.h"
      26              : #include "rtl.h"
      27              : #include "avoid-store-forwarding.h"
      28              : #include "alias.h"
      29              : #include "rtlanal.h"
      30              : #include "cfgrtl.h"
      31              : #include "tree-pass.h"
      32              : #include "predict.h"
      33              : #include "insn-config.h"
      34              : #include "expmed.h"
      35              : #include "recog.h"
      36              : #include "regset.h"
      37              : #include "df.h"
      38              : #include "expr.h"
      39              : #include "memmodel.h"
      40              : #include "emit-rtl.h"
      41              : #include "vec.h"
      42              : 
      43              : /* This pass tries to detect and avoid cases of store forwarding.
      44              :    On many processors there is a large penalty when smaller stores are
      45              :    forwarded to larger loads.  The idea used to avoid the stall is to move
      46              :    the store after the load and in addition emit a bit insert sequence so
      47              :    the load register has the correct value.  For example the following:
      48              : 
      49              :      strb    w2, [x1, 1]
      50              :      ldr     x0, [x1]
      51              : 
      52              :    Will be transformed to:
      53              : 
      54              :      ldr     x0, [x1]
      55              :      strb    w2, [x1]
      56              :      bfi     x0, x2, 0, 8
      57              : */
      58              : 
      59              : namespace {
      60              : 
      61              : const pass_data pass_data_avoid_store_forwarding =
      62              : {
      63              :   RTL_PASS, /* type.  */
      64              :   "avoid_store_forwarding", /* name.  */
      65              :   OPTGROUP_NONE, /* optinfo_flags.  */
      66              :   TV_AVOID_STORE_FORWARDING, /* tv_id.  */
      67              :   0, /* properties_required.  */
      68              :   0, /* properties_provided.  */
      69              :   0, /* properties_destroyed.  */
      70              :   0, /* todo_flags_start.  */
      71              :   TODO_df_finish /* todo_flags_finish.  */
      72              : };
      73              : 
      74              : class pass_rtl_avoid_store_forwarding : public rtl_opt_pass
      75              : {
      76              : public:
      77       285722 :   pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
      78       571444 :     : rtl_opt_pass (pass_data_avoid_store_forwarding, ctxt)
      79              :   {}
      80              : 
      81              :   /* opt_pass methods: */
      82      1471370 :   virtual bool gate (function *) final override
      83              :     {
      84      1471370 :       return flag_avoid_store_forwarding && optimize >= 1;
      85              :     }
      86              : 
      87              :   virtual unsigned int execute (function *) final override;
      88              : }; // class pass_rtl_avoid_store_forwarding
      89              : 
      90              : /* Handler for finding and avoiding store forwardings.  */
      91              : 
      92              : class store_forwarding_analyzer
      93              : {
      94              : public:
      95              :   unsigned int stats_sf_detected = 0;
      96              :   unsigned int stats_sf_avoided = 0;
      97              : 
      98              :   bool is_store_forwarding (rtx store_mem, rtx load_mem,
      99              :                             HOST_WIDE_INT *off_val);
     100              :   bool process_store_forwarding (vec<store_fwd_info> &, rtx_insn *load_insn,
     101              :                                  rtx load_mem);
     102              :   void avoid_store_forwarding (basic_block);
     103              :   void update_stats (function *);
     104              : };
     105              : 
     106              : /* Return a bit insertion sequence that would make DEST have the correct value
     107              :    if the store represented by STORE_INFO were to be moved after DEST.  */
     108              : 
     109              : static rtx_insn *
     110            6 : generate_bit_insert_sequence (store_fwd_info *store_info, rtx dest)
     111              : {
     112              :   /* Memory size should be a constant at this stage.  */
     113            6 :   unsigned HOST_WIDE_INT store_size
     114            6 :     = MEM_SIZE (store_info->store_mem).to_constant ();
     115              : 
     116            6 :   start_sequence ();
     117              : 
     118            6 :   unsigned HOST_WIDE_INT bitsize = store_size * BITS_PER_UNIT;
     119            6 :   unsigned HOST_WIDE_INT start = store_info->offset * BITS_PER_UNIT;
     120              : 
     121            6 :   rtx mov_reg = store_info->mov_reg;
     122            6 :   store_bit_field (dest, bitsize, start, 0, 0, GET_MODE (mov_reg), mov_reg,
     123              :                    false, false);
     124              : 
     125            6 :   rtx_insn *insns = get_insns ();
     126            6 :   unshare_all_rtl_in_chain (insns);
     127            6 :   end_sequence ();
     128              : 
     129           25 :   for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
     130           19 :     if (contains_mem_rtx_p (PATTERN (insn))
     131           19 :         || recog_memoized (insn) < 0)
     132            0 :       return NULL;
     133              : 
     134              :   return insns;
     135              : }
     136              : 
     137              : /* Return true iff a store to STORE_MEM would write to a sub-region of bytes
     138              :    from what LOAD_MEM would read.  If true also store the relative byte offset
     139              :    of the store within the load to OFF_VAL.  */
     140              : 
     141           68 : bool store_forwarding_analyzer::
     142              : is_store_forwarding (rtx store_mem, rtx load_mem, HOST_WIDE_INT *off_val)
     143              : {
     144           68 :   poly_int64 load_offset, store_offset;
     145           68 :   rtx load_base = strip_offset (XEXP (load_mem, 0), &load_offset);
     146           68 :   rtx store_base = strip_offset (XEXP (store_mem, 0), &store_offset);
     147           68 :   poly_int64 off_diff = store_offset - load_offset;
     148              : 
     149           68 :   HOST_WIDE_INT off_val_tmp = 0;
     150           68 :   bool is_off_diff_constant = off_diff.is_constant (&off_val_tmp);
     151           68 :   if (off_val)
     152           63 :     *off_val = off_val_tmp;
     153              : 
     154           68 :   return (MEM_SIZE (load_mem).is_constant ()
     155           68 :           && rtx_equal_p (load_base, store_base)
     156           43 :           && known_subrange_p (store_offset, MEM_SIZE (store_mem),
     157           43 :                                load_offset, MEM_SIZE (load_mem))
     158           68 :           && is_off_diff_constant);
     159              : }
     160              : 
     161              : /* Given a list of small stores that are forwarded to LOAD_INSN, try to
     162              :    rearrange them so that a store-forwarding penalty doesn't occur.
     163              :    The stores must be given in reverse program order, starting from the
     164              :    one closer to LOAD_INSN.  */
     165              : 
     166            8 : bool store_forwarding_analyzer::
     167              : process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
     168              :                           rtx load_mem)
     169              : {
     170            8 :   machine_mode load_mem_mode = GET_MODE (load_mem);
     171              :   /* Memory sizes should be constants at this stage.  */
     172            8 :   HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
     173              : 
     174              :   /* If the stores cover all the bytes of the load, then we can eliminate
     175              :      the load entirely and use the computed value instead.
     176              :      We can also eliminate stores on addresses that are overwritten
     177              :      by later stores.  */
     178              : 
     179            8 :   auto_sbitmap forwarded_bytes (load_size);
     180            8 :   unsigned int i;
     181            8 :   store_fwd_info* it;
     182            8 :   auto_vec<store_fwd_info> redundant_stores;
     183            8 :   auto_vec<int> store_ind_to_remove;
     184           18 :   FOR_EACH_VEC_ELT (stores, i, it)
     185              :     {
     186           10 :       HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
     187           10 :       if (bitmap_all_bits_in_range_p (forwarded_bytes, it->offset,
     188           10 :                                       it->offset + store_size - 1))
     189              :         {
     190            0 :           redundant_stores.safe_push (*it);
     191            0 :           store_ind_to_remove.safe_push (i);
     192            0 :           continue;
     193              :         }
     194           10 :       bitmap_set_range (forwarded_bytes, it->offset, store_size);
     195              :     }
     196              : 
     197            8 :   bitmap_not (forwarded_bytes, forwarded_bytes);
     198            8 :   bool load_elim = bitmap_empty_p (forwarded_bytes);
     199              : 
     200            8 :   stats_sf_detected++;
     201              : 
     202            8 :   if (dump_file)
     203              :     {
     204            0 :       fprintf (dump_file, "Store forwarding detected:\n");
     205              : 
     206            0 :       FOR_EACH_VEC_ELT (stores, i, it)
     207              :         {
     208            0 :           fprintf (dump_file, "From: ");
     209            0 :           print_rtl_single (dump_file, it->store_insn);
     210              :         }
     211              : 
     212            0 :       fprintf (dump_file, "To: ");
     213            0 :       print_rtl_single (dump_file, load_insn);
     214              : 
     215            0 :       if (load_elim)
     216            0 :         fprintf (dump_file, "(Load elimination candidate)\n");
     217              :     }
     218              : 
     219              :   /* Remove redundant stores from the vector.  Although this is quadratic,
     220              :      there doesn't seem to be much point optimizing it.  The number of
     221              :      redundant stores is expected to be low and the length of the list is
     222              :      limited by a --param.  The dependence checking that we did earlier is
     223              :      also quadratic in the size of this list.  */
     224            8 :   store_ind_to_remove.reverse ();
     225            8 :   for (int i : store_ind_to_remove)
     226            0 :     stores.ordered_remove (i);
     227              : 
     228            8 :   rtx load = single_set (load_insn);
     229            8 :   rtx dest;
     230              : 
     231            8 :   if (load_elim)
     232            4 :     dest = gen_reg_rtx (load_mem_mode);
     233              :   else
     234            4 :     dest = SET_DEST (load);
     235              : 
     236            8 :   int move_to_front = -1;
     237            8 :   int total_cost = 0;
     238            8 :   int base_offset_index = -1;
     239              : 
     240              :   /* Find the last store that has the same offset the load, in the case that
     241              :      we're eliminating the load.  We will try to use it as a base register
     242              :      to avoid bit inserts (see second loop below).  We want the last one, as
     243              :      it will be wider and we don't want to overwrite the base register if
     244              :      there are many of them.  */
     245            4 :   if (load_elim)
     246              :     {
     247            8 :       FOR_EACH_VEC_ELT_REVERSE (stores, i, it)
     248              :         {
     249            4 :           const bool has_base_offset
     250            4 :             = known_eq (poly_uint64 (it->offset),
     251              :                         subreg_size_lowpart_offset (MEM_SIZE (it->store_mem),
     252              :                                                     load_size));
     253            4 :           if (has_base_offset)
     254              :             {
     255            4 :               base_offset_index = i;
     256            4 :               break;
     257              :             }
     258              :         }
     259              :     }
     260              : 
     261              :   /* Check if we can emit bit insert instructions for all forwarded stores.  */
     262           18 :   FOR_EACH_VEC_ELT (stores, i, it)
     263              :     {
     264           10 :       it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem));
     265           10 :       rtx_insn *insns = NULL;
     266              : 
     267              :       /* Check if this is a store with base offset, if we're eliminating the
     268              :          load, and use it as the base register to avoid a bit insert if
     269              :          possible.  Load elimination is implied by base_offset_index != -1.  */
     270           10 :       if (i == (unsigned) base_offset_index)
     271              :         {
     272            4 :           start_sequence ();
     273              : 
     274            8 :           rtx base_reg = lowpart_subreg (GET_MODE (dest), it->mov_reg,
     275            4 :                                          GET_MODE (it->mov_reg));
     276              : 
     277            4 :           if (base_reg)
     278              :             {
     279            4 :               rtx_insn *move0 = emit_move_insn (dest, base_reg);
     280            4 :               if (recog_memoized (move0) >= 0)
     281              :                 {
     282            4 :                   insns = get_insns ();
     283            4 :                   move_to_front = (int) i;
     284              :                 }
     285              :             }
     286              : 
     287            4 :           end_sequence ();
     288              :         }
     289              : 
     290            4 :       if (!insns)
     291            6 :         insns = generate_bit_insert_sequence (&(*it), dest);
     292              : 
     293            6 :       if (!insns)
     294              :         {
     295            0 :           if (dump_file)
     296              :             {
     297            0 :               fprintf (dump_file, "Failed due to: ");
     298            0 :               print_rtl_single (dump_file, it->store_insn);
     299              :             }
     300            0 :           return false;
     301              :         }
     302              : 
     303           10 :       total_cost += seq_cost (insns, true);
     304           10 :       it->bits_insert_insns = insns;
     305              : 
     306           10 :       rtx store_set = single_set (it->store_insn);
     307              : 
     308              :       /* Create a register move at the store's original position to save the
     309              :          stored value.  */
     310           10 :       start_sequence ();
     311           10 :       rtx_insn *insn1
     312           10 :         = emit_insn (gen_rtx_SET (it->mov_reg, SET_SRC (store_set)));
     313           10 :       end_sequence ();
     314              : 
     315           10 :       if (recog_memoized (insn1) < 0)
     316              :         {
     317            0 :           if (dump_file)
     318              :             {
     319            0 :               fprintf (dump_file, "Failed due to unrecognizable insn: ");
     320            0 :               print_rtl_single (dump_file, insn1);
     321              :             }
     322            0 :           return false;
     323              :         }
     324              : 
     325           10 :       it->save_store_value_insn = insn1;
     326              : 
     327              :       /* Create a new store after the load with the saved original value.
     328              :          This avoids the forwarding stall.  */
     329           10 :       start_sequence ();
     330           10 :       rtx_insn *insn2
     331           10 :         = emit_insn (gen_rtx_SET (SET_DEST (store_set), it->mov_reg));
     332           10 :       end_sequence ();
     333              : 
     334           10 :       if (recog_memoized (insn2) < 0)
     335              :         {
     336            0 :           if (dump_file)
     337              :             {
     338            0 :               fprintf (dump_file, "Failed due to unrecognizable insn: ");
     339            0 :               print_rtl_single (dump_file, insn2);
     340              :             }
     341            0 :           return false;
     342              :         }
     343              : 
     344           10 :       it->store_saved_value_insn = insn2;
     345              :     }
     346              : 
     347            8 :   if (load_elim)
     348            4 :     total_cost -= insn_cost (load_insn, true);
     349              : 
     350              :   /* Let the target decide if transforming this store forwarding instance is
     351              :      profitable.  */
     352            8 :   if (!targetm.avoid_store_forwarding_p (stores, load_mem, total_cost,
     353              :                                          load_elim))
     354              :     {
     355            1 :       if (dump_file)
     356            0 :         fprintf (dump_file, "Not transformed due to target decision.\n");
     357              : 
     358            1 :       return false;
     359              :     }
     360              : 
     361              :   /* If we have a move instead of bit insert, it needs to be emitted first in
     362              :      the resulting sequence.  */
     363            7 :   if (move_to_front != -1)
     364              :     {
     365            4 :       store_fwd_info copy = stores[move_to_front];
     366            4 :       stores.safe_push (copy);
     367            4 :       stores.ordered_remove (move_to_front);
     368              :     }
     369              : 
     370            7 :   if (load_elim)
     371              :     {
     372            4 :       machine_mode outer_mode = GET_MODE (SET_DEST (load));
     373            4 :       rtx load_move;
     374            4 :       rtx load_value = dest;
     375            4 :       if (outer_mode != load_mem_mode)
     376              :         {
     377            0 :           load_value = simplify_gen_unary (GET_CODE (SET_SRC (load)),
     378              :                                            outer_mode, dest, load_mem_mode);
     379              :         }
     380            4 :       load_move = gen_rtx_SET (SET_DEST (load), load_value);
     381              : 
     382            4 :       start_sequence ();
     383            4 :       rtx_insn *insn = emit_insn (load_move);
     384            4 :       rtx_insn *seq = end_sequence ();
     385              : 
     386            4 :       if (recog_memoized (insn) < 0)
     387              :         return false;
     388              : 
     389            4 :       emit_insn_after (seq, load_insn);
     390              :     }
     391              : 
     392            7 :   if (dump_file)
     393              :     {
     394            0 :       fprintf (dump_file, "Store forwarding avoided with bit inserts:\n");
     395              : 
     396            0 :       FOR_EACH_VEC_ELT (stores, i, it)
     397              :         {
     398            0 :           if (stores.length () > 1)
     399              :             {
     400            0 :               fprintf (dump_file, "For: ");
     401            0 :               print_rtl_single (dump_file, it->store_insn);
     402              :             }
     403              : 
     404            0 :           fprintf (dump_file, "With sequence:\n");
     405              : 
     406            0 :           for (rtx_insn *insn = it->bits_insert_insns; insn;
     407            0 :                insn = NEXT_INSN (insn))
     408              :             {
     409            0 :               fprintf (dump_file, "  ");
     410            0 :               print_rtl_single (dump_file, insn);
     411              :             }
     412              :         }
     413              : 
     414            0 :       if (redundant_stores.length () > 0)
     415              :         {
     416            0 :           fprintf (dump_file, "\nRedundant stores that have been removed:\n");
     417            0 :           FOR_EACH_VEC_ELT (redundant_stores, i, it)
     418              :             {
     419            0 :               fprintf (dump_file, "  ");
     420            0 :               print_rtl_single (dump_file, it->store_insn);
     421              :             }
     422              :         }
     423              :     }
     424              : 
     425            7 :   stats_sf_avoided++;
     426              : 
     427              :   /* Done, emit all the generated instructions and delete the stores.
     428              :      Note that STORES are in reverse program order.  */
     429              : 
     430           16 :   FOR_EACH_VEC_ELT (stores, i, it)
     431              :     {
     432            9 :       emit_insn_after (it->bits_insert_insns, load_insn);
     433            9 :       emit_insn_after (it->store_saved_value_insn, load_insn);
     434              :     }
     435              : 
     436           16 :   FOR_EACH_VEC_ELT (stores, i, it)
     437              :     {
     438            9 :       emit_insn_before (it->save_store_value_insn, it->store_insn);
     439            9 :       delete_insn (it->store_insn);
     440              :     }
     441              : 
     442              :   /* Delete redundant stores.  */
     443            7 :   FOR_EACH_VEC_ELT (redundant_stores, i, it)
     444            0 :     delete_insn (it->store_insn);
     445              : 
     446            7 :   df_insn_rescan (load_insn);
     447              : 
     448            7 :   if (load_elim)
     449            4 :     delete_insn (load_insn);
     450              : 
     451              :   return true;
     452            8 : }
     453              : 
     454              : /* Try to modify BB so that expensive store forwarding cases are avoided.  */
     455              : 
     456              : void
     457           64 : store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
     458              : {
     459           64 :   if (!optimize_bb_for_speed_p (bb))
     460           25 :     return;
     461              : 
     462           52 :   auto_vec<store_fwd_info, 8> store_exprs;
     463           52 :   auto_vec<rtx> store_exprs_del;
     464           52 :   rtx_insn *insn;
     465           52 :   unsigned int insn_cnt = 0;
     466              : 
     467              :   /* We are iterating over the basic block's instructions detecting store
     468              :      instructions.  Upon reaching a load instruction, we check if any of the
     469              :      previously detected stores could result in store forwarding.  In that
     470              :      case, we try to reorder the load and store instructions.
     471              :      We skip this transformation when we encounter complex memory operations,
     472              :      instructions that might throw an exception, instruction dependencies,
     473              :      etc.  This is done by clearing the vector of detected stores, while
     474              :      keeping the removed stores in another vector.  By doing so, we can check
     475              :      if any of the removed stores operated on the load's address range, when
     476              :      reaching a subsequent store that operates on the same address range,
     477              :      as this would lead to incorrect values on the register that keeps the
     478              :      loaded value.  */
     479          560 :   FOR_BB_INSNS (bb, insn)
     480              :     {
     481          521 :       if (!NONDEBUG_INSN_P (insn))
     482          151 :         continue;
     483              : 
     484          410 :       vec_rtx_properties properties;
     485          410 :       properties.add_insn (insn, false);
     486              : 
     487          410 :       rtx set = single_set (insn);
     488              : 
     489          410 :       if (!set || insn_could_throw_p (insn))
     490              :         {
     491              :           unsigned int i;
     492              :           store_fwd_info *it;
     493           71 :           FOR_EACH_VEC_ELT (store_exprs, i, it)
     494           31 :             store_exprs_del.safe_push (it->store_mem);
     495           40 :           store_exprs.truncate (0);
     496           40 :           continue;
     497           40 :         }
     498              : 
     499              :       /* The inner mem RTX if INSN is a load, NULL_RTX otherwise.  */
     500          370 :       rtx load_mem = SET_SRC (set);
     501              : 
     502          370 :       if (GET_CODE (load_mem) == ZERO_EXTEND
     503          370 :           || GET_CODE (load_mem) == SIGN_EXTEND)
     504           12 :         load_mem = XEXP (load_mem, 0);
     505              : 
     506          370 :       if (!MEM_P (load_mem))
     507          321 :         load_mem = NULL_RTX;
     508              : 
     509              :       /* The mem RTX if INSN is a store, NULL_RTX otherwise.  */
     510          370 :       rtx store_mem = MEM_P (SET_DEST (set)) ? SET_DEST (set) : NULL_RTX;
     511              : 
     512              :       /* We cannot analyze memory RTXs that have unknown size.  */
     513          194 :       if ((store_mem && (!MEM_SIZE_KNOWN_P (store_mem)
     514              :                          || !MEM_SIZE (store_mem).is_constant ()))
     515          419 :           || (load_mem && (!MEM_SIZE_KNOWN_P (load_mem)
     516              :                            || !MEM_SIZE (load_mem).is_constant ())))
     517              :         {
     518              :           unsigned int i;
     519              :           store_fwd_info *it;
     520            0 :           FOR_EACH_VEC_ELT (store_exprs, i, it)
     521            0 :             store_exprs_del.safe_push (it->store_mem);
     522            0 :           store_exprs.truncate (0);
     523            0 :           continue;
     524            0 :         }
     525              : 
     526          370 :       bool is_simple = !properties.has_asm
     527          370 :                        && !properties.has_side_effects ();
     528          370 :       bool is_simple_store = is_simple
     529          370 :                              && store_mem
     530          370 :                              && !contains_mem_rtx_p (SET_SRC (set));
     531          370 :       bool is_simple_load = is_simple
     532          370 :                             && load_mem
     533          370 :                             && !contains_mem_rtx_p (SET_DEST (set));
     534              : 
     535          370 :       int removed_count = 0;
     536              : 
     537          370 :       if (is_simple_store)
     538              :         {
     539              :           /* Record store forwarding candidate.  */
     540          162 :           store_fwd_info info;
     541          162 :           info.store_insn = insn;
     542          162 :           info.store_mem = store_mem;
     543          162 :           info.insn_cnt = insn_cnt;
     544          162 :           info.remove = false;
     545          162 :           info.forwarded = false;
     546          162 :           store_exprs.safe_push (info);
     547              :         }
     548              : 
     549          370 :       bool reads_mem = false;
     550          370 :       bool writes_mem = false;
     551         1291 :       for (auto ref : properties.refs ())
     552          921 :         if (ref.is_mem ())
     553              :           {
     554          225 :             reads_mem |= ref.is_read ();
     555          225 :             writes_mem |= ref.is_write ();
     556              :           }
     557          696 :         else if (ref.is_write ())
     558              :           {
     559              :             /* Drop store forwarding candidates when the address register is
     560              :                overwritten.  */
     561          215 :             bool remove_rest = false;
     562          215 :             unsigned int i;
     563          215 :             store_fwd_info *it;
     564         1683 :             FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
     565              :               {
     566          332 :                 if (remove_rest
     567          664 :                     || reg_overlap_mentioned_p (regno_reg_rtx[ref.regno],
     568          332 :                                                 it->store_mem))
     569              :                   {
     570            0 :                     it->remove = true;
     571            0 :                     removed_count++;
     572            0 :                     remove_rest = true;
     573            0 :                     store_exprs_del.safe_push (it->store_mem);
     574              :                   }
     575              :               }
     576              :           }
     577              : 
     578          370 :       if (is_simple_load)
     579              :         {
     580              :           /* Process load for possible store forwarding cases.
     581              :              Possible newly created/moved stores, resulted from a successful
     582              :              forwarding, will be processed in subsequent iterations.  */
     583           49 :           auto_vec<store_fwd_info> forwardings;
     584           49 :           bool partial_forwarding = false;
     585           49 :           bool remove_rest = false;
     586              : 
     587           49 :           bool vector_load = VECTOR_MODE_P (GET_MODE (load_mem));
     588              : 
     589           49 :           unsigned int i;
     590           49 :           store_fwd_info *it;
     591          171 :           FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
     592              :             {
     593           73 :               rtx store_mem = it->store_mem;
     594           73 :               HOST_WIDE_INT off_val;
     595              : 
     596           73 :               bool vector_store = VECTOR_MODE_P (GET_MODE (store_mem));
     597              : 
     598           73 :               if (remove_rest)
     599              :                 {
     600            9 :                   it->remove = true;
     601            9 :                   removed_count++;
     602              :                 }
     603           64 :               else if (vector_load ^ vector_store)
     604              :                 {
     605              :                   /* Vector stores followed by a non-vector load or the
     606              :                      opposite, cause store_bit_field to generate non-canonical
     607              :                      expressions, like (subreg:V4SI (reg:DI ...) 0)).
     608              :                      Cases like that should be handled using vec_duplicate,
     609              :                      so we reject the transformation in those cases.  */
     610            1 :                   it->remove = true;
     611            1 :                   removed_count++;
     612            1 :                   remove_rest = true;
     613            1 :                   forwardings.truncate (0);
     614              :                 }
     615           63 :               else if (is_store_forwarding (store_mem, load_mem, &off_val))
     616              :                 {
     617              :                   unsigned int j;
     618              :                   rtx *del_it;
     619              :                   bool same_range_as_removed = false;
     620              : 
     621              :                   /* Check if another store in the load's address range has
     622              :                      been deleted due to a constraint violation.  In this case
     623              :                      we can't forward any other stores that operate in this
     624              :                      range, as it would lead to partial update of the register
     625              :                      that holds the loaded value.  */
     626           17 :                   FOR_EACH_VEC_ELT (store_exprs_del, j, del_it)
     627              :                     {
     628            5 :                       rtx del_store_mem = *del_it;
     629            5 :                       same_range_as_removed
     630            5 :                         = is_store_forwarding (del_store_mem, load_mem, NULL);
     631            5 :                       if (same_range_as_removed)
     632              :                         break;
     633              :                     }
     634              : 
     635              :                   /* Check if moving this store after the load is legal.  */
     636           13 :                   bool write_dep = false;
     637           13 :                   if (!same_range_as_removed)
     638              :                     {
     639           12 :                       unsigned int j = store_exprs.length () - 1;
     640           21 :                       for (; j != i; j--)
     641              :                         {
     642            9 :                           if (!store_exprs[j].forwarded
     643           16 :                               && output_dependence (store_mem,
     644            7 :                                                     store_exprs[j].store_mem))
     645              :                             {
     646              :                               write_dep = true;
     647              :                               break;
     648              :                             }
     649              :                         }
     650              :                     }
     651              : 
     652           12 :                   if (!same_range_as_removed && !write_dep)
     653              :                     {
     654           12 :                       it->forwarded = true;
     655           12 :                       it->offset = off_val;
     656           12 :                       forwardings.safe_push (*it);
     657              :                     }
     658              :                   else
     659              :                     partial_forwarding = true;
     660              : 
     661           13 :                   it->remove = true;
     662           13 :                   removed_count++;
     663              :                 }
     664           50 :               else if (true_dependence (store_mem, GET_MODE (store_mem),
     665              :                                         load_mem))
     666              :                 {
     667              :                   /* We cannot keep a store forwarding candidate if it possibly
     668              :                      interferes with this load.  */
     669            2 :                   it->remove = true;
     670            2 :                   removed_count++;
     671            2 :                   remove_rest = true;
     672            2 :                   forwardings.truncate (0);
     673              :                 }
     674              :             }
     675              : 
     676           67 :           if (!forwardings.is_empty () && !partial_forwarding)
     677            8 :             process_store_forwarding (forwardings, insn, load_mem);
     678           49 :         }
     679              : 
     680              :         /* Abort in case that we encounter a memory read/write that is not a
     681              :            simple store/load, as we can't make safe assumptions about the
     682              :            side-effects of this.  */
     683          370 :         if ((writes_mem && !is_simple_store)
     684          367 :              || (reads_mem && !is_simple_load))
     685           13 :           return;
     686              : 
     687          357 :         if (removed_count)
     688              :         {
     689           12 :           unsigned int i, j;
     690           12 :           store_fwd_info *it;
     691           47 :           VEC_ORDERED_REMOVE_IF (store_exprs, i, j, it, it->remove);
     692              :         }
     693              : 
     694              :         /* Don't consider store forwarding if the RTL instruction distance is
     695              :            more than PARAM_STORE_FORWARDING_MAX_DISTANCE and the cost checks
     696              :            are not disabled.  */
     697          357 :         const bool unlimited_cost = (param_store_forwarding_max_distance == 0);
     698          227 :         if (!unlimited_cost && !store_exprs.is_empty ()
     699          357 :             && (store_exprs[0].insn_cnt
     700          227 :                 + param_store_forwarding_max_distance <= insn_cnt))
     701           64 :           store_exprs.ordered_remove (0);
     702              : 
     703          357 :         insn_cnt++;
     704          410 :     }
     705           52 : }
     706              : 
     707              : /* Update pass statistics.  */
     708              : 
     709              : void
     710           22 : store_forwarding_analyzer::update_stats (function *fn)
     711              : {
     712           22 :   statistics_counter_event (fn, "Cases of store forwarding detected: ",
     713           22 :                             stats_sf_detected);
     714           22 :   statistics_counter_event (fn, "Cases of store forwarding avoided: ",
     715           22 :                             stats_sf_avoided);
     716           22 : }
     717              : 
     718              : unsigned int
     719           22 : pass_rtl_avoid_store_forwarding::execute (function *fn)
     720              : {
     721           22 :   df_set_flags (DF_DEFER_INSN_RESCAN);
     722              : 
     723           22 :   init_alias_analysis ();
     724              : 
     725           22 :   store_forwarding_analyzer analyzer;
     726              : 
     727           22 :   basic_block bb;
     728           86 :   FOR_EACH_BB_FN (bb, fn)
     729           64 :     analyzer.avoid_store_forwarding (bb);
     730              : 
     731           22 :   end_alias_analysis ();
     732              : 
     733           22 :   analyzer.update_stats (fn);
     734              : 
     735           22 :   return 0;
     736              : }
     737              : 
     738              : } // anon namespace.
     739              : 
     740              : rtl_opt_pass *
     741       285722 : make_pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
     742              : {
     743       285722 :   return new pass_rtl_avoid_store_forwarding (ctxt);
     744              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.