LCOV - code coverage report
Current view: top level - gcc - avoid-store-forwarding.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 86.3 % 306 264
Test Date: 2026-03-28 14:25:54 Functions: 100.0 % 8 8
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Avoid store forwarding optimization pass.
       2              :    Copyright (C) 2024-2026 Free Software Foundation, Inc.
       3              :    Contributed by VRULL GmbH.
       4              : 
       5              :    This file is part of GCC.
       6              : 
       7              :    GCC is free software; you can redistribute it and/or modify it
       8              :    under the terms of the GNU General Public License as published by
       9              :    the Free Software Foundation; either version 3, or (at your option)
      10              :    any later version.
      11              : 
      12              :    GCC is distributed in the hope that it will be useful, but
      13              :    WITHOUT ANY WARRANTY; without even the implied warranty of
      14              :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15              :    General Public License for more details.
      16              : 
      17              :    You should have received a copy of the GNU General Public License
      18              :    along with GCC; see the file COPYING3.  If not see
      19              :    <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "target.h"
      26              : #include "rtl.h"
      27              : #include "avoid-store-forwarding.h"
      28              : #include "alias.h"
      29              : #include "rtlanal.h"
      30              : #include "cfgrtl.h"
      31              : #include "tree-pass.h"
      32              : #include "predict.h"
      33              : #include "insn-config.h"
      34              : #include "expmed.h"
      35              : #include "recog.h"
      36              : #include "regset.h"
      37              : #include "df.h"
      38              : #include "expr.h"
      39              : #include "memmodel.h"
      40              : #include "emit-rtl.h"
      41              : #include "vec.h"
      42              : 
      43              : /* This pass tries to detect and avoid cases of store forwarding.
      44              :    On many processors there is a large penalty when smaller stores are
      45              :    forwarded to larger loads.  The idea used to avoid the stall is to move
      46              :    the store after the load and in addition emit a bit insert sequence so
      47              :    the load register has the correct value.  For example the following:
      48              : 
      49              :      strb    w2, [x1, 1]
      50              :      ldr     x0, [x1]
      51              : 
      52              :    Will be transformed to:
      53              : 
      54              :      ldr     x0, [x1]
      55              :      strb    w2, [x1]
      56              :      bfi     x0, x2, 0, 8
      57              : */
      58              : 
      59              : namespace {
      60              : 
      61              : const pass_data pass_data_avoid_store_forwarding =
      62              : {
      63              :   RTL_PASS, /* type.  */
      64              :   "avoid_store_forwarding", /* name.  */
      65              :   OPTGROUP_NONE, /* optinfo_flags.  */
      66              :   TV_AVOID_STORE_FORWARDING, /* tv_id.  */
      67              :   0, /* properties_required.  */
      68              :   0, /* properties_provided.  */
      69              :   0, /* properties_destroyed.  */
      70              :   0, /* todo_flags_start.  */
      71              :   TODO_df_finish /* todo_flags_finish.  */
      72              : };
      73              : 
      74              : class pass_rtl_avoid_store_forwarding : public rtl_opt_pass
      75              : {
      76              : public:
      77       287872 :   pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
      78       575744 :     : rtl_opt_pass (pass_data_avoid_store_forwarding, ctxt)
      79              :   {}
      80              : 
      81              :   /* opt_pass methods: */
      82      1480955 :   virtual bool gate (function *) final override
      83              :     {
      84      1480955 :       return flag_avoid_store_forwarding && optimize >= 1;
      85              :     }
      86              : 
      87              :   virtual unsigned int execute (function *) final override;
      88              : }; // class pass_rtl_avoid_store_forwarding
      89              : 
      90              : /* Handler for finding and avoiding store forwardings.  */
      91              : 
      92              : class store_forwarding_analyzer
      93              : {
      94              : public:
      95              :   unsigned int stats_sf_detected = 0;
      96              :   unsigned int stats_sf_avoided = 0;
      97              : 
      98              :   bool is_store_forwarding (rtx store_mem, rtx load_mem,
      99              :                             HOST_WIDE_INT *off_val);
     100              :   bool process_store_forwarding (vec<store_fwd_info> &, rtx_insn *load_insn,
     101              :                                  rtx load_mem);
     102              :   void avoid_store_forwarding (basic_block);
     103              :   void update_stats (function *);
     104              : };
     105              : 
     106              : /* Return a bit insertion sequence that would make DEST have the correct value
     107              :    if the store represented by STORE_INFO were to be moved after DEST.  */
     108              : 
     109              : static rtx_insn *
     110            5 : generate_bit_insert_sequence (store_fwd_info *store_info, rtx dest)
     111              : {
     112              :   /* Memory size should be a constant at this stage.  */
     113            5 :   unsigned HOST_WIDE_INT store_size
     114            5 :     = MEM_SIZE (store_info->store_mem).to_constant ();
     115              : 
     116            5 :   start_sequence ();
     117              : 
     118            5 :   unsigned HOST_WIDE_INT bitsize = store_size * BITS_PER_UNIT;
     119            5 :   unsigned HOST_WIDE_INT start = store_info->offset * BITS_PER_UNIT;
     120              : 
     121            5 :   rtx mov_reg = store_info->mov_reg;
     122            5 :   store_bit_field (dest, bitsize, start, 0, 0, GET_MODE (mov_reg), mov_reg,
     123              :                    false, false);
     124              : 
     125            5 :   rtx_insn *insns = get_insns ();
     126            5 :   unshare_all_rtl_in_chain (insns);
     127            5 :   end_sequence ();
     128              : 
     129           23 :   for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
     130           18 :     if (contains_mem_rtx_p (PATTERN (insn))
     131           18 :         || recog_memoized (insn) < 0)
     132            0 :       return NULL;
     133              : 
     134              :   return insns;
     135              : }
     136              : 
     137              : /* Return true iff a store to STORE_MEM would write to a sub-region of bytes
     138              :    from what LOAD_MEM would read.  If true also store the relative byte offset
     139              :    of the store within the load to OFF_VAL.  */
     140              : 
     141           70 : bool store_forwarding_analyzer::
     142              : is_store_forwarding (rtx store_mem, rtx load_mem, HOST_WIDE_INT *off_val)
     143              : {
     144           70 :   poly_int64 load_offset, store_offset;
     145           70 :   rtx load_base = strip_offset (XEXP (load_mem, 0), &load_offset);
     146           70 :   rtx store_base = strip_offset (XEXP (store_mem, 0), &store_offset);
     147           70 :   poly_int64 off_diff = store_offset - load_offset;
     148              : 
     149           70 :   HOST_WIDE_INT off_val_tmp = 0;
     150           70 :   bool is_off_diff_constant = off_diff.is_constant (&off_val_tmp);
     151           70 :   if (off_val)
     152           65 :     *off_val = off_val_tmp;
     153              : 
     154           70 :   return (MEM_SIZE (load_mem).is_constant ()
     155           70 :           && rtx_equal_p (load_base, store_base)
     156           43 :           && known_subrange_p (store_offset, MEM_SIZE (store_mem),
     157           43 :                                load_offset, MEM_SIZE (load_mem))
     158           70 :           && is_off_diff_constant);
     159              : }
     160              : 
     161              : /* Given a list of small stores that are forwarded to LOAD_INSN, try to
     162              :    rearrange them so that a store-forwarding penalty doesn't occur.
     163              :    The stores must be given in reverse program order, starting from the
     164              :    one closer to LOAD_INSN.  */
     165              : 
     166            8 : bool store_forwarding_analyzer::
     167              : process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
     168              :                           rtx load_mem)
     169              : {
     170            8 :   machine_mode load_mem_mode = GET_MODE (load_mem);
     171              :   /* Memory sizes should be constants at this stage.  */
     172            8 :   HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
     173              : 
     174              :   /* If the stores cover all the bytes of the load without overlap then we can
     175              :      eliminate the load entirely and use the computed value instead.
     176              :      Bail out when partially overlapping stores are detected, as the pass
     177              :      cannot correctly handle "last writer wins" semantics for the
     178              :      overlapping byte ranges (see PR124476).  */
     179              : 
     180            8 :   auto_sbitmap forwarded_bytes (load_size);
     181            8 :   bitmap_clear (forwarded_bytes);
     182              : 
     183            8 :   unsigned int i;
     184            8 :   store_fwd_info* it;
     185           17 :   FOR_EACH_VEC_ELT (stores, i, it)
     186              :     {
     187           10 :       HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
     188           10 :       if (bitmap_any_bit_in_range_p (forwarded_bytes, it->offset,
     189           10 :                                  it->offset + store_size - 1))
     190              :         return false;
     191            9 :       bitmap_set_range (forwarded_bytes, it->offset, store_size);
     192              :     }
     193              : 
     194            7 :   bitmap_not (forwarded_bytes, forwarded_bytes);
     195            7 :   bool load_elim = bitmap_empty_p (forwarded_bytes);
     196              : 
     197            7 :   stats_sf_detected++;
     198              : 
     199            7 :   if (dump_file)
     200              :     {
     201            0 :       fprintf (dump_file, "Store forwarding detected:\n");
     202              : 
     203            0 :       FOR_EACH_VEC_ELT (stores, i, it)
     204              :         {
     205            0 :           fprintf (dump_file, "From: ");
     206            0 :           print_rtl_single (dump_file, it->store_insn);
     207              :         }
     208              : 
     209            0 :       fprintf (dump_file, "To: ");
     210            0 :       print_rtl_single (dump_file, load_insn);
     211              : 
     212            0 :       if (load_elim)
     213            0 :         fprintf (dump_file, "(Load elimination candidate)\n");
     214              :     }
     215              : 
     216            7 :   rtx load = single_set (load_insn);
     217            7 :   rtx dest;
     218              : 
     219            7 :   if (load_elim)
     220            3 :     dest = gen_reg_rtx (load_mem_mode);
     221              :   else
     222            4 :     dest = SET_DEST (load);
     223              : 
     224            7 :   int move_to_front = -1;
     225            7 :   int total_cost = 0;
     226            7 :   int base_offset_index = -1;
     227              : 
     228              :   /* Find the last store that has the same offset the load, in the case that
     229              :      we're eliminating the load.  We will try to use it as a base register
     230              :      to avoid bit inserts (see second loop below).  We want the last one, as
     231              :      it will be wider and we don't want to overwrite the base register if
     232              :      there are many of them.  */
     233            4 :   if (load_elim)
     234              :     {
     235            6 :       FOR_EACH_VEC_ELT_REVERSE (stores, i, it)
     236              :         {
     237            3 :           const bool has_base_offset
     238            3 :             = known_eq (poly_uint64 (it->offset),
     239              :                         subreg_size_lowpart_offset (MEM_SIZE (it->store_mem),
     240              :                                                     load_size));
     241            3 :           if (has_base_offset)
     242              :             {
     243            3 :               base_offset_index = i;
     244            3 :               break;
     245              :             }
     246              :         }
     247              :     }
     248              : 
     249              :   /* Check if we can emit bit insert instructions for all forwarded stores.  */
     250           15 :   FOR_EACH_VEC_ELT (stores, i, it)
     251              :     {
     252            8 :       it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem));
     253            8 :       rtx_insn *insns = NULL;
     254              : 
     255              :       /* Check if this is a store with base offset, if we're eliminating the
     256              :          load, and use it as the base register to avoid a bit insert if
     257              :          possible.  Load elimination is implied by base_offset_index != -1.  */
     258            8 :       if (i == (unsigned) base_offset_index)
     259              :         {
     260            3 :           start_sequence ();
     261              : 
     262            6 :           rtx base_reg = lowpart_subreg (GET_MODE (dest), it->mov_reg,
     263            3 :                                          GET_MODE (it->mov_reg));
     264              : 
     265            3 :           if (base_reg)
     266              :             {
     267            3 :               rtx_insn *move0 = emit_move_insn (dest, base_reg);
     268            3 :               if (recog_memoized (move0) >= 0)
     269              :                 {
     270            3 :                   insns = get_insns ();
     271            3 :                   move_to_front = (int) i;
     272              :                 }
     273              :             }
     274              : 
     275            3 :           end_sequence ();
     276              :         }
     277              : 
     278            3 :       if (!insns)
     279            5 :         insns = generate_bit_insert_sequence (&(*it), dest);
     280              : 
     281            5 :       if (!insns)
     282              :         {
     283            0 :           if (dump_file)
     284              :             {
     285            0 :               fprintf (dump_file, "Failed due to: ");
     286            0 :               print_rtl_single (dump_file, it->store_insn);
     287              :             }
     288            0 :           return false;
     289              :         }
     290              : 
     291            8 :       total_cost += seq_cost (insns, true);
     292            8 :       it->bits_insert_insns = insns;
     293              : 
     294            8 :       rtx store_set = single_set (it->store_insn);
     295              : 
     296              :       /* Create a register move at the store's original position to save the
     297              :          stored value.  */
     298            8 :       start_sequence ();
     299            8 :       rtx_insn *insn1
     300            8 :         = emit_insn (gen_rtx_SET (it->mov_reg, SET_SRC (store_set)));
     301            8 :       end_sequence ();
     302              : 
     303            8 :       if (recog_memoized (insn1) < 0)
     304              :         {
     305            0 :           if (dump_file)
     306              :             {
     307            0 :               fprintf (dump_file, "Failed due to unrecognizable insn: ");
     308            0 :               print_rtl_single (dump_file, insn1);
     309              :             }
     310            0 :           return false;
     311              :         }
     312              : 
     313            8 :       it->save_store_value_insn = insn1;
     314              : 
     315              :       /* Create a new store after the load with the saved original value.
     316              :          This avoids the forwarding stall.  */
     317            8 :       start_sequence ();
     318            8 :       rtx_insn *insn2
     319            8 :         = emit_insn (gen_rtx_SET (SET_DEST (store_set), it->mov_reg));
     320            8 :       end_sequence ();
     321              : 
     322            8 :       if (recog_memoized (insn2) < 0)
     323              :         {
     324            0 :           if (dump_file)
     325              :             {
     326            0 :               fprintf (dump_file, "Failed due to unrecognizable insn: ");
     327            0 :               print_rtl_single (dump_file, insn2);
     328              :             }
     329            0 :           return false;
     330              :         }
     331              : 
     332            8 :       it->store_saved_value_insn = insn2;
     333              :     }
     334              : 
     335            7 :   if (load_elim)
     336            3 :     total_cost -= insn_cost (load_insn, true);
     337              : 
     338              :   /* Let the target decide if transforming this store forwarding instance is
     339              :      profitable.  */
     340            7 :   if (!targetm.avoid_store_forwarding_p (stores, load_mem, total_cost,
     341              :                                          load_elim))
     342              :     {
     343            1 :       if (dump_file)
     344            0 :         fprintf (dump_file, "Not transformed due to target decision.\n");
     345              : 
     346            1 :       return false;
     347              :     }
     348              : 
     349              :   /* If we have a move instead of bit insert, it needs to be emitted first in
     350              :      the resulting sequence.  */
     351            6 :   if (move_to_front != -1)
     352              :     {
     353            3 :       store_fwd_info copy = stores[move_to_front];
     354            3 :       stores.safe_push (copy);
     355            3 :       stores.ordered_remove (move_to_front);
     356              :     }
     357              : 
     358            6 :   if (load_elim)
     359              :     {
     360            3 :       machine_mode outer_mode = GET_MODE (SET_DEST (load));
     361            3 :       rtx load_move;
     362            3 :       rtx load_value = dest;
     363            3 :       if (outer_mode != load_mem_mode)
     364              :         {
     365            0 :           load_value = simplify_gen_unary (GET_CODE (SET_SRC (load)),
     366              :                                            outer_mode, dest, load_mem_mode);
     367              :         }
     368            3 :       load_move = gen_rtx_SET (SET_DEST (load), load_value);
     369              : 
     370            3 :       start_sequence ();
     371            3 :       rtx_insn *insn = emit_insn (load_move);
     372            3 :       rtx_insn *seq = end_sequence ();
     373              : 
     374            3 :       if (recog_memoized (insn) < 0)
     375              :         return false;
     376              : 
     377            3 :       emit_insn_after (seq, load_insn);
     378              :     }
     379              : 
     380            6 :   if (dump_file)
     381              :     {
     382            0 :       fprintf (dump_file, "Store forwarding avoided with bit inserts:\n");
     383              : 
     384            0 :       FOR_EACH_VEC_ELT (stores, i, it)
     385              :         {
     386            0 :           if (stores.length () > 1)
     387              :             {
     388            0 :               fprintf (dump_file, "For: ");
     389            0 :               print_rtl_single (dump_file, it->store_insn);
     390              :             }
     391              : 
     392            0 :           fprintf (dump_file, "With sequence:\n");
     393              : 
     394            0 :           for (rtx_insn *insn = it->bits_insert_insns; insn;
     395            0 :                insn = NEXT_INSN (insn))
     396              :             {
     397            0 :               fprintf (dump_file, "  ");
     398            0 :               print_rtl_single (dump_file, insn);
     399              :             }
     400              :         }
     401              : 
     402              :     }
     403              : 
     404            6 :   stats_sf_avoided++;
     405              : 
     406              :   /* Done, emit all the generated instructions and delete the stores.
     407              :      Note that STORES are in reverse program order.  */
     408              : 
     409           13 :   FOR_EACH_VEC_ELT (stores, i, it)
     410              :     {
     411            7 :       emit_insn_after (it->bits_insert_insns, load_insn);
     412            7 :       emit_insn_after (it->store_saved_value_insn, load_insn);
     413              :     }
     414              : 
     415           13 :   FOR_EACH_VEC_ELT (stores, i, it)
     416              :     {
     417            7 :       emit_insn_before (it->save_store_value_insn, it->store_insn);
     418            7 :       delete_insn (it->store_insn);
     419              :     }
     420              : 
     421            6 :   df_insn_rescan (load_insn);
     422              : 
     423            6 :   if (load_elim)
     424            3 :     delete_insn (load_insn);
     425              : 
     426              :   return true;
     427            8 : }
     428              : 
     429              : /* Try to modify BB so that expensive store forwarding cases are avoided.  */
     430              : 
     431              : void
     432           68 : store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
     433              : {
     434           68 :   if (!optimize_bb_for_speed_p (bb))
     435           28 :     return;
     436              : 
     437           55 :   auto_vec<store_fwd_info, 8> store_exprs;
     438           55 :   auto_vec<rtx> store_exprs_del;
     439           55 :   rtx_insn *insn;
     440           55 :   unsigned int insn_cnt = 0;
     441              : 
     442              :   /* We are iterating over the basic block's instructions detecting store
     443              :      instructions.  Upon reaching a load instruction, we check if any of the
     444              :      previously detected stores could result in store forwarding.  In that
     445              :      case, we try to reorder the load and store instructions.
     446              :      We skip this transformation when we encounter complex memory operations,
     447              :      instructions that might throw an exception, instruction dependencies,
     448              :      etc.  This is done by clearing the vector of detected stores, while
     449              :      keeping the removed stores in another vector.  By doing so, we can check
     450              :      if any of the removed stores operated on the load's address range, when
     451              :      reaching a subsequent store that operates on the same address range,
     452              :      as this would lead to incorrect values on the register that keeps the
     453              :      loaded value.  */
     454          583 :   FOR_BB_INSNS (bb, insn)
     455              :     {
     456          543 :       if (!NONDEBUG_INSN_P (insn))
     457          161 :         continue;
     458              : 
     459          424 :       vec_rtx_properties properties;
     460          424 :       properties.add_insn (insn, false);
     461              : 
     462          424 :       rtx set = single_set (insn);
     463              : 
     464          424 :       if (!set || insn_could_throw_p (insn))
     465              :         {
     466              :           unsigned int i;
     467              :           store_fwd_info *it;
     468           71 :           FOR_EACH_VEC_ELT (store_exprs, i, it)
     469           29 :             store_exprs_del.safe_push (it->store_mem);
     470           42 :           store_exprs.truncate (0);
     471           42 :           continue;
     472           42 :         }
     473              : 
     474              :       /* The inner mem RTX if INSN is a load, NULL_RTX otherwise.  */
     475          382 :       rtx load_mem = SET_SRC (set);
     476              : 
     477          382 :       if (GET_CODE (load_mem) == ZERO_EXTEND
     478          382 :           || GET_CODE (load_mem) == SIGN_EXTEND)
     479           14 :         load_mem = XEXP (load_mem, 0);
     480              : 
     481          382 :       if (!MEM_P (load_mem))
     482          332 :         load_mem = NULL_RTX;
     483              : 
     484              :       /* The mem RTX if INSN is a store, NULL_RTX otherwise.  */
     485          382 :       rtx store_mem = MEM_P (SET_DEST (set)) ? SET_DEST (set) : NULL_RTX;
     486              : 
     487              :       /* We cannot analyze memory RTXs that have unknown size.  */
     488          195 :       if ((store_mem && (!MEM_SIZE_KNOWN_P (store_mem)
     489              :                          || !MEM_SIZE (store_mem).is_constant ()))
     490          432 :           || (load_mem && (!MEM_SIZE_KNOWN_P (load_mem)
     491              :                            || !MEM_SIZE (load_mem).is_constant ())))
     492              :         {
     493              :           unsigned int i;
     494              :           store_fwd_info *it;
     495            0 :           FOR_EACH_VEC_ELT (store_exprs, i, it)
     496            0 :             store_exprs_del.safe_push (it->store_mem);
     497            0 :           store_exprs.truncate (0);
     498            0 :           continue;
     499            0 :         }
     500              : 
     501          382 :       bool is_simple = !properties.has_asm
     502          382 :                        && !properties.has_side_effects ();
     503          382 :       bool is_simple_store = is_simple
     504          382 :                              && store_mem
     505          382 :                              && !contains_mem_rtx_p (SET_SRC (set));
     506          382 :       bool is_simple_load = is_simple
     507          382 :                             && load_mem
     508          382 :                             && !contains_mem_rtx_p (SET_DEST (set));
     509              : 
     510          382 :       int removed_count = 0;
     511              : 
     512          382 :       if (is_simple_store)
     513              :         {
     514              :           /* Record store forwarding candidate.  */
     515          163 :           store_fwd_info info;
     516          163 :           info.store_insn = insn;
     517          163 :           info.store_mem = store_mem;
     518          163 :           info.insn_cnt = insn_cnt;
     519          163 :           info.remove = false;
     520          163 :           info.forwarded = false;
     521          163 :           store_exprs.safe_push (info);
     522              :         }
     523              : 
     524          382 :       bool reads_mem = false;
     525          382 :       bool writes_mem = false;
     526         1332 :       for (auto ref : properties.refs ())
     527          950 :         if (ref.is_mem ())
     528              :           {
     529          229 :             reads_mem |= ref.is_read ();
     530          229 :             writes_mem |= ref.is_write ();
     531              :           }
     532          721 :         else if (ref.is_write ())
     533              :           {
     534              :             /* Drop store forwarding candidates when the address register is
     535              :                overwritten.  */
     536          229 :             bool remove_rest = false;
     537          229 :             unsigned int i;
     538          229 :             store_fwd_info *it;
     539         1744 :             FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
     540              :               {
     541          336 :                 if (remove_rest
     542          672 :                     || reg_overlap_mentioned_p (regno_reg_rtx[ref.regno],
     543          336 :                                                 it->store_mem))
     544              :                   {
     545            0 :                     it->remove = true;
     546            0 :                     removed_count++;
     547            0 :                     remove_rest = true;
     548            0 :                     store_exprs_del.safe_push (it->store_mem);
     549              :                   }
     550              :               }
     551              :           }
     552              : 
     553          382 :       if (is_simple_load)
     554              :         {
     555              :           /* Process load for possible store forwarding cases.
     556              :              Possible newly created/moved stores, resulted from a successful
     557              :              forwarding, will be processed in subsequent iterations.  */
     558           50 :           auto_vec<store_fwd_info> forwardings;
     559           50 :           bool partial_forwarding = false;
     560           50 :           bool remove_rest = false;
     561              : 
     562           50 :           bool vector_load = VECTOR_MODE_P (GET_MODE (load_mem));
     563              : 
     564           50 :           unsigned int i;
     565           50 :           store_fwd_info *it;
     566          175 :           FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
     567              :             {
     568           75 :               rtx store_mem = it->store_mem;
     569           75 :               HOST_WIDE_INT off_val;
     570              : 
     571           75 :               bool vector_store = VECTOR_MODE_P (GET_MODE (store_mem));
     572              : 
     573           75 :               if (remove_rest)
     574              :                 {
     575            9 :                   it->remove = true;
     576            9 :                   removed_count++;
     577              :                 }
     578           66 :               else if (vector_load ^ vector_store)
     579              :                 {
     580              :                   /* Vector stores followed by a non-vector load or the
     581              :                      opposite, cause store_bit_field to generate non-canonical
     582              :                      expressions, like (subreg:V4SI (reg:DI ...) 0)).
     583              :                      Cases like that should be handled using vec_duplicate,
     584              :                      so we reject the transformation in those cases.  */
     585            1 :                   it->remove = true;
     586            1 :                   removed_count++;
     587            1 :                   remove_rest = true;
     588            1 :                   forwardings.truncate (0);
     589              :                 }
     590           65 :               else if (is_store_forwarding (store_mem, load_mem, &off_val))
     591              :                 {
     592              :                   unsigned int j;
     593              :                   rtx *del_it;
     594              :                   bool same_range_as_removed = false;
     595              : 
     596              :                   /* Check if another store in the load's address range has
     597              :                      been deleted due to a constraint violation.  In this case
     598              :                      we can't forward any other stores that operate in this
     599              :                      range, as it would lead to partial update of the register
     600              :                      that holds the loaded value.  */
     601           17 :                   FOR_EACH_VEC_ELT (store_exprs_del, j, del_it)
     602              :                     {
     603            5 :                       rtx del_store_mem = *del_it;
     604            5 :                       same_range_as_removed
     605            5 :                         = is_store_forwarding (del_store_mem, load_mem, NULL);
     606            5 :                       if (same_range_as_removed)
     607              :                         break;
     608              :                     }
     609              : 
     610              :                   /* Check if moving this store after the load is legal.  */
     611           13 :                   bool write_dep = false;
     612           13 :                   if (!same_range_as_removed)
     613              :                     {
     614           12 :                       unsigned int j = store_exprs.length () - 1;
     615           21 :                       for (; j != i; j--)
     616              :                         {
     617            9 :                           if (!store_exprs[j].forwarded
     618           16 :                               && output_dependence (store_mem,
     619            7 :                                                     store_exprs[j].store_mem))
     620              :                             {
     621              :                               write_dep = true;
     622              :                               break;
     623              :                             }
     624              :                         }
     625              :                     }
     626              : 
     627           12 :                   if (!same_range_as_removed && !write_dep)
     628              :                     {
     629           12 :                       it->forwarded = true;
     630           12 :                       it->offset = off_val;
     631           12 :                       forwardings.safe_push (*it);
     632              :                     }
     633              :                   else
     634              :                     partial_forwarding = true;
     635              : 
     636           13 :                   it->remove = true;
     637           13 :                   removed_count++;
     638              :                 }
     639           52 :               else if (true_dependence (store_mem, GET_MODE (store_mem),
     640              :                                         load_mem))
     641              :                 {
     642              :                   /* We cannot keep a store forwarding candidate if it possibly
     643              :                      interferes with this load.  */
     644            2 :                   it->remove = true;
     645            2 :                   removed_count++;
     646            2 :                   remove_rest = true;
     647            2 :                   forwardings.truncate (0);
     648              :                 }
     649              :             }
     650              : 
     651           68 :           if (!forwardings.is_empty () && !partial_forwarding)
     652            8 :             process_store_forwarding (forwardings, insn, load_mem);
     653           50 :         }
     654              : 
     655              :         /* Abort in case that we encounter a memory read/write that is not a
     656              :            simple store/load, as we can't make safe assumptions about the
     657              :            side-effects of this.  */
     658          382 :         if ((writes_mem && !is_simple_store)
     659          379 :              || (reads_mem && !is_simple_load))
     660           15 :           return;
     661              : 
     662          367 :         if (removed_count)
     663              :         {
     664           12 :           unsigned int i, j;
     665           12 :           store_fwd_info *it;
     666           47 :           VEC_ORDERED_REMOVE_IF (store_exprs, i, j, it, it->remove);
     667              :         }
     668              : 
     669              :         /* Don't consider store forwarding if the RTL instruction distance is
     670              :            more than PARAM_STORE_FORWARDING_MAX_DISTANCE and the cost checks
     671              :            are not disabled.  */
     672          367 :         const bool unlimited_cost = (param_store_forwarding_max_distance == 0);
     673          226 :         if (!unlimited_cost && !store_exprs.is_empty ()
     674          367 :             && (store_exprs[0].insn_cnt
     675          226 :                 + param_store_forwarding_max_distance <= insn_cnt))
     676           64 :           store_exprs.ordered_remove (0);
     677              : 
     678          367 :         insn_cnt++;
     679          424 :     }
     680           55 : }
     681              : 
     682              : /* Update pass statistics.  */
     683              : 
     684              : void
     685           24 : store_forwarding_analyzer::update_stats (function *fn)
     686              : {
     687           24 :   statistics_counter_event (fn, "Cases of store forwarding detected: ",
     688           24 :                             stats_sf_detected);
     689           24 :   statistics_counter_event (fn, "Cases of store forwarding avoided: ",
     690           24 :                             stats_sf_avoided);
     691           24 : }
     692              : 
     693              : unsigned int
     694           24 : pass_rtl_avoid_store_forwarding::execute (function *fn)
     695              : {
     696           24 :   df_set_flags (DF_DEFER_INSN_RESCAN);
     697              : 
     698           24 :   init_alias_analysis ();
     699              : 
     700           24 :   store_forwarding_analyzer analyzer;
     701              : 
     702           24 :   basic_block bb;
     703           92 :   FOR_EACH_BB_FN (bb, fn)
     704           68 :     analyzer.avoid_store_forwarding (bb);
     705              : 
     706           24 :   end_alias_analysis ();
     707              : 
     708           24 :   analyzer.update_stats (fn);
     709              : 
     710           24 :   return 0;
     711              : }
     712              : 
     713              : } // anon namespace.
     714              : 
     715              : rtl_opt_pass *
     716       287872 : make_pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
     717              : {
     718       287872 :   return new pass_rtl_avoid_store_forwarding (ctxt);
     719              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.