LCOV - code coverage report
Current view: top level - gcc - avoid-store-forwarding.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 88.9 % 324 288
Test Date: 2026-05-30 15:37:04 Functions: 100.0 % 10 10
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Avoid store forwarding optimization pass.
       2              :    Copyright (C) 2024-2026 Free Software Foundation, Inc.
       3              :    Contributed by VRULL GmbH.
       4              : 
       5              :    This file is part of GCC.
       6              : 
       7              :    GCC is free software; you can redistribute it and/or modify it
       8              :    under the terms of the GNU General Public License as published by
       9              :    the Free Software Foundation; either version 3, or (at your option)
      10              :    any later version.
      11              : 
      12              :    GCC is distributed in the hope that it will be useful, but
      13              :    WITHOUT ANY WARRANTY; without even the implied warranty of
      14              :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15              :    General Public License for more details.
      16              : 
      17              :    You should have received a copy of the GNU General Public License
      18              :    along with GCC; see the file COPYING3.  If not see
      19              :    <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "target.h"
      26              : #include "rtl.h"
      27              : #include "avoid-store-forwarding.h"
      28              : #include "alias.h"
      29              : #include "rtlanal.h"
      30              : #include "cfgrtl.h"
      31              : #include "tree-pass.h"
      32              : #include "predict.h"
      33              : #include "insn-config.h"
      34              : #include "expmed.h"
      35              : #include "recog.h"
      36              : #include "regset.h"
      37              : #include "regs.h"
      38              : #include "df.h"
      39              : #include "expr.h"
      40              : #include "memmodel.h"
      41              : #include "emit-rtl.h"
      42              : #include "vec.h"
      43              : 
      44              : /* This pass tries to detect and avoid cases of store forwarding.
      45              :    On many processors there is a large penalty when smaller stores are
      46              :    forwarded to larger loads.  The idea used to avoid the stall is to move
      47              :    the store after the load and in addition emit a bit insert sequence so
      48              :    the load register has the correct value.  For example the following:
      49              : 
      50              :      strb    w2, [x1, 1]
      51              :      ldr     x0, [x1]
      52              : 
      53              :    Will be transformed to:
      54              : 
      55              :      ldr     x0, [x1]
      56              :      strb    w2, [x1]
      57              :      bfi     x0, x2, 0, 8
      58              : */
      59              : 
      60              : namespace {
      61              : 
      62              : const pass_data pass_data_avoid_store_forwarding =
      63              : {
      64              :   RTL_PASS, /* type.  */
      65              :   "avoid_store_forwarding", /* name.  */
      66              :   OPTGROUP_NONE, /* optinfo_flags.  */
      67              :   TV_AVOID_STORE_FORWARDING, /* tv_id.  */
      68              :   0, /* properties_required.  */
      69              :   0, /* properties_provided.  */
      70              :   0, /* properties_destroyed.  */
      71              :   0, /* todo_flags_start.  */
      72              :   TODO_df_finish /* todo_flags_finish.  */
      73              : };
      74              : 
      75              : class pass_rtl_avoid_store_forwarding : public rtl_opt_pass
      76              : {
      77              : public:
      78       288767 :   pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
      79       577534 :     : rtl_opt_pass (pass_data_avoid_store_forwarding, ctxt)
      80              :   {}
      81              : 
      82              :   /* opt_pass methods: */
      83      1481491 :   virtual bool gate (function *) final override
      84              :     {
      85      1481491 :       return flag_avoid_store_forwarding && optimize >= 1;
      86              :     }
      87              : 
      88              :   virtual unsigned int execute (function *) final override;
      89              : }; // class pass_rtl_avoid_store_forwarding
      90              : 
      91              : /* Handler for finding and avoiding store forwardings.  */
      92              : 
      93           48 : class store_forwarding_analyzer
      94              : {
      95              : public:
      96              :   unsigned int stats_sf_detected = 0;
      97              :   unsigned int stats_sf_avoided = 0;
      98              : 
      99              :   bool is_store_forwarding (rtx store_mem, rtx load_mem,
     100              :                             HOST_WIDE_INT *off_val);
     101              :   bool process_store_forwarding (vec<store_fwd_info> &, rtx_insn *load_insn,
     102              :                                  rtx load_mem);
     103              :   void avoid_store_forwarding (basic_block);
     104              :   void update_stats (function *);
     105              : 
     106              : private:
     107              :   /* Per-insn live-out hard-register sets for the current BB.  Populated
     108              :      lazily on the first candidate with bit-insert side-effect clobbers
     109              :      (so aarch64 bfi pays nothing).  Cleared on each avoid_store_forwarding
     110              :      entry.  */
     111              :   hash_map<rtx_insn *, HARD_REG_SET> m_bb_live_after;
     112              : 
     113              :   void compute_bb_live_after (basic_block bb);
     114              : };
     115              : 
     116              : /* Return a bit insertion sequence that would make DEST have the correct value
     117              :    if the store represented by STORE_INFO were to be moved after DEST.  */
     118              : 
     119              : static rtx_insn *
     120           39 : generate_bit_insert_sequence (store_fwd_info *store_info, rtx dest)
     121              : {
     122              :   /* Memory size should be a constant at this stage.  */
     123           39 :   unsigned HOST_WIDE_INT store_size
     124           39 :     = MEM_SIZE (store_info->store_mem).to_constant ();
     125              : 
     126           39 :   start_sequence ();
     127              : 
     128           39 :   unsigned HOST_WIDE_INT bitsize = store_size * BITS_PER_UNIT;
     129           39 :   unsigned HOST_WIDE_INT start = store_info->offset * BITS_PER_UNIT;
     130              : 
     131           39 :   rtx mov_reg = store_info->mov_reg;
     132           39 :   store_bit_field (dest, bitsize, start, 0, 0, GET_MODE (mov_reg), mov_reg,
     133              :                    false, false);
     134              : 
     135           39 :   rtx_insn *insns = get_insns ();
     136           39 :   unshare_all_rtl_in_chain (insns);
     137           39 :   end_sequence ();
     138              : 
     139          255 :   for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
     140          216 :     if (contains_mem_rtx_p (PATTERN (insn))
     141          216 :         || recog_memoized (insn) < 0)
     142            0 :       return NULL;
     143              : 
     144              :   return insns;
     145              : }
     146              : 
     147              : /* note_stores callback: record hard regs clobbered (not set) by an insn,
     148              :    to capture side-effect clobbers (e.g. flags) without the intended dest.  */
     149              : 
     150              : static void
     151          316 : record_hard_reg_clobbers (rtx x, const_rtx pat, void *data)
     152              : {
     153          316 :   if (GET_CODE (pat) == CLOBBER && REG_P (x) && HARD_REGISTER_P (x))
     154           90 :     add_to_hard_reg_set ((HARD_REG_SET *) data, GET_MODE (x), REGNO (x));
     155          316 : }
     156              : 
     157              : /* Populate m_bb_live_after with the hard registers live immediately
     158              :    after each real insn in BB.  */
     159              : 
     160              : void
     161            5 : store_forwarding_analyzer::compute_bb_live_after (basic_block bb)
     162              : {
     163            5 :   auto_bitmap live;
     164            5 :   df_simulate_initialize_backwards (bb, live);
     165            5 :   rtx_insn *scan;
     166          271 :   FOR_BB_INSNS_REVERSE (bb, scan)
     167          266 :     if (INSN_P (scan))
     168              :       {
     169              :         HARD_REG_SET hrs;
     170          247 :         REG_SET_TO_HARD_REG_SET (hrs, live);
     171          247 :         m_bb_live_after.put (scan, hrs);
     172          247 :         df_simulate_one_insn_backwards (bb, scan, live);
     173              :       }
     174            5 : }
     175              : 
     176              : /* Return true iff a store to STORE_MEM would write to a sub-region of bytes
     177              :    from what LOAD_MEM would read.  If true also store the relative byte offset
     178              :    of the store within the load to OFF_VAL.  */
     179              : 
     180          359 : bool store_forwarding_analyzer::
     181              : is_store_forwarding (rtx store_mem, rtx load_mem, HOST_WIDE_INT *off_val)
     182              : {
     183          359 :   poly_int64 load_offset, store_offset;
     184          359 :   rtx load_base = strip_offset (XEXP (load_mem, 0), &load_offset);
     185          359 :   rtx store_base = strip_offset (XEXP (store_mem, 0), &store_offset);
     186          359 :   poly_int64 off_diff = store_offset - load_offset;
     187              : 
     188          359 :   HOST_WIDE_INT off_val_tmp = 0;
     189          359 :   bool is_off_diff_constant = off_diff.is_constant (&off_val_tmp);
     190          359 :   if (off_val)
     191          359 :     *off_val = off_val_tmp;
     192              : 
     193          359 :   return (MEM_SIZE (load_mem).is_constant ()
     194          359 :           && rtx_equal_p (load_base, store_base)
     195          277 :           && known_subrange_p (store_offset, MEM_SIZE (store_mem),
     196          277 :                                load_offset, MEM_SIZE (load_mem))
     197          359 :           && is_off_diff_constant);
     198              : }
     199              : 
     200              : /* Given a list of small stores that are forwarded to LOAD_INSN, try to
     201              :    rearrange them so that a store-forwarding penalty doesn't occur.
     202              :    The stores must be given in reverse program order, starting from the
     203              :    one closer to LOAD_INSN.  */
     204              : 
     205           20 : bool store_forwarding_analyzer::
     206              : process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
     207              :                           rtx load_mem)
     208              : {
     209           20 :   machine_mode load_mem_mode = GET_MODE (load_mem);
     210              :   /* Memory sizes should be constants at this stage.  */
     211           20 :   HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
     212              : 
     213              :   /* If the stores cover all the bytes of the load without overlap then we can
     214              :      eliminate the load entirely and use the computed value instead.
     215              :      Bail out when partially overlapping stores are detected, as the pass
     216              :      cannot correctly handle "last writer wins" semantics for the
     217              :      overlapping byte ranges (see PR124476).  */
     218              : 
     219           20 :   auto_sbitmap forwarded_bytes (load_size);
     220           20 :   bitmap_clear (forwarded_bytes);
     221              : 
     222           20 :   unsigned int i;
     223           20 :   store_fwd_info* it;
     224           93 :   FOR_EACH_VEC_ELT (stores, i, it)
     225              :     {
     226           77 :       HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
     227           77 :       if (bitmap_any_bit_in_range_p (forwarded_bytes, it->offset,
     228           77 :                                  it->offset + store_size - 1))
     229              :         return false;
     230           73 :       bitmap_set_range (forwarded_bytes, it->offset, store_size);
     231              :     }
     232              : 
     233           16 :   bitmap_not (forwarded_bytes, forwarded_bytes);
     234           16 :   bool load_elim = bitmap_empty_p (forwarded_bytes);
     235              : 
     236           16 :   stats_sf_detected++;
     237              : 
     238           16 :   if (dump_file)
     239              :     {
     240            0 :       fprintf (dump_file, "Store forwarding detected:\n");
     241              : 
     242            0 :       FOR_EACH_VEC_ELT (stores, i, it)
     243              :         {
     244            0 :           fprintf (dump_file, "From: ");
     245            0 :           print_rtl_single (dump_file, it->store_insn);
     246              :         }
     247              : 
     248            0 :       fprintf (dump_file, "To: ");
     249            0 :       print_rtl_single (dump_file, load_insn);
     250              : 
     251            0 :       if (load_elim)
     252            0 :         fprintf (dump_file, "(Load elimination candidate)\n");
     253              :     }
     254              : 
     255           16 :   rtx load = single_set (load_insn);
     256           16 :   rtx dest;
     257              : 
     258           16 :   if (load_elim)
     259           10 :     dest = gen_reg_rtx (load_mem_mode);
     260              :   else
     261            6 :     dest = SET_DEST (load);
     262              : 
     263           16 :   int move_to_front = -1;
     264           16 :   int total_cost = 0;
     265           16 :   int base_offset_index = -1;
     266              : 
     267              :   /* Find the last store that has the same offset the load, in the case that
     268              :      we're eliminating the load.  We will try to use it as a base register
     269              :      to avoid bit inserts (see second loop below).  We want the last one, as
     270              :      it will be wider and we don't want to overwrite the base register if
     271              :      there are many of them.  */
     272            6 :   if (load_elim)
     273              :     {
     274           20 :       FOR_EACH_VEC_ELT_REVERSE (stores, i, it)
     275              :         {
     276           10 :           const bool has_base_offset
     277           10 :             = known_eq (poly_uint64 (it->offset),
     278              :                         subreg_size_lowpart_offset (MEM_SIZE (it->store_mem),
     279              :                                                     load_size));
     280           10 :           if (has_base_offset)
     281              :             {
     282           10 :               base_offset_index = i;
     283           10 :               break;
     284              :             }
     285              :         }
     286              :     }
     287              : 
     288              :   /* Check if we can emit bit insert instructions for all forwarded stores.  */
     289          113 :   FOR_EACH_VEC_ELT (stores, i, it)
     290              :     {
     291           49 :       it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem));
     292           49 :       rtx_insn *insns = NULL;
     293              : 
     294              :       /* Check if this is a store with base offset, if we're eliminating the
     295              :          load, and use it as the base register to avoid a bit insert if
     296              :          possible.  Load elimination is implied by base_offset_index != -1.  */
     297           49 :       if (i == (unsigned) base_offset_index)
     298              :         {
     299           10 :           start_sequence ();
     300              : 
     301           20 :           rtx base_reg = lowpart_subreg (GET_MODE (dest), it->mov_reg,
     302           10 :                                          GET_MODE (it->mov_reg));
     303              : 
     304           10 :           if (base_reg)
     305              :             {
     306           10 :               rtx_insn *move0 = emit_move_insn (dest, base_reg);
     307           10 :               if (recog_memoized (move0) >= 0)
     308              :                 {
     309           10 :                   insns = get_insns ();
     310           10 :                   move_to_front = (int) i;
     311              :                 }
     312              :             }
     313              : 
     314           10 :           end_sequence ();
     315              :         }
     316              : 
     317           10 :       if (!insns)
     318           39 :         insns = generate_bit_insert_sequence (&(*it), dest);
     319              : 
     320           39 :       if (!insns)
     321              :         {
     322            0 :           if (dump_file)
     323              :             {
     324            0 :               fprintf (dump_file, "Failed due to: ");
     325            0 :               print_rtl_single (dump_file, it->store_insn);
     326              :             }
     327            0 :           return false;
     328              :         }
     329              : 
     330           49 :       total_cost += seq_cost (insns, true);
     331           49 :       it->bits_insert_insns = insns;
     332              : 
     333           49 :       rtx store_set = single_set (it->store_insn);
     334              : 
     335              :       /* Create a register move at the store's original position to save the
     336              :          stored value.  */
     337           49 :       start_sequence ();
     338           49 :       rtx_insn *insn1
     339           49 :         = emit_insn (gen_rtx_SET (it->mov_reg, SET_SRC (store_set)));
     340           49 :       end_sequence ();
     341              : 
     342           49 :       if (recog_memoized (insn1) < 0)
     343              :         {
     344            0 :           if (dump_file)
     345              :             {
     346            0 :               fprintf (dump_file, "Failed due to unrecognizable insn: ");
     347            0 :               print_rtl_single (dump_file, insn1);
     348              :             }
     349            0 :           return false;
     350              :         }
     351              : 
     352           49 :       it->save_store_value_insn = insn1;
     353              : 
     354              :       /* Create a new store after the load with the saved original value.
     355              :          This avoids the forwarding stall.  */
     356           49 :       start_sequence ();
     357           49 :       rtx_insn *insn2
     358           49 :         = emit_insn (gen_rtx_SET (SET_DEST (store_set), it->mov_reg));
     359           49 :       end_sequence ();
     360              : 
     361           49 :       if (recog_memoized (insn2) < 0)
     362              :         {
     363            0 :           if (dump_file)
     364              :             {
     365            0 :               fprintf (dump_file, "Failed due to unrecognizable insn: ");
     366            0 :               print_rtl_single (dump_file, insn2);
     367              :             }
     368            0 :           return false;
     369              :         }
     370              : 
     371           49 :       it->store_saved_value_insn = insn2;
     372              :     }
     373              : 
     374              :   /* Reject if the bit-insert sequences clobber a hard register live at
     375              :      the insertion point (e.g. shift/and/or on x86 clobber flags, which
     376              :      would break carry chains).  Done before the target cost query so
     377              :      we skip cost work on candidates we would reject anyway.  */
     378              :   HARD_REG_SET clobbered_regs;
     379           65 :   CLEAR_HARD_REG_SET (clobbered_regs);
     380          113 :   FOR_EACH_VEC_ELT (stores, i, it)
     381          275 :     for (rtx_insn *ins = it->bits_insert_insns; ins; ins = NEXT_INSN (ins))
     382          226 :       note_stores (ins, record_hard_reg_clobbers, &clobbered_regs);
     383              : 
     384           16 :   if (!hard_reg_set_empty_p (clobbered_regs))
     385              :     {
     386           12 :       if (m_bb_live_after.is_empty ())
     387            5 :         compute_bb_live_after (BLOCK_FOR_INSN (load_insn));
     388              : 
     389           12 :       const HARD_REG_SET *live_at_insert = m_bb_live_after.get (load_insn);
     390           12 :       if (live_at_insert
     391           24 :           && hard_reg_set_intersect_p (clobbered_regs, *live_at_insert))
     392              :         {
     393            5 :           if (dump_file)
     394            0 :             fprintf (dump_file,
     395              :                      "Not transformed: bit-insert clobbers live hard reg.\n");
     396            5 :           return false;
     397              :         }
     398              :     }
     399              : 
     400           11 :   if (load_elim)
     401            6 :     total_cost -= insn_cost (load_insn, true);
     402              : 
     403              :   /* Let the target decide if transforming this store forwarding instance is
     404              :      profitable.  */
     405           11 :   if (!targetm.avoid_store_forwarding_p (stores, load_mem, total_cost,
     406              :                                          load_elim))
     407              :     {
     408            1 :       if (dump_file)
     409            0 :         fprintf (dump_file, "Not transformed due to target decision.\n");
     410              : 
     411            1 :       return false;
     412              :     }
     413              : 
     414              :   /* If we have a move instead of bit insert, it needs to be emitted first in
     415              :      the resulting sequence.  */
     416           10 :   if (move_to_front != -1)
     417              :     {
     418            6 :       store_fwd_info copy = stores[move_to_front];
     419            6 :       stores.safe_push (copy);
     420            6 :       stores.ordered_remove (move_to_front);
     421              :     }
     422              : 
     423           10 :   if (load_elim)
     424              :     {
     425            6 :       machine_mode outer_mode = GET_MODE (SET_DEST (load));
     426            6 :       rtx load_move;
     427            6 :       rtx load_value = dest;
     428            6 :       if (outer_mode != load_mem_mode)
     429              :         {
     430            0 :           load_value = simplify_gen_unary (GET_CODE (SET_SRC (load)),
     431              :                                            outer_mode, dest, load_mem_mode);
     432              :         }
     433            6 :       load_move = gen_rtx_SET (SET_DEST (load), load_value);
     434              : 
     435            6 :       start_sequence ();
     436            6 :       rtx_insn *insn = emit_insn (load_move);
     437            6 :       rtx_insn *seq = end_sequence ();
     438              : 
     439            6 :       if (recog_memoized (insn) < 0)
     440              :         return false;
     441              : 
     442            6 :       emit_insn_after (seq, load_insn);
     443              :     }
     444              : 
     445           10 :   if (dump_file)
     446              :     {
     447            0 :       fprintf (dump_file, "Store forwarding avoided with bit inserts:\n");
     448              : 
     449            0 :       FOR_EACH_VEC_ELT (stores, i, it)
     450              :         {
     451            0 :           if (stores.length () > 1)
     452              :             {
     453            0 :               fprintf (dump_file, "For: ");
     454            0 :               print_rtl_single (dump_file, it->store_insn);
     455              :             }
     456              : 
     457            0 :           fprintf (dump_file, "With sequence:\n");
     458              : 
     459            0 :           for (rtx_insn *insn = it->bits_insert_insns; insn;
     460            0 :                insn = NEXT_INSN (insn))
     461              :             {
     462            0 :               fprintf (dump_file, "  ");
     463            0 :               print_rtl_single (dump_file, insn);
     464              :             }
     465              :         }
     466              : 
     467              :     }
     468              : 
     469           10 :   stats_sf_avoided++;
     470              : 
     471              :   /* Done, emit all the generated instructions and delete the stores.
     472              :      Note that STORES are in reverse program order.  */
     473              : 
     474           48 :   FOR_EACH_VEC_ELT (stores, i, it)
     475              :     {
     476           38 :       emit_insn_after (it->bits_insert_insns, load_insn);
     477           38 :       emit_insn_after (it->store_saved_value_insn, load_insn);
     478              :     }
     479              : 
     480           48 :   FOR_EACH_VEC_ELT (stores, i, it)
     481              :     {
     482           38 :       emit_insn_before (it->save_store_value_insn, it->store_insn);
     483           38 :       delete_insn (it->store_insn);
     484              :     }
     485              : 
     486           10 :   df_insn_rescan (load_insn);
     487              : 
     488           10 :   if (load_elim)
     489              :     {
     490              :       /* Prevent a dangling rtx_insn * key after delete_insn.  */
     491            6 :       m_bb_live_after.remove (load_insn);
     492            6 :       delete_insn (load_insn);
     493              :     }
     494              : 
     495              :   return true;
     496           20 : }
     497              : 
     498              : /* Try to modify BB so that expensive store forwarding cases are avoided.  */
     499              : 
     500              : void
     501           68 : store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
     502              : {
     503           68 :   if (!optimize_bb_for_speed_p (bb))
     504           13 :     return;
     505              : 
     506           55 :   m_bb_live_after.empty ();
     507              : 
     508           55 :   auto_vec<store_fwd_info, 8> store_exprs;
     509           55 :   rtx_insn *insn;
     510           55 :   unsigned int insn_cnt = 0;
     511              : 
     512              :   /* Iterate over the basic block's instructions detecting store instructions.
     513              :      Upon reaching a load instruction, check if any of the previously detected
     514              :      stores could result in store forwarding.  In that case, try to reorder
     515              :      the load and store instructions.  When we encounter instructions that
     516              :      might throw an exception, instruction dependencies, etc., clear the
     517              :      vector of detected stores and continue.
     518              : 
     519              :      Invariant: dropping a candidate from store_exprs (via it->remove or
     520              :      truncate) only removes it from the forwarding list; the store insn
     521              :      stays in the IR so later loads read its effect from memory.  Only
     522              :      process_store_forwarding may delete the original store.  */
     523         1072 :   FOR_BB_INSNS (bb, insn)
     524              :     {
     525         1017 :       if (!NONDEBUG_INSN_P (insn))
     526          225 :         continue;
     527              : 
     528          897 :       vec_rtx_properties properties;
     529          897 :       properties.add_insn (insn, false);
     530              : 
     531          897 :       rtx set = single_set (insn);
     532              : 
     533          897 :       if (!set || insn_could_throw_p (insn))
     534              :         {
     535           58 :           store_exprs.truncate (0);
     536           58 :           continue;
     537              :         }
     538              : 
     539              :       /* The inner mem RTX if INSN is a load, NULL_RTX otherwise.  */
     540          839 :       rtx load_mem = SET_SRC (set);
     541              : 
     542          839 :       if (GET_CODE (load_mem) == ZERO_EXTEND
     543          839 :           || GET_CODE (load_mem) == SIGN_EXTEND)
     544           40 :         load_mem = XEXP (load_mem, 0);
     545              : 
     546          839 :       if (!MEM_P (load_mem))
     547          758 :         load_mem = NULL_RTX;
     548              : 
     549              :       /* The mem RTX if INSN is a store, NULL_RTX otherwise.  */
     550          839 :       rtx store_mem = MEM_P (SET_DEST (set)) ? SET_DEST (set) : NULL_RTX;
     551              : 
     552              :       /* We cannot analyze memory RTXs that have unknown size.  */
     553          354 :       if ((store_mem && (!MEM_SIZE_KNOWN_P (store_mem)
     554              :                          || !MEM_SIZE (store_mem).is_constant ()))
     555          920 :           || (load_mem && (!MEM_SIZE_KNOWN_P (load_mem)
     556              :                            || !MEM_SIZE (load_mem).is_constant ())))
     557              :         {
     558            0 :           store_exprs.truncate (0);
     559            0 :           continue;
     560              :         }
     561              : 
     562          839 :       bool is_simple = !properties.has_asm
     563          839 :                        && !properties.has_side_effects ();
     564          839 :       bool is_simple_store = is_simple
     565          839 :                              && store_mem
     566          839 :                              && !contains_mem_rtx_p (SET_SRC (set));
     567          839 :       bool is_simple_load = is_simple
     568          839 :                             && load_mem
     569          839 :                             && !contains_mem_rtx_p (SET_DEST (set));
     570              : 
     571          839 :       int removed_count = 0;
     572              : 
     573          839 :       if (is_simple_store)
     574              :         {
     575              :           /* Record store forwarding candidate.  */
     576          302 :           store_fwd_info info;
     577          302 :           info.store_insn = insn;
     578          302 :           info.store_mem = store_mem;
     579          302 :           info.insn_cnt = insn_cnt;
     580          302 :           info.remove = false;
     581          302 :           info.forwarded = false;
     582          302 :           store_exprs.safe_push (info);
     583              :         }
     584              : 
     585          839 :       bool reads_mem = false;
     586          839 :       bool writes_mem = false;
     587         2970 :       for (auto ref : properties.refs ())
     588         2131 :         if (ref.is_mem ())
     589              :           {
     590          440 :             reads_mem |= ref.is_read ();
     591          440 :             writes_mem |= ref.is_write ();
     592              :           }
     593         1691 :         else if (ref.is_write ())
     594              :           {
     595              :             /* Drop store forwarding candidates when the address register is
     596              :                overwritten.  */
     597          632 :             bool remove_rest = false;
     598          632 :             unsigned int i;
     599          632 :             store_fwd_info *it;
     600        11800 :             FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
     601              :               {
     602         8405 :                 if (remove_rest
     603        16793 :                     || reg_overlap_mentioned_p (regno_reg_rtx[ref.regno],
     604         8388 :                                                 it->store_mem))
     605              :                   {
     606           20 :                     it->remove = true;
     607           20 :                     removed_count++;
     608           20 :                     remove_rest = true;
     609              :                   }
     610              :               }
     611              :           }
     612              : 
     613          839 :       if (is_simple_load)
     614              :         {
     615              :           /* Process load for possible store forwarding cases.
     616              :              Possible newly created/moved stores, resulted from a successful
     617              :              forwarding, will be processed in subsequent iterations.  */
     618           81 :           auto_vec<store_fwd_info> forwardings;
     619           81 :           bool partial_forwarding = false;
     620           81 :           bool remove_rest = false;
     621              : 
     622           81 :           bool vector_load = VECTOR_MODE_P (GET_MODE (load_mem));
     623              : 
     624           81 :           unsigned int i;
     625           81 :           store_fwd_info *it;
     626          531 :           FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
     627              :             {
     628          369 :               rtx store_mem = it->store_mem;
     629          369 :               HOST_WIDE_INT off_val;
     630              : 
     631          369 :               bool vector_store = VECTOR_MODE_P (GET_MODE (store_mem));
     632              : 
     633          369 :               if (remove_rest)
     634              :                 {
     635            9 :                   it->remove = true;
     636            9 :                   removed_count++;
     637              :                 }
     638          360 :               else if (vector_load ^ vector_store)
     639              :                 {
     640              :                   /* Vector stores followed by a non-vector load or the
     641              :                      opposite, cause store_bit_field to generate non-canonical
     642              :                      expressions, like (subreg:V4SI (reg:DI ...) 0)).
     643              :                      Cases like that should be handled using vec_duplicate,
     644              :                      so we reject the transformation in those cases.  */
     645            1 :                   it->remove = true;
     646            1 :                   removed_count++;
     647            1 :                   remove_rest = true;
     648            1 :                   forwardings.truncate (0);
     649              :                 }
     650          359 :               else if (is_store_forwarding (store_mem, load_mem, &off_val))
     651              :                 {
     652              :                   /* Check if moving this store after the load is legal.  */
     653           93 :                   bool write_dep = false;
     654           93 :                   unsigned int j = store_exprs.length () - 1;
     655         1928 :                   for (; j != i; j--)
     656              :                     {
     657         1835 :                       if (!store_exprs[j].forwarded
     658         3291 :                           && output_dependence (store_mem,
     659         1456 :                                                 store_exprs[j].store_mem))
     660              :                         {
     661              :                           write_dep = true;
     662              :                           break;
     663              :                         }
     664              :                     }
     665              : 
     666           93 :                   if (!write_dep)
     667              :                     {
     668           93 :                       it->forwarded = true;
     669           93 :                       it->offset = off_val;
     670           93 :                       forwardings.safe_push (*it);
     671              :                     }
     672              :                   else
     673              :                     partial_forwarding = true;
     674              : 
     675           93 :                   it->remove = true;
     676           93 :                   removed_count++;
     677              :                 }
     678          266 :               else if (true_dependence (store_mem, GET_MODE (store_mem),
     679              :                                         load_mem))
     680              :                 {
     681              :                   /* We cannot keep a store forwarding candidate if it possibly
     682              :                      interferes with this load.  */
     683            2 :                   it->remove = true;
     684            2 :                   removed_count++;
     685            2 :                   remove_rest = true;
     686            2 :                   forwardings.truncate (0);
     687              :                 }
     688              :             }
     689              : 
     690          123 :           if (!forwardings.is_empty () && !partial_forwarding)
     691           20 :             process_store_forwarding (forwardings, insn, load_mem);
     692           81 :         }
     693              : 
     694              :       /* If we encounter a memory read/write that is not a simple
     695              :          store/load, flush all pending store candidates and continue.
     696              :          We can't make safe assumptions about the side-effects, but
     697              :          store-forwarding opportunities later in the BB should still
     698              :          be analyzed.  */
     699          839 :       if ((writes_mem && !is_simple_store)
     700          806 :           || (reads_mem && !is_simple_load))
     701              :         {
     702           47 :           store_exprs.truncate (0);
     703           47 :           continue;
     704              :         }
     705              : 
     706          792 :       if (removed_count)
     707              :         {
     708           23 :           unsigned int i, j;
     709           23 :           store_fwd_info *it;
     710          312 :           VEC_ORDERED_REMOVE_IF (store_exprs, i, j, it, it->remove);
     711              :         }
     712              : 
     713              :       /* Don't consider store forwarding if the RTL instruction distance is
     714              :          more than PARAM_STORE_FORWARDING_MAX_DISTANCE and the cost checks
     715              :          are not disabled.  */
     716          792 :       const bool unlimited_cost = (param_store_forwarding_max_distance == 0);
     717          287 :       if (!unlimited_cost && !store_exprs.is_empty ()
     718          792 :           && (store_exprs[0].insn_cnt
     719          287 :               + param_store_forwarding_max_distance <= insn_cnt))
     720           62 :         store_exprs.ordered_remove (0);
     721              : 
     722          792 :       insn_cnt++;
     723          897 :     }
     724           55 : }
     725              : 
     726              : /* Update pass statistics.  */
     727              : 
     728              : void
     729           24 : store_forwarding_analyzer::update_stats (function *fn)
     730              : {
     731           24 :   statistics_counter_event (fn, "Cases of store forwarding detected: ",
     732           24 :                             stats_sf_detected);
     733           24 :   statistics_counter_event (fn, "Cases of store forwarding avoided: ",
     734           24 :                             stats_sf_avoided);
     735           24 : }
     736              : 
     737              : unsigned int
     738           24 : pass_rtl_avoid_store_forwarding::execute (function *fn)
     739              : {
     740           24 :   df_set_flags (DF_DEFER_INSN_RESCAN);
     741              : 
     742           24 :   init_alias_analysis ();
     743              : 
     744           24 :   store_forwarding_analyzer analyzer;
     745              : 
     746           24 :   basic_block bb;
     747           92 :   FOR_EACH_BB_FN (bb, fn)
     748           68 :     analyzer.avoid_store_forwarding (bb);
     749              : 
     750           24 :   end_alias_analysis ();
     751              : 
     752           24 :   analyzer.update_stats (fn);
     753              : 
     754           24 :   return 0;
     755           24 : }
     756              : 
     757              : } // anon namespace.
     758              : 
     759              : rtl_opt_pass *
     760       288767 : make_pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
     761              : {
     762       288767 :   return new pass_rtl_avoid_store_forwarding (ctxt);
     763              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.