Line data Source code
1 : /* Avoid store forwarding optimization pass.
2 : Copyright (C) 2024-2026 Free Software Foundation, Inc.
3 : Contributed by VRULL GmbH.
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it
8 : under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3, or (at your option)
10 : any later version.
11 :
12 : GCC is distributed in the hope that it will be useful, but
13 : WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "config.h"
22 : #include "system.h"
23 : #include "coretypes.h"
24 : #include "backend.h"
25 : #include "target.h"
26 : #include "rtl.h"
27 : #include "avoid-store-forwarding.h"
28 : #include "alias.h"
29 : #include "rtlanal.h"
30 : #include "cfgrtl.h"
31 : #include "tree-pass.h"
32 : #include "predict.h"
33 : #include "insn-config.h"
34 : #include "expmed.h"
35 : #include "recog.h"
36 : #include "regset.h"
37 : #include "regs.h"
38 : #include "df.h"
39 : #include "expr.h"
40 : #include "memmodel.h"
41 : #include "emit-rtl.h"
42 : #include "vec.h"
43 :
44 : /* This pass tries to detect and avoid cases of store forwarding.
45 : On many processors there is a large penalty when smaller stores are
46 : forwarded to larger loads. The idea used to avoid the stall is to move
47 : the store after the load and in addition emit a bit insert sequence so
48 : the load register has the correct value. For example the following:
49 :
50 : strb w2, [x1, 1]
51 : ldr x0, [x1]
52 :
53 : Will be transformed to:
54 :
55 : ldr x0, [x1]
56 : strb w2, [x1]
57 : bfi x0, x2, 0, 8
58 : */
59 :
60 : namespace {
61 :
62 : const pass_data pass_data_avoid_store_forwarding =
63 : {
64 : RTL_PASS, /* type. */
65 : "avoid_store_forwarding", /* name. */
66 : OPTGROUP_NONE, /* optinfo_flags. */
67 : TV_AVOID_STORE_FORWARDING, /* tv_id. */
68 : 0, /* properties_required. */
69 : 0, /* properties_provided. */
70 : 0, /* properties_destroyed. */
71 : 0, /* todo_flags_start. */
72 : TODO_df_finish /* todo_flags_finish. */
73 : };
74 :
75 : class pass_rtl_avoid_store_forwarding : public rtl_opt_pass
76 : {
77 : public:
78 288767 : pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
79 577534 : : rtl_opt_pass (pass_data_avoid_store_forwarding, ctxt)
80 : {}
81 :
82 : /* opt_pass methods: */
83 1481491 : virtual bool gate (function *) final override
84 : {
85 1481491 : return flag_avoid_store_forwarding && optimize >= 1;
86 : }
87 :
88 : virtual unsigned int execute (function *) final override;
89 : }; // class pass_rtl_avoid_store_forwarding
90 :
91 : /* Handler for finding and avoiding store forwardings. */
92 :
93 48 : class store_forwarding_analyzer
94 : {
95 : public:
96 : unsigned int stats_sf_detected = 0;
97 : unsigned int stats_sf_avoided = 0;
98 :
99 : bool is_store_forwarding (rtx store_mem, rtx load_mem,
100 : HOST_WIDE_INT *off_val);
101 : bool process_store_forwarding (vec<store_fwd_info> &, rtx_insn *load_insn,
102 : rtx load_mem);
103 : void avoid_store_forwarding (basic_block);
104 : void update_stats (function *);
105 :
106 : private:
107 : /* Per-insn live-out hard-register sets for the current BB. Populated
108 : lazily on the first candidate with bit-insert side-effect clobbers
109 : (so aarch64 bfi pays nothing). Cleared on each avoid_store_forwarding
110 : entry. */
111 : hash_map<rtx_insn *, HARD_REG_SET> m_bb_live_after;
112 :
113 : void compute_bb_live_after (basic_block bb);
114 : };
115 :
116 : /* Return a bit insertion sequence that would make DEST have the correct value
117 : if the store represented by STORE_INFO were to be moved after DEST. */
118 :
119 : static rtx_insn *
120 39 : generate_bit_insert_sequence (store_fwd_info *store_info, rtx dest)
121 : {
122 : /* Memory size should be a constant at this stage. */
123 39 : unsigned HOST_WIDE_INT store_size
124 39 : = MEM_SIZE (store_info->store_mem).to_constant ();
125 :
126 39 : start_sequence ();
127 :
128 39 : unsigned HOST_WIDE_INT bitsize = store_size * BITS_PER_UNIT;
129 39 : unsigned HOST_WIDE_INT start = store_info->offset * BITS_PER_UNIT;
130 :
131 39 : rtx mov_reg = store_info->mov_reg;
132 39 : store_bit_field (dest, bitsize, start, 0, 0, GET_MODE (mov_reg), mov_reg,
133 : false, false);
134 :
135 39 : rtx_insn *insns = get_insns ();
136 39 : unshare_all_rtl_in_chain (insns);
137 39 : end_sequence ();
138 :
139 255 : for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
140 216 : if (contains_mem_rtx_p (PATTERN (insn))
141 216 : || recog_memoized (insn) < 0)
142 0 : return NULL;
143 :
144 : return insns;
145 : }
146 :
147 : /* note_stores callback: record hard regs clobbered (not set) by an insn,
148 : to capture side-effect clobbers (e.g. flags) without the intended dest. */
149 :
150 : static void
151 316 : record_hard_reg_clobbers (rtx x, const_rtx pat, void *data)
152 : {
153 316 : if (GET_CODE (pat) == CLOBBER && REG_P (x) && HARD_REGISTER_P (x))
154 90 : add_to_hard_reg_set ((HARD_REG_SET *) data, GET_MODE (x), REGNO (x));
155 316 : }
156 :
157 : /* Populate m_bb_live_after with the hard registers live immediately
158 : after each real insn in BB. */
159 :
160 : void
161 5 : store_forwarding_analyzer::compute_bb_live_after (basic_block bb)
162 : {
163 5 : auto_bitmap live;
164 5 : df_simulate_initialize_backwards (bb, live);
165 5 : rtx_insn *scan;
166 271 : FOR_BB_INSNS_REVERSE (bb, scan)
167 266 : if (INSN_P (scan))
168 : {
169 : HARD_REG_SET hrs;
170 247 : REG_SET_TO_HARD_REG_SET (hrs, live);
171 247 : m_bb_live_after.put (scan, hrs);
172 247 : df_simulate_one_insn_backwards (bb, scan, live);
173 : }
174 5 : }
175 :
176 : /* Return true iff a store to STORE_MEM would write to a sub-region of bytes
177 : from what LOAD_MEM would read. If true also store the relative byte offset
178 : of the store within the load to OFF_VAL. */
179 :
180 359 : bool store_forwarding_analyzer::
181 : is_store_forwarding (rtx store_mem, rtx load_mem, HOST_WIDE_INT *off_val)
182 : {
183 359 : poly_int64 load_offset, store_offset;
184 359 : rtx load_base = strip_offset (XEXP (load_mem, 0), &load_offset);
185 359 : rtx store_base = strip_offset (XEXP (store_mem, 0), &store_offset);
186 359 : poly_int64 off_diff = store_offset - load_offset;
187 :
188 359 : HOST_WIDE_INT off_val_tmp = 0;
189 359 : bool is_off_diff_constant = off_diff.is_constant (&off_val_tmp);
190 359 : if (off_val)
191 359 : *off_val = off_val_tmp;
192 :
193 359 : return (MEM_SIZE (load_mem).is_constant ()
194 359 : && rtx_equal_p (load_base, store_base)
195 277 : && known_subrange_p (store_offset, MEM_SIZE (store_mem),
196 277 : load_offset, MEM_SIZE (load_mem))
197 359 : && is_off_diff_constant);
198 : }
199 :
200 : /* Given a list of small stores that are forwarded to LOAD_INSN, try to
201 : rearrange them so that a store-forwarding penalty doesn't occur.
202 : The stores must be given in reverse program order, starting from the
203 : one closer to LOAD_INSN. */
204 :
205 20 : bool store_forwarding_analyzer::
206 : process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
207 : rtx load_mem)
208 : {
209 20 : machine_mode load_mem_mode = GET_MODE (load_mem);
210 : /* Memory sizes should be constants at this stage. */
211 20 : HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
212 :
213 : /* If the stores cover all the bytes of the load without overlap then we can
214 : eliminate the load entirely and use the computed value instead.
215 : Bail out when partially overlapping stores are detected, as the pass
216 : cannot correctly handle "last writer wins" semantics for the
217 : overlapping byte ranges (see PR124476). */
218 :
219 20 : auto_sbitmap forwarded_bytes (load_size);
220 20 : bitmap_clear (forwarded_bytes);
221 :
222 20 : unsigned int i;
223 20 : store_fwd_info* it;
224 93 : FOR_EACH_VEC_ELT (stores, i, it)
225 : {
226 77 : HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
227 77 : if (bitmap_any_bit_in_range_p (forwarded_bytes, it->offset,
228 77 : it->offset + store_size - 1))
229 : return false;
230 73 : bitmap_set_range (forwarded_bytes, it->offset, store_size);
231 : }
232 :
233 16 : bitmap_not (forwarded_bytes, forwarded_bytes);
234 16 : bool load_elim = bitmap_empty_p (forwarded_bytes);
235 :
236 16 : stats_sf_detected++;
237 :
238 16 : if (dump_file)
239 : {
240 0 : fprintf (dump_file, "Store forwarding detected:\n");
241 :
242 0 : FOR_EACH_VEC_ELT (stores, i, it)
243 : {
244 0 : fprintf (dump_file, "From: ");
245 0 : print_rtl_single (dump_file, it->store_insn);
246 : }
247 :
248 0 : fprintf (dump_file, "To: ");
249 0 : print_rtl_single (dump_file, load_insn);
250 :
251 0 : if (load_elim)
252 0 : fprintf (dump_file, "(Load elimination candidate)\n");
253 : }
254 :
255 16 : rtx load = single_set (load_insn);
256 16 : rtx dest;
257 :
258 16 : if (load_elim)
259 10 : dest = gen_reg_rtx (load_mem_mode);
260 : else
261 6 : dest = SET_DEST (load);
262 :
263 16 : int move_to_front = -1;
264 16 : int total_cost = 0;
265 16 : int base_offset_index = -1;
266 :
267 : /* Find the last store that has the same offset the load, in the case that
268 : we're eliminating the load. We will try to use it as a base register
269 : to avoid bit inserts (see second loop below). We want the last one, as
270 : it will be wider and we don't want to overwrite the base register if
271 : there are many of them. */
272 6 : if (load_elim)
273 : {
274 20 : FOR_EACH_VEC_ELT_REVERSE (stores, i, it)
275 : {
276 10 : const bool has_base_offset
277 10 : = known_eq (poly_uint64 (it->offset),
278 : subreg_size_lowpart_offset (MEM_SIZE (it->store_mem),
279 : load_size));
280 10 : if (has_base_offset)
281 : {
282 10 : base_offset_index = i;
283 10 : break;
284 : }
285 : }
286 : }
287 :
288 : /* Check if we can emit bit insert instructions for all forwarded stores. */
289 113 : FOR_EACH_VEC_ELT (stores, i, it)
290 : {
291 49 : it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem));
292 49 : rtx_insn *insns = NULL;
293 :
294 : /* Check if this is a store with base offset, if we're eliminating the
295 : load, and use it as the base register to avoid a bit insert if
296 : possible. Load elimination is implied by base_offset_index != -1. */
297 49 : if (i == (unsigned) base_offset_index)
298 : {
299 10 : start_sequence ();
300 :
301 20 : rtx base_reg = lowpart_subreg (GET_MODE (dest), it->mov_reg,
302 10 : GET_MODE (it->mov_reg));
303 :
304 10 : if (base_reg)
305 : {
306 10 : rtx_insn *move0 = emit_move_insn (dest, base_reg);
307 10 : if (recog_memoized (move0) >= 0)
308 : {
309 10 : insns = get_insns ();
310 10 : move_to_front = (int) i;
311 : }
312 : }
313 :
314 10 : end_sequence ();
315 : }
316 :
317 10 : if (!insns)
318 39 : insns = generate_bit_insert_sequence (&(*it), dest);
319 :
320 39 : if (!insns)
321 : {
322 0 : if (dump_file)
323 : {
324 0 : fprintf (dump_file, "Failed due to: ");
325 0 : print_rtl_single (dump_file, it->store_insn);
326 : }
327 0 : return false;
328 : }
329 :
330 49 : total_cost += seq_cost (insns, true);
331 49 : it->bits_insert_insns = insns;
332 :
333 49 : rtx store_set = single_set (it->store_insn);
334 :
335 : /* Create a register move at the store's original position to save the
336 : stored value. */
337 49 : start_sequence ();
338 49 : rtx_insn *insn1
339 49 : = emit_insn (gen_rtx_SET (it->mov_reg, SET_SRC (store_set)));
340 49 : end_sequence ();
341 :
342 49 : if (recog_memoized (insn1) < 0)
343 : {
344 0 : if (dump_file)
345 : {
346 0 : fprintf (dump_file, "Failed due to unrecognizable insn: ");
347 0 : print_rtl_single (dump_file, insn1);
348 : }
349 0 : return false;
350 : }
351 :
352 49 : it->save_store_value_insn = insn1;
353 :
354 : /* Create a new store after the load with the saved original value.
355 : This avoids the forwarding stall. */
356 49 : start_sequence ();
357 49 : rtx_insn *insn2
358 49 : = emit_insn (gen_rtx_SET (SET_DEST (store_set), it->mov_reg));
359 49 : end_sequence ();
360 :
361 49 : if (recog_memoized (insn2) < 0)
362 : {
363 0 : if (dump_file)
364 : {
365 0 : fprintf (dump_file, "Failed due to unrecognizable insn: ");
366 0 : print_rtl_single (dump_file, insn2);
367 : }
368 0 : return false;
369 : }
370 :
371 49 : it->store_saved_value_insn = insn2;
372 : }
373 :
374 : /* Reject if the bit-insert sequences clobber a hard register live at
375 : the insertion point (e.g. shift/and/or on x86 clobber flags, which
376 : would break carry chains). Done before the target cost query so
377 : we skip cost work on candidates we would reject anyway. */
378 : HARD_REG_SET clobbered_regs;
379 65 : CLEAR_HARD_REG_SET (clobbered_regs);
380 113 : FOR_EACH_VEC_ELT (stores, i, it)
381 275 : for (rtx_insn *ins = it->bits_insert_insns; ins; ins = NEXT_INSN (ins))
382 226 : note_stores (ins, record_hard_reg_clobbers, &clobbered_regs);
383 :
384 16 : if (!hard_reg_set_empty_p (clobbered_regs))
385 : {
386 12 : if (m_bb_live_after.is_empty ())
387 5 : compute_bb_live_after (BLOCK_FOR_INSN (load_insn));
388 :
389 12 : const HARD_REG_SET *live_at_insert = m_bb_live_after.get (load_insn);
390 12 : if (live_at_insert
391 24 : && hard_reg_set_intersect_p (clobbered_regs, *live_at_insert))
392 : {
393 5 : if (dump_file)
394 0 : fprintf (dump_file,
395 : "Not transformed: bit-insert clobbers live hard reg.\n");
396 5 : return false;
397 : }
398 : }
399 :
400 11 : if (load_elim)
401 6 : total_cost -= insn_cost (load_insn, true);
402 :
403 : /* Let the target decide if transforming this store forwarding instance is
404 : profitable. */
405 11 : if (!targetm.avoid_store_forwarding_p (stores, load_mem, total_cost,
406 : load_elim))
407 : {
408 1 : if (dump_file)
409 0 : fprintf (dump_file, "Not transformed due to target decision.\n");
410 :
411 1 : return false;
412 : }
413 :
414 : /* If we have a move instead of bit insert, it needs to be emitted first in
415 : the resulting sequence. */
416 10 : if (move_to_front != -1)
417 : {
418 6 : store_fwd_info copy = stores[move_to_front];
419 6 : stores.safe_push (copy);
420 6 : stores.ordered_remove (move_to_front);
421 : }
422 :
423 10 : if (load_elim)
424 : {
425 6 : machine_mode outer_mode = GET_MODE (SET_DEST (load));
426 6 : rtx load_move;
427 6 : rtx load_value = dest;
428 6 : if (outer_mode != load_mem_mode)
429 : {
430 0 : load_value = simplify_gen_unary (GET_CODE (SET_SRC (load)),
431 : outer_mode, dest, load_mem_mode);
432 : }
433 6 : load_move = gen_rtx_SET (SET_DEST (load), load_value);
434 :
435 6 : start_sequence ();
436 6 : rtx_insn *insn = emit_insn (load_move);
437 6 : rtx_insn *seq = end_sequence ();
438 :
439 6 : if (recog_memoized (insn) < 0)
440 : return false;
441 :
442 6 : emit_insn_after (seq, load_insn);
443 : }
444 :
445 10 : if (dump_file)
446 : {
447 0 : fprintf (dump_file, "Store forwarding avoided with bit inserts:\n");
448 :
449 0 : FOR_EACH_VEC_ELT (stores, i, it)
450 : {
451 0 : if (stores.length () > 1)
452 : {
453 0 : fprintf (dump_file, "For: ");
454 0 : print_rtl_single (dump_file, it->store_insn);
455 : }
456 :
457 0 : fprintf (dump_file, "With sequence:\n");
458 :
459 0 : for (rtx_insn *insn = it->bits_insert_insns; insn;
460 0 : insn = NEXT_INSN (insn))
461 : {
462 0 : fprintf (dump_file, " ");
463 0 : print_rtl_single (dump_file, insn);
464 : }
465 : }
466 :
467 : }
468 :
469 10 : stats_sf_avoided++;
470 :
471 : /* Done, emit all the generated instructions and delete the stores.
472 : Note that STORES are in reverse program order. */
473 :
474 48 : FOR_EACH_VEC_ELT (stores, i, it)
475 : {
476 38 : emit_insn_after (it->bits_insert_insns, load_insn);
477 38 : emit_insn_after (it->store_saved_value_insn, load_insn);
478 : }
479 :
480 48 : FOR_EACH_VEC_ELT (stores, i, it)
481 : {
482 38 : emit_insn_before (it->save_store_value_insn, it->store_insn);
483 38 : delete_insn (it->store_insn);
484 : }
485 :
486 10 : df_insn_rescan (load_insn);
487 :
488 10 : if (load_elim)
489 : {
490 : /* Prevent a dangling rtx_insn * key after delete_insn. */
491 6 : m_bb_live_after.remove (load_insn);
492 6 : delete_insn (load_insn);
493 : }
494 :
495 : return true;
496 20 : }
497 :
498 : /* Try to modify BB so that expensive store forwarding cases are avoided. */
499 :
500 : void
501 68 : store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
502 : {
503 68 : if (!optimize_bb_for_speed_p (bb))
504 13 : return;
505 :
506 55 : m_bb_live_after.empty ();
507 :
508 55 : auto_vec<store_fwd_info, 8> store_exprs;
509 55 : rtx_insn *insn;
510 55 : unsigned int insn_cnt = 0;
511 :
512 : /* Iterate over the basic block's instructions detecting store instructions.
513 : Upon reaching a load instruction, check if any of the previously detected
514 : stores could result in store forwarding. In that case, try to reorder
515 : the load and store instructions. When we encounter instructions that
516 : might throw an exception, instruction dependencies, etc., clear the
517 : vector of detected stores and continue.
518 :
519 : Invariant: dropping a candidate from store_exprs (via it->remove or
520 : truncate) only removes it from the forwarding list; the store insn
521 : stays in the IR so later loads read its effect from memory. Only
522 : process_store_forwarding may delete the original store. */
523 1072 : FOR_BB_INSNS (bb, insn)
524 : {
525 1017 : if (!NONDEBUG_INSN_P (insn))
526 225 : continue;
527 :
528 897 : vec_rtx_properties properties;
529 897 : properties.add_insn (insn, false);
530 :
531 897 : rtx set = single_set (insn);
532 :
533 897 : if (!set || insn_could_throw_p (insn))
534 : {
535 58 : store_exprs.truncate (0);
536 58 : continue;
537 : }
538 :
539 : /* The inner mem RTX if INSN is a load, NULL_RTX otherwise. */
540 839 : rtx load_mem = SET_SRC (set);
541 :
542 839 : if (GET_CODE (load_mem) == ZERO_EXTEND
543 839 : || GET_CODE (load_mem) == SIGN_EXTEND)
544 40 : load_mem = XEXP (load_mem, 0);
545 :
546 839 : if (!MEM_P (load_mem))
547 758 : load_mem = NULL_RTX;
548 :
549 : /* The mem RTX if INSN is a store, NULL_RTX otherwise. */
550 839 : rtx store_mem = MEM_P (SET_DEST (set)) ? SET_DEST (set) : NULL_RTX;
551 :
552 : /* We cannot analyze memory RTXs that have unknown size. */
553 354 : if ((store_mem && (!MEM_SIZE_KNOWN_P (store_mem)
554 : || !MEM_SIZE (store_mem).is_constant ()))
555 920 : || (load_mem && (!MEM_SIZE_KNOWN_P (load_mem)
556 : || !MEM_SIZE (load_mem).is_constant ())))
557 : {
558 0 : store_exprs.truncate (0);
559 0 : continue;
560 : }
561 :
562 839 : bool is_simple = !properties.has_asm
563 839 : && !properties.has_side_effects ();
564 839 : bool is_simple_store = is_simple
565 839 : && store_mem
566 839 : && !contains_mem_rtx_p (SET_SRC (set));
567 839 : bool is_simple_load = is_simple
568 839 : && load_mem
569 839 : && !contains_mem_rtx_p (SET_DEST (set));
570 :
571 839 : int removed_count = 0;
572 :
573 839 : if (is_simple_store)
574 : {
575 : /* Record store forwarding candidate. */
576 302 : store_fwd_info info;
577 302 : info.store_insn = insn;
578 302 : info.store_mem = store_mem;
579 302 : info.insn_cnt = insn_cnt;
580 302 : info.remove = false;
581 302 : info.forwarded = false;
582 302 : store_exprs.safe_push (info);
583 : }
584 :
585 839 : bool reads_mem = false;
586 839 : bool writes_mem = false;
587 2970 : for (auto ref : properties.refs ())
588 2131 : if (ref.is_mem ())
589 : {
590 440 : reads_mem |= ref.is_read ();
591 440 : writes_mem |= ref.is_write ();
592 : }
593 1691 : else if (ref.is_write ())
594 : {
595 : /* Drop store forwarding candidates when the address register is
596 : overwritten. */
597 632 : bool remove_rest = false;
598 632 : unsigned int i;
599 632 : store_fwd_info *it;
600 11800 : FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
601 : {
602 8405 : if (remove_rest
603 16793 : || reg_overlap_mentioned_p (regno_reg_rtx[ref.regno],
604 8388 : it->store_mem))
605 : {
606 20 : it->remove = true;
607 20 : removed_count++;
608 20 : remove_rest = true;
609 : }
610 : }
611 : }
612 :
613 839 : if (is_simple_load)
614 : {
615 : /* Process load for possible store forwarding cases.
616 : Possible newly created/moved stores, resulted from a successful
617 : forwarding, will be processed in subsequent iterations. */
618 81 : auto_vec<store_fwd_info> forwardings;
619 81 : bool partial_forwarding = false;
620 81 : bool remove_rest = false;
621 :
622 81 : bool vector_load = VECTOR_MODE_P (GET_MODE (load_mem));
623 :
624 81 : unsigned int i;
625 81 : store_fwd_info *it;
626 531 : FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
627 : {
628 369 : rtx store_mem = it->store_mem;
629 369 : HOST_WIDE_INT off_val;
630 :
631 369 : bool vector_store = VECTOR_MODE_P (GET_MODE (store_mem));
632 :
633 369 : if (remove_rest)
634 : {
635 9 : it->remove = true;
636 9 : removed_count++;
637 : }
638 360 : else if (vector_load ^ vector_store)
639 : {
640 : /* Vector stores followed by a non-vector load or the
641 : opposite, cause store_bit_field to generate non-canonical
642 : expressions, like (subreg:V4SI (reg:DI ...) 0)).
643 : Cases like that should be handled using vec_duplicate,
644 : so we reject the transformation in those cases. */
645 1 : it->remove = true;
646 1 : removed_count++;
647 1 : remove_rest = true;
648 1 : forwardings.truncate (0);
649 : }
650 359 : else if (is_store_forwarding (store_mem, load_mem, &off_val))
651 : {
652 : /* Check if moving this store after the load is legal. */
653 93 : bool write_dep = false;
654 93 : unsigned int j = store_exprs.length () - 1;
655 1928 : for (; j != i; j--)
656 : {
657 1835 : if (!store_exprs[j].forwarded
658 3291 : && output_dependence (store_mem,
659 1456 : store_exprs[j].store_mem))
660 : {
661 : write_dep = true;
662 : break;
663 : }
664 : }
665 :
666 93 : if (!write_dep)
667 : {
668 93 : it->forwarded = true;
669 93 : it->offset = off_val;
670 93 : forwardings.safe_push (*it);
671 : }
672 : else
673 : partial_forwarding = true;
674 :
675 93 : it->remove = true;
676 93 : removed_count++;
677 : }
678 266 : else if (true_dependence (store_mem, GET_MODE (store_mem),
679 : load_mem))
680 : {
681 : /* We cannot keep a store forwarding candidate if it possibly
682 : interferes with this load. */
683 2 : it->remove = true;
684 2 : removed_count++;
685 2 : remove_rest = true;
686 2 : forwardings.truncate (0);
687 : }
688 : }
689 :
690 123 : if (!forwardings.is_empty () && !partial_forwarding)
691 20 : process_store_forwarding (forwardings, insn, load_mem);
692 81 : }
693 :
694 : /* If we encounter a memory read/write that is not a simple
695 : store/load, flush all pending store candidates and continue.
696 : We can't make safe assumptions about the side-effects, but
697 : store-forwarding opportunities later in the BB should still
698 : be analyzed. */
699 839 : if ((writes_mem && !is_simple_store)
700 806 : || (reads_mem && !is_simple_load))
701 : {
702 47 : store_exprs.truncate (0);
703 47 : continue;
704 : }
705 :
706 792 : if (removed_count)
707 : {
708 23 : unsigned int i, j;
709 23 : store_fwd_info *it;
710 312 : VEC_ORDERED_REMOVE_IF (store_exprs, i, j, it, it->remove);
711 : }
712 :
713 : /* Don't consider store forwarding if the RTL instruction distance is
714 : more than PARAM_STORE_FORWARDING_MAX_DISTANCE and the cost checks
715 : are not disabled. */
716 792 : const bool unlimited_cost = (param_store_forwarding_max_distance == 0);
717 287 : if (!unlimited_cost && !store_exprs.is_empty ()
718 792 : && (store_exprs[0].insn_cnt
719 287 : + param_store_forwarding_max_distance <= insn_cnt))
720 62 : store_exprs.ordered_remove (0);
721 :
722 792 : insn_cnt++;
723 897 : }
724 55 : }
725 :
726 : /* Update pass statistics. */
727 :
728 : void
729 24 : store_forwarding_analyzer::update_stats (function *fn)
730 : {
731 24 : statistics_counter_event (fn, "Cases of store forwarding detected: ",
732 24 : stats_sf_detected);
733 24 : statistics_counter_event (fn, "Cases of store forwarding avoided: ",
734 24 : stats_sf_avoided);
735 24 : }
736 :
737 : unsigned int
738 24 : pass_rtl_avoid_store_forwarding::execute (function *fn)
739 : {
740 24 : df_set_flags (DF_DEFER_INSN_RESCAN);
741 :
742 24 : init_alias_analysis ();
743 :
744 24 : store_forwarding_analyzer analyzer;
745 :
746 24 : basic_block bb;
747 92 : FOR_EACH_BB_FN (bb, fn)
748 68 : analyzer.avoid_store_forwarding (bb);
749 :
750 24 : end_alias_analysis ();
751 :
752 24 : analyzer.update_stats (fn);
753 :
754 24 : return 0;
755 24 : }
756 :
757 : } // anon namespace.
758 :
759 : rtl_opt_pass *
760 288767 : make_pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
761 : {
762 288767 : return new pass_rtl_avoid_store_forwarding (ctxt);
763 : }
|