Line data Source code
1 : /* Avoid store forwarding optimization pass.
2 : Copyright (C) 2024-2026 Free Software Foundation, Inc.
3 : Contributed by VRULL GmbH.
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it
8 : under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3, or (at your option)
10 : any later version.
11 :
12 : GCC is distributed in the hope that it will be useful, but
13 : WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "config.h"
22 : #include "system.h"
23 : #include "coretypes.h"
24 : #include "backend.h"
25 : #include "target.h"
26 : #include "rtl.h"
27 : #include "avoid-store-forwarding.h"
28 : #include "alias.h"
29 : #include "rtlanal.h"
30 : #include "cfgrtl.h"
31 : #include "tree-pass.h"
32 : #include "predict.h"
33 : #include "insn-config.h"
34 : #include "expmed.h"
35 : #include "recog.h"
36 : #include "regset.h"
37 : #include "regs.h"
38 : #include "df.h"
39 : #include "expr.h"
40 : #include "memmodel.h"
41 : #include "emit-rtl.h"
42 : #include "vec.h"
43 :
44 : /* This pass tries to detect and avoid cases of store forwarding.
45 : On many processors there is a large penalty when smaller stores are
46 : forwarded to larger loads. The idea used to avoid the stall is to move
47 : the store after the load and in addition emit a bit insert sequence so
48 : the load register has the correct value. For example the following:
49 :
50 : strb w2, [x1, 1]
51 : ldr x0, [x1]
52 :
53 : Will be transformed to:
54 :
55 : ldr x0, [x1]
56 : strb w2, [x1]
57 : bfi x0, x2, 0, 8
58 : */
59 :
60 : namespace {
61 :
62 : const pass_data pass_data_avoid_store_forwarding =
63 : {
64 : RTL_PASS, /* type. */
65 : "avoid_store_forwarding", /* name. */
66 : OPTGROUP_NONE, /* optinfo_flags. */
67 : TV_AVOID_STORE_FORWARDING, /* tv_id. */
68 : 0, /* properties_required. */
69 : 0, /* properties_provided. */
70 : 0, /* properties_destroyed. */
71 : 0, /* todo_flags_start. */
72 : TODO_df_finish /* todo_flags_finish. */
73 : };
74 :
75 : class pass_rtl_avoid_store_forwarding : public rtl_opt_pass
76 : {
77 : public:
78 298828 : pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
79 597656 : : rtl_opt_pass (pass_data_avoid_store_forwarding, ctxt)
80 : {}
81 :
82 : /* opt_pass methods: */
83 1488378 : virtual bool gate (function *) final override
84 : {
85 1488378 : return flag_avoid_store_forwarding && optimize >= 1;
86 : }
87 :
88 : virtual unsigned int execute (function *) final override;
89 : }; // class pass_rtl_avoid_store_forwarding
90 :
91 : /* Handler for finding and avoiding store forwardings. */
92 :
93 48 : class store_forwarding_analyzer
94 : {
95 : public:
96 : unsigned int stats_sf_detected = 0;
97 : unsigned int stats_sf_avoided = 0;
98 :
99 : bool is_store_forwarding (rtx store_mem, rtx load_mem,
100 : HOST_WIDE_INT *off_val);
101 : bool process_store_forwarding (vec<store_fwd_info> &, rtx_insn *load_insn,
102 : rtx load_mem);
103 : void avoid_store_forwarding (basic_block);
104 : void update_stats (function *);
105 :
106 : private:
107 : /* Per-insn live-out hard-register sets for the current BB. Populated
108 : lazily on the first candidate with bit-insert side-effect clobbers
109 : (so aarch64 bfi pays nothing). Cleared on each avoid_store_forwarding
110 : entry. */
111 : hash_map<rtx_insn *, HARD_REG_SET> m_bb_live_after;
112 :
113 : void compute_bb_live_after (basic_block bb);
114 : };
115 :
116 : /* Return a bit insertion sequence that would make DEST have the correct value
117 : if the store represented by STORE_INFO were to be moved after DEST. */
118 :
119 : static rtx_insn *
120 39 : generate_bit_insert_sequence (store_fwd_info *store_info, rtx dest)
121 : {
122 : /* Memory size should be a constant at this stage. */
123 39 : unsigned HOST_WIDE_INT store_size
124 39 : = MEM_SIZE (store_info->store_mem).to_constant ();
125 :
126 39 : start_sequence ();
127 :
128 39 : unsigned HOST_WIDE_INT bitsize = store_size * BITS_PER_UNIT;
129 39 : unsigned HOST_WIDE_INT start = store_info->offset * BITS_PER_UNIT;
130 :
131 39 : rtx mov_reg = store_info->mov_reg;
132 39 : store_bit_field (dest, bitsize, start, 0, 0, GET_MODE (mov_reg), mov_reg,
133 : false, false);
134 :
135 39 : rtx_insn *insns = get_insns ();
136 39 : unshare_all_rtl_in_chain (insns);
137 39 : end_sequence ();
138 :
139 255 : for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
140 216 : if (contains_mem_rtx_p (PATTERN (insn))
141 216 : || recog_memoized (insn) < 0)
142 0 : return NULL;
143 :
144 : return insns;
145 : }
146 :
147 : /* note_stores callback: record hard regs clobbered (not set) by an insn,
148 : to capture side-effect clobbers (e.g. flags) without the intended dest. */
149 :
150 : static void
151 316 : record_hard_reg_clobbers (rtx x, const_rtx pat, void *data)
152 : {
153 316 : if (GET_CODE (pat) == CLOBBER && REG_P (x) && HARD_REGISTER_P (x))
154 90 : add_to_hard_reg_set ((HARD_REG_SET *) data, GET_MODE (x), REGNO (x));
155 316 : }
156 :
157 : /* Populate m_bb_live_after with the hard registers live immediately
158 : after each real insn in BB. */
159 :
160 : void
161 5 : store_forwarding_analyzer::compute_bb_live_after (basic_block bb)
162 : {
163 5 : auto_bitmap live;
164 5 : df_simulate_initialize_backwards (bb, live);
165 5 : rtx_insn *scan;
166 271 : FOR_BB_INSNS_REVERSE (bb, scan)
167 266 : if (INSN_P (scan))
168 : {
169 : HARD_REG_SET hrs;
170 247 : REG_SET_TO_HARD_REG_SET (hrs, live);
171 247 : m_bb_live_after.put (scan, hrs);
172 247 : df_simulate_one_insn_backwards (bb, scan, live);
173 : }
174 5 : }
175 :
176 : /* Return true iff a store to STORE_MEM would write to a sub-region of bytes
177 : from what LOAD_MEM would read. If true also store the relative byte offset
178 : of the store within the load to OFF_VAL. */
179 :
180 359 : bool store_forwarding_analyzer::
181 : is_store_forwarding (rtx store_mem, rtx load_mem, HOST_WIDE_INT *off_val)
182 : {
183 359 : poly_int64 load_offset, store_offset;
184 359 : rtx load_base = strip_offset (XEXP (load_mem, 0), &load_offset);
185 359 : rtx store_base = strip_offset (XEXP (store_mem, 0), &store_offset);
186 359 : poly_int64 off_diff = store_offset - load_offset;
187 :
188 359 : HOST_WIDE_INT off_val_tmp = 0;
189 359 : bool is_off_diff_constant = off_diff.is_constant (&off_val_tmp);
190 359 : if (off_val)
191 359 : *off_val = off_val_tmp;
192 :
193 359 : return (MEM_SIZE (load_mem).is_constant ()
194 359 : && rtx_equal_p (load_base, store_base)
195 277 : && known_subrange_p (store_offset, MEM_SIZE (store_mem),
196 277 : load_offset, MEM_SIZE (load_mem))
197 359 : && is_off_diff_constant);
198 : }
199 :
200 : /* Given a list of small stores that are forwarded to LOAD_INSN, try to
201 : rearrange them so that a store-forwarding penalty doesn't occur.
202 : The stores must be given in reverse program order, starting from the
203 : one closer to LOAD_INSN. */
204 :
205 20 : bool store_forwarding_analyzer::
206 : process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
207 : rtx load_mem)
208 : {
209 20 : machine_mode load_mem_mode = GET_MODE (load_mem);
210 : /* Memory sizes should be constants at this stage. */
211 20 : HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
212 :
213 : /* If the stores cover all the bytes of the load without overlap then we can
214 : eliminate the load entirely and use the computed value instead.
215 : Bail out when partially overlapping stores are detected, as the pass
216 : cannot correctly handle "last writer wins" semantics for the
217 : overlapping byte ranges (see PR124476). */
218 :
219 20 : auto_sbitmap forwarded_bytes (load_size);
220 20 : bitmap_clear (forwarded_bytes);
221 :
222 20 : unsigned int i;
223 20 : store_fwd_info* it;
224 93 : FOR_EACH_VEC_ELT (stores, i, it)
225 : {
226 77 : HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
227 77 : if (bitmap_any_bit_in_range_p (forwarded_bytes, it->offset,
228 77 : it->offset + store_size - 1))
229 : return false;
230 73 : bitmap_set_range (forwarded_bytes, it->offset, store_size);
231 : }
232 :
233 16 : bitmap_not (forwarded_bytes, forwarded_bytes);
234 16 : bool load_elim = bitmap_empty_p (forwarded_bytes);
235 :
236 16 : stats_sf_detected++;
237 :
238 16 : if (dump_file)
239 : {
240 0 : fprintf (dump_file, "Store forwarding detected:\n");
241 :
242 0 : FOR_EACH_VEC_ELT (stores, i, it)
243 : {
244 0 : fprintf (dump_file, "From: ");
245 0 : print_rtl_single (dump_file, it->store_insn);
246 : }
247 :
248 0 : fprintf (dump_file, "To: ");
249 0 : print_rtl_single (dump_file, load_insn);
250 :
251 0 : if (load_elim)
252 0 : fprintf (dump_file, "(Load elimination candidate)\n");
253 : }
254 :
255 16 : rtx load = single_set (load_insn);
256 16 : rtx dest;
257 :
258 16 : if (load_elim)
259 10 : dest = gen_reg_rtx (load_mem_mode);
260 : else
261 6 : dest = SET_DEST (load);
262 :
263 16 : int move_to_front = -1;
264 16 : int total_cost = 0;
265 16 : int base_offset_index = -1;
266 :
267 : /* Find the last store that has the same offset the load, in the case that
268 : we're eliminating the load. We will try to use it as a base register
269 : to avoid bit inserts (see second loop below). We want the last one, as
270 : it will be wider and we don't want to overwrite the base register if
271 : there are many of them. */
272 6 : if (load_elim)
273 : {
274 20 : FOR_EACH_VEC_ELT_REVERSE (stores, i, it)
275 : {
276 10 : const bool has_base_offset
277 10 : = known_eq (poly_uint64 (it->offset),
278 : subreg_size_lowpart_offset (MEM_SIZE (it->store_mem),
279 : load_size));
280 10 : if (has_base_offset)
281 : {
282 10 : base_offset_index = i;
283 10 : break;
284 : }
285 : }
286 : }
287 :
288 : /* Check if we can emit bit insert instructions for all forwarded stores. */
289 113 : FOR_EACH_VEC_ELT (stores, i, it)
290 : {
291 49 : it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem));
292 49 : rtx_insn *insns = NULL;
293 :
294 : /* Check if this is a store with base offset, if we're eliminating the
295 : load, and use it as the base register to avoid a bit insert if
296 : possible. Load elimination is implied by base_offset_index != -1. */
297 49 : if (i == (unsigned) base_offset_index)
298 : {
299 10 : start_sequence ();
300 :
301 20 : rtx base_reg = lowpart_subreg (GET_MODE (dest), it->mov_reg,
302 10 : GET_MODE (it->mov_reg));
303 :
304 10 : if (base_reg)
305 : {
306 10 : rtx_insn *move0 = emit_move_insn (dest, base_reg);
307 10 : if (recog_memoized (move0) >= 0)
308 : {
309 10 : insns = get_insns ();
310 10 : move_to_front = (int) i;
311 : }
312 : }
313 :
314 10 : end_sequence ();
315 : }
316 :
317 10 : if (!insns)
318 39 : insns = generate_bit_insert_sequence (&(*it), dest);
319 :
320 39 : if (!insns)
321 : {
322 0 : if (dump_file)
323 : {
324 0 : fprintf (dump_file, "Failed due to: ");
325 0 : print_rtl_single (dump_file, it->store_insn);
326 : }
327 0 : return false;
328 : }
329 :
330 49 : total_cost += seq_cost (insns, true);
331 49 : it->bits_insert_insns = insns;
332 :
333 49 : rtx store_set = single_set (it->store_insn);
334 :
335 : /* Create a register move at the store's original position to save the
336 : stored value. */
337 49 : start_sequence ();
338 49 : rtx_insn *insn1
339 49 : = emit_insn (gen_rtx_SET (it->mov_reg, SET_SRC (store_set)));
340 49 : end_sequence ();
341 :
342 49 : if (recog_memoized (insn1) < 0)
343 : {
344 0 : if (dump_file)
345 : {
346 0 : fprintf (dump_file, "Failed due to unrecognizable insn: ");
347 0 : print_rtl_single (dump_file, insn1);
348 : }
349 0 : return false;
350 : }
351 :
352 49 : it->save_store_value_insn = insn1;
353 :
354 : /* Create a new store after the load with the saved original value.
355 : This avoids the forwarding stall. */
356 49 : start_sequence ();
357 49 : rtx_insn *insn2
358 49 : = emit_insn (gen_rtx_SET (SET_DEST (store_set), it->mov_reg));
359 49 : end_sequence ();
360 :
361 49 : if (recog_memoized (insn2) < 0)
362 : {
363 0 : if (dump_file)
364 : {
365 0 : fprintf (dump_file, "Failed due to unrecognizable insn: ");
366 0 : print_rtl_single (dump_file, insn2);
367 : }
368 0 : return false;
369 : }
370 :
371 49 : it->store_saved_value_insn = insn2;
372 : }
373 :
374 : /* Reject if the bit-insert sequences clobber a hard register live at
375 : the insertion point (e.g. shift/and/or on x86 clobber flags, which
376 : would break carry chains). Done before the target cost query so
377 : we skip cost work on candidates we would reject anyway. */
378 : HARD_REG_SET clobbered_regs;
379 65 : CLEAR_HARD_REG_SET (clobbered_regs);
380 113 : FOR_EACH_VEC_ELT (stores, i, it)
381 275 : for (rtx_insn *ins = it->bits_insert_insns; ins; ins = NEXT_INSN (ins))
382 226 : note_stores (ins, record_hard_reg_clobbers, &clobbered_regs);
383 :
384 16 : if (!hard_reg_set_empty_p (clobbered_regs))
385 : {
386 12 : if (m_bb_live_after.is_empty ())
387 5 : compute_bb_live_after (BLOCK_FOR_INSN (load_insn));
388 :
389 12 : const HARD_REG_SET *live_at_insert = m_bb_live_after.get (load_insn);
390 12 : if (live_at_insert
391 24 : && hard_reg_set_intersect_p (clobbered_regs, *live_at_insert))
392 : {
393 5 : if (dump_file)
394 0 : fprintf (dump_file,
395 : "Not transformed: bit-insert clobbers live hard reg.\n");
396 5 : return false;
397 : }
398 : }
399 :
400 11 : if (load_elim)
401 6 : total_cost -= insn_cost (load_insn, true);
402 :
403 : /* Let the target decide if transforming this store forwarding instance is
404 : profitable. */
405 11 : if (!targetm.avoid_store_forwarding_p (stores, load_mem, total_cost,
406 : load_elim))
407 : {
408 1 : if (dump_file)
409 0 : fprintf (dump_file, "Not transformed due to target decision.\n");
410 :
411 1 : return false;
412 : }
413 :
414 : /* If we have a move instead of bit insert, it needs to be emitted first in
415 : the resulting sequence. */
416 10 : if (move_to_front != -1)
417 : {
418 6 : store_fwd_info copy = stores[move_to_front];
419 6 : stores.safe_push (copy);
420 6 : stores.ordered_remove (move_to_front);
421 : }
422 :
423 10 : machine_mode outer_mode = GET_MODE (SET_DEST (load));
424 10 : if (load_elim || outer_mode != load_mem_mode)
425 : {
426 : /* If the load is being eliminated, emit a move (with extension if
427 : needed) from the temp register to the original load destination.
428 : Otherwise, if the load has SIGN_EXTEND or ZERO_EXTEND wrapping
429 : the MEM, the bit insert sequence may have modified bits that
430 : affect the extension (e.g. the sign bit), so re-apply it. */
431 6 : rtx move_src;
432 6 : if (outer_mode != load_mem_mode)
433 : {
434 0 : rtx ext_op = dest;
435 0 : if (!load_elim)
436 : {
437 0 : ext_op = lowpart_subreg (load_mem_mode, dest, outer_mode);
438 0 : if (!ext_op)
439 : return false;
440 : }
441 0 : move_src = simplify_gen_unary (GET_CODE (SET_SRC (load)),
442 : outer_mode, ext_op, load_mem_mode);
443 : }
444 : else
445 : move_src = dest;
446 :
447 6 : rtx move = gen_rtx_SET (SET_DEST (load), move_src);
448 :
449 6 : start_sequence ();
450 6 : rtx_insn *insn = emit_insn (move);
451 6 : rtx_insn *seq = end_sequence ();
452 :
453 6 : if (recog_memoized (insn) < 0)
454 : return false;
455 :
456 6 : emit_insn_after (seq, load_insn);
457 : }
458 :
459 10 : if (dump_file)
460 : {
461 0 : fprintf (dump_file, "Store forwarding avoided with bit inserts:\n");
462 :
463 0 : FOR_EACH_VEC_ELT (stores, i, it)
464 : {
465 0 : if (stores.length () > 1)
466 : {
467 0 : fprintf (dump_file, "For: ");
468 0 : print_rtl_single (dump_file, it->store_insn);
469 : }
470 :
471 0 : fprintf (dump_file, "With sequence:\n");
472 :
473 0 : for (rtx_insn *insn = it->bits_insert_insns; insn;
474 0 : insn = NEXT_INSN (insn))
475 : {
476 0 : fprintf (dump_file, " ");
477 0 : print_rtl_single (dump_file, insn);
478 : }
479 : }
480 :
481 : }
482 :
483 10 : stats_sf_avoided++;
484 :
485 : /* Done, emit all the generated instructions and delete the stores.
486 : Note that STORES are in reverse program order. */
487 :
488 48 : FOR_EACH_VEC_ELT (stores, i, it)
489 : {
490 38 : emit_insn_after (it->bits_insert_insns, load_insn);
491 38 : emit_insn_after (it->store_saved_value_insn, load_insn);
492 : }
493 :
494 48 : FOR_EACH_VEC_ELT (stores, i, it)
495 : {
496 38 : emit_insn_before (it->save_store_value_insn, it->store_insn);
497 38 : delete_insn (it->store_insn);
498 : }
499 :
500 10 : df_insn_rescan (load_insn);
501 :
502 10 : if (load_elim)
503 : {
504 : /* Prevent a dangling rtx_insn * key after delete_insn. */
505 6 : m_bb_live_after.remove (load_insn);
506 6 : delete_insn (load_insn);
507 : }
508 :
509 : return true;
510 20 : }
511 :
512 : /* Try to modify BB so that expensive store forwarding cases are avoided. */
513 :
514 : void
515 68 : store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
516 : {
517 68 : if (!optimize_bb_for_speed_p (bb))
518 13 : return;
519 :
520 55 : m_bb_live_after.empty ();
521 :
522 55 : auto_vec<store_fwd_info, 8> store_exprs;
523 55 : rtx_insn *insn;
524 55 : unsigned int insn_cnt = 0;
525 :
526 : /* Iterate over the basic block's instructions detecting store instructions.
527 : Upon reaching a load instruction, check if any of the previously detected
528 : stores could result in store forwarding. In that case, try to reorder
529 : the load and store instructions. When we encounter instructions that
530 : might throw an exception, instruction dependencies, etc., clear the
531 : vector of detected stores and continue.
532 :
533 : Invariant: dropping a candidate from store_exprs (via it->remove or
534 : truncate) only removes it from the forwarding list; the store insn
535 : stays in the IR so later loads read its effect from memory. Only
536 : process_store_forwarding may delete the original store. */
537 1072 : FOR_BB_INSNS (bb, insn)
538 : {
539 1017 : if (!NONDEBUG_INSN_P (insn))
540 225 : continue;
541 :
542 897 : vec_rtx_properties properties;
543 897 : properties.add_insn (insn, false);
544 :
545 897 : rtx set = single_set (insn);
546 :
547 897 : if (!set || insn_could_throw_p (insn))
548 : {
549 58 : store_exprs.truncate (0);
550 58 : continue;
551 : }
552 :
553 : /* The inner mem RTX if INSN is a load, NULL_RTX otherwise. */
554 839 : rtx load_mem = SET_SRC (set);
555 :
556 839 : if (GET_CODE (load_mem) == ZERO_EXTEND
557 839 : || GET_CODE (load_mem) == SIGN_EXTEND)
558 40 : load_mem = XEXP (load_mem, 0);
559 :
560 839 : if (!MEM_P (load_mem))
561 758 : load_mem = NULL_RTX;
562 :
563 : /* The mem RTX if INSN is a store, NULL_RTX otherwise. */
564 839 : rtx store_mem = MEM_P (SET_DEST (set)) ? SET_DEST (set) : NULL_RTX;
565 :
566 : /* We cannot analyze memory RTXs that have unknown size. */
567 354 : if ((store_mem && (!MEM_SIZE_KNOWN_P (store_mem)
568 : || !MEM_SIZE (store_mem).is_constant ()))
569 920 : || (load_mem && (!MEM_SIZE_KNOWN_P (load_mem)
570 : || !MEM_SIZE (load_mem).is_constant ())))
571 : {
572 0 : store_exprs.truncate (0);
573 0 : continue;
574 : }
575 :
576 839 : bool is_simple = !properties.has_asm
577 839 : && !properties.has_side_effects ();
578 839 : bool is_simple_store = is_simple
579 839 : && store_mem
580 839 : && !contains_mem_rtx_p (SET_SRC (set));
581 839 : bool is_simple_load = is_simple
582 839 : && load_mem
583 839 : && !contains_mem_rtx_p (SET_DEST (set));
584 :
585 839 : int removed_count = 0;
586 :
587 839 : if (is_simple_store)
588 : {
589 : /* Record store forwarding candidate. */
590 302 : store_fwd_info info;
591 302 : info.store_insn = insn;
592 302 : info.store_mem = store_mem;
593 302 : info.insn_cnt = insn_cnt;
594 302 : info.remove = false;
595 302 : info.forwarded = false;
596 302 : store_exprs.safe_push (info);
597 : }
598 :
599 839 : bool reads_mem = false;
600 839 : bool writes_mem = false;
601 2970 : for (auto ref : properties.refs ())
602 2131 : if (ref.is_mem ())
603 : {
604 440 : reads_mem |= ref.is_read ();
605 440 : writes_mem |= ref.is_write ();
606 : }
607 1691 : else if (ref.is_write ())
608 : {
609 : /* Drop store forwarding candidates when the address register is
610 : overwritten. */
611 632 : bool remove_rest = false;
612 632 : unsigned int i;
613 632 : store_fwd_info *it;
614 11800 : FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
615 : {
616 8405 : if (remove_rest
617 16793 : || reg_overlap_mentioned_p (regno_reg_rtx[ref.regno],
618 8388 : it->store_mem))
619 : {
620 20 : it->remove = true;
621 20 : removed_count++;
622 20 : remove_rest = true;
623 : }
624 : }
625 : }
626 :
627 839 : if (is_simple_load)
628 : {
629 : /* Process load for possible store forwarding cases.
630 : Possible newly created/moved stores, resulted from a successful
631 : forwarding, will be processed in subsequent iterations. */
632 81 : auto_vec<store_fwd_info> forwardings;
633 81 : bool partial_forwarding = false;
634 81 : bool remove_rest = false;
635 :
636 81 : bool vector_load = VECTOR_MODE_P (GET_MODE (load_mem));
637 :
638 81 : unsigned int i;
639 81 : store_fwd_info *it;
640 531 : FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it)
641 : {
642 369 : rtx store_mem = it->store_mem;
643 369 : HOST_WIDE_INT off_val;
644 :
645 369 : bool vector_store = VECTOR_MODE_P (GET_MODE (store_mem));
646 :
647 369 : if (remove_rest)
648 : {
649 9 : it->remove = true;
650 9 : removed_count++;
651 : }
652 360 : else if (vector_load ^ vector_store)
653 : {
654 : /* Vector stores followed by a non-vector load or the
655 : opposite, cause store_bit_field to generate non-canonical
656 : expressions, like (subreg:V4SI (reg:DI ...) 0)).
657 : Cases like that should be handled using vec_duplicate,
658 : so we reject the transformation in those cases. */
659 1 : it->remove = true;
660 1 : removed_count++;
661 1 : remove_rest = true;
662 1 : forwardings.truncate (0);
663 : }
664 359 : else if (is_store_forwarding (store_mem, load_mem, &off_val))
665 : {
666 : /* Check if moving this store after the load is legal. */
667 93 : bool write_dep = false;
668 93 : unsigned int j = store_exprs.length () - 1;
669 1928 : for (; j != i; j--)
670 : {
671 1835 : if (!store_exprs[j].forwarded
672 3291 : && output_dependence (store_mem,
673 1456 : store_exprs[j].store_mem))
674 : {
675 : write_dep = true;
676 : break;
677 : }
678 : }
679 :
680 93 : if (!write_dep)
681 : {
682 93 : it->forwarded = true;
683 93 : it->offset = off_val;
684 93 : forwardings.safe_push (*it);
685 : }
686 : else
687 : partial_forwarding = true;
688 :
689 93 : it->remove = true;
690 93 : removed_count++;
691 : }
692 266 : else if (true_dependence (store_mem, GET_MODE (store_mem),
693 : load_mem))
694 : {
695 : /* We cannot keep a store forwarding candidate if it possibly
696 : interferes with this load. */
697 2 : it->remove = true;
698 2 : removed_count++;
699 2 : remove_rest = true;
700 2 : forwardings.truncate (0);
701 : }
702 : }
703 :
704 123 : if (!forwardings.is_empty () && !partial_forwarding)
705 20 : process_store_forwarding (forwardings, insn, load_mem);
706 81 : }
707 :
708 : /* If we encounter a memory read/write that is not a simple
709 : store/load, flush all pending store candidates and continue.
710 : We can't make safe assumptions about the side-effects, but
711 : store-forwarding opportunities later in the BB should still
712 : be analyzed. */
713 839 : if ((writes_mem && !is_simple_store)
714 806 : || (reads_mem && !is_simple_load))
715 : {
716 47 : store_exprs.truncate (0);
717 47 : continue;
718 : }
719 :
720 792 : if (removed_count)
721 : {
722 23 : unsigned int i, j;
723 23 : store_fwd_info *it;
724 312 : VEC_ORDERED_REMOVE_IF (store_exprs, i, j, it, it->remove);
725 : }
726 :
727 : /* Don't consider store forwarding if the RTL instruction distance is
728 : more than PARAM_STORE_FORWARDING_MAX_DISTANCE and the cost checks
729 : are not disabled. */
730 792 : const bool unlimited_cost = (param_store_forwarding_max_distance == 0);
731 287 : if (!unlimited_cost && !store_exprs.is_empty ()
732 792 : && (store_exprs[0].insn_cnt
733 287 : + param_store_forwarding_max_distance <= insn_cnt))
734 62 : store_exprs.ordered_remove (0);
735 :
736 792 : insn_cnt++;
737 897 : }
738 55 : }
739 :
740 : /* Update pass statistics. */
741 :
742 : void
743 24 : store_forwarding_analyzer::update_stats (function *fn)
744 : {
745 24 : statistics_counter_event (fn, "Cases of store forwarding detected: ",
746 24 : stats_sf_detected);
747 24 : statistics_counter_event (fn, "Cases of store forwarding avoided: ",
748 24 : stats_sf_avoided);
749 24 : }
750 :
751 : unsigned int
752 24 : pass_rtl_avoid_store_forwarding::execute (function *fn)
753 : {
754 24 : df_set_flags (DF_DEFER_INSN_RESCAN);
755 :
756 24 : init_alias_analysis ();
757 :
758 24 : store_forwarding_analyzer analyzer;
759 :
760 24 : basic_block bb;
761 92 : FOR_EACH_BB_FN (bb, fn)
762 68 : analyzer.avoid_store_forwarding (bb);
763 :
764 24 : end_alias_analysis ();
765 :
766 24 : analyzer.update_stats (fn);
767 :
768 24 : return 0;
769 24 : }
770 :
771 : } // anon namespace.
772 :
773 : rtl_opt_pass *
774 298828 : make_pass_rtl_avoid_store_forwarding (gcc::context *ctxt)
775 : {
776 298828 : return new pass_rtl_avoid_store_forwarding (ctxt);
777 : }
|