Line data Source code
1 : /* Late RTL pass to fold memory offsets.
2 : Copyright (C) 2023-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3, or (at your option)
9 : any later version.
10 :
11 : GCC is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "tm.h"
24 : #include "rtl.h"
25 : #include "tree.h"
26 : #include "expr.h"
27 : #include "backend.h"
28 : #include "regs.h"
29 : #include "target.h"
30 : #include "memmodel.h"
31 : #include "emit-rtl.h"
32 : #include "insn-config.h"
33 : #include "recog.h"
34 : #include "predict.h"
35 : #include "df.h"
36 : #include "tree-pass.h"
37 : #include "cfgrtl.h"
38 : #include "diagnostic-core.h"
39 :
40 : /* This pass tries to optimize memory offset calculations by moving constants
41 : from add instructions to the memory instructions (loads / stores).
42 : For example it can transform code like this:
43 :
44 : add t4, sp, 16
45 : add t2, a6, t4
46 : shl t3, t2, 1
47 : ld a2, 0(t3)
48 : add a2, 1
49 : sd a2, 8(t2)
50 :
51 : into the following (one instruction less):
52 :
53 : add t2, a6, sp
54 : shl t3, t2, 1
55 : ld a2, 32(t3)
56 : add a2, 1
57 : sd a2, 24(t2)
58 :
59 : Although the previous passes try to emit efficient offset calculations
60 : this pass is still beneficial because:
61 :
62 : - The mechanisms that optimize memory offsets usually work with specific
63 : patterns or have limitations. This pass is designed to fold offsets
64 : through complex calculations that affect multiple memory operations
65 : and have partially overlapping calculations.
66 :
67 : - There are cases where add instructions are introduced in late rtl passes
68 : and the rest of the pipeline cannot eliminate them. Arrays and structs
69 : allocated on the stack can result in unwanted add instructions that
70 : cannot be eliminated easily.
71 :
72 : This pass works on a basic block level and consists of 4 phases:
73 :
74 : - Phase 1 (Analysis): Find "foldable" instructions.
75 : Foldable instructions are those that we know how to propagate
76 : a constant addition through (add, shift, move, ...) and only have other
77 : foldable instructions for uses. In that phase a DFS traversal on the
78 : definition tree is performed and foldable instructions are marked on
79 : a bitmap. The add immediate instructions that are reachable in this
80 : DFS are candidates for folding since all the intermediate calculations
81 : affected by them are also foldable.
82 :
83 : - Phase 2 (Validity): Traverse and calculate the offsets that would result
84 : from folding the add immediate instructions. Check whether the
85 : calculated offsets result in a valid instruction for the target.
86 :
87 : - Phase 3 (Commit offsets): Traverse again. It is now known which folds
88 : are valid so at this point change the offsets in the memory instructions.
89 :
90 : - Phase 4 (Commit instruction deletions): Scan all instructions and delete
91 : or simplify (reduce to move) all add immediate instructions that were
92 : folded.
93 :
94 : This pass should run before hard register propagation because it creates
95 : register moves that we expect to be eliminated. */
96 :
97 : namespace {
98 :
99 : const pass_data pass_data_fold_mem =
100 : {
101 : RTL_PASS, /* type */
102 : "fold_mem_offsets", /* name */
103 : OPTGROUP_NONE, /* optinfo_flags */
104 : TV_FOLD_MEM_OFFSETS, /* tv_id */
105 : 0, /* properties_required */
106 : 0, /* properties_provided */
107 : 0, /* properties_destroyed */
108 : 0, /* todo_flags_start */
109 : TODO_df_finish, /* todo_flags_finish */
110 : };
111 :
112 : class pass_fold_mem_offsets : public rtl_opt_pass
113 : {
114 : public:
115 287872 : pass_fold_mem_offsets (gcc::context *ctxt)
116 575744 : : rtl_opt_pass (pass_data_fold_mem, ctxt)
117 : {}
118 :
119 : /* opt_pass methods: */
120 1480955 : virtual bool gate (function *)
121 : {
122 1480955 : return flag_fold_mem_offsets && optimize >= 2;
123 : }
124 :
125 : virtual unsigned int execute (function *);
126 : }; // class pass_fold_mem_offsets
127 :
128 : /* Class that holds in FOLD_INSNS the instructions that if folded the offset
129 : of a memory instruction would increase by ADDED_OFFSET. */
130 12989676 : class fold_mem_info {
131 : public:
132 : auto_bitmap fold_insns;
133 : HOST_WIDE_INT added_offset;
134 : };
135 :
136 : typedef hash_map<rtx_insn *, fold_mem_info *> fold_info_map;
137 :
138 : /* Tracks which instructions can be reached through instructions that can
139 : propagate offsets for folding. */
140 : static bitmap_head can_fold_insns;
141 :
142 : /* Marks instructions that are currently eligible for folding. */
143 : static bitmap_head candidate_fold_insns;
144 :
145 : /* Tracks instructions that cannot be folded because it turned out that
146 : folding will result in creating an invalid memory instruction.
147 : An instruction can be in both CANDIDATE_FOLD_INSNS and CANNOT_FOLD_INSNS
148 : at the same time, in which case it is not legal to fold. */
149 : static bitmap_head cannot_fold_insns;
150 :
151 : /* The number of instructions that were simplified or eliminated. */
152 : static int stats_fold_count;
153 :
154 : /* Get the single reaching definition of an instruction inside a BB.
155 : The definition is desired for REG used in INSN.
156 : Return the definition insn or NULL if there's no definition with
157 : the desired criteria. */
158 : static rtx_insn *
159 26097244 : get_single_def_in_bb (rtx_insn *insn, rtx reg)
160 : {
161 26097244 : df_ref use;
162 26097244 : struct df_link *ref_chain, *ref_link;
163 :
164 31247977 : FOR_EACH_INSN_USE (use, insn)
165 : {
166 31247977 : if (GET_CODE (DF_REF_REG (use)) == SUBREG)
167 : return NULL;
168 31247977 : if (REGNO (DF_REF_REG (use)) == REGNO (reg))
169 : break;
170 : }
171 :
172 26097244 : if (!use)
173 : return NULL;
174 :
175 26097244 : ref_chain = DF_REF_CHAIN (use);
176 :
177 26097244 : if (!ref_chain)
178 : return NULL;
179 :
180 57724253 : for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
181 : {
182 : /* Problem getting some definition for this instruction. */
183 32656701 : if (ref_link->ref == NULL)
184 : return NULL;
185 32656701 : if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
186 : return NULL;
187 31627213 : if (global_regs[REGNO (reg)]
188 31627213 : && !set_of (reg, DF_REF_INSN (ref_link->ref)))
189 : return NULL;
190 : }
191 :
192 25067552 : if (ref_chain->next)
193 : return NULL;
194 :
195 22099985 : rtx_insn *def = DF_REF_INSN (ref_chain->ref);
196 :
197 22099985 : if (BLOCK_FOR_INSN (def) != BLOCK_FOR_INSN (insn))
198 : return NULL;
199 :
200 8423308 : if (DF_INSN_LUID (def) > DF_INSN_LUID (insn))
201 : return NULL;
202 :
203 : return def;
204 : }
205 :
206 : /* Get all uses of REG which is set in INSN. Return the use list or NULL if a
207 : use is missing / irregular. If SUCCESS is not NULL then set it to false if
208 : there are missing / irregular uses and true otherwise. */
209 : static df_link *
210 300998 : get_uses (rtx_insn *insn, rtx reg, bool *success)
211 : {
212 300998 : df_ref def;
213 :
214 300998 : if (success)
215 300998 : *success = false;
216 :
217 300998 : FOR_EACH_INSN_DEF (def, insn)
218 300998 : if (REGNO (DF_REF_REG (def)) == REGNO (reg))
219 : break;
220 :
221 300998 : if (!def)
222 : return NULL;
223 :
224 300998 : df_link *ref_chain = DF_REF_CHAIN (def);
225 300998 : int insn_luid = DF_INSN_LUID (insn);
226 300998 : basic_block insn_bb = BLOCK_FOR_INSN (insn);
227 :
228 1539107 : for (df_link *ref_link = ref_chain; ref_link; ref_link = ref_link->next)
229 : {
230 : /* Problem getting a use for this instruction. */
231 1445834 : if (ref_link->ref == NULL)
232 : return NULL;
233 1445834 : if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR)
234 : return NULL;
235 :
236 1440833 : rtx_insn *use = DF_REF_INSN (ref_link->ref);
237 1440833 : if (DEBUG_INSN_P (use))
238 166052 : continue;
239 :
240 : /* We do not handle REG_EQUIV/REG_EQ notes for now. */
241 1274781 : if (DF_REF_FLAGS (ref_link->ref) & DF_REF_IN_NOTE)
242 : return NULL;
243 1189935 : if (BLOCK_FOR_INSN (use) != insn_bb)
244 : return NULL;
245 : /* Punt if use appears before def in the basic block. See PR111601. */
246 1072081 : if (DF_INSN_LUID (use) < insn_luid)
247 : return NULL;
248 : }
249 :
250 93273 : if (success)
251 93273 : *success = true;
252 :
253 : return ref_chain;
254 : }
255 :
256 : static HOST_WIDE_INT
257 : fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns);
258 :
259 : /* Helper function for fold_offsets.
260 :
261 : If DO_RECURSION is false and ANALYZE is true this function returns true iff
262 : it understands the structure of INSN and knows how to propagate constants
263 : through it. In this case OFFSET_OUT and FOLDABLE_INSNS are unused.
264 :
265 : If DO_RECURSION is true then it also calls fold_offsets for each recognized
266 : part of INSN with the appropriate arguments.
267 :
268 : If DO_RECURSION is true and ANALYZE is false then offset that would result
269 : from folding is computed and is returned through the pointer OFFSET_OUT.
270 : The instructions that can be folded are recorded in FOLDABLE_INSNS. */
271 : static bool
272 1813008 : fold_offsets_1 (rtx_insn *insn, bool analyze, bool do_recursion,
273 : HOST_WIDE_INT *offset_out, bitmap foldable_insns)
274 : {
275 : /* Doesn't make sense if both DO_RECURSION and ANALYZE are false. */
276 1813008 : gcc_checking_assert (do_recursion || analyze);
277 1813008 : gcc_checking_assert (GET_CODE (PATTERN (insn)) == SET);
278 :
279 1813008 : rtx src = SET_SRC (PATTERN (insn));
280 1813008 : HOST_WIDE_INT offset = 0;
281 :
282 1813008 : switch (GET_CODE (src))
283 : {
284 118119 : case PLUS:
285 118119 : {
286 : /* Propagate through add. */
287 118119 : rtx arg1 = XEXP (src, 0);
288 118119 : rtx arg2 = XEXP (src, 1);
289 :
290 118119 : if (REG_P (arg1))
291 : {
292 41297 : if (do_recursion)
293 7322 : offset += fold_offsets (insn, arg1, analyze, foldable_insns);
294 : }
295 76822 : else if (GET_CODE (arg1) == ASHIFT
296 46961 : && REG_P (XEXP (arg1, 0))
297 46961 : && CONST_INT_P (XEXP (arg1, 1)))
298 : {
299 : /* Handle R1 = (R2 << C) + ... */
300 46961 : if (do_recursion)
301 : {
302 16875 : HOST_WIDE_INT scale
303 16875 : = (HOST_WIDE_INT_1U << INTVAL (XEXP (arg1, 1)));
304 16875 : offset += scale * fold_offsets (insn, XEXP (arg1, 0), analyze,
305 : foldable_insns);
306 : }
307 : }
308 29861 : else if (GET_CODE (arg1) == PLUS
309 29807 : && REG_P (XEXP (arg1, 0))
310 27915 : && REG_P (XEXP (arg1, 1)))
311 : {
312 : /* Handle R1 = (R2 + R3) + ... */
313 27915 : if (do_recursion)
314 : {
315 5265 : offset += fold_offsets (insn, XEXP (arg1, 0), analyze,
316 : foldable_insns);
317 5265 : offset += fold_offsets (insn, XEXP (arg1, 1), analyze,
318 : foldable_insns);
319 : }
320 : }
321 1946 : else if (GET_CODE (arg1) == PLUS
322 1892 : && GET_CODE (XEXP (arg1, 0)) == ASHIFT
323 1880 : && REG_P (XEXP (XEXP (arg1, 0), 0))
324 1880 : && CONST_INT_P (XEXP (XEXP (arg1, 0), 1))
325 1880 : && REG_P (XEXP (arg1, 1)))
326 : {
327 : /* Handle R1 = ((R2 << C) + R3) + ... */
328 1880 : if (do_recursion)
329 : {
330 758 : HOST_WIDE_INT scale
331 758 : = (HOST_WIDE_INT_1U << INTVAL (XEXP (XEXP (arg1, 0), 1)));
332 758 : offset += scale * fold_offsets (insn, XEXP (XEXP (arg1, 0), 0),
333 : analyze, foldable_insns);
334 758 : offset += fold_offsets (insn, XEXP (arg1, 1), analyze,
335 : foldable_insns);
336 : }
337 : }
338 : else
339 : return false;
340 :
341 118053 : if (REG_P (arg2))
342 : {
343 82233 : if (do_recursion)
344 24152 : offset += fold_offsets (insn, arg2, analyze, foldable_insns);
345 : }
346 35820 : else if (CONST_INT_P (arg2))
347 : {
348 33204 : if (REG_P (arg1))
349 : {
350 4084 : offset += INTVAL (arg2);
351 : /* This is a R1 = R2 + C instruction, candidate for folding. */
352 4084 : if (!analyze)
353 28 : bitmap_set_bit (foldable_insns, INSN_UID (insn));
354 : }
355 : }
356 : else
357 : return false;
358 :
359 : /* Pattern recognized for folding. */
360 : break;
361 : }
362 0 : case MINUS:
363 0 : {
364 : /* Propagate through minus. */
365 0 : rtx arg1 = XEXP (src, 0);
366 0 : rtx arg2 = XEXP (src, 1);
367 :
368 0 : if (REG_P (arg1))
369 : {
370 0 : if (do_recursion)
371 0 : offset += fold_offsets (insn, arg1, analyze, foldable_insns);
372 : }
373 : else
374 : return false;
375 :
376 0 : if (REG_P (arg2))
377 : {
378 0 : if (do_recursion)
379 0 : offset -= fold_offsets (insn, arg2, analyze, foldable_insns);
380 : }
381 0 : else if (CONST_INT_P (arg2))
382 : {
383 0 : if (REG_P (arg1))
384 : {
385 0 : offset -= INTVAL (arg2);
386 : /* This is a R1 = R2 - C instruction, candidate for folding. */
387 0 : if (!analyze)
388 0 : bitmap_set_bit (foldable_insns, INSN_UID (insn));
389 : }
390 : }
391 : else
392 : return false;
393 :
394 : /* Pattern recognized for folding. */
395 : break;
396 : }
397 0 : case NEG:
398 0 : {
399 : /* Propagate through negation. */
400 0 : rtx arg1 = XEXP (src, 0);
401 0 : if (REG_P (arg1))
402 : {
403 0 : if (do_recursion)
404 0 : offset = -fold_offsets (insn, arg1, analyze, foldable_insns);
405 : }
406 : else
407 : return false;
408 :
409 : /* Pattern recognized for folding. */
410 : break;
411 : }
412 349 : case MULT:
413 349 : {
414 : /* Propagate through multiply by constant. */
415 349 : rtx arg1 = XEXP (src, 0);
416 349 : rtx arg2 = XEXP (src, 1);
417 :
418 349 : if (REG_P (arg1) && CONST_INT_P (arg2))
419 : {
420 349 : if (do_recursion)
421 : {
422 9 : HOST_WIDE_INT scale = INTVAL (arg2);
423 9 : offset = scale * fold_offsets (insn, arg1, analyze,
424 : foldable_insns);
425 : }
426 : }
427 : else
428 : return false;
429 :
430 : /* Pattern recognized for folding. */
431 : break;
432 : }
433 0 : case ASHIFT:
434 0 : {
435 : /* Propagate through shift left by constant. */
436 0 : rtx arg1 = XEXP (src, 0);
437 0 : rtx arg2 = XEXP (src, 1);
438 :
439 0 : if (REG_P (arg1) && CONST_INT_P (arg2))
440 : {
441 0 : if (do_recursion)
442 : {
443 0 : HOST_WIDE_INT scale = (HOST_WIDE_INT_1U << INTVAL (arg2));
444 0 : offset = scale * fold_offsets (insn, arg1, analyze,
445 : foldable_insns);
446 : }
447 : }
448 : else
449 : return false;
450 :
451 : /* Pattern recognized for folding. */
452 : break;
453 : }
454 272914 : case REG:
455 272914 : {
456 : /* Propagate through register move. */
457 272914 : if (do_recursion)
458 57488 : offset = fold_offsets (insn, src, analyze, foldable_insns);
459 :
460 : /* Pattern recognized for folding. */
461 : break;
462 : }
463 36 : case CONST_INT:
464 36 : {
465 36 : offset = INTVAL (src);
466 : /* R1 = C is candidate for folding. */
467 36 : if (!analyze)
468 14 : bitmap_set_bit (foldable_insns, INSN_UID (insn));
469 :
470 : /* Pattern recognized for folding. */
471 : break;
472 : }
473 : default:
474 : /* Cannot recognize. */
475 : return false;
476 : }
477 :
478 388736 : if (do_recursion && !analyze)
479 55872 : *offset_out = offset;
480 :
481 : return true;
482 : }
483 :
484 : /* Function that computes the offset that would have to be added to all uses
485 : of REG if the instructions marked in FOLDABLE_INSNS were to be eliminated.
486 :
487 : If ANALYZE is true then mark in CAN_FOLD_INSNS which instructions
488 : transitively only affect other instructions found in CAN_FOLD_INSNS.
489 : If ANALYZE is false then compute the offset required for folding. */
490 : static HOST_WIDE_INT
491 26097244 : fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns)
492 : {
493 26097244 : rtx_insn *def = get_single_def_in_bb (insn, reg);
494 :
495 26097244 : if (!def || RTX_FRAME_RELATED_P (def) || GET_CODE (PATTERN (def)) != SET)
496 : return 0;
497 :
498 3441295 : rtx dest = SET_DEST (PATTERN (def));
499 :
500 3441295 : if (!REG_P (dest))
501 : return 0;
502 :
503 : /* We can only affect the values of GPR registers. */
504 2226687 : unsigned int dest_regno = REGNO (dest);
505 2226687 : if (fixed_regs[dest_regno]
506 2226687 : || !TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], dest_regno))
507 : return 0;
508 :
509 2225380 : if (analyze)
510 : {
511 : /* Check if we know how to handle DEF. */
512 1106306 : if (!fold_offsets_1 (def, true, false, NULL, NULL))
513 1074440 : return 0;
514 :
515 : /* We only fold through instructions that are transitively used as
516 : memory addresses and do not have other uses. Use the same logic
517 : from offset calculation to visit instructions that can propagate
518 : offsets and keep track of them in CAN_FOLD_INSNS. */
519 300998 : bool success;
520 300998 : struct df_link *uses = get_uses (def, dest, &success), *ref_link;
521 :
522 300998 : if (!success)
523 : return 0;
524 :
525 164117 : for (ref_link = uses; ref_link; ref_link = ref_link->next)
526 : {
527 132251 : rtx_insn *use = DF_REF_INSN (ref_link->ref);
528 :
529 132251 : if (DEBUG_INSN_P (use))
530 10736 : continue;
531 :
532 : /* Punt if the use is anything more complicated than a set
533 : (clobber, use, etc). */
534 121515 : if (!NONJUMP_INSN_P (use) || GET_CODE (PATTERN (use)) != SET)
535 : return 0;
536 :
537 : /* This use affects instructions outside of CAN_FOLD_INSNS. */
538 116303 : if (!bitmap_bit_p (&can_fold_insns, INSN_UID (use)))
539 : return 0;
540 :
541 61890 : rtx use_set = PATTERN (use);
542 :
543 : /* Special case: A foldable memory store is not foldable if it
544 : mentions DEST outside of the address calculation. */
545 61890 : if (use_set && MEM_P (SET_DEST (use_set))
546 92448 : && reg_mentioned_p (dest, SET_SRC (use_set)))
547 : return 0;
548 : }
549 :
550 31866 : bitmap_set_bit (&can_fold_insns, INSN_UID (def));
551 :
552 31866 : if (dump_file && (dump_flags & TDF_DETAILS))
553 : {
554 0 : fprintf (dump_file, "Instruction marked for propagation: ");
555 0 : print_rtl_single (dump_file, def);
556 : }
557 : }
558 : else
559 : {
560 : /* We cannot propagate through this instruction. */
561 1119074 : if (!bitmap_bit_p (&can_fold_insns, INSN_UID (def)))
562 : return 0;
563 : }
564 :
565 706702 : HOST_WIDE_INT offset = 0;
566 706702 : bool recognized = fold_offsets_1 (def, analyze, true, &offset,
567 : foldable_insns);
568 :
569 706702 : if (!recognized)
570 : return 0;
571 :
572 87738 : return offset;
573 : }
574 :
575 : /* Test if INSN is a memory load / store that can have an offset folded to it.
576 : Return true iff INSN is such an instruction and return through MEM_OUT,
577 : REG_OUT and OFFSET_OUT the RTX that has a MEM code, the register that is
578 : used as a base address and the offset accordingly.
579 : All of the out pointers may be NULL in which case they will be ignored. */
580 : bool
581 253977072 : get_fold_mem_root (rtx_insn *insn, rtx *mem_out, rtx *reg_out,
582 : HOST_WIDE_INT *offset_out)
583 : {
584 253977072 : rtx set = single_set (insn);
585 253977072 : rtx mem = NULL_RTX;
586 :
587 253977072 : if (set != NULL_RTX)
588 : {
589 119329846 : rtx src = SET_SRC (set);
590 119329846 : rtx dest = SET_DEST (set);
591 :
592 : /* Don't fold when we have unspec / volatile. */
593 119329846 : if (GET_CODE (src) == UNSPEC
594 119329846 : || GET_CODE (src) == UNSPEC_VOLATILE
595 118638872 : || GET_CODE (dest) == UNSPEC
596 118638872 : || GET_CODE (dest) == UNSPEC_VOLATILE)
597 : return false;
598 :
599 118638872 : if (MEM_P (src))
600 : mem = src;
601 88998474 : else if (MEM_P (dest))
602 : mem = dest;
603 56995750 : else if ((GET_CODE (src) == SIGN_EXTEND
604 56995750 : || GET_CODE (src) == ZERO_EXTEND)
605 1282484 : && MEM_P (XEXP (src, 0)))
606 : mem = XEXP (src, 0);
607 : }
608 :
609 : if (mem == NULL_RTX)
610 : return false;
611 :
612 62185372 : rtx mem_addr = XEXP (mem, 0);
613 62185372 : rtx reg;
614 62185372 : HOST_WIDE_INT offset;
615 :
616 62185372 : if (REG_P (mem_addr))
617 : {
618 : reg = mem_addr;
619 : offset = 0;
620 : }
621 53439336 : else if (GET_CODE (mem_addr) == PLUS
622 44986600 : && REG_P (XEXP (mem_addr, 0))
623 43812896 : && CONST_INT_P (XEXP (mem_addr, 1)))
624 : {
625 43212668 : reg = XEXP (mem_addr, 0);
626 43212668 : offset = INTVAL (XEXP (mem_addr, 1));
627 : }
628 : else
629 : return false;
630 :
631 51958704 : if (mem_out)
632 38969028 : *mem_out = mem;
633 51958704 : if (reg_out)
634 51958704 : *reg_out = reg;
635 51958704 : if (offset_out)
636 38969028 : *offset_out = offset;
637 :
638 : return true;
639 : }
640 :
641 : /* If INSN is a root memory instruction then do a DFS traversal on its
642 : definitions and find folding candidates. */
643 : static void
644 113998860 : do_analysis (rtx_insn *insn)
645 : {
646 113998860 : rtx reg;
647 113998860 : if (!get_fold_mem_root (insn, NULL, ®, NULL))
648 101009184 : return;
649 :
650 12989676 : if (dump_file && (dump_flags & TDF_DETAILS))
651 : {
652 0 : fprintf (dump_file, "Starting analysis from root: ");
653 0 : print_rtl_single (dump_file, insn);
654 : }
655 :
656 : /* Mark this memory instruction as foldable before the DFS so that its
657 : address definitions can see it in can_fold_insns during analysis.
658 : This is required because fold_offsets checks that all uses of a
659 : definition are in can_fold_insns before marking the definition. */
660 12989676 : bitmap_set_bit (&can_fold_insns, INSN_UID (insn));
661 12989676 : fold_offsets (insn, reg, true, NULL);
662 : }
663 :
664 : static void
665 113998860 : do_fold_info_calculation (rtx_insn *insn, fold_info_map *fold_info)
666 : {
667 113998860 : rtx mem, reg;
668 113998860 : HOST_WIDE_INT cur_offset;
669 113998860 : if (!get_fold_mem_root (insn, &mem, ®, &cur_offset))
670 101009184 : return;
671 :
672 12989676 : fold_mem_info *info = new fold_mem_info;
673 12989676 : info->added_offset = fold_offsets (insn, reg, false, info->fold_insns);
674 :
675 12989676 : fold_info->put (insn, info);
676 : }
677 :
678 : /* If INSN is a root memory instruction then compute a potentially new offset
679 : for it and test if the resulting instruction is valid. */
680 : static void
681 12989676 : do_check_validity (rtx_insn *insn, fold_mem_info *info)
682 : {
683 12989676 : rtx mem, reg;
684 12989676 : HOST_WIDE_INT cur_offset;
685 12989676 : if (!get_fold_mem_root (insn, &mem, ®, &cur_offset))
686 0 : return;
687 :
688 12989676 : HOST_WIDE_INT new_offset = cur_offset + info->added_offset;
689 :
690 : /* Test if it is valid to change MEM's address offset to NEW_OFFSET. */
691 12989676 : int icode = INSN_CODE (insn);
692 12989676 : INSN_CODE (insn) = -1;
693 12989676 : rtx mem_addr = XEXP (mem, 0);
694 12989676 : machine_mode addr_mode = GET_MODE (mem_addr);
695 12989676 : machine_mode mem_mode = GET_MODE (mem);
696 12989676 : if (new_offset != 0)
697 10803192 : XEXP (mem, 0) = gen_rtx_PLUS (addr_mode, reg,
698 : gen_int_mode (new_offset, addr_mode));
699 : else
700 2186484 : XEXP (mem, 0) = reg;
701 :
702 12989676 : bool illegal = insn_invalid_p (insn, false)
703 25979339 : || !memory_address_addr_space_p (mem_mode, XEXP (mem, 0),
704 13234622 : MEM_ADDR_SPACE (mem));
705 :
706 : /* Restore the instruction. */
707 12989676 : XEXP (mem, 0) = mem_addr;
708 12989676 : INSN_CODE (insn) = icode;
709 :
710 12989676 : if (illegal)
711 13 : bitmap_ior_into (&cannot_fold_insns, info->fold_insns);
712 : else
713 12989663 : bitmap_ior_into (&candidate_fold_insns, info->fold_insns);
714 : }
715 :
716 : static bool
717 10328818 : compute_validity_closure (fold_info_map *fold_info)
718 : {
719 : /* Let's say we have an arbitrary chain of foldable instructions xN = xN + C
720 : and memory operations rN that use xN as shown below. If folding x1 in r1
721 : turns out to be invalid for whatever reason then it's also invalid to fold
722 : any of the other xN into any rN. That means that we need the transitive
723 : closure of validity to determine whether we can fold a xN instruction.
724 :
725 : +--------------+ +-------------------+ +-------------------+
726 : | r1 = mem[x1] | | r2 = mem[x1 + x2] | | r3 = mem[x2 + x3] | ...
727 : +--------------+ +-------------------+ +-------------------+
728 : ^ ^ ^ ^ ^
729 : | / | / | ...
730 : | / | / |
731 : +-------------+ / +-------------+ / +-------------+
732 : | x1 = x1 + 1 |-----+ | x2 = x2 + 1 |-----+ | x3 = x3 + 1 |--- ...
733 : +-------------+ +-------------+ +-------------+
734 : ^ ^ ^
735 : | | |
736 : ... ... ...
737 : */
738 :
739 : /* In general three iterations should be enough for most cases, but allow up
740 : to five when -fexpensive-optimizations is used. */
741 10328818 : int max_iters = 3 + 2 * flag_expensive_optimizations;
742 10328818 : for (int pass = 0; pass < max_iters; pass++)
743 : {
744 10328818 : bool made_changes = false;
745 23318494 : for (fold_info_map::iterator iter = fold_info->begin ();
746 36308170 : iter != fold_info->end (); ++iter)
747 : {
748 12989676 : fold_mem_info *info = (*iter).second;
749 12989676 : if (bitmap_intersect_p (&cannot_fold_insns, info->fold_insns))
750 13 : made_changes |= bitmap_ior_into (&cannot_fold_insns,
751 13 : info->fold_insns);
752 : }
753 :
754 10328818 : if (!made_changes)
755 : return true;
756 : }
757 :
758 : return false;
759 : }
760 :
761 : /* If INSN is a root memory instruction that was affected by any folding
762 : then update its offset as necessary. */
763 : static void
764 12989676 : do_commit_offset (rtx_insn *insn, fold_mem_info *info)
765 : {
766 12989676 : rtx mem, reg;
767 12989676 : HOST_WIDE_INT cur_offset;
768 12989676 : if (!get_fold_mem_root (insn, &mem, ®, &cur_offset))
769 12989647 : return;
770 :
771 12989676 : HOST_WIDE_INT new_offset = cur_offset + info->added_offset;
772 :
773 12989676 : if (new_offset == cur_offset)
774 : return;
775 :
776 42 : gcc_assert (!bitmap_empty_p (info->fold_insns));
777 :
778 42 : if (bitmap_intersect_p (&cannot_fold_insns, info->fold_insns))
779 : return;
780 :
781 29 : if (dump_file)
782 : {
783 0 : fprintf (dump_file, "Memory offset changed from "
784 : HOST_WIDE_INT_PRINT_DEC " to " HOST_WIDE_INT_PRINT_DEC
785 : " for instruction:\n", cur_offset, new_offset);
786 0 : print_rtl_single (dump_file, insn);
787 : }
788 :
789 29 : machine_mode mode = GET_MODE (XEXP (mem, 0));
790 29 : if (new_offset != 0)
791 29 : XEXP (mem, 0) = gen_rtx_PLUS (mode, reg, gen_int_mode (new_offset, mode));
792 : else
793 0 : XEXP (mem, 0) = reg;
794 29 : INSN_CODE (insn) = recog (PATTERN (insn), insn, 0);
795 29 : df_insn_rescan (insn);
796 : }
797 :
798 : /* If INSN is a move / add instruction that was folded then replace its
799 : constant part with zero. */
800 : static void
801 113998878 : do_commit_insn (rtx_insn *insn)
802 : {
803 113998878 : if (bitmap_bit_p (&candidate_fold_insns, INSN_UID (insn))
804 113998878 : && !bitmap_bit_p (&cannot_fold_insns, INSN_UID (insn)))
805 : {
806 18 : if (dump_file)
807 : {
808 0 : fprintf (dump_file, "Instruction folded:");
809 0 : print_rtl_single (dump_file, insn);
810 : }
811 :
812 18 : stats_fold_count++;
813 :
814 18 : rtx set = single_set (insn);
815 18 : rtx dest = SET_DEST (set);
816 18 : rtx src = SET_SRC (set);
817 :
818 : /* Emit a move and let subsequent passes eliminate it if possible. */
819 18 : if (GET_CODE (src) == CONST_INT)
820 : {
821 : /* INSN is R1 = C.
822 : Replace it with R1 = 0 because C was folded. */
823 1 : rtx mov_rtx
824 1 : = gen_move_insn (dest, gen_int_mode (0, GET_MODE (dest)));
825 1 : df_insn_rescan (emit_insn_after (mov_rtx, insn));
826 : }
827 : else
828 : {
829 : /* INSN is R1 = R2 + C.
830 : Replace it with R1 = R2 because C was folded. */
831 17 : rtx arg1 = XEXP (src, 0);
832 :
833 : /* If the DEST == ARG1 then the move is a no-op. */
834 17 : if (REGNO (dest) != REGNO (arg1))
835 : {
836 17 : gcc_checking_assert (GET_MODE (dest) == GET_MODE (arg1));
837 17 : rtx mov_rtx = gen_move_insn (dest, arg1);
838 17 : df_insn_rescan (emit_insn_after (mov_rtx, insn));
839 : }
840 : }
841 :
842 : /* Delete the original move / add instruction. */
843 18 : delete_insn (insn);
844 : }
845 113998878 : }
846 :
847 : unsigned int
848 960352 : pass_fold_mem_offsets::execute (function *fn)
849 : {
850 : /* Computing UD/DU chains for flow graphs which have a high connectivity
851 : will take a long time and is unlikely to be particularly useful.
852 :
853 : In normal circumstances a cfg should have about twice as many
854 : edges as blocks. But we do not want to punish small functions
855 : which have a couple switch statements. Rather than simply
856 : threshold the number of blocks, uses something with a more
857 : graceful degradation. */
858 960352 : if (n_edges_for_fn (fn) > 20000 + n_basic_blocks_for_fn (fn) * 4)
859 : {
860 0 : warning (OPT_Wdisabled_optimization,
861 : "fold-mem-offsets: %d basic blocks and %d edges/basic block",
862 : n_basic_blocks_for_fn (cfun),
863 0 : n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun));
864 0 : return 0;
865 : }
866 :
867 960352 : df_set_flags (DF_EQ_NOTES + DF_RD_PRUNE_DEAD_DEFS + DF_DEFER_INSN_RESCAN);
868 960352 : df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
869 960352 : df_analyze ();
870 :
871 960352 : bitmap_initialize (&can_fold_insns, NULL);
872 960352 : bitmap_initialize (&candidate_fold_insns, NULL);
873 960352 : bitmap_initialize (&cannot_fold_insns, NULL);
874 :
875 960352 : stats_fold_count = 0;
876 :
877 960352 : basic_block bb;
878 960352 : rtx_insn *insn;
879 13680341 : FOR_ALL_BB_FN (bb, fn)
880 : {
881 : /* There is a conflict between this pass and RISCV's shorten-memrefs
882 : pass. For now disable folding if optimizing for size because
883 : otherwise this cancels the effects of shorten-memrefs. */
884 12719989 : if (optimize_bb_for_size_p (bb))
885 2391171 : continue;
886 :
887 10328818 : fold_info_map fold_info;
888 :
889 10328818 : bitmap_clear (&can_fold_insns);
890 10328818 : bitmap_clear (&candidate_fold_insns);
891 10328818 : bitmap_clear (&cannot_fold_insns);
892 :
893 124327678 : FOR_BB_INSNS (bb, insn)
894 113998860 : do_analysis (insn);
895 :
896 124327678 : FOR_BB_INSNS (bb, insn)
897 113998860 : do_fold_info_calculation (insn, &fold_info);
898 :
899 124327678 : FOR_BB_INSNS (bb, insn)
900 113998860 : if (fold_mem_info **info = fold_info.get (insn))
901 12989676 : do_check_validity (insn, *info);
902 :
903 10328818 : if (compute_validity_closure (&fold_info))
904 : {
905 124327678 : FOR_BB_INSNS (bb, insn)
906 113998860 : if (fold_mem_info **info = fold_info.get (insn))
907 12989676 : do_commit_offset (insn, *info);
908 :
909 124327696 : FOR_BB_INSNS (bb, insn)
910 113998878 : do_commit_insn (insn);
911 : }
912 :
913 23318494 : for (fold_info_map::iterator iter = fold_info.begin ();
914 46636988 : iter != fold_info.end (); ++iter)
915 25979352 : delete (*iter).second;
916 10328818 : }
917 :
918 960352 : statistics_counter_event (cfun, "Number of folded instructions",
919 : stats_fold_count);
920 :
921 960352 : bitmap_release (&can_fold_insns);
922 960352 : bitmap_release (&candidate_fold_insns);
923 960352 : bitmap_release (&cannot_fold_insns);
924 :
925 960352 : return 0;
926 : }
927 :
928 : } // anon namespace
929 :
930 : rtl_opt_pass *
931 287872 : make_pass_fold_mem_offsets (gcc::context *ctxt)
932 : {
933 287872 : return new pass_fold_mem_offsets (ctxt);
934 : }
|