Line data Source code
1 : /* Late RTL pass to fold memory offsets.
2 : Copyright (C) 2023-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3, or (at your option)
9 : any later version.
10 :
11 : GCC is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "tm.h"
24 : #include "rtl.h"
25 : #include "tree.h"
26 : #include "expr.h"
27 : #include "backend.h"
28 : #include "regs.h"
29 : #include "target.h"
30 : #include "memmodel.h"
31 : #include "emit-rtl.h"
32 : #include "insn-config.h"
33 : #include "recog.h"
34 : #include "predict.h"
35 : #include "df.h"
36 : #include "tree-pass.h"
37 : #include "cfgrtl.h"
38 : #include "diagnostic-core.h"
39 :
40 : /* This pass tries to optimize memory offset calculations by moving constants
41 : from add instructions to the memory instructions (loads / stores).
42 : For example it can transform code like this:
43 :
44 : add t4, sp, 16
45 : add t2, a6, t4
46 : shl t3, t2, 1
47 : ld a2, 0(t3)
48 : add a2, 1
49 : sd a2, 8(t2)
50 :
51 : into the following (one instruction less):
52 :
53 : add t2, a6, sp
54 : shl t3, t2, 1
55 : ld a2, 32(t3)
56 : add a2, 1
57 : sd a2, 24(t2)
58 :
59 : Although the previous passes try to emit efficient offset calculations
60 : this pass is still beneficial because:
61 :
62 : - The mechanisms that optimize memory offsets usually work with specific
63 : patterns or have limitations. This pass is designed to fold offsets
64 : through complex calculations that affect multiple memory operations
65 : and have partially overlapping calculations.
66 :
67 : - There are cases where add instructions are introduced in late rtl passes
68 : and the rest of the pipeline cannot eliminate them. Arrays and structs
69 : allocated on the stack can result in unwanted add instructions that
70 : cannot be eliminated easily.
71 :
72 : This pass works on a basic block level and consists of 4 phases:
73 :
74 : - Phase 1 (Analysis): Find "foldable" instructions.
75 : Foldable instructions are those that we know how to propagate
76 : a constant addition through (add, shift, move, ...) and only have other
77 : foldable instructions for uses. In that phase a DFS traversal on the
78 : definition tree is performed and foldable instructions are marked on
79 : a bitmap. The add immediate instructions that are reachable in this
80 : DFS are candidates for folding since all the intermediate calculations
81 : affected by them are also foldable.
82 :
83 : - Phase 2 (Validity): Traverse and calculate the offsets that would result
84 : from folding the add immediate instructions. Check whether the
85 : calculated offsets result in a valid instruction for the target.
86 :
87 : - Phase 3 (Commit offsets): Traverse again. It is now known which folds
88 : are valid so at this point change the offsets in the memory instructions.
89 :
90 : - Phase 4 (Commit instruction deletions): Scan all instructions and delete
91 : or simplify (reduce to move) all add immediate instructions that were
92 : folded.
93 :
94 : This pass should run before hard register propagation because it creates
95 : register moves that we expect to be eliminated. */
96 :
97 : namespace {
98 :
99 : const pass_data pass_data_fold_mem =
100 : {
101 : RTL_PASS, /* type */
102 : "fold_mem_offsets", /* name */
103 : OPTGROUP_NONE, /* optinfo_flags */
104 : TV_FOLD_MEM_OFFSETS, /* tv_id */
105 : 0, /* properties_required */
106 : 0, /* properties_provided */
107 : 0, /* properties_destroyed */
108 : 0, /* todo_flags_start */
109 : TODO_df_finish, /* todo_flags_finish */
110 : };
111 :
112 : class pass_fold_mem_offsets : public rtl_opt_pass
113 : {
114 : public:
115 285722 : pass_fold_mem_offsets (gcc::context *ctxt)
116 571444 : : rtl_opt_pass (pass_data_fold_mem, ctxt)
117 : {}
118 :
119 : /* opt_pass methods: */
120 1471370 : virtual bool gate (function *)
121 : {
122 1471370 : return flag_fold_mem_offsets && optimize >= 2;
123 : }
124 :
125 : virtual unsigned int execute (function *);
126 : }; // class pass_fold_mem_offsets
127 :
128 : /* Class that holds in FOLD_INSNS the instructions that if folded the offset
129 : of a memory instruction would increase by ADDED_OFFSET. */
130 13011067 : class fold_mem_info {
131 : public:
132 : auto_bitmap fold_insns;
133 : HOST_WIDE_INT added_offset;
134 : };
135 :
136 : typedef hash_map<rtx_insn *, fold_mem_info *> fold_info_map;
137 :
138 : /* Tracks which instructions can be reached through instructions that can
139 : propagate offsets for folding. */
140 : static bitmap_head can_fold_insns;
141 :
142 : /* Marks instructions that are currently eligible for folding. */
143 : static bitmap_head candidate_fold_insns;
144 :
145 : /* Tracks instructions that cannot be folded because it turned out that
146 : folding will result in creating an invalid memory instruction.
147 : An instruction can be in both CANDIDATE_FOLD_INSNS and CANNOT_FOLD_INSNS
148 : at the same time, in which case it is not legal to fold. */
149 : static bitmap_head cannot_fold_insns;
150 :
151 : /* The number of instructions that were simplified or eliminated. */
152 : static int stats_fold_count;
153 :
154 : /* Get the single reaching definition of an instruction inside a BB.
155 : The definition is desired for REG used in INSN.
156 : Return the definition insn or NULL if there's no definition with
157 : the desired criteria. */
158 : static rtx_insn *
159 26140543 : get_single_def_in_bb (rtx_insn *insn, rtx reg)
160 : {
161 26140543 : df_ref use;
162 26140543 : struct df_link *ref_chain, *ref_link;
163 :
164 31302213 : FOR_EACH_INSN_USE (use, insn)
165 : {
166 31302213 : if (GET_CODE (DF_REF_REG (use)) == SUBREG)
167 : return NULL;
168 31302213 : if (REGNO (DF_REF_REG (use)) == REGNO (reg))
169 : break;
170 : }
171 :
172 26140543 : if (!use)
173 : return NULL;
174 :
175 26140543 : ref_chain = DF_REF_CHAIN (use);
176 :
177 26140543 : if (!ref_chain)
178 : return NULL;
179 :
180 57840815 : for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
181 : {
182 : /* Problem getting some definition for this instruction. */
183 32729997 : if (ref_link->ref == NULL)
184 : return NULL;
185 32729997 : if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
186 : return NULL;
187 31700476 : if (global_regs[REGNO (reg)]
188 31700476 : && !set_of (reg, DF_REF_INSN (ref_link->ref)))
189 : return NULL;
190 : }
191 :
192 25110818 : if (ref_chain->next)
193 : return NULL;
194 :
195 22135751 : rtx_insn *def = DF_REF_INSN (ref_chain->ref);
196 :
197 22135751 : if (BLOCK_FOR_INSN (def) != BLOCK_FOR_INSN (insn))
198 : return NULL;
199 :
200 8424078 : if (DF_INSN_LUID (def) > DF_INSN_LUID (insn))
201 : return NULL;
202 :
203 : return def;
204 : }
205 :
206 : /* Get all uses of REG which is set in INSN. Return the use list or NULL if a
207 : use is missing / irregular. If SUCCESS is not NULL then set it to false if
208 : there are missing / irregular uses and true otherwise. */
209 : static df_link *
210 302018 : get_uses (rtx_insn *insn, rtx reg, bool *success)
211 : {
212 302018 : df_ref def;
213 :
214 302018 : if (success)
215 302018 : *success = false;
216 :
217 302018 : FOR_EACH_INSN_DEF (def, insn)
218 302018 : if (REGNO (DF_REF_REG (def)) == REGNO (reg))
219 : break;
220 :
221 302018 : if (!def)
222 : return NULL;
223 :
224 302018 : df_link *ref_chain = DF_REF_CHAIN (def);
225 302018 : int insn_luid = DF_INSN_LUID (insn);
226 302018 : basic_block insn_bb = BLOCK_FOR_INSN (insn);
227 :
228 1540155 : for (df_link *ref_link = ref_chain; ref_link; ref_link = ref_link->next)
229 : {
230 : /* Problem getting a use for this instruction. */
231 1446631 : if (ref_link->ref == NULL)
232 : return NULL;
233 1446631 : if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR)
234 : return NULL;
235 :
236 1441573 : rtx_insn *use = DF_REF_INSN (ref_link->ref);
237 1441573 : if (DEBUG_INSN_P (use))
238 166877 : continue;
239 :
240 : /* We do not handle REG_EQUIV/REG_EQ notes for now. */
241 1274696 : if (DF_REF_FLAGS (ref_link->ref) & DF_REF_IN_NOTE)
242 : return NULL;
243 1189371 : if (BLOCK_FOR_INSN (use) != insn_bb)
244 : return NULL;
245 : /* Punt if use appears before def in the basic block. See PR111601. */
246 1071284 : if (DF_INSN_LUID (use) < insn_luid)
247 : return NULL;
248 : }
249 :
250 93524 : if (success)
251 93524 : *success = true;
252 :
253 : return ref_chain;
254 : }
255 :
256 : static HOST_WIDE_INT
257 : fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns);
258 :
259 : /* Helper function for fold_offsets.
260 :
261 : If DO_RECURSION is false and ANALYZE is true this function returns true iff
262 : it understands the structure of INSN and knows how to propagate constants
263 : through it. In this case OFFSET_OUT and FOLDABLE_INSNS are unused.
264 :
265 : If DO_RECURSION is true then it also calls fold_offsets for each recognized
266 : part of INSN with the appropriate arguments.
267 :
268 : If DO_RECURSION is true and ANALYZE is false then offset that would result
269 : from folding is computed and is returned through the pointer OFFSET_OUT.
270 : The instructions that can be folded are recorded in FOLDABLE_INSNS. */
271 : static bool
272 1820306 : fold_offsets_1 (rtx_insn *insn, bool analyze, bool do_recursion,
273 : HOST_WIDE_INT *offset_out, bitmap foldable_insns)
274 : {
275 : /* Doesn't make sense if both DO_RECURSION and ANALYZE are false. */
276 1820306 : gcc_checking_assert (do_recursion || analyze);
277 1820306 : gcc_checking_assert (GET_CODE (PATTERN (insn)) == SET);
278 :
279 1820306 : rtx src = SET_SRC (PATTERN (insn));
280 1820306 : HOST_WIDE_INT offset = 0;
281 :
282 1820306 : switch (GET_CODE (src))
283 : {
284 118716 : case PLUS:
285 118716 : {
286 : /* Propagate through add. */
287 118716 : rtx arg1 = XEXP (src, 0);
288 118716 : rtx arg2 = XEXP (src, 1);
289 :
290 118716 : if (REG_P (arg1))
291 : {
292 41885 : if (do_recursion)
293 7483 : offset += fold_offsets (insn, arg1, analyze, foldable_insns);
294 : }
295 76831 : else if (GET_CODE (arg1) == ASHIFT
296 46835 : && REG_P (XEXP (arg1, 0))
297 46835 : && CONST_INT_P (XEXP (arg1, 1)))
298 : {
299 : /* Handle R1 = (R2 << C) + ... */
300 46835 : if (do_recursion)
301 : {
302 16863 : HOST_WIDE_INT scale
303 16863 : = (HOST_WIDE_INT_1U << INTVAL (XEXP (arg1, 1)));
304 16863 : offset += scale * fold_offsets (insn, XEXP (arg1, 0), analyze,
305 : foldable_insns);
306 : }
307 : }
308 29996 : else if (GET_CODE (arg1) == PLUS
309 29942 : && REG_P (XEXP (arg1, 0))
310 28042 : && REG_P (XEXP (arg1, 1)))
311 : {
312 : /* Handle R1 = (R2 + R3) + ... */
313 28042 : if (do_recursion)
314 : {
315 5224 : offset += fold_offsets (insn, XEXP (arg1, 0), analyze,
316 : foldable_insns);
317 5224 : offset += fold_offsets (insn, XEXP (arg1, 1), analyze,
318 : foldable_insns);
319 : }
320 : }
321 1954 : else if (GET_CODE (arg1) == PLUS
322 1900 : && GET_CODE (XEXP (arg1, 0)) == ASHIFT
323 1888 : && REG_P (XEXP (XEXP (arg1, 0), 0))
324 1888 : && CONST_INT_P (XEXP (XEXP (arg1, 0), 1))
325 1888 : && REG_P (XEXP (arg1, 1)))
326 : {
327 : /* Handle R1 = ((R2 << C) + R3) + ... */
328 1888 : if (do_recursion)
329 : {
330 764 : HOST_WIDE_INT scale
331 764 : = (HOST_WIDE_INT_1U << INTVAL (XEXP (XEXP (arg1, 0), 1)));
332 764 : offset += scale * fold_offsets (insn, XEXP (XEXP (arg1, 0), 0),
333 : analyze, foldable_insns);
334 764 : offset += fold_offsets (insn, XEXP (arg1, 1), analyze,
335 : foldable_insns);
336 : }
337 : }
338 : else
339 : return false;
340 :
341 118650 : if (REG_P (arg2))
342 : {
343 82480 : if (do_recursion)
344 24298 : offset += fold_offsets (insn, arg2, analyze, foldable_insns);
345 : }
346 36170 : else if (CONST_INT_P (arg2))
347 : {
348 33554 : if (REG_P (arg1))
349 : {
350 4299 : offset += INTVAL (arg2);
351 : /* This is a R1 = R2 + C instruction, candidate for folding. */
352 4299 : if (!analyze)
353 30 : bitmap_set_bit (foldable_insns, INSN_UID (insn));
354 : }
355 : }
356 : else
357 : return false;
358 :
359 : /* Pattern recognized for folding. */
360 : break;
361 : }
362 0 : case MINUS:
363 0 : {
364 : /* Propagate through minus. */
365 0 : rtx arg1 = XEXP (src, 0);
366 0 : rtx arg2 = XEXP (src, 1);
367 :
368 0 : if (REG_P (arg1))
369 : {
370 0 : if (do_recursion)
371 0 : offset += fold_offsets (insn, arg1, analyze, foldable_insns);
372 : }
373 : else
374 : return false;
375 :
376 0 : if (REG_P (arg2))
377 : {
378 0 : if (do_recursion)
379 0 : offset -= fold_offsets (insn, arg2, analyze, foldable_insns);
380 : }
381 0 : else if (CONST_INT_P (arg2))
382 : {
383 0 : if (REG_P (arg1))
384 : {
385 0 : offset -= INTVAL (arg2);
386 : /* This is a R1 = R2 - C instruction, candidate for folding. */
387 0 : if (!analyze)
388 0 : bitmap_set_bit (foldable_insns, INSN_UID (insn));
389 : }
390 : }
391 : else
392 : return false;
393 :
394 : /* Pattern recognized for folding. */
395 : break;
396 : }
397 0 : case NEG:
398 0 : {
399 : /* Propagate through negation. */
400 0 : rtx arg1 = XEXP (src, 0);
401 0 : if (REG_P (arg1))
402 : {
403 0 : if (do_recursion)
404 0 : offset = -fold_offsets (insn, arg1, analyze, foldable_insns);
405 : }
406 : else
407 : return false;
408 :
409 : /* Pattern recognized for folding. */
410 : break;
411 : }
412 351 : case MULT:
413 351 : {
414 : /* Propagate through multiply by constant. */
415 351 : rtx arg1 = XEXP (src, 0);
416 351 : rtx arg2 = XEXP (src, 1);
417 :
418 351 : if (REG_P (arg1) && CONST_INT_P (arg2))
419 : {
420 351 : if (do_recursion)
421 : {
422 9 : HOST_WIDE_INT scale = INTVAL (arg2);
423 9 : offset = scale * fold_offsets (insn, arg1, analyze,
424 : foldable_insns);
425 : }
426 : }
427 : else
428 : return false;
429 :
430 : /* Pattern recognized for folding. */
431 : break;
432 : }
433 0 : case ASHIFT:
434 0 : {
435 : /* Propagate through shift left by constant. */
436 0 : rtx arg1 = XEXP (src, 0);
437 0 : rtx arg2 = XEXP (src, 1);
438 :
439 0 : if (REG_P (arg1) && CONST_INT_P (arg2))
440 : {
441 0 : if (do_recursion)
442 : {
443 0 : HOST_WIDE_INT scale = (HOST_WIDE_INT_1U << INTVAL (arg2));
444 0 : offset = scale * fold_offsets (insn, arg1, analyze,
445 : foldable_insns);
446 : }
447 : }
448 : else
449 : return false;
450 :
451 : /* Pattern recognized for folding. */
452 : break;
453 : }
454 273741 : case REG:
455 273741 : {
456 : /* Propagate through register move. */
457 273741 : if (do_recursion)
458 57780 : offset = fold_offsets (insn, src, analyze, foldable_insns);
459 :
460 : /* Pattern recognized for folding. */
461 : break;
462 : }
463 36 : case CONST_INT:
464 36 : {
465 36 : offset = INTVAL (src);
466 : /* R1 = C is candidate for folding. */
467 36 : if (!analyze)
468 14 : bitmap_set_bit (foldable_insns, INSN_UID (insn));
469 :
470 : /* Pattern recognized for folding. */
471 : break;
472 : }
473 : default:
474 : /* Cannot recognize. */
475 : return false;
476 : }
477 :
478 390162 : if (do_recursion && !analyze)
479 56133 : *offset_out = offset;
480 :
481 : return true;
482 : }
483 :
484 : /* Function that computes the offset that would have to be added to all uses
485 : of REG if the instructions marked in FOLDABLE_INSNS were to be eliminated.
486 :
487 : If ANALYZE is true then mark in CAN_FOLD_INSNS which instructions
488 : transitively only affect other instructions found in CAN_FOLD_INSNS.
489 : If ANALYZE is false then compute the offset required for folding. */
490 : static HOST_WIDE_INT
491 26140543 : fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns)
492 : {
493 26140543 : rtx_insn *def = get_single_def_in_bb (insn, reg);
494 :
495 26140543 : if (!def || RTX_FRAME_RELATED_P (def) || GET_CODE (PATTERN (def)) != SET)
496 : return 0;
497 :
498 3449752 : rtx dest = SET_DEST (PATTERN (def));
499 :
500 3449752 : if (!REG_P (dest))
501 : return 0;
502 :
503 : /* We can only affect the values of GPR registers. */
504 2235220 : unsigned int dest_regno = REGNO (dest);
505 2235220 : if (fixed_regs[dest_regno]
506 2235220 : || !TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], dest_regno))
507 : return 0;
508 :
509 2233928 : if (analyze)
510 : {
511 : /* Check if we know how to handle DEF. */
512 1110525 : if (!fold_offsets_1 (def, true, false, NULL, NULL))
513 1078514 : return 0;
514 :
515 : /* We only fold through instructions that are transitively used as
516 : memory addresses and do not have other uses. Use the same logic
517 : from offset calculation to visit instructions that can propagate
518 : offsets and keep track of them in CAN_FOLD_INSNS. */
519 302018 : bool success;
520 302018 : struct df_link *uses = get_uses (def, dest, &success), *ref_link;
521 :
522 302018 : if (!success)
523 : return 0;
524 :
525 164465 : for (ref_link = uses; ref_link; ref_link = ref_link->next)
526 : {
527 132454 : rtx_insn *use = DF_REF_INSN (ref_link->ref);
528 :
529 132454 : if (DEBUG_INSN_P (use))
530 10664 : continue;
531 :
532 : /* Punt if the use is anything more complicated than a set
533 : (clobber, use, etc). */
534 121790 : if (!NONJUMP_INSN_P (use) || GET_CODE (PATTERN (use)) != SET)
535 : return 0;
536 :
537 : /* This use affects instructions outside of CAN_FOLD_INSNS. */
538 116437 : if (!bitmap_bit_p (&can_fold_insns, INSN_UID (use)))
539 : return 0;
540 :
541 61981 : rtx use_set = PATTERN (use);
542 :
543 : /* Special case: A foldable memory store is not foldable if it
544 : mentions DEST outside of the address calculation. */
545 61981 : if (use_set && MEM_P (SET_DEST (use_set))
546 92847 : && reg_mentioned_p (dest, SET_SRC (use_set)))
547 : return 0;
548 : }
549 :
550 32011 : bitmap_set_bit (&can_fold_insns, INSN_UID (def));
551 :
552 32011 : if (dump_file && (dump_flags & TDF_DETAILS))
553 : {
554 0 : fprintf (dump_file, "Instruction marked for propagation: ");
555 0 : print_rtl_single (dump_file, def);
556 : }
557 : }
558 : else
559 : {
560 : /* We cannot propagate through this instruction. */
561 1123403 : if (!bitmap_bit_p (&can_fold_insns, INSN_UID (def)))
562 : return 0;
563 : }
564 :
565 709781 : HOST_WIDE_INT offset = 0;
566 709781 : bool recognized = fold_offsets_1 (def, analyze, true, &offset,
567 : foldable_insns);
568 :
569 709781 : if (!recognized)
570 : return 0;
571 :
572 88144 : return offset;
573 : }
574 :
575 : /* Test if INSN is a memory load / store that can have an offset folded to it.
576 : Return true iff INSN is such an instruction and return through MEM_OUT,
577 : REG_OUT and OFFSET_OUT the RTX that has a MEM code, the register that is
578 : used as a base address and the offset accordingly.
579 : All of the out pointers may be NULL in which case they will be ignored. */
580 : bool
581 255068584 : get_fold_mem_root (rtx_insn *insn, rtx *mem_out, rtx *reg_out,
582 : HOST_WIDE_INT *offset_out)
583 : {
584 255068584 : rtx set = single_set (insn);
585 255068584 : rtx mem = NULL_RTX;
586 :
587 255068584 : if (set != NULL_RTX)
588 : {
589 119408452 : rtx src = SET_SRC (set);
590 119408452 : rtx dest = SET_DEST (set);
591 :
592 : /* Don't fold when we have unspec / volatile. */
593 119408452 : if (GET_CODE (src) == UNSPEC
594 119408452 : || GET_CODE (src) == UNSPEC_VOLATILE
595 118720076 : || GET_CODE (dest) == UNSPEC
596 118720076 : || GET_CODE (dest) == UNSPEC_VOLATILE)
597 : return false;
598 :
599 118720076 : if (MEM_P (src))
600 : mem = src;
601 88994916 : else if (MEM_P (dest))
602 : mem = dest;
603 56998336 : else if ((GET_CODE (src) == SIGN_EXTEND
604 56998336 : || GET_CODE (src) == ZERO_EXTEND)
605 1276432 : && MEM_P (XEXP (src, 0)))
606 : mem = XEXP (src, 0);
607 : }
608 :
609 : if (mem == NULL_RTX)
610 : return false;
611 :
612 62262686 : rtx mem_addr = XEXP (mem, 0);
613 62262686 : rtx reg;
614 62262686 : HOST_WIDE_INT offset;
615 :
616 62262686 : if (REG_P (mem_addr))
617 : {
618 : reg = mem_addr;
619 : offset = 0;
620 : }
621 53486126 : else if (GET_CODE (mem_addr) == PLUS
622 45042946 : && REG_P (XEXP (mem_addr, 0))
623 43867424 : && CONST_INT_P (XEXP (mem_addr, 1)))
624 : {
625 43267708 : reg = XEXP (mem_addr, 0);
626 43267708 : offset = INTVAL (XEXP (mem_addr, 1));
627 : }
628 : else
629 : return false;
630 :
631 52044268 : if (mem_out)
632 39033201 : *mem_out = mem;
633 52044268 : if (reg_out)
634 52044268 : *reg_out = reg;
635 52044268 : if (offset_out)
636 39033201 : *offset_out = offset;
637 :
638 : return true;
639 : }
640 :
641 : /* If INSN is a root memory instruction then do a DFS traversal on its
642 : definitions and find folding candidates. */
643 : static void
644 114523225 : do_analysis (rtx_insn *insn)
645 : {
646 114523225 : rtx reg;
647 114523225 : if (!get_fold_mem_root (insn, NULL, ®, NULL))
648 101512158 : return;
649 :
650 13011067 : if (dump_file && (dump_flags & TDF_DETAILS))
651 : {
652 0 : fprintf (dump_file, "Starting analysis from root: ");
653 0 : print_rtl_single (dump_file, insn);
654 : }
655 :
656 : /* Analyse folding opportunities for this memory instruction. */
657 13011067 : bitmap_set_bit (&can_fold_insns, INSN_UID (insn));
658 13011067 : fold_offsets (insn, reg, true, NULL);
659 : }
660 :
661 : static void
662 114523225 : do_fold_info_calculation (rtx_insn *insn, fold_info_map *fold_info)
663 : {
664 114523225 : rtx mem, reg;
665 114523225 : HOST_WIDE_INT cur_offset;
666 114523225 : if (!get_fold_mem_root (insn, &mem, ®, &cur_offset))
667 101512158 : return;
668 :
669 13011067 : fold_mem_info *info = new fold_mem_info;
670 13011067 : info->added_offset = fold_offsets (insn, reg, false, info->fold_insns);
671 :
672 13011067 : fold_info->put (insn, info);
673 : }
674 :
675 : /* If INSN is a root memory instruction then compute a potentially new offset
676 : for it and test if the resulting instruction is valid. */
677 : static void
678 13011067 : do_check_validity (rtx_insn *insn, fold_mem_info *info)
679 : {
680 13011067 : rtx mem, reg;
681 13011067 : HOST_WIDE_INT cur_offset;
682 13011067 : if (!get_fold_mem_root (insn, &mem, ®, &cur_offset))
683 0 : return;
684 :
685 13011067 : HOST_WIDE_INT new_offset = cur_offset + info->added_offset;
686 :
687 : /* Test if it is valid to change MEM's address offset to NEW_OFFSET. */
688 13011067 : int icode = INSN_CODE (insn);
689 13011067 : INSN_CODE (insn) = -1;
690 13011067 : rtx mem_addr = XEXP (mem, 0);
691 13011067 : machine_mode mode = GET_MODE (mem_addr);
692 13011067 : if (new_offset != 0)
693 10816954 : XEXP (mem, 0) = gen_rtx_PLUS (mode, reg, gen_int_mode (new_offset, mode));
694 : else
695 2194113 : XEXP (mem, 0) = reg;
696 :
697 13011067 : bool illegal = insn_invalid_p (insn, false)
698 26022121 : || !memory_address_addr_space_p (mode, XEXP (mem, 0),
699 13257016 : MEM_ADDR_SPACE (mem));
700 :
701 : /* Restore the instruction. */
702 13011067 : XEXP (mem, 0) = mem_addr;
703 13011067 : INSN_CODE (insn) = icode;
704 :
705 13011067 : if (illegal)
706 13 : bitmap_ior_into (&cannot_fold_insns, info->fold_insns);
707 : else
708 13011054 : bitmap_ior_into (&candidate_fold_insns, info->fold_insns);
709 : }
710 :
711 : static bool
712 10366025 : compute_validity_closure (fold_info_map *fold_info)
713 : {
714 : /* Let's say we have an arbitrary chain of foldable instructions xN = xN + C
715 : and memory operations rN that use xN as shown below. If folding x1 in r1
716 : turns out to be invalid for whatever reason then it's also invalid to fold
717 : any of the other xN into any rN. That means that we need the transitive
718 : closure of validity to determine whether we can fold a xN instruction.
719 :
720 : +--------------+ +-------------------+ +-------------------+
721 : | r1 = mem[x1] | | r2 = mem[x1 + x2] | | r3 = mem[x2 + x3] | ...
722 : +--------------+ +-------------------+ +-------------------+
723 : ^ ^ ^ ^ ^
724 : | / | / | ...
725 : | / | / |
726 : +-------------+ / +-------------+ / +-------------+
727 : | x1 = x1 + 1 |-----+ | x2 = x2 + 1 |-----+ | x3 = x3 + 1 |--- ...
728 : +-------------+ +-------------+ +-------------+
729 : ^ ^ ^
730 : | | |
731 : ... ... ...
732 : */
733 :
734 : /* In general three iterations should be enough for most cases, but allow up
735 : to five when -fexpensive-optimizations is used. */
736 10366025 : int max_iters = 3 + 2 * flag_expensive_optimizations;
737 10366025 : for (int pass = 0; pass < max_iters; pass++)
738 : {
739 10366025 : bool made_changes = false;
740 23377092 : for (fold_info_map::iterator iter = fold_info->begin ();
741 36388159 : iter != fold_info->end (); ++iter)
742 : {
743 13011067 : fold_mem_info *info = (*iter).second;
744 13011067 : if (bitmap_intersect_p (&cannot_fold_insns, info->fold_insns))
745 13 : made_changes |= bitmap_ior_into (&cannot_fold_insns,
746 13 : info->fold_insns);
747 : }
748 :
749 10366025 : if (!made_changes)
750 : return true;
751 : }
752 :
753 : return false;
754 : }
755 :
756 : /* If INSN is a root memory instruction that was affected by any folding
757 : then update its offset as necessary. */
758 : static void
759 13011067 : do_commit_offset (rtx_insn *insn, fold_mem_info *info)
760 : {
761 13011067 : rtx mem, reg;
762 13011067 : HOST_WIDE_INT cur_offset;
763 13011067 : if (!get_fold_mem_root (insn, &mem, ®, &cur_offset))
764 13011036 : return;
765 :
766 13011067 : HOST_WIDE_INT new_offset = cur_offset + info->added_offset;
767 :
768 13011067 : if (new_offset == cur_offset)
769 : return;
770 :
771 44 : gcc_assert (!bitmap_empty_p (info->fold_insns));
772 :
773 44 : if (bitmap_intersect_p (&cannot_fold_insns, info->fold_insns))
774 : return;
775 :
776 31 : if (dump_file)
777 : {
778 0 : fprintf (dump_file, "Memory offset changed from "
779 : HOST_WIDE_INT_PRINT_DEC " to " HOST_WIDE_INT_PRINT_DEC
780 : " for instruction:\n", cur_offset, new_offset);
781 0 : print_rtl_single (dump_file, insn);
782 : }
783 :
784 31 : machine_mode mode = GET_MODE (XEXP (mem, 0));
785 31 : if (new_offset != 0)
786 31 : XEXP (mem, 0) = gen_rtx_PLUS (mode, reg, gen_int_mode (new_offset, mode));
787 : else
788 0 : XEXP (mem, 0) = reg;
789 31 : INSN_CODE (insn) = recog (PATTERN (insn), insn, 0);
790 31 : df_insn_rescan (insn);
791 : }
792 :
793 : /* If INSN is a move / add instruction that was folded then replace its
794 : constant part with zero. */
795 : static void
796 114523244 : do_commit_insn (rtx_insn *insn)
797 : {
798 114523244 : if (bitmap_bit_p (&candidate_fold_insns, INSN_UID (insn))
799 114523244 : && !bitmap_bit_p (&cannot_fold_insns, INSN_UID (insn)))
800 : {
801 19 : if (dump_file)
802 : {
803 0 : fprintf (dump_file, "Instruction folded:");
804 0 : print_rtl_single (dump_file, insn);
805 : }
806 :
807 19 : stats_fold_count++;
808 :
809 19 : rtx set = single_set (insn);
810 19 : rtx dest = SET_DEST (set);
811 19 : rtx src = SET_SRC (set);
812 :
813 : /* Emit a move and let subsequent passes eliminate it if possible. */
814 19 : if (GET_CODE (src) == CONST_INT)
815 : {
816 : /* INSN is R1 = C.
817 : Replace it with R1 = 0 because C was folded. */
818 1 : rtx mov_rtx
819 1 : = gen_move_insn (dest, gen_int_mode (0, GET_MODE (dest)));
820 1 : df_insn_rescan (emit_insn_after (mov_rtx, insn));
821 : }
822 : else
823 : {
824 : /* INSN is R1 = R2 + C.
825 : Replace it with R1 = R2 because C was folded. */
826 18 : rtx arg1 = XEXP (src, 0);
827 :
828 : /* If the DEST == ARG1 then the move is a no-op. */
829 18 : if (REGNO (dest) != REGNO (arg1))
830 : {
831 18 : gcc_checking_assert (GET_MODE (dest) == GET_MODE (arg1));
832 18 : rtx mov_rtx = gen_move_insn (dest, arg1);
833 18 : df_insn_rescan (emit_insn_after (mov_rtx, insn));
834 : }
835 : }
836 :
837 : /* Delete the original move / add instruction. */
838 19 : delete_insn (insn);
839 : }
840 114523244 : }
841 :
842 : unsigned int
843 963981 : pass_fold_mem_offsets::execute (function *fn)
844 : {
845 : /* Computing UD/DU chains for flow graphs which have a high connectivity
846 : will take a long time and is unlikely to be particularly useful.
847 :
848 : In normal circumstances a cfg should have about twice as many
849 : edges as blocks. But we do not want to punish small functions
850 : which have a couple switch statements. Rather than simply
851 : threshold the number of blocks, uses something with a more
852 : graceful degradation. */
853 963981 : if (n_edges_for_fn (fn) > 20000 + n_basic_blocks_for_fn (fn) * 4)
854 : {
855 0 : warning (OPT_Wdisabled_optimization,
856 : "fold-mem-offsets: %d basic blocks and %d edges/basic block",
857 : n_basic_blocks_for_fn (cfun),
858 0 : n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun));
859 0 : return 0;
860 : }
861 :
862 963981 : df_set_flags (DF_EQ_NOTES + DF_RD_PRUNE_DEAD_DEFS + DF_DEFER_INSN_RESCAN);
863 963981 : df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
864 963981 : df_analyze ();
865 :
866 963981 : bitmap_initialize (&can_fold_insns, NULL);
867 963981 : bitmap_initialize (&candidate_fold_insns, NULL);
868 963981 : bitmap_initialize (&cannot_fold_insns, NULL);
869 :
870 963981 : stats_fold_count = 0;
871 :
872 963981 : basic_block bb;
873 963981 : rtx_insn *insn;
874 13723676 : FOR_ALL_BB_FN (bb, fn)
875 : {
876 : /* There is a conflict between this pass and RISCV's shorten-memrefs
877 : pass. For now disable folding if optimizing for size because
878 : otherwise this cancels the effects of shorten-memrefs. */
879 12759695 : if (optimize_bb_for_size_p (bb))
880 2393670 : continue;
881 :
882 10366025 : fold_info_map fold_info;
883 :
884 10366025 : bitmap_clear (&can_fold_insns);
885 10366025 : bitmap_clear (&candidate_fold_insns);
886 10366025 : bitmap_clear (&cannot_fold_insns);
887 :
888 124889250 : FOR_BB_INSNS (bb, insn)
889 114523225 : do_analysis (insn);
890 :
891 124889250 : FOR_BB_INSNS (bb, insn)
892 114523225 : do_fold_info_calculation (insn, &fold_info);
893 :
894 124889250 : FOR_BB_INSNS (bb, insn)
895 114523225 : if (fold_mem_info **info = fold_info.get (insn))
896 13011067 : do_check_validity (insn, *info);
897 :
898 10366025 : if (compute_validity_closure (&fold_info))
899 : {
900 124889250 : FOR_BB_INSNS (bb, insn)
901 114523225 : if (fold_mem_info **info = fold_info.get (insn))
902 13011067 : do_commit_offset (insn, *info);
903 :
904 124889269 : FOR_BB_INSNS (bb, insn)
905 114523244 : do_commit_insn (insn);
906 : }
907 :
908 23377092 : for (fold_info_map::iterator iter = fold_info.begin ();
909 46754184 : iter != fold_info.end (); ++iter)
910 26022134 : delete (*iter).second;
911 10366025 : }
912 :
913 963981 : statistics_counter_event (cfun, "Number of folded instructions",
914 : stats_fold_count);
915 :
916 963981 : bitmap_release (&can_fold_insns);
917 963981 : bitmap_release (&candidate_fold_insns);
918 963981 : bitmap_release (&cannot_fold_insns);
919 :
920 963981 : return 0;
921 : }
922 :
923 : } // anon namespace
924 :
925 : rtl_opt_pass *
926 285722 : make_pass_fold_mem_offsets (gcc::context *ctxt)
927 : {
928 285722 : return new pass_fold_mem_offsets (ctxt);
929 : }
|