Line data Source code
1 : /* Post reload partially redundant load elimination
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "target.h"
25 : #include "rtl.h"
26 : #include "tree.h"
27 : #include "predict.h"
28 : #include "df.h"
29 : #include "memmodel.h"
30 : #include "tm_p.h"
31 : #include "insn-config.h"
32 : #include "emit-rtl.h"
33 : #include "recog.h"
34 :
35 : #include "cfgrtl.h"
36 : #include "profile.h"
37 : #include "expr.h"
38 : #include "tree-pass.h"
39 : #include "dbgcnt.h"
40 : #include "intl.h"
41 : #include "gcse-common.h"
42 : #include "gcse.h"
43 : #include "regs.h"
44 : #include "function-abi.h"
45 :
46 : /* The following code implements gcse after reload, the purpose of this
47 : pass is to cleanup redundant loads generated by reload and other
48 : optimizations that come after gcse. It searches for simple inter-block
49 : redundancies and tries to eliminate them by adding moves and loads
50 : in cold places.
51 :
52 : Perform partially redundant load elimination, try to eliminate redundant
53 : loads created by the reload pass. We try to look for full or partial
54 : redundant loads fed by one or more loads/stores in predecessor BBs,
55 : and try adding loads to make them fully redundant. We also check if
56 : it's worth adding loads to be able to delete the redundant load.
57 :
58 : Algorithm:
59 : 1. Build available expressions hash table:
60 : For each load/store instruction, if the loaded/stored memory didn't
61 : change until the end of the basic block add this memory expression to
62 : the hash table.
63 : 2. Perform Redundancy elimination:
64 : For each load instruction do the following:
65 : perform partial redundancy elimination, check if it's worth adding
66 : loads to make the load fully redundant. If so add loads and
67 : register copies and delete the load.
68 : 3. Delete instructions made redundant in step 2.
69 :
70 : Future enhancement:
71 : If the loaded register is used/defined between load and some store,
72 : look for some other free register between load and all its stores,
73 : and replace the load with a copy from this register to the loaded
74 : register.
75 : */
76 :
77 :
78 : /* Keep statistics of this pass. */
79 : static struct
80 : {
81 : int moves_inserted;
82 : int copies_inserted;
83 : int insns_deleted;
84 : } stats;
85 :
86 : /* We need to keep a hash table of expressions. The table entries are of
87 : type 'struct expr', and for each expression there is a single linked
88 : list of occurrences. */
89 :
90 : /* Expression elements in the hash table. */
91 : struct expr
92 : {
93 : /* The expression (SET_SRC for expressions, PATTERN for assignments). */
94 : rtx expr;
95 :
96 : /* The same hash for this entry. */
97 : hashval_t hash;
98 :
99 : /* Index in the transparent bitmaps. */
100 : unsigned int bitmap_index;
101 :
102 : /* List of available occurrence in basic blocks in the function. */
103 : struct occr *avail_occr;
104 : };
105 :
106 : /* Hashtable helpers. */
107 :
108 : struct expr_hasher : nofree_ptr_hash <expr>
109 : {
110 : static inline hashval_t hash (const expr *);
111 : static inline bool equal (const expr *, const expr *);
112 : };
113 :
114 :
115 : /* Hash expression X.
116 : DO_NOT_RECORD_P is a boolean indicating if a volatile operand is found
117 : or if the expression contains something we don't want to insert in the
118 : table. */
119 :
120 : static hashval_t
121 877146 : hash_expr (rtx x, int *do_not_record_p)
122 : {
123 877146 : *do_not_record_p = 0;
124 877146 : return hash_rtx (x, GET_MODE (x), do_not_record_p,
125 877146 : NULL, /*have_reg_qty=*/false);
126 : }
127 :
128 : /* Callback for hashtab.
129 : Return the hash value for expression EXP. We don't actually hash
130 : here, we just return the cached hash value. */
131 :
132 : inline hashval_t
133 1595023 : expr_hasher::hash (const expr *exp)
134 : {
135 1595023 : return exp->hash;
136 : }
137 :
138 : /* Callback for hashtab.
139 : Return nonzero if exp1 is equivalent to exp2. */
140 :
141 : inline bool
142 2336997 : expr_hasher::equal (const expr *exp1, const expr *exp2)
143 : {
144 2336997 : bool equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true);
145 :
146 2336997 : gcc_assert (!equiv_p || exp1->hash == exp2->hash);
147 2336997 : return equiv_p;
148 : }
149 :
150 : /* The table itself. */
151 : static hash_table<expr_hasher> *expr_table;
152 :
153 :
154 : static struct obstack expr_obstack;
155 :
156 : /* Occurrence of an expression.
157 : There is at most one occurrence per basic block. If a pattern appears
158 : more than once, the last appearance is used. */
159 :
160 : struct occr
161 : {
162 : /* Next occurrence of this expression. */
163 : struct occr *next;
164 : /* The insn that computes the expression. */
165 : rtx_insn *insn;
166 : /* Nonzero if this [anticipatable] occurrence has been deleted. */
167 : char deleted_p;
168 : };
169 :
170 : static struct obstack occr_obstack;
171 :
172 : /* The following structure holds the information about the occurrences of
173 : the redundant instructions. */
174 : struct unoccr
175 : {
176 : struct unoccr *next;
177 : edge pred;
178 : rtx_insn *insn;
179 : };
180 :
181 : static struct obstack unoccr_obstack;
182 :
183 : /* Array where each element is the CUID if the insn that last set the hard
184 : register with the number of the element, since the start of the current
185 : basic block.
186 :
187 : This array is used during the building of the hash table (step 1) to
188 : determine if a reg is killed before the end of a basic block.
189 :
190 : It is also used when eliminating partial redundancies (step 2) to see
191 : if a reg was modified since the start of a basic block. */
192 : static int *reg_avail_info;
193 :
194 : /* A list of insns that may modify memory within the current basic block. */
195 : struct modifies_mem
196 : {
197 : rtx_insn *insn;
198 : struct modifies_mem *next;
199 : };
200 : static struct modifies_mem *modifies_mem_list;
201 :
202 : /* The modifies_mem structs also go on an obstack, only this obstack is
203 : freed each time after completing the analysis or transformations on
204 : a basic block. So we allocate a dummy modifies_mem_obstack_bottom
205 : object on the obstack to keep track of the bottom of the obstack. */
206 : static struct obstack modifies_mem_obstack;
207 : static struct modifies_mem *modifies_mem_obstack_bottom;
208 :
209 : /* Mapping of insn UIDs to CUIDs.
210 : CUIDs are like UIDs except they increase monotonically in each basic
211 : block, have no gaps, and only apply to real insns. */
212 : static int *uid_cuid;
213 : #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
214 :
215 : /* Bitmap of blocks which have memory stores. */
216 : static bitmap modify_mem_list_set;
217 :
218 : /* Bitmap of blocks which have calls. */
219 : static bitmap blocks_with_calls;
220 :
221 : /* Vector indexed by block # with a list of all the insns that
222 : modify memory within the block. */
223 : static vec<rtx_insn *> *modify_mem_list;
224 :
225 : /* Vector indexed by block # with a canonicalized list of insns
226 : that modify memory in the block. */
227 : static vec<modify_pair> *canon_modify_mem_list;
228 :
229 : /* Vector of simple bitmaps indexed by block number. Each component sbitmap
230 : indicates which expressions are transparent through the block. */
231 : static sbitmap *transp;
232 :
233 :
234 : /* Helpers for memory allocation/freeing. */
235 : static void alloc_mem (void);
236 : static void free_mem (void);
237 :
238 : /* Support for hash table construction and transformations. */
239 : static bool oprs_unchanged_p (rtx, rtx_insn *, bool);
240 : static void record_last_reg_set_info (rtx_insn *, rtx);
241 : static void record_last_reg_set_info_regno (rtx_insn *, int);
242 : static void record_last_mem_set_info (rtx_insn *);
243 : static void record_last_set_info (rtx, const_rtx, void *);
244 : static void record_opr_changes (rtx_insn *);
245 :
246 : static void find_mem_conflicts (rtx, const_rtx, void *);
247 : static bool load_killed_in_block_p (int, rtx, bool);
248 : static void reset_opr_set_tables (void);
249 :
250 : /* Hash table support. */
251 : static hashval_t hash_expr (rtx, int *);
252 : static void insert_expr_in_table (rtx, rtx_insn *);
253 : static struct expr *lookup_expr_in_table (rtx);
254 : static void dump_hash_table (FILE *);
255 :
256 : /* Helpers for eliminate_partially_redundant_load. */
257 : static bool reg_killed_on_edge (rtx, edge);
258 : static bool reg_used_on_edge (rtx, edge);
259 :
260 : static rtx get_avail_load_store_reg (rtx_insn *);
261 :
262 : static bool bb_has_well_behaved_predecessors (basic_block);
263 : static struct occr* get_bb_avail_insn (basic_block, struct occr *, int);
264 : static void hash_scan_set (rtx_insn *);
265 : static void compute_hash_table (void);
266 :
267 : /* The work horses of this pass. */
268 : static void eliminate_partially_redundant_load (basic_block,
269 : rtx_insn *,
270 : struct expr *);
271 : static void eliminate_partially_redundant_loads (void);
272 :
273 :
274 : /* Allocate memory for the CUID mapping array and register/memory
275 : tracking tables. */
276 :
277 : static void
278 89238 : alloc_mem (void)
279 : {
280 89238 : int i;
281 89238 : basic_block bb;
282 89238 : rtx_insn *insn;
283 :
284 : /* Find the largest UID and create a mapping from UIDs to CUIDs. */
285 89238 : uid_cuid = XCNEWVEC (int, get_max_uid () + 1);
286 89238 : i = 1;
287 1157749 : FOR_EACH_BB_FN (bb, cfun)
288 9768997 : FOR_BB_INSNS (bb, insn)
289 : {
290 8700486 : if (INSN_P (insn))
291 6484926 : uid_cuid[INSN_UID (insn)] = i++;
292 : else
293 2215560 : uid_cuid[INSN_UID (insn)] = i;
294 : }
295 :
296 : /* Allocate the available expressions hash table. We don't want to
297 : make the hash table too small, but unnecessarily making it too large
298 : also doesn't help. The i/4 is a gcse.cc relic, and seems like a
299 : reasonable choice. */
300 89238 : expr_table = new hash_table<expr_hasher> (MAX (i / 4, 13));
301 :
302 : /* We allocate everything on obstacks because we often can roll back
303 : the whole obstack to some point. Freeing obstacks is very fast. */
304 89238 : gcc_obstack_init (&expr_obstack);
305 89238 : gcc_obstack_init (&occr_obstack);
306 89238 : gcc_obstack_init (&unoccr_obstack);
307 89238 : gcc_obstack_init (&modifies_mem_obstack);
308 :
309 : /* Working array used to track the last set for each register
310 : in the current block. */
311 89238 : reg_avail_info = (int *) xmalloc (FIRST_PSEUDO_REGISTER * sizeof (int));
312 :
313 : /* Put a dummy modifies_mem object on the modifies_mem_obstack, so we
314 : can roll it back in reset_opr_set_tables. */
315 178476 : modifies_mem_obstack_bottom =
316 89238 : (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
317 : sizeof (struct modifies_mem));
318 :
319 89238 : blocks_with_calls = BITMAP_ALLOC (NULL);
320 89238 : modify_mem_list_set = BITMAP_ALLOC (NULL);
321 :
322 89238 : modify_mem_list = (vec_rtx_heap *) xcalloc (last_basic_block_for_fn (cfun),
323 : sizeof (vec_rtx_heap));
324 89238 : canon_modify_mem_list
325 89238 : = (vec_modify_pair_heap *) xcalloc (last_basic_block_for_fn (cfun),
326 : sizeof (vec_modify_pair_heap));
327 89238 : }
328 :
329 : /* Free memory allocated by alloc_mem. */
330 :
331 : static void
332 89238 : free_mem (void)
333 : {
334 89238 : free (uid_cuid);
335 :
336 89238 : delete expr_table;
337 89238 : expr_table = NULL;
338 :
339 89238 : obstack_free (&expr_obstack, NULL);
340 89238 : obstack_free (&occr_obstack, NULL);
341 89238 : obstack_free (&unoccr_obstack, NULL);
342 89238 : obstack_free (&modifies_mem_obstack, NULL);
343 :
344 89238 : unsigned i;
345 89238 : bitmap_iterator bi;
346 449749 : EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
347 : {
348 360511 : modify_mem_list[i].release ();
349 360511 : canon_modify_mem_list[i].release ();
350 : }
351 :
352 89238 : BITMAP_FREE (blocks_with_calls);
353 89238 : BITMAP_FREE (modify_mem_list_set);
354 89238 : free (reg_avail_info);
355 89238 : free (modify_mem_list);
356 89238 : free (canon_modify_mem_list);
357 89238 : }
358 :
359 :
360 : /* Insert expression X in INSN in the hash TABLE.
361 : If it is already present, record it as the last occurrence in INSN's
362 : basic block. */
363 :
364 : static void
365 586703 : insert_expr_in_table (rtx x, rtx_insn *insn)
366 : {
367 586703 : int do_not_record_p;
368 586703 : hashval_t hash;
369 586703 : struct expr *cur_expr, **slot;
370 586703 : struct occr *avail_occr;
371 :
372 586703 : hash = hash_expr (x, &do_not_record_p);
373 :
374 : /* Do not insert expression in the table if it contains volatile operands,
375 : or if hash_expr determines the expression is something we don't want
376 : to or can't handle. */
377 586703 : if (do_not_record_p)
378 30850 : return;
379 :
380 : /* We anticipate that redundant expressions are rare, so for convenience
381 : allocate a new hash table element here already and set its fields.
382 : If we don't do this, we need a hack with a static struct expr. Anyway,
383 : obstack_free is really fast and one more obstack_alloc doesn't hurt if
384 : we're going to see more expressions later on. */
385 555853 : cur_expr = (struct expr *) obstack_alloc (&expr_obstack,
386 : sizeof (struct expr));
387 555853 : cur_expr->expr = x;
388 555853 : cur_expr->hash = hash;
389 555853 : cur_expr->avail_occr = NULL;
390 :
391 555853 : slot = expr_table->find_slot_with_hash (cur_expr, hash, INSERT);
392 :
393 555853 : if (! (*slot))
394 : {
395 : /* The expression isn't found, so insert it. */
396 342940 : *slot = cur_expr;
397 :
398 : /* Anytime we add an entry to the table, record the index
399 : of the new entry. The bitmap index starts counting
400 : at zero. */
401 342940 : cur_expr->bitmap_index = expr_table->elements () - 1;
402 : }
403 : else
404 : {
405 : /* The expression is already in the table, so roll back the
406 : obstack and use the existing table entry. */
407 212913 : obstack_free (&expr_obstack, cur_expr);
408 212913 : cur_expr = *slot;
409 : }
410 :
411 : /* Search for another occurrence in the same basic block. We insert
412 : insns blockwise from start to end, so keep appending to the
413 : start of the list so we have to check only a single element. */
414 555853 : avail_occr = cur_expr->avail_occr;
415 555853 : if (avail_occr
416 555853 : && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
417 12958 : avail_occr->insn = insn;
418 : else
419 : {
420 : /* First occurrence of this expression in this basic block. */
421 542895 : avail_occr = (struct occr *) obstack_alloc (&occr_obstack,
422 : sizeof (struct occr));
423 542895 : avail_occr->insn = insn;
424 542895 : avail_occr->next = cur_expr->avail_occr;
425 542895 : avail_occr->deleted_p = 0;
426 542895 : cur_expr->avail_occr = avail_occr;
427 : }
428 : }
429 :
430 :
431 : /* Lookup pattern PAT in the expression hash table.
432 : The result is a pointer to the table entry, or NULL if not found. */
433 :
434 : static struct expr *
435 290443 : lookup_expr_in_table (rtx pat)
436 : {
437 290443 : int do_not_record_p;
438 290443 : struct expr **slot, *tmp_expr;
439 290443 : hashval_t hash = hash_expr (pat, &do_not_record_p);
440 :
441 290443 : if (do_not_record_p)
442 : return NULL;
443 :
444 290443 : tmp_expr = (struct expr *) obstack_alloc (&expr_obstack,
445 : sizeof (struct expr));
446 290443 : tmp_expr->expr = pat;
447 290443 : tmp_expr->hash = hash;
448 290443 : tmp_expr->avail_occr = NULL;
449 :
450 290443 : slot = expr_table->find_slot_with_hash (tmp_expr, hash, NO_INSERT);
451 290443 : obstack_free (&expr_obstack, tmp_expr);
452 :
453 290443 : if (!slot)
454 : return NULL;
455 : else
456 199821 : return (*slot);
457 : }
458 :
459 :
460 : /* Dump all expressions and occurrences that are currently in the
461 : expression hash table to FILE. */
462 :
463 : /* This helper is called via htab_traverse. */
464 : int
465 20 : dump_expr_hash_table_entry (expr **slot, FILE *file)
466 : {
467 20 : struct expr *exprs = *slot;
468 20 : struct occr *occr;
469 :
470 20 : fprintf (file, "expr: ");
471 20 : print_rtl (file, exprs->expr);
472 20 : fprintf (file,"\nhashcode: %u\n", exprs->hash);
473 20 : fprintf (file,"list of occurrences:\n");
474 20 : occr = exprs->avail_occr;
475 40 : while (occr)
476 : {
477 20 : rtx_insn *insn = occr->insn;
478 20 : print_rtl_single (file, insn);
479 20 : fprintf (file, "\n");
480 20 : occr = occr->next;
481 : }
482 20 : fprintf (file, "\n");
483 20 : return 1;
484 : }
485 :
486 : static void
487 8 : dump_hash_table (FILE *file)
488 : {
489 8 : fprintf (file, "\n\nexpression hash table\n");
490 8 : fprintf (file, "size " HOST_SIZE_T_PRINT_DEC ", " HOST_SIZE_T_PRINT_DEC
491 : " elements, %f collision/search ratio\n",
492 8 : (fmt_size_t) expr_table->size (),
493 8 : (fmt_size_t) expr_table->elements (),
494 : expr_table->collisions ());
495 8 : if (!expr_table->is_empty ())
496 : {
497 8 : fprintf (file, "\n\ntable entries:\n");
498 28 : expr_table->traverse <FILE *, dump_expr_hash_table_entry> (file);
499 : }
500 8 : fprintf (file, "\n");
501 8 : }
502 :
503 : /* Return true if register X is recorded as being set by an instruction
504 : whose CUID is greater than the one given. */
505 :
506 : static bool
507 1236398 : reg_changed_after_insn_p (rtx x, int cuid)
508 : {
509 1236398 : unsigned int regno, end_regno;
510 :
511 1236398 : regno = REGNO (x);
512 1236398 : end_regno = END_REGNO (x);
513 1236551 : do
514 1236551 : if (reg_avail_info[regno] > cuid)
515 : return true;
516 1008603 : while (++regno < end_regno);
517 : return false;
518 : }
519 :
520 : /* Return nonzero if the operands of expression X are unchanged
521 : 1) from the start of INSN's basic block up to but not including INSN
522 : if AFTER_INSN is false, or
523 : 2) from INSN to the end of INSN's basic block if AFTER_INSN is true. */
524 :
525 : static bool
526 4052992 : oprs_unchanged_p (rtx x, rtx_insn *insn, bool after_insn)
527 : {
528 4052992 : int i, j;
529 4052992 : enum rtx_code code;
530 4052992 : const char *fmt;
531 :
532 4052992 : if (x == 0)
533 : return true;
534 :
535 4052992 : code = GET_CODE (x);
536 4052992 : switch (code)
537 : {
538 1036577 : case REG:
539 : /* We are called after register allocation. */
540 1036577 : gcc_assert (REGNO (x) < FIRST_PSEUDO_REGISTER);
541 1036577 : if (after_insn)
542 705612 : return !reg_changed_after_insn_p (x, INSN_CUID (insn) - 1);
543 : else
544 330965 : return !reg_changed_after_insn_p (x, 0);
545 :
546 1149041 : case MEM:
547 1149041 : if (load_killed_in_block_p (INSN_CUID (insn), x, after_insn))
548 : return false;
549 : else
550 801114 : return oprs_unchanged_p (XEXP (x, 0), insn, after_insn);
551 :
552 : case PC:
553 : case CONST:
554 : CASE_CONST_ANY:
555 : case SYMBOL_REF:
556 : case LABEL_REF:
557 : case ADDR_VEC:
558 : case ADDR_DIFF_VEC:
559 : return true;
560 :
561 0 : case PRE_DEC:
562 0 : case PRE_INC:
563 0 : case POST_DEC:
564 0 : case POST_INC:
565 0 : case PRE_MODIFY:
566 0 : case POST_MODIFY:
567 0 : if (after_insn)
568 : return false;
569 : break;
570 :
571 : default:
572 : break;
573 : }
574 :
575 2529266 : for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
576 : {
577 1825247 : if (fmt[i] == 'e')
578 : {
579 1825247 : if (! oprs_unchanged_p (XEXP (x, i), insn, after_insn))
580 : return false;
581 : }
582 0 : else if (fmt[i] == 'E')
583 0 : for (j = 0; j < XVECLEN (x, i); j++)
584 0 : if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, after_insn))
585 : return false;
586 : }
587 :
588 : return true;
589 : }
590 :
591 :
592 : /* Used for communication between find_mem_conflicts and
593 : load_killed_in_block_p. Nonzero if find_mem_conflicts finds a
594 : conflict between two memory references.
595 : This is a bit of a hack to work around the limitations of note_stores. */
596 : static int mems_conflict_p;
597 :
598 : /* DEST is the output of an instruction. If it is a memory reference, and
599 : possibly conflicts with the load found in DATA, then set mems_conflict_p
600 : to a nonzero value. */
601 :
602 : static void
603 6642368 : find_mem_conflicts (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
604 : void *data)
605 : {
606 6642368 : rtx mem_op = (rtx) data;
607 :
608 6642368 : while (GET_CODE (dest) == SUBREG
609 6642368 : || GET_CODE (dest) == ZERO_EXTRACT
610 13284736 : || GET_CODE (dest) == STRICT_LOW_PART)
611 0 : dest = XEXP (dest, 0);
612 :
613 : /* If DEST is not a MEM, then it will not conflict with the load. Note
614 : that function calls are assumed to clobber memory, but are handled
615 : elsewhere. */
616 6642368 : if (! MEM_P (dest))
617 : return;
618 :
619 6599241 : if (true_dependence (dest, GET_MODE (dest), mem_op))
620 188618 : mems_conflict_p = 1;
621 : }
622 :
623 :
624 : /* Return nonzero if the expression in X (a memory reference) is killed
625 : in the current basic block before (if AFTER_INSN is false) or after
626 : (if AFTER_INSN is true) the insn with the CUID in UID_LIMIT.
627 :
628 : This function assumes that the modifies_mem table is flushed when
629 : the hash table construction or redundancy elimination phases start
630 : processing a new basic block. */
631 :
632 : static bool
633 1698262 : load_killed_in_block_p (int uid_limit, rtx x, bool after_insn)
634 : {
635 1698262 : struct modifies_mem *list_entry = modifies_mem_list;
636 :
637 14914773 : while (list_entry)
638 : {
639 13836069 : rtx_insn *setter = list_entry->insn;
640 :
641 : /* Ignore entries in the list that do not apply. */
642 20641957 : if ((after_insn
643 12398384 : && INSN_CUID (setter) < uid_limit)
644 13836069 : || (! after_insn
645 1437685 : && INSN_CUID (setter) > uid_limit))
646 : {
647 6805888 : list_entry = list_entry->next;
648 6805888 : continue;
649 : }
650 :
651 : /* If SETTER is a call everything is clobbered. Note that calls
652 : to pure functions are never put on the list, so we need not
653 : worry about them. */
654 7030181 : if (CALL_P (setter))
655 : return true;
656 :
657 : /* SETTER must be an insn of some kind that sets memory. Call
658 : note_stores to examine each hunk of memory that is modified.
659 : It will set mems_conflict_p to nonzero if there may be a
660 : conflict between X and SETTER. */
661 6599241 : mems_conflict_p = 0;
662 6599241 : note_stores (setter, find_mem_conflicts, x);
663 6599241 : if (mems_conflict_p)
664 : return true;
665 :
666 6410623 : list_entry = list_entry->next;
667 : }
668 : return false;
669 : }
670 :
671 :
672 : /* Record register first/last/block set information for REGNO in INSN. */
673 :
674 : static inline void
675 7490384 : record_last_reg_set_info (rtx_insn *insn, rtx reg)
676 : {
677 7490384 : unsigned int regno, end_regno;
678 :
679 7490384 : regno = REGNO (reg);
680 7490384 : end_regno = END_REGNO (reg);
681 7511155 : do
682 7511155 : reg_avail_info[regno] = INSN_CUID (insn);
683 7511155 : while (++regno < end_regno);
684 7490384 : }
685 :
686 : static inline void
687 37103112 : record_last_reg_set_info_regno (rtx_insn *insn, int regno)
688 : {
689 37103112 : reg_avail_info[regno] = INSN_CUID (insn);
690 37103112 : }
691 :
692 :
693 : /* Record memory modification information for INSN. We do not actually care
694 : about the memory location(s) that are set, or even how they are set (consider
695 : a CALL_INSN). We merely need to record which insns modify memory. */
696 :
697 : static void
698 1772535 : record_last_mem_set_info (rtx_insn *insn)
699 : {
700 1772535 : struct modifies_mem *list_entry;
701 :
702 1772535 : list_entry = (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
703 : sizeof (struct modifies_mem));
704 1772535 : list_entry->insn = insn;
705 1772535 : list_entry->next = modifies_mem_list;
706 1772535 : modifies_mem_list = list_entry;
707 :
708 1772535 : record_last_mem_set_info_common (insn, modify_mem_list,
709 : canon_modify_mem_list,
710 : modify_mem_list_set,
711 : blocks_with_calls);
712 1772535 : }
713 :
714 : /* Called from compute_hash_table via note_stores to handle one
715 : SET or CLOBBER in an insn. DATA is really the instruction in which
716 : the SET is taking place. */
717 :
718 : static void
719 10219960 : record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
720 : {
721 10219960 : rtx_insn *last_set_insn = (rtx_insn *) data;
722 :
723 10219960 : if (GET_CODE (dest) == SUBREG)
724 1177 : dest = SUBREG_REG (dest);
725 :
726 10219960 : if (REG_P (dest))
727 7490384 : record_last_reg_set_info (last_set_insn, dest);
728 2729576 : else if (MEM_P (dest))
729 : {
730 : /* Ignore pushes, they don't clobber memory. They may still
731 : clobber the stack pointer though. Some targets do argument
732 : pushes without adding REG_INC notes. See e.g. PR25196,
733 : where a pushsi2 on i386 doesn't have REG_INC notes. Note
734 : such changes here too. */
735 1400131 : if (! push_operand (dest, GET_MODE (dest)))
736 1366999 : record_last_mem_set_info (last_set_insn);
737 : else
738 33132 : record_last_reg_set_info_regno (last_set_insn, STACK_POINTER_REGNUM);
739 : }
740 10219960 : }
741 :
742 :
743 : /* Reset tables used to keep track of what's still available since the
744 : start of the block. */
745 :
746 : static void
747 1751210 : reset_opr_set_tables (void)
748 : {
749 1751210 : memset (reg_avail_info, 0, FIRST_PSEUDO_REGISTER * sizeof (int));
750 1751210 : obstack_free (&modifies_mem_obstack, modifies_mem_obstack_bottom);
751 1751210 : modifies_mem_list = NULL;
752 1751210 : }
753 :
754 :
755 : /* Record things set by INSN.
756 : This data is used by oprs_unchanged_p. */
757 :
758 : static void
759 10537471 : record_opr_changes (rtx_insn *insn)
760 : {
761 10537471 : rtx note;
762 :
763 : /* Find all stores and record them. */
764 10537471 : note_stores (insn, record_last_set_info, insn);
765 :
766 : /* Also record autoincremented REGs for this insn as changed. */
767 13270855 : for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
768 2733384 : if (REG_NOTE_KIND (note) == REG_INC)
769 0 : record_last_reg_set_info (insn, XEXP (note, 0));
770 :
771 : /* Finally, if this is a call, record all call clobbers. */
772 10537471 : if (CALL_P (insn))
773 : {
774 460643 : unsigned int regno;
775 460643 : hard_reg_set_iterator hrsi;
776 : /* We don't track modes of hard registers, so we need to be
777 : conservative and assume that partial kills are full kills. */
778 460643 : HARD_REG_SET callee_clobbers
779 460643 : = insn_callee_abi (insn).full_and_partial_reg_clobbers ();
780 37530623 : EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi)
781 37069980 : record_last_reg_set_info_regno (insn, regno);
782 :
783 890845 : if (! RTL_CONST_OR_PURE_CALL_P (insn)
784 64867 : || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
785 515877 : || can_throw_external (insn))
786 405536 : record_last_mem_set_info (insn);
787 : }
788 10537471 : }
789 :
790 :
791 : /* Scan the pattern of INSN and add an entry to the hash TABLE.
792 : After reload we are interested in loads/stores only. */
793 :
794 : static void
795 4643761 : hash_scan_set (rtx_insn *insn)
796 : {
797 4643761 : rtx pat = PATTERN (insn);
798 4643761 : rtx src = SET_SRC (pat);
799 4643761 : rtx dest = SET_DEST (pat);
800 :
801 : /* We are only interested in loads and stores. */
802 4643761 : if (! MEM_P (src) && ! MEM_P (dest))
803 : return;
804 :
805 : /* Don't mess with jumps and nops. */
806 1513704 : if (JUMP_P (insn) || set_noop_p (pat))
807 11 : return;
808 :
809 1513693 : if (REG_P (dest))
810 : {
811 702774 : if (/* Don't CSE something if we can't do a reg/reg copy. */
812 702774 : can_copy_p (GET_MODE (dest))
813 : /* Is SET_SRC something we want to gcse? */
814 702774 : && general_operand (src, GET_MODE (src))
815 : #ifdef STACK_REGS
816 : /* Never consider insns touching the register stack. It may
817 : create situations that reg-stack cannot handle (e.g. a stack
818 : register live across an abnormal edge). */
819 702762 : && (REGNO (dest) < FIRST_STACK_REG || REGNO (dest) > LAST_STACK_REG)
820 : #endif
821 : /* An expression is not available if its operands are
822 : subsequently modified, including this insn. */
823 1384770 : && oprs_unchanged_p (src, insn, true))
824 : {
825 372474 : insert_expr_in_table (src, insn);
826 : }
827 : }
828 810919 : else if (REG_P (src))
829 : {
830 : /* Only record sets of pseudo-regs in the hash table. */
831 562357 : if (/* Don't CSE something if we can't do a reg/reg copy. */
832 562357 : can_copy_p (GET_MODE (src))
833 : /* Is SET_DEST something we want to gcse? */
834 562357 : && general_operand (dest, GET_MODE (dest))
835 : #ifdef STACK_REGS
836 : /* As above for STACK_REGS. */
837 555414 : && (REGNO (src) < FIRST_STACK_REG || REGNO (src) > LAST_STACK_REG)
838 : #endif
839 549565 : && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest)))
840 : /* Check if the memory expression is killed after insn. */
841 549221 : && ! load_killed_in_block_p (INSN_CUID (insn) + 1, dest, true)
842 839947 : && oprs_unchanged_p (XEXP (dest, 0), insn, true))
843 : {
844 214229 : insert_expr_in_table (dest, insn);
845 : }
846 : }
847 : }
848 :
849 :
850 : /* Create hash table of memory expressions available at end of basic
851 : blocks. Basically you should think of this hash table as the
852 : representation of AVAIL_OUT. This is the set of expressions that
853 : is generated in a basic block and not killed before the end of the
854 : same basic block. Notice that this is really a local computation. */
855 :
856 : static void
857 89238 : compute_hash_table (void)
858 : {
859 89238 : basic_block bb;
860 :
861 1157749 : FOR_EACH_BB_FN (bb, cfun)
862 : {
863 1068511 : rtx_insn *insn;
864 :
865 : /* First pass over the instructions records information used to
866 : determine when registers and memory are last set.
867 : Since we compute a "local" AVAIL_OUT, reset the tables that
868 : help us keep track of what has been modified since the start
869 : of the block. */
870 1068511 : reset_opr_set_tables ();
871 9768997 : FOR_BB_INSNS (bb, insn)
872 : {
873 8700486 : if (INSN_P (insn))
874 6484926 : record_opr_changes (insn);
875 : }
876 :
877 : /* The next pass actually builds the hash table. */
878 9768997 : FOR_BB_INSNS (bb, insn)
879 8700486 : if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
880 4643761 : hash_scan_set (insn);
881 : }
882 89238 : }
883 :
884 :
885 : /* Check if register REG is killed in any insn waiting to be inserted on
886 : edge E. This function is required to check that our data flow analysis
887 : is still valid prior to commit_edge_insertions. */
888 :
889 : static bool
890 32190 : reg_killed_on_edge (rtx reg, edge e)
891 : {
892 32190 : rtx_insn *insn;
893 :
894 32337 : for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
895 176 : if (INSN_P (insn) && reg_set_p (reg, insn))
896 : return true;
897 :
898 : return false;
899 : }
900 :
901 : /* Similar to above - check if register REG is used in any insn waiting
902 : to be inserted on edge E.
903 : Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p
904 : with PREV(insn),NEXT(insn) instead of calling reg_overlap_mentioned_p. */
905 :
906 : static bool
907 16241 : reg_used_on_edge (rtx reg, edge e)
908 : {
909 16241 : rtx_insn *insn;
910 :
911 16284 : for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
912 53 : if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn)))
913 : return true;
914 :
915 : return false;
916 : }
917 :
918 : /* Return the loaded/stored register of a load/store instruction. */
919 :
920 : static rtx
921 41100 : get_avail_load_store_reg (rtx_insn *insn)
922 : {
923 41100 : if (REG_P (SET_DEST (PATTERN (insn))))
924 : /* A load. */
925 : return SET_DEST (PATTERN (insn));
926 : else
927 : {
928 : /* A store. */
929 13831 : gcc_assert (REG_P (SET_SRC (PATTERN (insn))));
930 : return SET_SRC (PATTERN (insn));
931 : }
932 : }
933 :
934 : /* Return true if the predecessors of BB are "well behaved". */
935 :
936 : static bool
937 880664 : bb_has_well_behaved_predecessors (basic_block bb)
938 : {
939 880664 : edge pred;
940 880664 : edge_iterator ei;
941 :
942 880664 : if (EDGE_COUNT (bb->preds) == 0)
943 : return false;
944 :
945 2246251 : FOR_EACH_EDGE (pred, ei, bb->preds)
946 : {
947 : /* commit_one_edge_insertion refuses to insert on abnormal edges even if
948 : the source has only one successor so EDGE_CRITICAL_P is too weak. */
949 1367871 : if ((pred->flags & EDGE_ABNORMAL) && !single_pred_p (pred->dest))
950 : return false;
951 :
952 1366510 : if ((pred->flags & EDGE_ABNORMAL_CALL) && cfun->has_nonlocal_label)
953 : return false;
954 :
955 1366452 : if (tablejump_p (BB_END (pred->src), NULL, NULL))
956 : return false;
957 : }
958 : return true;
959 : }
960 :
961 :
962 : /* Search for the occurrences of expression in BB. */
963 :
964 : static struct occr*
965 972438 : get_bb_avail_insn (basic_block bb, struct occr *orig_occr, int bitmap_index)
966 : {
967 972438 : struct occr *occr = orig_occr;
968 :
969 8488001 : for (; occr != NULL; occr = occr->next)
970 7540815 : if (BLOCK_FOR_INSN (occr->insn) == bb)
971 : return occr;
972 :
973 : /* If we could not find an occurrence in BB, see if BB
974 : has a single predecessor with an occurrence that is
975 : transparent through BB. */
976 947186 : if (transp
977 929305 : && single_pred_p (bb)
978 791841 : && bitmap_bit_p (transp[bb->index], bitmap_index)
979 1670612 : && (occr = get_bb_avail_insn (single_pred (bb), orig_occr, bitmap_index)))
980 : {
981 23244 : rtx avail_reg = get_avail_load_store_reg (occr->insn);
982 46488 : if (!reg_set_between_p (avail_reg,
983 23244 : PREV_INSN (BB_HEAD (bb)),
984 23244 : NEXT_INSN (BB_END (bb)))
985 23244 : && !reg_killed_on_edge (avail_reg, single_pred_edge (bb)))
986 : return occr;
987 : }
988 :
989 : return NULL;
990 : }
991 :
992 :
993 : /* This helper is called via htab_traverse. */
994 : int
995 342940 : compute_expr_transp (expr **slot, FILE *dump_file ATTRIBUTE_UNUSED)
996 : {
997 342940 : struct expr *expr = *slot;
998 :
999 342940 : compute_transp (expr->expr, expr->bitmap_index, transp,
1000 : blocks_with_calls, modify_mem_list_set,
1001 : canon_modify_mem_list);
1002 342940 : return 1;
1003 : }
1004 :
1005 : /* This handles the case where several stores feed a partially redundant
1006 : load. It checks if the redundancy elimination is possible and if it's
1007 : worth it.
1008 :
1009 : Redundancy elimination is possible if,
1010 : 1) None of the operands of an insn have been modified since the start
1011 : of the current basic block.
1012 : 2) In any predecessor of the current basic block, the same expression
1013 : is generated.
1014 :
1015 : See the function body for the heuristics that determine if eliminating
1016 : a redundancy is also worth doing, assuming it is possible. */
1017 :
1018 : static void
1019 199821 : eliminate_partially_redundant_load (basic_block bb, rtx_insn *insn,
1020 : struct expr *expr)
1021 : {
1022 199821 : edge pred;
1023 199821 : rtx_insn *avail_insn = NULL;
1024 199821 : rtx avail_reg;
1025 199821 : rtx dest, pat;
1026 199821 : struct occr *a_occr;
1027 199821 : struct unoccr *occr, *avail_occrs = NULL;
1028 199821 : struct unoccr *unoccr, *unavail_occrs = NULL, *rollback_unoccr = NULL;
1029 199821 : int npred_ok = 0;
1030 199821 : profile_count ok_count = profile_count::zero ();
1031 : /* Redundant load execution count. */
1032 199821 : profile_count critical_count = profile_count::zero ();
1033 : /* Execution count of critical edges. */
1034 199821 : edge_iterator ei;
1035 199821 : bool critical_edge_split = false;
1036 :
1037 : /* The execution count of the loads to be added to make the
1038 : load fully redundant. */
1039 199821 : profile_count not_ok_count = profile_count::zero ();
1040 199821 : basic_block pred_bb;
1041 :
1042 199821 : pat = PATTERN (insn);
1043 199821 : dest = SET_DEST (pat);
1044 :
1045 : /* Check that the loaded register is not used, set, or killed from the
1046 : beginning of the block. */
1047 199821 : if (reg_changed_after_insn_p (dest, 0)
1048 199821 : || reg_used_between_p (dest, PREV_INSN (BB_HEAD (bb)), insn))
1049 30750 : return;
1050 :
1051 : /* Check potential for replacing load with copy for predecessors. */
1052 407426 : FOR_EACH_EDGE (pred, ei, bb->preds)
1053 : {
1054 238355 : rtx_insn *next_pred_bb_end;
1055 :
1056 238355 : avail_insn = NULL;
1057 238355 : avail_reg = NULL_RTX;
1058 238355 : pred_bb = pred->src;
1059 247043 : for (a_occr = get_bb_avail_insn (pred_bb,
1060 : expr->avail_occr,
1061 238355 : expr->bitmap_index);
1062 247043 : a_occr;
1063 8688 : a_occr = get_bb_avail_insn (pred_bb,
1064 : a_occr->next,
1065 8688 : expr->bitmap_index))
1066 : {
1067 : /* Check if the loaded register is not used. */
1068 16282 : avail_insn = a_occr->insn;
1069 16282 : avail_reg = get_avail_load_store_reg (avail_insn);
1070 16282 : gcc_assert (avail_reg);
1071 :
1072 : /* Make sure we can generate a move from register avail_reg to
1073 : dest. */
1074 16282 : rtx_insn *move = gen_move_insn (copy_rtx (dest),
1075 : copy_rtx (avail_reg));
1076 16282 : extract_insn (move);
1077 16282 : if (! constrain_operands (1, get_preferred_alternatives (insn,
1078 : pred_bb))
1079 16270 : || reg_killed_on_edge (avail_reg, pred)
1080 32523 : || reg_used_on_edge (dest, pred))
1081 : {
1082 51 : avail_insn = NULL;
1083 51 : continue;
1084 : }
1085 16231 : next_pred_bb_end = NEXT_INSN (BB_END (BLOCK_FOR_INSN (avail_insn)));
1086 16231 : if (!reg_set_between_p (avail_reg, avail_insn, next_pred_bb_end))
1087 : /* AVAIL_INSN remains non-null. */
1088 : break;
1089 : else
1090 : avail_insn = NULL;
1091 : }
1092 :
1093 238355 : if (EDGE_CRITICAL_P (pred) && pred->count ().initialized_p ())
1094 52694 : critical_count += pred->count ();
1095 :
1096 238355 : if (avail_insn != NULL_RTX)
1097 : {
1098 7594 : npred_ok++;
1099 7594 : if (pred->count ().initialized_p ())
1100 7583 : ok_count = ok_count + pred->count ();
1101 7594 : if (! set_noop_p (PATTERN (gen_move_insn (copy_rtx (dest),
1102 : copy_rtx (avail_reg)))))
1103 : {
1104 : /* Check if there is going to be a split. */
1105 4583 : if (EDGE_CRITICAL_P (pred))
1106 : critical_edge_split = true;
1107 : }
1108 : else /* Its a dead move no need to generate. */
1109 3011 : continue;
1110 4583 : occr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1111 : sizeof (struct unoccr));
1112 4583 : occr->insn = avail_insn;
1113 4583 : occr->pred = pred;
1114 4583 : occr->next = avail_occrs;
1115 4583 : avail_occrs = occr;
1116 4583 : if (! rollback_unoccr)
1117 1957 : rollback_unoccr = occr;
1118 : }
1119 : else
1120 : {
1121 : /* Adding a load on a critical edge will cause a split. */
1122 230761 : if (EDGE_CRITICAL_P (pred))
1123 : critical_edge_split = true;
1124 230761 : if (pred->count ().initialized_p ())
1125 230628 : not_ok_count = not_ok_count + pred->count ();
1126 230761 : unoccr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1127 : sizeof (struct unoccr));
1128 230761 : unoccr->insn = NULL;
1129 230761 : unoccr->pred = pred;
1130 230761 : unoccr->next = unavail_occrs;
1131 230761 : unavail_occrs = unoccr;
1132 230761 : if (! rollback_unoccr)
1133 166524 : rollback_unoccr = unoccr;
1134 : }
1135 : }
1136 :
1137 169071 : if (/* No load can be replaced by copy. */
1138 : npred_ok == 0
1139 : /* Prevent exploding the code. */
1140 6010 : || (optimize_bb_for_size_p (bb) && npred_ok > 1)
1141 : /* If we don't have profile information we cannot tell if splitting
1142 : a critical edge is profitable or not so don't do it. */
1143 175081 : || ((!profile_info || profile_status_for_fn (cfun) != PROFILE_READ
1144 0 : || targetm.cannot_modify_jumps_p ())
1145 6010 : && critical_edge_split))
1146 165358 : goto cleanup;
1147 :
1148 : /* Check if it's worth applying the partial redundancy elimination. */
1149 3713 : if (ok_count.to_gcov_type ()
1150 3713 : < param_gcse_after_reload_partial_fraction * not_ok_count.to_gcov_type ())
1151 1583 : goto cleanup;
1152 :
1153 2130 : gcov_type threshold;
1154 : #if (GCC_VERSION >= 5000)
1155 2130 : if (__builtin_mul_overflow (param_gcse_after_reload_critical_fraction,
1156 : critical_count.to_gcov_type (), &threshold))
1157 0 : threshold = profile_count::max_count;
1158 : #else
1159 : threshold
1160 : = (param_gcse_after_reload_critical_fraction
1161 : * critical_count.to_gcov_type ());
1162 : #endif
1163 :
1164 2130 : if (ok_count.to_gcov_type () < threshold)
1165 209 : goto cleanup;
1166 :
1167 : /* Generate moves to the loaded register from where
1168 : the memory is available. */
1169 3495 : for (occr = avail_occrs; occr; occr = occr->next)
1170 : {
1171 1574 : avail_insn = occr->insn;
1172 1574 : pred = occr->pred;
1173 : /* Set avail_reg to be the register having the value of the
1174 : memory. */
1175 1574 : avail_reg = get_avail_load_store_reg (avail_insn);
1176 1574 : gcc_assert (avail_reg);
1177 :
1178 1574 : insert_insn_on_edge (gen_move_insn (copy_rtx (dest),
1179 : copy_rtx (avail_reg)),
1180 : pred);
1181 1574 : stats.moves_inserted++;
1182 :
1183 1574 : if (dump_file)
1184 0 : fprintf (dump_file,
1185 : "generating move from %d to %d on edge from %d to %d\n",
1186 : REGNO (avail_reg),
1187 : REGNO (dest),
1188 0 : pred->src->index,
1189 0 : pred->dest->index);
1190 : }
1191 :
1192 : /* Regenerate loads where the memory is unavailable. */
1193 2274 : for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next)
1194 : {
1195 353 : pred = unoccr->pred;
1196 353 : insert_insn_on_edge (copy_insn (PATTERN (insn)), pred);
1197 353 : stats.copies_inserted++;
1198 :
1199 353 : if (dump_file)
1200 : {
1201 0 : fprintf (dump_file,
1202 : "generating on edge from %d to %d a copy of load: ",
1203 0 : pred->src->index,
1204 0 : pred->dest->index);
1205 0 : print_rtl (dump_file, PATTERN (insn));
1206 0 : fprintf (dump_file, "\n");
1207 : }
1208 : }
1209 :
1210 : /* Delete the insn if it is not available in this block and mark it
1211 : for deletion if it is available. If insn is available it may help
1212 : discover additional redundancies, so mark it for later deletion. */
1213 1921 : for (a_occr = get_bb_avail_insn (bb, expr->avail_occr, expr->bitmap_index);
1214 1969 : a_occr && (a_occr->insn != insn);
1215 48 : a_occr = get_bb_avail_insn (bb, a_occr->next, expr->bitmap_index))
1216 : ;
1217 :
1218 1921 : if (!a_occr)
1219 : {
1220 323 : stats.insns_deleted++;
1221 :
1222 323 : if (dump_file)
1223 : {
1224 0 : fprintf (dump_file, "deleting insn:\n");
1225 0 : print_rtl_single (dump_file, insn);
1226 0 : fprintf (dump_file, "\n");
1227 : }
1228 323 : delete_insn (insn);
1229 : }
1230 : else
1231 1598 : a_occr->deleted_p = 1;
1232 :
1233 169071 : cleanup:
1234 169071 : if (rollback_unoccr)
1235 168481 : obstack_free (&unoccr_obstack, rollback_unoccr);
1236 : }
1237 :
1238 : /* Performing the redundancy elimination as described before. */
1239 :
1240 : static void
1241 35604 : eliminate_partially_redundant_loads (void)
1242 : {
1243 35604 : rtx_insn *insn;
1244 35604 : basic_block bb;
1245 :
1246 : /* Note we start at block 1. */
1247 :
1248 35604 : if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1249 : return;
1250 :
1251 916268 : FOR_BB_BETWEEN (bb,
1252 : ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->next_bb,
1253 : EXIT_BLOCK_PTR_FOR_FN (cfun),
1254 : next_bb)
1255 : {
1256 : /* Don't try anything on basic blocks with strange predecessors. */
1257 880664 : if (! bb_has_well_behaved_predecessors (bb))
1258 1361 : continue;
1259 :
1260 : /* Do not try anything on cold basic blocks. */
1261 879303 : if (optimize_bb_for_size_p (bb))
1262 196604 : continue;
1263 :
1264 : /* Reset the table of things changed since the start of the current
1265 : basic block. */
1266 682699 : reset_opr_set_tables ();
1267 :
1268 : /* Look at all insns in the current basic block and see if there are
1269 : any loads in it that we can record. */
1270 6152516 : FOR_BB_INSNS (bb, insn)
1271 : {
1272 : /* Is it a load - of the form (set (reg) (mem))? */
1273 5469817 : if (NONJUMP_INSN_P (insn)
1274 2969590 : && GET_CODE (PATTERN (insn)) == SET
1275 2424060 : && REG_P (SET_DEST (PATTERN (insn)))
1276 7360130 : && MEM_P (SET_SRC (PATTERN (insn))))
1277 : {
1278 502854 : rtx pat = PATTERN (insn);
1279 502854 : rtx src = SET_SRC (pat);
1280 502854 : struct expr *expr;
1281 :
1282 502854 : if (!MEM_VOLATILE_P (src)
1283 467046 : && GET_MODE (src) != BLKmode
1284 467046 : && general_operand (src, GET_MODE (src))
1285 : /* Are the operands unchanged since the start of the
1286 : block? */
1287 467045 : && oprs_unchanged_p (src, insn, false)
1288 290839 : && !(cfun->can_throw_non_call_exceptions && may_trap_p (src))
1289 290443 : && !side_effects_p (src)
1290 : /* Is the expression recorded? */
1291 793297 : && (expr = lookup_expr_in_table (src)) != NULL)
1292 : {
1293 : /* We now have a load (insn) and an available memory at
1294 : its BB start (expr). Try to remove the loads if it is
1295 : redundant. */
1296 199821 : eliminate_partially_redundant_load (bb, insn, expr);
1297 : }
1298 : }
1299 :
1300 : /* Keep track of everything modified by this insn, so that we
1301 : know what has been modified since the start of the current
1302 : basic block. */
1303 5469817 : if (INSN_P (insn))
1304 4052545 : record_opr_changes (insn);
1305 : }
1306 : }
1307 :
1308 35604 : commit_edge_insertions ();
1309 : }
1310 :
1311 : /* Go over the expression hash table and delete insns that were
1312 : marked for later deletion. */
1313 :
1314 : /* This helper is called via htab_traverse. */
1315 : int
1316 342940 : delete_redundant_insns_1 (expr **slot, void *data ATTRIBUTE_UNUSED)
1317 : {
1318 342940 : struct expr *exprs = *slot;
1319 342940 : struct occr *occr;
1320 :
1321 885835 : for (occr = exprs->avail_occr; occr != NULL; occr = occr->next)
1322 : {
1323 542895 : if (occr->deleted_p && dbg_cnt (gcse2_delete))
1324 : {
1325 1598 : delete_insn (occr->insn);
1326 1598 : stats.insns_deleted++;
1327 :
1328 1598 : if (dump_file)
1329 : {
1330 0 : fprintf (dump_file, "deleting insn:\n");
1331 0 : print_rtl_single (dump_file, occr->insn);
1332 0 : fprintf (dump_file, "\n");
1333 : }
1334 : }
1335 : }
1336 :
1337 342940 : return 1;
1338 : }
1339 :
1340 : static void
1341 35604 : delete_redundant_insns (void)
1342 : {
1343 378544 : expr_table->traverse <void *, delete_redundant_insns_1> (NULL);
1344 35604 : if (dump_file)
1345 8 : fprintf (dump_file, "\n");
1346 35604 : }
1347 :
1348 : /* Main entry point of the GCSE after reload - clean some redundant loads
1349 : due to spilling. */
1350 :
1351 : static void
1352 89238 : gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED)
1353 : {
1354 : /* Disable computing transparentness if it is too expensive. */
1355 89238 : bool do_transp
1356 89238 : = !gcse_or_cprop_is_too_expensive (_("using simple load CSE after register "
1357 89238 : "allocation"));
1358 :
1359 89238 : memset (&stats, 0, sizeof (stats));
1360 :
1361 : /* Allocate memory for this pass.
1362 : Also computes and initializes the insns' CUIDs. */
1363 89238 : alloc_mem ();
1364 :
1365 : /* We need alias analysis. */
1366 89238 : init_alias_analysis ();
1367 :
1368 89238 : compute_hash_table ();
1369 :
1370 89238 : if (dump_file)
1371 8 : dump_hash_table (dump_file);
1372 :
1373 89238 : if (!expr_table->is_empty ())
1374 : {
1375 : /* Knowing which MEMs are transparent through a block can signifiantly
1376 : increase the number of redundant loads found. So compute transparency
1377 : information for each memory expression in the hash table. */
1378 35604 : df_analyze ();
1379 35604 : if (do_transp)
1380 : {
1381 : /* This cannot be part of the normal allocation routine because
1382 : we have to know the number of elements in the hash table. */
1383 71208 : transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
1384 35604 : expr_table->elements ());
1385 35604 : bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
1386 378544 : expr_table->traverse <FILE *, compute_expr_transp> (dump_file);
1387 : }
1388 : else
1389 0 : transp = NULL;
1390 35604 : eliminate_partially_redundant_loads ();
1391 35604 : delete_redundant_insns ();
1392 35604 : if (do_transp)
1393 35604 : sbitmap_vector_free (transp);
1394 :
1395 35604 : if (dump_file)
1396 : {
1397 8 : fprintf (dump_file, "GCSE AFTER RELOAD stats:\n");
1398 8 : fprintf (dump_file, "copies inserted: %d\n", stats.copies_inserted);
1399 8 : fprintf (dump_file, "moves inserted: %d\n", stats.moves_inserted);
1400 8 : fprintf (dump_file, "insns deleted: %d\n", stats.insns_deleted);
1401 8 : fprintf (dump_file, "\n\n");
1402 : }
1403 :
1404 35604 : statistics_counter_event (cfun, "copies inserted",
1405 : stats.copies_inserted);
1406 35604 : statistics_counter_event (cfun, "moves inserted",
1407 : stats.moves_inserted);
1408 35604 : statistics_counter_event (cfun, "insns deleted",
1409 : stats.insns_deleted);
1410 : }
1411 :
1412 : /* We are finished with alias. */
1413 89238 : end_alias_analysis ();
1414 :
1415 89238 : free_mem ();
1416 89238 : }
1417 :
1418 :
1419 :
1420 : static void
1421 89238 : rest_of_handle_gcse2 (void)
1422 : {
1423 89238 : gcse_after_reload_main (get_insns ());
1424 89238 : rebuild_jump_labels (get_insns ());
1425 89238 : }
1426 :
1427 : namespace {
1428 :
1429 : const pass_data pass_data_gcse2 =
1430 : {
1431 : RTL_PASS, /* type */
1432 : "gcse2", /* name */
1433 : OPTGROUP_NONE, /* optinfo_flags */
1434 : TV_GCSE_AFTER_RELOAD, /* tv_id */
1435 : 0, /* properties_required */
1436 : 0, /* properties_provided */
1437 : 0, /* properties_destroyed */
1438 : 0, /* todo_flags_start */
1439 : 0, /* todo_flags_finish */
1440 : };
1441 :
1442 : class pass_gcse2 : public rtl_opt_pass
1443 : {
1444 : public:
1445 285722 : pass_gcse2 (gcc::context *ctxt)
1446 571444 : : rtl_opt_pass (pass_data_gcse2, ctxt)
1447 : {}
1448 :
1449 : /* opt_pass methods: */
1450 1471370 : bool gate (function *fun) final override
1451 : {
1452 1043686 : return (optimize > 0 && flag_gcse_after_reload
1453 1561088 : && optimize_function_for_speed_p (fun));
1454 : }
1455 :
1456 89238 : unsigned int execute (function *) final override
1457 : {
1458 89238 : rest_of_handle_gcse2 ();
1459 89238 : return 0;
1460 : }
1461 :
1462 : }; // class pass_gcse2
1463 :
1464 : } // anon namespace
1465 :
1466 : rtl_opt_pass *
1467 285722 : make_pass_gcse2 (gcc::context *ctxt)
1468 : {
1469 285722 : return new pass_gcse2 (ctxt);
1470 : }
|